import re, string, htmlentitydefs, urllib, urlparse, locale, time, calendar
import os, sys
import error

url_re = re.compile('^\w+://')
entity = re.compile(r'\&.\w*?\;')
html_tags = re.compile(r'\<.*?\>')

def is_url(text):
    return url_re.match(text)

def read_text(fragment, chars):
    """Read chars cdata characters from html fragment fragment"""
    count = 0
    in_tag = 0
    in_attr = 0
    in_entity = 0
    res_len = 0
    if fragment is None:
        return ""
    frag_len = len(fragment)
    i = -1
    res = []
    previous = None
    while res_len < chars and i < frag_len - 1:
        i += 1
        current = fragment[i]
        if in_tag:
            if in_attr:
                if current == '"':
                    in_attr = 0
            else:
                if current == '"':
                    in_attr = 1
                elif current == '>':
                    in_tag = 0
        elif in_entity:
            res.append(current)
            if current == ';':
                in_entity = 0
                previous = ""
                res_len += 1
        else:
            if current == "<":
                in_tag = 1
            elif current == "&":
                in_entity = 1
                res.append("&")
            # avoid beginning whitespace, newlines and returns, and multiple
            # whitespaces
            elif len(res) or current not in string.whitespace:
                if current == '\n' or current == '\r':
                    current = ' '
                    if current == previous == ' ':
                        continue
                res.append(current)
                previous = current
                res_len += 1
    return "".join(res)

def convert_entities(text):
    def conv(ents):
        entities = htmlentitydefs.entitydefs
        ents = ents.group(0)
        ent_code = entities.get(ents[1:-1], None)
        if ent_code is not None:
            try:
                ents = unicode(ent_code, get_locale_encoding())
            except UnicodeDecodeError:
                ents = unicode(ent_code, 'latin-1')
            except Exception, ex:
                error.log("error occurred while converting entity %s: %s" % (ents, ex))

            # check if it still needs conversion
            if entity.search(ents) is None:
                return ents

        if ents[1] == '#':
            code = ents[2:-1]
            base = 10
            if code[0] == 'x':
                code = code[1:]
                base = 16
            return unichr(int(code, base))
        else:
            return

    in_entity = entity.search(text)
    if in_entity is None:
        return text
    else:
        ctext = in_entity.re.sub(conv, text)
        return ctext

def is_html(text):
    is_tag = html_tags.search(text)
    if is_tag: return 1
    else: return 0

def complete_url(url, feed_location):
    url = urllib.quote(url, safe=string.punctuation)
    if urlparse.urlparse(url)[0] == '':
        return urlparse.urljoin(feed_location, url)
    else:
        return url

def get_url_location(url):
    url = urllib.quote(url, safe=string.punctuation)
    parsed_url = urlparse.urlsplit(url)
    return urlparse.urlunsplit((parsed_url[0], parsed_url[1], '','',''))

def get_locale_encoding():
    try:
        encoding = locale.getpreferredencoding()
    except locale.Error:
        encoding = sys.getdefaultencoding()
    return encoding

def format_date(date, format=None, encoding=None):
    if format is None:
        format = get_date_format()
    if encoding is None:
        encoding = get_locale_encoding()
    timestr = time.strftime(format, time.localtime(calendar.timegm(date)))
    return unicode(timestr, encoding)

def get_date_format():
    # this is here just to make xgettext happy: it should be defined in
    # only one place, and a good one would be MainWindow.py module level.
    # however, we can't access _ there.
    # The format: %A is the full weekday name
    #             %B is the full month name
    #             %e is the day of the month as a decimal number,
    #                without leading zero
    # This should be translated to be suitable for the locale in
    # question, feel free to alter the order and the parameters (they
    # are strftime(3) parameters, the whole string is passed to the
    # function, Straw does no additional interpretation) if you feel
    # it's necessary in the translation file.
    return _('%A %B %e %H:%M')

MAX_DISPLAY_CHAR = 60
def convert_title(title, description = ''):
    if title is None or title == '':
        try:
            if len(description):
                title = read_text(description, MAX_DISPLAY_CHAR) + '...'
        except TypeError:
            error.log("empty title and description, leave as is...")
            # no title, no description, leave it.
            return

    title = convert_entities(title)

    if is_html(title):
        title = read_text(title, MAX_DISPLAY_CHAR)

    return title

def find_lib_dir():
    if os.environ.has_key("STRAW_DATA"):
        return os.environ["STRAW_DATA"]
    h, t = os.path.split(os.path.split(os.path.abspath(sys.argv[0]))[0])
    if t == 'bin':
        fp = os.path.join(h, 'share', 'straw')
        if os.path.isdir(fp):
            return fp
        fp = os.path.join(h, 'lib', 'straw')
        if os.path.isdir(fp):
            return fp
    raise "FileNotFoundError", "couldn't find Straw data directory"

def find_locale_dir():
    h, t = os.path.split(os.path.split(os.path.abspath(sys.argv[0]))[0])
    if t == 'bin':
        fp = os.path.join(h, 'share', 'locale')
        if os.path.isdir(fp):
            return fp
    raise "FileNotFoundError", "couldn't find locale data directory"

def find_glade_file(libdir=None):
    if os.environ.has_key("STRAW_GLADE"):
        return os.environ["STRAW_GLADE"]

    if libdir is None:
        libdir = find_lib_dir()

    fp = os.path.join(libdir, 'straw.glade')
    if os.path.isfile(fp):
        return fp
    raise "FileNotFoundError", "couldn't find straw.glade"

def listdiff(l1, l2, test=None):
    if test is not None:
        return _listdifftest(l1, l2, test)
    common = []
    inl1 = []
    inl2 = []
    for e in l1:
        if e in l2:
            common.append(e)
        else:
            inl1.append(e)
    for e in l2:
        if e in l1:
            if e not in common:
                common.append(e)
        else:
            inl2.append(e)
    return (common, inl1, inl2)
