########################################################################
#
# File Name:            ReaderBase.py
#
# Documentation:        http://docs.ftsuite.com/4Suite/Lib/ReaderBase.py.html
#
"""
The base class for all readers in 4Suite
WWW: http://4suite.org/4Suite         e-mail: support@4suite.org

Copyright (c) 2000-2001 Fourthought Inc., USA.   All Rights Reserved.
See  http://4suite.org/COPYRIGHT  for license and copyright information
"""

from types import StringType
stringTypes = [StringType]
try:
    from types import UnicodeType
    stringTypes.append(UnicodeType)
except ImportError:
    pass

import string, urllib, cStringIO, os
from xml.sax import saxlib, sax2exts, handler, xmlreader
from xml.dom import XML_NAMESPACE, XMLNS_NAMESPACE
from xml.dom.ext import SplitQName
from Ft.Lib import Uri, ReaderException, XIncludeException

try:
    import codecs, encodings
    from types import UnicodeType
    encoder = codecs.lookup("utf-8")[0] # encode,decode,reader,writer
    def StrStream(st):
        if type(st) is UnicodeType:
            st = encoder(st)[0]
        return cStringIO.StringIO(st)
except ImportError:
    StrStream = lambda x: cStringIO.StringIO(x)

class DomletteReader:
    def __init__(self, force8Bit=0):
        self.uriResolver = Uri.BASIC_RESOLVER
        self._force8Bit = force8Bit
        return

    def clone(self):
        """Used to create a new copy of this instance"""
        if hasattr(self, '__getinitargs__'):
            return apply(self.__class__, self.__getinitargs__())
        else:
            return self.__class__()

    def fromStream(self, stream, refUri='', ownerDoc=None,
                   stripElements=None):
        "Create a DOM from a stream"
        raise ReaderException(ReaderException.NOT_IMPLEMENTED, 'fromStream')

    def releaseNode(self, dom):
        "Free a DOM tree"
        raise ReaderException(ReaderException.NOT_IMPLEMENTED, 'releaseNode')

    def fromString(self, st, refUri='', ownerDoc=None, stripElements=None):
        "Create a DOM from a string"
        #The default implementaton
        self._baseUri = refUri
        stream = StrStream(st)
        rt = self.fromStream(stream, refUri, ownerDoc, stripElements)
        stream.close()
        return rt

    def fromUri(self, uri, baseUri='', ownerDoc=None, stripElements=None):
        "Create a DOM from a URI"
        stream = self.uriResolver.resolve(uri, baseUri)
        newBaseUri = self.uriResolver.normalize(uri, baseUri)
        #newBaseUri = urllib.basejoin(baseUri, uri) 
        rt = self.fromStream(stream, newBaseUri, ownerDoc, stripElements) 
        stream.close()
        return rt


class PreserveSpaceMixin:
    def _initState(self, top=1, stripElements=None):
        self._preserveStateStack = [top]
        self._stripElements = stripElements or []
        return

    def _peekPreserveStateStack(self):
        return self._preserveStateStack[-1]
    
    def _popPreserveStateStack(self):
        top = self._preserveStateStack[-1]
        del self._preserveStateStack[-1]
        return top
    
    def _updatePreserveStateStack(self, newElement):
        new_pstate = self._preserveStateStack[-1]
        for (uri, local, strip) in self._stripElements:
            if (uri, local) in [(newElement.namespaceURI,
                                 newElement.localName), ('', '*'),
                                (newElement.namespaceURI, '*')
                                ]:
                new_pstate = not strip
                break
        self._preserveStateStack.append(new_pstate)


class StringIndexMixin:
    def __init__(self, indexStringValues=1):
        self._indexStringValues = indexStringValues
        return

    def _initState(self):
        """
        Pre-condition: the root node of the handler must be set up
        """
        if self._indexStringValues:
            self._rootNode.stringValue = ""
        return

    def _updateTopNodeStringIndex(self, text):
        top_node = self._nodeStack[-1]
        if self._indexStringValues:
            top_node.stringValue = top_node.stringValue + text
        return

    def _reset(self, node):
        if self._indexStringValues:
            node.stringValue = ""
        return


class DocIndexMixin:
    def _initState(self):
        """
        Pre-condition: the root node of the handler must be set up
        """
        self._currIndex = 1
        self._rootNode.docIndex = 1
        return

    def _docIndex(self, node):
        self._currIndex = self._currIndex + 1
        node.docIndex = self._currIndex
        return


class XincludeMixin:
    def __init__(self, processIncludes=1, visitedHrefs=None):
        self._visitedHrefs = visitedHrefs or []
        self._processIncludes = processIncludes
        return

    def _initState(self):
        self._includeDepth = 0
        return

    def _handleIncludes(self, name, qname, nsattribs):
        if not self._processIncludes: return 0
        from Ft.Lib.XInclude import XINCLUDE_NAMESPACE, XIncludeException
        if self._includeDepth:
            self._includeDepth = self._includeDepth + 1
            return self._includeDepth
        if name != (XINCLUDE_NAMESPACE, 'include'):
            return self._includeDepth
        href = nsattribs.get(('', 'href'))
        if not href:
            raise XIncludeException(XIncludeException.XINCLUDE_MISSING_HREF)
        if href in self._visitedHrefs:
            raise XIncludeException(
                XIncludeException.CIRCULAR_INCLUDE_ERROR, href
                )
        parse = nsattribs.get(('', 'parse'))

        curr_base = self.parser.GetBase()
        new_base = urllib.basejoin(curr_base, href)
        stream = self._uriResolver.resolve(href, curr_base)
        if not parse or parse == 'xml':
            context = qname
            if self._entity_stack is None: self._entity_stack = []
            self._entity_stack.append(self.parser)

            self.parser= self.parser.ExternalEntityParserCreate(context)
            self.parser.SetBase(new_base)
            self._visitedHrefs.append(href)
            self.parser.ParseFile(stream)
            self._visitedHrefs.remove(href)
        else:
            self._currText = self._currText + stream.read()
        stream.close()
        self._includeDepth = 1
        return 1

    def _checkIncludeDepth(self):
        return self._includeDepth
    
    def _popInclude(self):
        if self._includeDepth:
            self._includeDepth = self._includeDepth - 1
            return 1
        else:
            return 0

    
class NamespaceAdapterMixin:
    def _handleStartElementNss(self, name, attribs):
        self._namespaces.append(self._namespaces[-1].copy())
        for curr_attrib_key in attribs.keys():
            if type(curr_attrib_key) == type(()):
                local = curr_attrib_key[1]
                prefix = ''
                if curr_attrib_key[0]:
                    raise "Namespaces in validating docs not supported"
            else:
                (prefix, local) = SplitQName(curr_attrib_key)
            if not local:
                raise ReaderException(ReaderException.INVALID_XMLNS, curr_attrib_key)
            if local == 'xmlns':
                self._namespaces[-1][prefix] = attribs[curr_attrib_key]
            
        if type(name) == type(()):
            local = name[1]
            prefix = ''
            if name[0]:
                raise "Namespaces in validating docs not supported"
        else:
            (prefix, local) = SplitQName(name)

        if prefix and not self._namespaces[-1].has_key(prefix):
            raise ReaderException(ReaderException.XMLNS_UNKNOWN_PREFIX, prefix)

        if self._namespaces[-1].has_key(prefix):
            namespace = self._namespaces[-1][prefix]
        else:
            namespace = ''
        attrs = {}
        qnames = {}
        for curr_attrib_key in attribs.keys():
            if type(curr_attrib_key) == type(()):
                a_local = curr_attrib_key[1]
                a_prefix = ''
                if curr_attrib_key[0]:
                    raise "Namespaces in validating docs not supported"
            else:
                (a_prefix, a_local) = SplitQName(curr_attrib_key)
            if a_local == 'xmlns':
                ns = XMLNS_NAMESPACE
                a_local = a_prefix
            elif a_prefix and self._namespaces[-1].has_key(a_prefix):
                ns = self._namespaces[-1][a_prefix]
            else:
                ns = ''
            attrs[(ns, a_local)] = attribs[curr_attrib_key]
            qnames[(ns, a_local)] = curr_attrib_key

        nsattribs = xmlreader.AttributesNSImpl(attrs, qnames)
        return ((namespace, local), name, nsattribs)


class Force8BitMixin:
    def __init__(self, force8Bit=0):
        self._force8Bit = force8Bit
        if self._force8Bit:
            try:
                import codecs
            except:
                raise Exception("Don't use force8Bit on Python versions that don't support unicode")
            enc = codecs.lookup('utf-8')[0]
            self._encoder = lambda x, e=enc: e(x)[0]
        return

    def _checkString(self, text):
        if self._force8Bit:
            return self._encoder(text)
        else:
            return text
            
    def _checkDict(self, attribs):
        if self._force8Bit:
            new_attribs = {}
            for k in attribs.keys():
                new_attribs[self._encoder(k)] = self._encoder(attribs[k])
            return new_attribs
        else:
            return attribs


class HandlerBase:
    def __init__(self, resolveEntity=None, uriResolver=None):
        self._resolveEntity = resolveEntity
        self._uriResolver = uriResolver or Uri.BASIC_RESOLVER
        return

    def initState(self):
        """
        Pre-condition: the root node of the handler must be set up
        by the concrete class
        """
        #Set up the stack which keeps track of the nesting of DOM nodes.
        self._nodeStack = [self._rootNode]
        self._namespaces = [{'xml': XML_NAMESPACE}]
        self._currText = ''
        return

    def processingInstruction(self, target, data):
        self._completeTextNode(target, data)
        pi = self._createProcessingInstruction(target, data)
        self._nodeStack[-1].appendChild(pi)
        return

    def comment(self, data):
        self._completeTextNode()
        comment = self._createComment(data)
        self._nodeStack[-1].appendChild(comment)
        return

    def _startElementPrep(self, name, qname):
        namespace = name[0]
        local = name[1]
        prefix = SplitQName(qname)[0]
        self._completeTextNode()
        return (namespace, local, prefix)

    def startElement(self, name, attribs):
        (name, qname, nsattribs) = self._handleStartElementNss(name, attribs)
        (namespace, local, prefix) = self._startElementPrep(name, qname)
        new_element = self._createElement(self._ownerDoc, namespace, local,
                                          prefix)
        for attr_qname in nsattribs.getQNames():
            (attr_ns, attr_local) = nsattribs.getNameByQName(attr_qname)
            attr_prefix = SplitQName(attr_qname)[0]
            attr = self._createAttribute(self._ownerDoc, attr_ns, attr_local,
                                         attr_prefix)
            attr.value = nsattribs.getValueByQName(attr_qname)
            attr.ownerElement = new_element
            new_element.attributes[(attr_ns, attr_local)] = attr
        self._nodeStack.append(new_element)
        return

    def endElement(self, name):
        del self._namespaces[-1]
        self._completeTextNode()
        new_element = self._nodeStack[-1]
        del self._nodeStack[-1]
        self._nodeStack[-1].appendChild(new_element)
        return

    def characters(self, data):
        if self._force8Bit:
            data = self._encoder(data)
        if self._includeDepth: return
        self._currText = self._currText + data
        return

    _entity_stack= None
    def entityRef(self, context, base, sysid, pubid):
        stream = self.resolveEntity(pubid, _combineSysid(base, sysid))

        if self._entity_stack is None: self._entity_stack = []
        self._entity_stack.append(self.parser)

        self.parser= self.parser.ExternalEntityParserCreate(context)
        self.parser.SetBase(_getBase(stream))
        self.parser.ParseFile(stream)

        self.parser= self._entity_stack.pop()
        stream.close()
        return 1

    def resolveEntity(self, pubid, sysid):
        if self._resolveEntity is None: source = sysid
        else: source= self._resolveEntity(pubid, sysid)

        if type(source) in stringTypes:
            source= urllib.urlopen(source)

        return source

    def getRootNode(self):
        self._completeTextNode()
        #import pdb; pdb.set_trace()
        return self._rootNode or self._ownerDoc


def _combineSysid(base, sysid):
    return urllib.basejoin(base, sysid)


def _getBase(stream):
    #FIXME: This is an odd request.  What does it mean to get the "base"
    #Of an arbitrary URI?
    name = getattr(stream, 'url', None) or getattr(stream, 'name', '')
    return name


#Doesn't really belong there
from xml.dom.ext.reader import Reader

def AssertReader(reader):
    if not isinstance(reader, Reader) and not isinstance(reader, DomletteReader):
        raise "Not a Reader %s" % str(reader)

    def _completeTextNode(self):
        #Note some parsers don't report ignorable white space properly
        if self._currText and len(self._nodeStack) and self._nodeStack[-1].nodeType != Node.DOCUMENT_NODE:
            new_text = self._createtext(data)
            top_node = self._nodeStack[-1]
            top_node.appendChild(new_text)
        self._currText = ''
        return

