Skip to content
Snippets Groups Projects
_base.py 11.75 KiB
from html5lib.constants import scopingElements, tableInsertModeElements
try:
    frozenset
except NameError:
    # Import from the sets module for python 2.3
    from sets import Set as set
    from sets import ImmutableSet as frozenset

# The scope markers are inserted when entering buttons, object elements,
# marquees, table cells, and table captions, and are used to prevent formatting
# from "leaking" into tables, buttons, object elements, and marquees.
Marker = None

#XXX - TODO; make the default interface more ElementTree-like
#            rather than DOM-like

class Node(object):
    def __init__(self, name):
        """Node representing an item in the tree.
        name - The tag name associated with the node
        parent - The parent of the current node (or None for the document node)
        value - The value of the current node (applies to text nodes and 
        comments
        attributes - a dict holding name, value pairs for attributes of the node
        childNodes - a list of child nodes of the current node. This must 
        include all elements but not necessarily other node types
        _flags - A list of miscellaneous flags that can be set on the node
        """
        self.name = name
        self.parent = None
        self.value = None
        self.attributes = {}
        self.childNodes = []
        self._flags = []

    def __unicode__(self):
        attributesStr =  " ".join(["%s=\"%s\""%(name, value) 
                                   for name, value in 
                                   self.attributes.iteritems()])
        if attributesStr:
            return "<%s %s>"%(self.name,attributesStr)
        else:
            return "<%s>"%(self.name)

    def __repr__(self):
        return "<%s %s>" % (self.__class__, self.name)

    def appendChild(self, node):
        """Insert node as a child of the current node
        """
        raise NotImplementedError

    def insertText(self, data, insertBefore=None):
        """Insert data as text in the current node, positioned before the 
        start of node insertBefore or to the end of the node's text.
        """
        raise NotImplementedError

    def insertBefore(self, node, refNode):
        """Insert node as a child of the current node, before refNode in the 
        list of child nodes. Raises ValueError if refNode is not a child of 
        the current node"""
        raise NotImplementedError

    def removeChild(self, node):
        """Remove node from the children of the current node
        """
        raise NotImplementedError

    def reparentChildren(self, newParent):
        """Move all the children of the current node to newParent. 
        This is needed so that trees that don't store text as nodes move the 
        text in the correct way
        """
        #XXX - should this method be made more general?
        for child in self.childNodes:
            newParent.appendChild(child)
        self.childNodes = []

    def cloneNode(self):
        """Return a shallow copy of the current node i.e. a node with the same
        name and attributes but with no parent or child nodes
        """
        raise NotImplementedError


    def hasContent(self):
        """Return true if the node has children or text, false otherwise
        """
        raise NotImplementedError

class TreeBuilder(object):
    """Base treebuilder implementation
    documentClass - the class to use for the bottommost node of a document
    elementClass - the class to use for HTML Elements
    commentClass - the class to use for comments
    doctypeClass - the class to use for doctypes
    """

    #Document class
    documentClass = None

    #The class to use for creating a node
    elementClass = None

    #The class to use for creating comments
    commentClass = None

    #The class to use for creating doctypes
    doctypeClass = None
    
    #Fragment class
    fragmentClass = None

    def __init__(self):
        self.reset()
    
    def reset(self):
        self.openElements = []
        self.activeFormattingElements = []

        #XXX - rename these to headElement, formElement
        self.headPointer = None
        self.formPointer = None

        self.insertFromTable = False

        self.document = self.documentClass()

    def elementInScope(self, target, tableVariant=False):
        # Exit early when possible.
        if self.openElements[-1].name == target:
            return True

        # AT Use reverse instead of [::-1] when we can rely on Python 2.4
        # AT How about while True and simply set node to [-1] and set it to
        # [-2] at the end...
        for node in self.openElements[::-1]:
            if node.name == target:
                return True
            elif node.name == "table":
                return False
            elif not tableVariant and node.name in scopingElements:
                return False
            elif node.name == "html":
                return False
        assert False # We should never reach this point

    def reconstructActiveFormattingElements(self):
        # Within this algorithm the order of steps described in the
        # specification is not quite the same as the order of steps in the
        # code. It should still do the same though.

        # Step 1: stop the algorithm when there's nothing to do.
        if not self.activeFormattingElements:
            return

        # Step 2 and step 3: we start with the last element. So i is -1.
        i = -1
        entry = self.activeFormattingElements[i]
        if entry == Marker or entry in self.openElements:
            return

        # Step 6
        while entry != Marker and entry not in self.openElements:
            # Step 5: let entry be one earlier in the list.
            i -= 1
            try:
                entry = self.activeFormattingElements[i]
            except:
                # Step 4: at this point we need to jump to step 8. By not doing
                # i += 1 which is also done in step 7 we achieve that.
                break
        while True:
            # Step 7
            i += 1

            # Step 8
            clone = self.activeFormattingElements[i].cloneNode()

            # Step 9
            element = self.insertElement(clone.name, clone.attributes)

            # Step 10
            self.activeFormattingElements[i] = element

            # Step 11
            if element == self.activeFormattingElements[-1]:
                break

    def clearActiveFormattingElements(self):
        entry = self.activeFormattingElements.pop()
        while self.activeFormattingElements and entry != Marker:
            entry = self.activeFormattingElements.pop()

    def elementInActiveFormattingElements(self, name):
        """Check if an element exists between the end of the active
        formatting elements and the last marker. If it does, return it, else
        return false"""

        for item in self.activeFormattingElements[::-1]:
            # Check for Marker first because if it's a Marker it doesn't have a
            # name attribute.
            if item == Marker:
                break
            elif item.name == name:
                return item
        return False

    def insertDoctype(self, name, publicId, systemId):
        doctype = self.doctypeClass(name)
        doctype.publicId = publicId
        doctype.systemId = systemId
        self.document.appendChild(doctype)

    def insertComment(self, data, parent=None):
        if parent is None:
            parent = self.openElements[-1]
        parent.appendChild(self.commentClass(data))
                           
    def createElement(self, name, attributes):
        """Create an element but don't insert it anywhere"""
        element = self.elementClass(name)
        element.attributes = attributes
        return element

    def _getInsertFromTable(self):
        return self._insertFromTable

    def _setInsertFromTable(self, value):
        """Switch the function used to insert an element from the
        normal one to the misnested table one and back again"""
        self._insertFromTable = value
        if value:
            self.insertElement = self.insertElementTable
        else:
            self.insertElement = self.insertElementNormal

    insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
        
    def insertElementNormal(self, name, attributes):
        element = self.elementClass(name)
        element.attributes = attributes
        self.openElements[-1].appendChild(element)
        self.openElements.append(element)
        return element

    def insertElementTable(self, name, attributes):
        """Create an element and insert it into the tree""" 
        element = self.elementClass(name)
        element.attributes = attributes
        if self.openElements[-1].name not in tableInsertModeElements:
            return self.insertElementNormal(name, attributes)
        else:
            #We should be in the InTable mode. This means we want to do
            #special magic element rearranging
            parent, insertBefore = self.getTableMisnestedNodePosition()
            if insertBefore is None:
                parent.appendChild(element)
            else:
                parent.insertBefore(element, insertBefore)
            self.openElements.append(element)
        return element

    def insertText(self, data, parent=None):
        """Insert text data."""
        if parent is None:
            parent = self.openElements[-1]

        if (not(self.insertFromTable) or (self.insertFromTable and
                                          self.openElements[-1].name not in
                                          tableInsertModeElements)):
            parent.insertText(data)
        else:
            #We should be in the InTable mode. This means we want to do
            #special magic element rearranging
            parent, insertBefore = self.getTableMisnestedNodePosition()
            parent.insertText(data, insertBefore)
            
    def getTableMisnestedNodePosition(self):
        """Get the foster parent element, and sibling to insert before
        (or None) when inserting a misnested table node"""
        #The foster parent element is the one which comes before the most
        #recently opened table element
        #XXX - this is really inelegant
        lastTable=None
        fosterParent = None
        insertBefore = None
        for elm in self.openElements[::-1]:
            if elm.name == u"table":
                lastTable = elm
                break
        if lastTable:
            #XXX - we should really check that this parent is actually a
            #node here
            if lastTable.parent:
                fosterParent = lastTable.parent
                insertBefore = lastTable
            else:
                fosterParent = self.openElements[
                    self.openElements.index(lastTable) - 1]
        else:
            fosterParent = self.openElements[0]
        return fosterParent, insertBefore

    def generateImpliedEndTags(self, exclude=None):
        name = self.openElements[-1].name
        # XXX td, th and tr are not actually needed
        if (name in frozenset(("dd", "dt", "li", "p", "td", "th", "tr"))
            and name != exclude):
            self.openElements.pop()
            # XXX This is not entirely what the specification says. We should
            # investigate it more closely.
            self.generateImpliedEndTags(exclude)

    def getDocument(self):
        "Return the final tree"
        return self.document
    
    def getFragment(self):
        "Return the final fragment"
        #assert self.innerHTML
        fragment = self.fragmentClass()
        self.openElements[0].reparentChildren(fragment)
        return fragment

    def testSerializer(self, node):
        """Serialize the subtree of node in the format required by unit tests
        node - the node from which to start serializing"""
        raise NotImplementedError