Skip to content
Snippets Groups Projects
_base.py 11.7 KiB
Newer Older
  • Learn to ignore specific revisions
  • from html5lib.constants import scopingElements, tableInsertModeElements
    try:
        frozenset
    except NameError:
        # Import from the sets module for python 2.3
        from sets import Set as set
        from sets import ImmutableSet as frozenset
    
    # The scope markers are inserted when entering buttons, object elements,
    # marquees, table cells, and table captions, and are used to prevent formatting
    # from "leaking" into tables, buttons, object elements, and marquees.
    Marker = None
    
    #XXX - TODO; make the default interface more ElementTree-like
    #            rather than DOM-like
    
    class Node(object):
        def __init__(self, name):
            """Node representing an item in the tree.
            name - The tag name associated with the node
            parent - The parent of the current node (or None for the document node)
            value - The value of the current node (applies to text nodes and 
            comments
            attributes - a dict holding name, value pairs for attributes of the node
            childNodes - a list of child nodes of the current node. This must 
            include all elements but not necessarily other node types
            _flags - A list of miscellaneous flags that can be set on the node
            """
            self.name = name
            self.parent = None
            self.value = None
            self.attributes = {}
            self.childNodes = []
            self._flags = []
    
        def __unicode__(self):
            attributesStr =  " ".join(["%s=\"%s\""%(name, value) 
                                       for name, value in 
                                       self.attributes.iteritems()])
            if attributesStr:
                return "<%s %s>"%(self.name,attributesStr)
            else:
                return "<%s>"%(self.name)
    
        def __repr__(self):
            return "<%s %s>" % (self.__class__, self.name)
    
        def appendChild(self, node):
            """Insert node as a child of the current node
            """
            raise NotImplementedError
    
        def insertText(self, data, insertBefore=None):
            """Insert data as text in the current node, positioned before the 
            start of node insertBefore or to the end of the node's text.
            """
            raise NotImplementedError
    
        def insertBefore(self, node, refNode):
            """Insert node as a child of the current node, before refNode in the 
            list of child nodes. Raises ValueError if refNode is not a child of 
            the current node"""
            raise NotImplementedError
    
        def removeChild(self, node):
            """Remove node from the children of the current node
            """
            raise NotImplementedError
    
        def reparentChildren(self, newParent):
            """Move all the children of the current node to newParent. 
            This is needed so that trees that don't store text as nodes move the 
            text in the correct way
            """
            #XXX - should this method be made more general?
            for child in self.childNodes:
                newParent.appendChild(child)
            self.childNodes = []
    
        def cloneNode(self):
            """Return a shallow copy of the current node i.e. a node with the same
            name and attributes but with no parent or child nodes
            """
            raise NotImplementedError
    
    
        def hasContent(self):
            """Return true if the node has children or text, false otherwise
            """
            raise NotImplementedError
    
    class TreeBuilder(object):
        """Base treebuilder implementation
        documentClass - the class to use for the bottommost node of a document
        elementClass - the class to use for HTML Elements
        commentClass - the class to use for comments
        doctypeClass - the class to use for doctypes
        """
    
        #Document class
        documentClass = None
    
        #The class to use for creating a node
        elementClass = None
    
        #The class to use for creating comments
        commentClass = None
    
        #The class to use for creating doctypes
        doctypeClass = None
        
        #Fragment class
        fragmentClass = None
    
        def __init__(self):
            self.reset()
        
        def reset(self):
            self.openElements = []
            self.activeFormattingElements = []
    
            #XXX - rename these to headElement, formElement
            self.headPointer = None
            self.formPointer = None
    
            self.insertFromTable = False
    
            self.document = self.documentClass()
    
        def elementInScope(self, target, tableVariant=False):
            # Exit early when possible.
            if self.openElements[-1].name == target:
                return True
    
            # AT Use reverse instead of [::-1] when we can rely on Python 2.4
            # AT How about while True and simply set node to [-1] and set it to
            # [-2] at the end...
            for node in self.openElements[::-1]:
                if node.name == target:
                    return True
                elif node.name == "table":
                    return False
                elif not tableVariant and node.name in scopingElements:
                    return False
                elif node.name == "html":
                    return False
            assert False # We should never reach this point
    
        def reconstructActiveFormattingElements(self):
            # Within this algorithm the order of steps described in the
            # specification is not quite the same as the order of steps in the
            # code. It should still do the same though.
    
            # Step 1: stop the algorithm when there's nothing to do.
            if not self.activeFormattingElements:
                return
    
            # Step 2 and step 3: we start with the last element. So i is -1.
            i = -1
            entry = self.activeFormattingElements[i]
            if entry == Marker or entry in self.openElements:
                return
    
            # Step 6
            while entry != Marker and entry not in self.openElements:
                # Step 5: let entry be one earlier in the list.
                i -= 1
                try:
                    entry = self.activeFormattingElements[i]
                except:
                    # Step 4: at this point we need to jump to step 8. By not doing
                    # i += 1 which is also done in step 7 we achieve that.
                    break
            while True:
                # Step 7
                i += 1
    
                # Step 8
                clone = self.activeFormattingElements[i].cloneNode()
    
                # Step 9
                element = self.insertElement(clone.name, clone.attributes)
    
                # Step 10
                self.activeFormattingElements[i] = element
    
                # Step 11
                if element == self.activeFormattingElements[-1]:
                    break
    
        def clearActiveFormattingElements(self):
            entry = self.activeFormattingElements.pop()
            while self.activeFormattingElements and entry != Marker:
                entry = self.activeFormattingElements.pop()
    
        def elementInActiveFormattingElements(self, name):
            """Check if an element exists between the end of the active
            formatting elements and the last marker. If it does, return it, else
            return false"""
    
            for item in self.activeFormattingElements[::-1]:
                # Check for Marker first because if it's a Marker it doesn't have a
                # name attribute.
                if item == Marker:
                    break
                elif item.name == name:
                    return item
            return False
    
        def insertDoctype(self, name, publicId, systemId):
            doctype = self.doctypeClass(name)
            doctype.publicId = publicId
            doctype.systemId = systemId
            self.document.appendChild(doctype)
    
        def insertComment(self, data, parent=None):
            if parent is None:
                parent = self.openElements[-1]
            parent.appendChild(self.commentClass(data))
                               
        def createElement(self, name, attributes):
            """Create an element but don't insert it anywhere"""
            element = self.elementClass(name)
            element.attributes = attributes
            return element
    
        def _getInsertFromTable(self):
            return self._insertFromTable
    
        def _setInsertFromTable(self, value):
            """Switch the function used to insert an element from the
            normal one to the misnested table one and back again"""
            self._insertFromTable = value
            if value:
                self.insertElement = self.insertElementTable
            else:
                self.insertElement = self.insertElementNormal
    
        insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
            
        def insertElementNormal(self, name, attributes):
            element = self.elementClass(name)
            element.attributes = attributes
            self.openElements[-1].appendChild(element)
            self.openElements.append(element)
            return element
    
        def insertElementTable(self, name, attributes):
            """Create an element and insert it into the tree""" 
            element = self.elementClass(name)
            element.attributes = attributes
            if self.openElements[-1].name not in tableInsertModeElements:
                return self.insertElementNormal(name, attributes)
            else:
                #We should be in the InTable mode. This means we want to do
                #special magic element rearranging
                parent, insertBefore = self.getTableMisnestedNodePosition()
                if insertBefore is None:
                    parent.appendChild(element)
                else:
                    parent.insertBefore(element, insertBefore)
                self.openElements.append(element)
            return element
    
        def insertText(self, data, parent=None):
            """Insert text data."""
            if parent is None:
                parent = self.openElements[-1]
    
            if (not(self.insertFromTable) or (self.insertFromTable and
                                              self.openElements[-1].name not in
                                              tableInsertModeElements)):
                parent.insertText(data)
            else:
                #We should be in the InTable mode. This means we want to do
                #special magic element rearranging
                parent, insertBefore = self.getTableMisnestedNodePosition()
                parent.insertText(data, insertBefore)
                
        def getTableMisnestedNodePosition(self):
            """Get the foster parent element, and sibling to insert before
            (or None) when inserting a misnested table node"""
            #The foster parent element is the one which comes before the most
            #recently opened table element
            #XXX - this is really inelegant
            lastTable=None
            fosterParent = None
            insertBefore = None
            for elm in self.openElements[::-1]:
                if elm.name == u"table":
                    lastTable = elm
                    break
            if lastTable:
                #XXX - we should really check that this parent is actually a
                #node here
                if lastTable.parent:
                    fosterParent = lastTable.parent
                    insertBefore = lastTable
                else:
                    fosterParent = self.openElements[
                        self.openElements.index(lastTable) - 1]
            else:
                fosterParent = self.openElements[0]
            return fosterParent, insertBefore
    
        def generateImpliedEndTags(self, exclude=None):
            name = self.openElements[-1].name
            # XXX td, th and tr are not actually needed
            if (name in frozenset(("dd", "dt", "li", "p", "td", "th", "tr"))
                and name != exclude):
                self.openElements.pop()
                # XXX This is not entirely what the specification says. We should
                # investigate it more closely.
                self.generateImpliedEndTags(exclude)
    
        def getDocument(self):
            "Return the final tree"
            return self.document
        
        def getFragment(self):
            "Return the final fragment"
            #assert self.innerHTML
            fragment = self.fragmentClass()
            self.openElements[0].reparentChildren(fragment)
            return fragment
    
        def testSerializer(self, node):
            """Serialize the subtree of node in the format required by unit tests
            node - the node from which to start serializing"""
            raise NotImplementedError