Skip to content
Snippets Groups Projects
soup.py 1.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • import gettext
    _ = gettext.gettext
    
    from BeautifulSoup import BeautifulSoup, Declaration, Comment, Tag
    
    import _base
    
    class TreeWalker(_base.NonRecursiveTreeWalker):
        def getNodeDetails(self, node):
            if isinstance(node, BeautifulSoup): # Document or DocumentFragment
                return (_base.DOCUMENT,)
    
            elif isinstance(node, Declaration): # DocumentType
                #Slice needed to remove markup added during unicode conversion
                return _base.DOCTYPE, unicode(node.string)[2:-1]
    
            elif isinstance(node, Comment):
                return _base.COMMENT, unicode(node.string)[4:-3]
    
            elif isinstance(node, unicode): # TextNode
                return _base.TEXT, node
    
            elif isinstance(node, Tag): # Element
                return _base.ELEMENT, node.name, \
                    dict(node.attrs).items(), node.contents
            else:
                return _base.UNKNOWN, node.__class__.__name__
    
        def getFirstChild(self, node):
            return node.contents[0]
    
        def getNextSibling(self, node):
            return node.nextSibling
    
        def getParentNode(self, node):
            return node.parent