Skip to content
Snippets Groups Projects
pulldom.py 1.79 KiB
Newer Older
  • Learn to ignore specific revisions
  • from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
        COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
    
    import _base
    
    from html5lib.constants import voidElements
    
    class TreeWalker(_base.TreeWalker):
        def __iter__(self):
            ignore_until = None
            previous = None
            for event in self.tree:
                if previous is not None and \
                  (ignore_until is None or previous[1] is ignore_until):
                    if previous[1] is ignore_until:
                        ignore_until = None
                    for token in self.tokens(previous, event):
                        yield token
                        if token["type"] == "EmptyTag":
                            ignore_until = previous[1]
                previous = event
            if ignore_until is None or previous[1] is ignore_until:
                for token in self.tokens(previous, None):
                    yield token
            elif ignore_until is not None:
                raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
    
        def tokens(self, event, next):
            type, node = event
            if type == START_ELEMENT:
                name = node.nodeName
                if name in voidElements:
                    for token in self.emptyTag(name, \
                      node.attributes.items(), not next or next[1] is not node):
                        yield token
                else:
                    yield self.startTag(name, node.attributes.items())
    
            elif type == END_ELEMENT:
                name = node.nodeName
                if name not in voidElements:
                    yield self.endTag(name)
    
            elif type == COMMENT:
                yield self.comment(node.nodeValue)
    
            elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
                for token in self.text(node.nodeValue):
                    yield token
    
            else:
                yield self.unknown(type)