import gettext _ = gettext.gettext import new import copy import _base from html5lib.constants import voidElements moduleCache = {} def getETreeModule(ElementTreeImplementation): name = "_" + ElementTreeImplementation.__name__+"builder" if name in moduleCache: return moduleCache[name] else: mod = new.module("_" + ElementTreeImplementation.__name__+"builder") objs = getETreeBuilder(ElementTreeImplementation) mod.__dict__.update(objs) moduleCache[name] = mod return mod def getETreeBuilder(ElementTreeImplementation): ElementTree = ElementTreeImplementation class TreeWalker(_base.NonRecursiveTreeWalker): """Given the particular ElementTree representation, this implementation, to avoid using recursion, returns "nodes" as tuples with the following content: 1. An Element node serving as *context* (it cannot be called the parent node due to the particular ``tail`` text nodes. 2. Either the string literals ``"text"`` or ``"tail"`` or a child index 3. A list used as a stack of all ancestor *context nodes*. It is a pair tuple whose first item is an Element and second item is a child index. """ def getNodeDetails(self, node): if isinstance(node, tuple): # It might be the root Element elt, key, parents = node if key in ("text", "tail"): return _base.TEXT, getattr(elt, key) else: node = elt[int(key)] if not(hasattr(node, "tag")): node = node.getroot() if node.tag in ("<DOCUMENT_ROOT>", "<DOCUMENT_FRAGMENT>"): return (_base.DOCUMENT,) elif node.tag == "<!DOCTYPE>": return _base.DOCTYPE, node.text elif type(node.tag) == type(ElementTree.Comment): return _base.COMMENT, node.text else: #This is assumed to be an ordinary element return _base.ELEMENT, node.tag, node.attrib.items(), len(node) or node.text def getFirstChild(self, node): if isinstance(node, tuple): # It might be the root Element elt, key, parents = node assert key not in ("text", "tail"), "Text nodes have no children" parents.append((elt, int(key))) node = elt[int(key)] else: parents = [] assert len(node) or node.text, "Node has no children" if node.text: return (node, "text", parents) else: return (node, 0, parents) def getNextSibling(self, node): assert isinstance(node, tuple), "Node is not a tuple: " + str(node) elt, key, parents = node if key == "text": key = -1 elif key == "tail": elt, key = parents.pop() else: # Look for "tail" of the "revisited" node child = elt[key] if child.tail: parents.append((elt, key)) return (child, "tail", parents) # case where key were "text" or "tail" or elt[key] had a tail key += 1 if len(elt) > key: return (elt, key, parents) else: return None def getParentNode(self, node): assert isinstance(node, tuple) elt, key, parents = node if parents: elt, key = parents.pop() return elt, key, parents else: # HACK: We could return ``elt`` but None will stop the algorithm the same way return None return locals()