"README.md" did not exist on "3f7f511e189c93578caaf8e2c9708481d6152d52"
Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gettext
_ = gettext.gettext
import new
import copy
import _base
from html5lib.constants import voidElements
moduleCache = {}
def getETreeModule(ElementTreeImplementation):
name = "_" + ElementTreeImplementation.__name__+"builder"
if name in moduleCache:
return moduleCache[name]
else:
mod = new.module("_" + ElementTreeImplementation.__name__+"builder")
objs = getETreeBuilder(ElementTreeImplementation)
mod.__dict__.update(objs)
moduleCache[name] = mod
return mod
def getETreeBuilder(ElementTreeImplementation):
ElementTree = ElementTreeImplementation
class TreeWalker(_base.NonRecursiveTreeWalker):
"""Given the particular ElementTree representation, this implementation,
to avoid using recursion, returns "nodes" as tuples with the following
content:
1. An Element node serving as *context* (it cannot be called the parent
node due to the particular ``tail`` text nodes.
2. Either the string literals ``"text"`` or ``"tail"`` or a child index
3. A list used as a stack of all ancestor *context nodes*. It is a
pair tuple whose first item is an Element and second item is a child
index.
"""
def getNodeDetails(self, node):
if isinstance(node, tuple): # It might be the root Element
elt, key, parents = node
if key in ("text", "tail"):
return _base.TEXT, getattr(elt, key)
else:
node = elt[int(key)]
if not(hasattr(node, "tag")):
node = node.getroot()
if node.tag in ("<DOCUMENT_ROOT>", "<DOCUMENT_FRAGMENT>"):
return (_base.DOCUMENT,)
elif node.tag == "<!DOCTYPE>":
return _base.DOCTYPE, node.text
elif type(node.tag) == type(ElementTree.Comment):
return _base.COMMENT, node.text
else:
#This is assumed to be an ordinary element
return _base.ELEMENT, node.tag, node.attrib.items(), len(node) or node.text
def getFirstChild(self, node):
if isinstance(node, tuple): # It might be the root Element
elt, key, parents = node
assert key not in ("text", "tail"), "Text nodes have no children"
parents.append((elt, int(key)))
node = elt[int(key)]
else:
parents = []
assert len(node) or node.text, "Node has no children"
if node.text:
return (node, "text", parents)
else:
return (node, 0, parents)
def getNextSibling(self, node):
assert isinstance(node, tuple), "Node is not a tuple: " + str(node)
elt, key, parents = node
if key == "text":
key = -1
elif key == "tail":
elt, key = parents.pop()
else:
# Look for "tail" of the "revisited" node
child = elt[key]
if child.tail:
parents.append((elt, key))
return (child, "tail", parents)
# case where key were "text" or "tail" or elt[key] had a tail
key += 1
if len(elt) > key:
return (elt, key, parents)
else:
return None
def getParentNode(self, node):
assert isinstance(node, tuple)
elt, key, parents = node
if parents:
elt, key = parents.pop()
return elt, key, parents
else:
# HACK: We could return ``elt`` but None will stop the algorithm the same way
return None
return locals()