Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, \
START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
from genshi.output import NamespaceFlattener
import _base
from html5lib.constants import voidElements
class TreeWalker(_base.TreeWalker):
def __iter__(self):
depth = 0
ignore_until = None
previous = None
for event in NamespaceFlattener(prefixes={
'http://www.w3.org/1999/xhtml': ''
})(self.tree):
if previous is not None:
if previous[0] == START:
depth += 1
if ignore_until <= depth:
ignore_until = None
if ignore_until is None:
for token in self.tokens(previous, event):
yield token
if token["type"] == "EmptyTag":
ignore_until = depth
if previous[0] == END:
depth -= 1
previous = event
if previous is not None:
if ignore_until is None or ignore_until <= depth:
for token in self.tokens(previous, None):
yield token
elif ignore_until is not None:
raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
def tokens(self, event, next):
kind, data, pos = event
if kind == START:
tag, attrib = data
if tag in voidElements:
for token in self.emptyTag(tag, list(attrib), \
not next or next[0] != END or next[1] != tag):
yield token
else:
yield self.startTag(tag, list(attrib))
elif kind == END:
if data not in voidElements:
yield self.endTag(data)
elif kind == COMMENT:
yield self.comment(data)
elif kind == TEXT:
for token in self.text(data):
yield token
elif kind == DOCTYPE:
yield self.doctype(*data)
elif kind in (XML_DECL, DOCTYPE, START_NS, END_NS, \
START_CDATA, END_CDATA, PI):
pass
else:
yield self.unknown(kind)