_genshi.py

from StringIO import StringIO
from xml.sax.saxutils import escape

from genshi.input import HTMLParser, XMLParser
from genshi.template import Context, MarkupTemplate

subscriptions = []
feed_types = [
    'application/atom+xml',
    'application/rss+xml',
    'application/rdf+xml'
]

def norm(value):
    """ Convert to Unicode """
    if hasattr(value,'items'):
        return dict([(norm(n),norm(v)) for n,v in value.items()])

    try:
        return value.decode('utf-8')
    except:
        return value.decode('iso-8859-1')

def find_config(config, feed):
    # match based on self link
    for link in feed.links:
        if link.has_key('rel') and link.rel=='self':
            if link.has_key('type') and link.type in feed_types:
                if link.has_key('href') and link.href in subscriptions:
                    return norm(dict(config.parser.items(link.href)))

    # match based on name
    for sub in subscriptions:
        if config.parser.has_option(sub, 'name') and \
            norm(config.parser.get(sub, 'name')) == feed.planet_name:
            return norm(dict(config.parser.items(sub)))

    return {}

class XHTMLParser(object):
    """ parse an XHTML fragment """
    def __init__(self, text):
        self.parser = XMLParser(StringIO("<div>%s</div>" % text))
        self.depth = 0
    def __iter__(self):
        self.iter = self.parser.__iter__()
        return self
    def next(self):
        object = self.iter.next()
        if object[0] == 'END': self.depth = self.depth - 1
        predepth = self.depth
        if object[0] == 'START': self.depth = self.depth + 1
        if predepth: return object
        return self.next()

def streamify(text,bozo):
    """ add a .stream to a _detail textConstruct """
    if text.type == 'text/plain':
        text.stream = HTMLParser(StringIO(escape(text.value)))
    elif text.type == 'text/html' or bozo != 'false':
        text.stream = HTMLParser(StringIO(text.value))
    else:
        text.stream = XHTMLParser(text.value)

def run(script, doc, output_file=None, options={}):
    """ process an Genshi template """

    context = Context(**options)

    tmpl_fileobj = open(script)
    tmpl = MarkupTemplate(tmpl_fileobj, script)
    tmpl_fileobj.close()

    if not output_file: 
        # filter
        context.push({'input':XMLParser(StringIO(doc))})
    else:
        # template
        import time
        from planet import config,feedparser
        from planet.spider import filename

        # gather a list of subscriptions, feeds
        global subscriptions
        feeds = []
        sources = config.cache_sources_directory()
        for sub in config.subscriptions():
            data=feedparser.parse(filename(sources,sub))
            data.feed.config = norm(dict(config.parser.items(sub)))
            if data.feed.has_key('link'):
                feeds.append((data.feed.config.get('name',''),data.feed))
            subscriptions.append(norm(sub))
        feeds.sort()

        # annotate each entry
        new_date_format = config.new_date_format()
        vars = feedparser.parse(StringIO(doc))
        vars.feeds = [value for name,value in feeds]
        last_feed = None
        last_date = None
        for entry in vars.entries:
             entry.source.config = find_config(config, entry.source)

             # add new_feed and new_date fields
             entry.new_feed = entry.source.id
             entry.new_date = date = None
             if entry.has_key('published_parsed'): date=entry.published_parsed
             if entry.has_key('updated_parsed'): date=entry.updated_parsed
             if date: entry.new_date = time.strftime(new_date_format, date)

             # remove new_feed and new_date fields if not "new"
             if entry.new_date == last_date:
                 entry.new_date = None
                 if entry.new_feed == last_feed:
                     entry.new_feed = None
                 else:
                     last_feed = entry.new_feed
             elif entry.new_date:
                 last_date = entry.new_date
                 last_feed = None

             # add streams for all text constructs
             for key in entry.keys():
                 if key.endswith("_detail") and entry[key].has_key('type') and \
                     entry[key].has_key('value'):
                     streamify(entry[key],entry.source.planet_bozo)
             if entry.has_key('content'):
                 for content in entry.content:
                     streamify(content,entry.source.planet_bozo)
     
        # add cumulative feed information to the Genshi context
        vars.feed.config = dict(config.parser.items('Planet',True))
        context.push(vars)

    # apply template
    output=tmpl.generate(context).render('xml')

    if output_file:
        out_file = open(output_file,'w')
        out_file.write(output)
        out_file.close()
    else:
        return output