Skip to content
Snippets Groups Projects
_genshi.py 4.82 KiB
Newer Older
  • Learn to ignore specific revisions
  • from StringIO import StringIO
    from xml.sax.saxutils import escape
    
    from genshi.input import HTMLParser, XMLParser
    from genshi.template import Context, MarkupTemplate
    
    subscriptions = []
    feed_types = [
        'application/atom+xml',
        'application/rss+xml',
        'application/rdf+xml'
    ]
    
    def norm(value):
        """ Convert to Unicode """
        if hasattr(value,'items'):
            return dict([(norm(n),norm(v)) for n,v in value.items()])
    
        try:
            return value.decode('utf-8')
        except:
            return value.decode('iso-8859-1')
    
    def find_config(config, feed):
        # match based on self link
        for link in feed.links:
            if link.has_key('rel') and link.rel=='self':
                if link.has_key('type') and link.type in feed_types:
                    if link.has_key('href') and link.href in subscriptions:
                        return norm(dict(config.parser.items(link.href)))
    
        # match based on name
        for sub in subscriptions:
            if config.parser.has_option(sub, 'name') and \
                norm(config.parser.get(sub, 'name')) == feed.planet_name:
                return norm(dict(config.parser.items(sub)))
    
        return {}
    
    class XHTMLParser(object):
        """ parse an XHTML fragment """
        def __init__(self, text):
            self.parser = XMLParser(StringIO("<div>%s</div>" % text))
            self.depth = 0
        def __iter__(self):
            self.iter = self.parser.__iter__()
            return self
        def next(self):
            object = self.iter.next()
            if object[0] == 'END': self.depth = self.depth - 1
            predepth = self.depth
            if object[0] == 'START': self.depth = self.depth + 1
            if predepth: return object
            return self.next()
    
    def streamify(text,bozo):
        """ add a .stream to a _detail textConstruct """
        if text.type == 'text/plain':
            text.stream = HTMLParser(StringIO(escape(text.value)))
        elif text.type == 'text/html' or bozo != 'false':
            text.stream = HTMLParser(StringIO(text.value))
        else:
            text.stream = XHTMLParser(text.value)
    
    def run(script, doc, output_file=None, options={}):
        """ process an Genshi template """
    
        context = Context(**options)
    
        tmpl_fileobj = open(script)
        tmpl = MarkupTemplate(tmpl_fileobj, script)
        tmpl_fileobj.close()
    
        if not output_file: 
            # filter
            context.push({'input':XMLParser(StringIO(doc))})
        else:
            # template
            import time
            from planet import config,feedparser
            from planet.spider import filename
    
            # gather a list of subscriptions, feeds
            global subscriptions
            feeds = []
            sources = config.cache_sources_directory()
            for sub in config.subscriptions():
                data=feedparser.parse(filename(sources,sub))
                data.feed.config = norm(dict(config.parser.items(sub)))
                if data.feed.has_key('link'):
                    feeds.append((data.feed.config.get('name',''),data.feed))
                subscriptions.append(norm(sub))
            feeds.sort()
    
            # annotate each entry
            new_date_format = config.new_date_format()
            vars = feedparser.parse(StringIO(doc))
            vars.feeds = [value for name,value in feeds]
            last_feed = None
            last_date = None
            for entry in vars.entries:
                 entry.source.config = find_config(config, entry.source)
    
                 # add new_feed and new_date fields
                 entry.new_feed = entry.source.id
                 entry.new_date = date = None
                 if entry.has_key('published_parsed'): date=entry.published_parsed
                 if entry.has_key('updated_parsed'): date=entry.updated_parsed
                 if date: entry.new_date = time.strftime(new_date_format, date)
    
                 # remove new_feed and new_date fields if not "new"
                 if entry.new_date == last_date:
                     entry.new_date = None
                     if entry.new_feed == last_feed:
                         entry.new_feed = None
                     else:
                         last_feed = entry.new_feed
                 elif entry.new_date:
                     last_date = entry.new_date
                     last_feed = None
    
                 # add streams for all text constructs
                 for key in entry.keys():
                     if key.endswith("_detail") and entry[key].has_key('type') and \
                         entry[key].has_key('value'):
                         streamify(entry[key],entry.source.planet_bozo)
                 if entry.has_key('content'):
                     for content in entry.content:
                         streamify(content,entry.source.planet_bozo)
         
            # add cumulative feed information to the Genshi context
            vars.feed.config = dict(config.parser.items('Planet',True))
            context.push(vars)
    
        # apply template
        output=tmpl.generate(context).render('xml')
    
        if output_file:
            out_file = open(output_file,'w')
            out_file.write(output)
            out_file.close()
        else:
            return output