"""PyRSS2Gen - A Python library for generating RSS 2.0 feeds.""" __name__ = "PyRSS2Gen" __version__ = (1, 0, 0) __author__ = "Andrew Dalke " _generator_name = __name__ + "-" + ".".join(map(str, __version__)) import datetime # Could make this the base class; will need to add 'publish' class WriteXmlMixin: def write_xml(self, outfile, encoding = "iso-8859-1"): from xml.sax import saxutils handler = saxutils.XMLGenerator(outfile, encoding) handler.startDocument() self.publish(handler) handler.endDocument() def to_xml(self, encoding = "iso-8859-1"): try: import cStringIO as StringIO except ImportError: import StringIO f = StringIO.StringIO() self.write_xml(f, encoding) return f.getvalue() def _element(handler, name, obj, d = {}): if isinstance(obj, basestring) or obj is None: # special-case handling to make the API easier # to use for the common case. handler.startElement(name, d) if obj is not None: handler.characters(obj) handler.endElement(name) else: # It better know how to emit the correct XML. obj.publish(handler) def _opt_element(handler, name, obj): if obj is None: return _element(handler, name, obj) def _format_date(dt): """convert a datetime into an RFC 822 formatted date Input date must be in GMT. """ # Looks like: # Sat, 07 Sep 2002 00:00:01 GMT # Can't use strftime because that's locale dependent # # Isn't there a standard way to do this for Python? The # rfc822 and email.Utils modules assume a timestamp. The # following is based on the rfc822 module. return "%s, %02d %s %04d %02d:%02d:%02d GMT" % ( ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()], dt.day, ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1], dt.year, dt.hour, dt.minute, dt.second) ## # A couple simple wrapper objects for the fields which # take a simple value other than a string. class IntElement: """implements the 'publish' API for integers Takes the tag name and the integer value to publish. (Could be used for anything which uses str() to be published to text for XML.) """ element_attrs = {} def __init__(self, name, val): self.name = name self.val = val def publish(self, handler): handler.startElement(self.name, self.element_attrs) handler.characters(str(self.val)) handler.endElement(self.name) class DateElement: """implements the 'publish' API for a datetime.datetime Takes the tag name and the datetime to publish. Converts the datetime to RFC 2822 timestamp (4-digit year). """ def __init__(self, name, dt): self.name = name self.dt = dt def publish(self, handler): _element(handler, self.name, _format_date(self.dt)) #### class Category: """Publish a category element""" def __init__(self, category, domain = None): self.category = category self.domain = domain def publish(self, handler): d = {} if self.domain is not None: d["domain"] = self.domain _element(handler, "category", self.category, d) class Cloud: """Publish a cloud""" def __init__(self, domain, port, path, registerProcedure, protocol): self.domain = domain self.port = port self.path = path self.registerProcedure = registerProcedure self.protocol = protocol def publish(self, handler): _element(handler, "cloud", None, { "domain": self.domain, "port": str(self.port), "path": self.path, "registerProcedure": self.registerProcedure, "protocol": self.protocol}) class Image: """Publish a channel Image""" element_attrs = {} def __init__(self, url, title, link, width = None, height = None, description = None): self.url = url self.title = title self.link = link self.width = width self.height = height self.description = description def publish(self, handler): handler.startElement("image", self.element_attrs) _element(handler, "url", self.url) _element(handler, "title", self.title) _element(handler, "link", self.link) width = self.width if isinstance(width, int): width = IntElement("width", width) _opt_element(handler, "width", width) height = self.height if isinstance(height, int): height = IntElement("height", height) _opt_element(handler, "height", height) _opt_element(handler, "description", self.description) handler.endElement("image") class Guid: """Publish a guid Defaults to being a permalink, which is the assumption if it's omitted. Hence strings are always permalinks. """ def __init__(self, guid, isPermaLink = 1): self.guid = guid self.isPermaLink = isPermaLink def publish(self, handler): d = {} if self.isPermaLink: d["isPermaLink"] = "true" else: d["isPermaLink"] = "false" _element(handler, "guid", self.guid, d) class TextInput: """Publish a textInput Apparently this is rarely used. """ element_attrs = {} def __init__(self, title, description, name, link): self.title = title self.description = description self.name = name self.link = link def publish(self, handler): handler.startElement("textInput", self.element_attrs) _element(handler, "title", self.title) _element(handler, "description", self.description) _element(handler, "name", self.name) _element(handler, "link", self.link) handler.endElement("textInput") class Enclosure: """Publish an enclosure""" def __init__(self, url, length, type): self.url = url self.length = length self.type = type def publish(self, handler): _element(handler, "enclosure", None, {"url": self.url, "length": str(self.length), "type": self.type, }) class Source: """Publish the item's original source, used by aggregators""" def __init__(self, name, url): self.name = name self.url = url def publish(self, handler): _element(handler, "source", self.name, {"url": self.url}) class SkipHours: """Publish the skipHours This takes a list of hours, as integers. """ element_attrs = {} def __init__(self, hours): self.hours = hours def publish(self, handler): if self.hours: handler.startElement("skipHours", self.element_attrs) for hour in self.hours: _element(handler, "hour", str(hour)) handler.endElement("skipHours") class SkipDays: """Publish the skipDays This takes a list of days as strings. """ element_attrs = {} def __init__(self, days): self.days = days def publish(self, handler): if self.days: handler.startElement("skipDays", self.element_attrs) for day in self.days: _element(handler, "day", day) handler.endElement("skipDays") class RSS2(WriteXmlMixin): """The main RSS class. Stores the channel attributes, with the "category" elements under ".categories" and the RSS items under ".items". """ rss_attrs = {"version": "2.0"} element_attrs = {} def __init__(self, title, link, description, language = None, copyright = None, managingEditor = None, webMaster = None, pubDate = None, # a datetime, *in* *GMT* lastBuildDate = None, # a datetime categories = None, # list of strings or Category generator = _generator_name, docs = "http://blogs.law.harvard.edu/tech/rss", cloud = None, # a Cloud ttl = None, # integer number of minutes image = None, # an Image rating = None, # a string; I don't know how it's used textInput = None, # a TextInput skipHours = None, # a SkipHours with a list of integers skipDays = None, # a SkipDays with a list of strings items = None, # list of RSSItems ): self.title = title self.link = link self.description = description self.language = language self.copyright = copyright self.managingEditor = managingEditor self.webMaster = webMaster self.pubDate = pubDate self.lastBuildDate = lastBuildDate if categories is None: categories = [] self.categories = categories self.generator = generator self.docs = docs self.cloud = cloud self.ttl = ttl self.image = image self.rating = rating self.textInput = textInput self.skipHours = skipHours self.skipDays = skipDays if items is None: items = [] self.items = items def publish(self, handler): handler.startElement("rss", self.rss_attrs) handler.startElement("channel", self.element_attrs) _element(handler, "title", self.title) _element(handler, "link", self.link) _element(handler, "description", self.description) self.publish_extensions(handler) _opt_element(handler, "language", self.language) _opt_element(handler, "copyright", self.copyright) _opt_element(handler, "managingEditor", self.managingEditor) _opt_element(handler, "webMaster", self.webMaster) pubDate = self.pubDate if isinstance(pubDate, datetime.datetime): pubDate = DateElement("pubDate", pubDate) _opt_element(handler, "pubDate", pubDate) lastBuildDate = self.lastBuildDate if isinstance(lastBuildDate, datetime.datetime): lastBuildDate = DateElement("lastBuildDate", lastBuildDate) _opt_element(handler, "lastBuildDate", lastBuildDate) for category in self.categories: if isinstance(category, basestring): category = Category(category) category.publish(handler) _opt_element(handler, "generator", self.generator) _opt_element(handler, "docs", self.docs) if self.cloud is not None: self.cloud.publish(handler) ttl = self.ttl if isinstance(self.ttl, int): ttl = IntElement("ttl", ttl) _opt_element(handler, "tt", ttl) if self.image is not None: self.image.publish(handler) _opt_element(handler, "rating", self.rating) if self.textInput is not None: self.textInput.publish(handler) if self.skipHours is not None: self.skipHours.publish(handler) if self.skipDays is not None: self.skipDays.publish(handler) for item in self.items: item.publish(handler) handler.endElement("channel") handler.endElement("rss") def publish_extensions(self, handler): # Derived classes can hook into this to insert # output after the three required fields. pass class RSSItem(WriteXmlMixin): """Publish an RSS Item""" element_attrs = {} def __init__(self, title = None, # string link = None, # url as string description = None, # string author = None, # email address as string categories = None, # list of string or Category comments = None, # url as string enclosure = None, # an Enclosure guid = None, # a unique string pubDate = None, # a datetime source = None, # a Source ): if title is None and description is None: raise TypeError( "must define at least one of 'title' or 'description'") self.title = title self.link = link self.description = description self.author = author if categories is None: categories = [] self.categories = categories self.comments = comments self.enclosure = enclosure self.guid = guid self.pubDate = pubDate self.source = source # It sure does get tedious typing these names three times... def publish(self, handler): handler.startElement("item", self.element_attrs) _opt_element(handler, "title", self.title) _opt_element(handler, "link", self.link) self.publish_extensions(handler) _opt_element(handler, "description", self.description) _opt_element(handler, "author", self.author) for category in self.categories: if isinstance(category, basestring): category = Category(category) category.publish(handler) _opt_element(handler, "comments", self.comments) if self.enclosure is not None: self.enclosure.publish(handler) _opt_element(handler, "guid", self.guid) pubDate = self.pubDate if isinstance(pubDate, datetime.datetime): pubDate = DateElement("pubDate", pubDate) _opt_element(handler, "pubDate", pubDate) if self.source is not None: self.source.publish(handler) handler.endElement("item") def publish_extensions(self, handler): # Derived classes can hook into this to insert # output after the title and link elements pass