1"""PyRSS2Gen - A Python library for generating RSS 2.0 feeds."""
2
3__name__ = "PyRSS2Gen"
4__version__ = (1, 0, 0)
5__author__ = "Andrew Dalke <dalke@dalkescientific.com>"
6
7_generator_name = __name__ + "-" + ".".join(map(str, __version__))
8
9import datetime
10
11# Could make this the base class; will need to add 'publish'
12class WriteXmlMixin:
13    def write_xml(self, outfile, encoding = "iso-8859-1"):
14        from xml.sax import saxutils
15        handler = saxutils.XMLGenerator(outfile, encoding)
16        handler.startDocument()
17        self.publish(handler)
18        handler.endDocument()
19
20    def to_xml(self, encoding = "iso-8859-1"):
21        try:
22            import cStringIO as StringIO
23        except ImportError:
24            import StringIO
25        f = StringIO.StringIO()
26        self.write_xml(f, encoding)
27        return f.getvalue()
28
29
30def _element(handler, name, obj, d = {}):
31    if isinstance(obj, basestring) or obj is None:
32        # special-case handling to make the API easier
33        # to use for the common case.
34        handler.startElement(name, d)
35        if obj is not None:
36            handler.characters(obj)
37        handler.endElement(name)
38    else:
39        # It better know how to emit the correct XML.
40        obj.publish(handler)
41
42def _opt_element(handler, name, obj):
43    if obj is None:
44        return
45    _element(handler, name, obj)
46
47
48def _format_date(dt):
49    """convert a datetime into an RFC 822 formatted date
50
51    Input date must be in GMT.
52    """
53    # Looks like:
54    #   Sat, 07 Sep 2002 00:00:01 GMT
55    # Can't use strftime because that's locale dependent
56    #
57    # Isn't there a standard way to do this for Python?  The
58    # rfc822 and email.Utils modules assume a timestamp.  The
59    # following is based on the rfc822 module.
60    return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
61            ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()],
62            dt.day,
63            ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
64             "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1],
65            dt.year, dt.hour, dt.minute, dt.second)
66
67
68##
69# A couple simple wrapper objects for the fields which
70# take a simple value other than a string.
71class IntElement:
72    """implements the 'publish' API for integers
73
74    Takes the tag name and the integer value to publish.
75
76    (Could be used for anything which uses str() to be published
77    to text for XML.)
78    """
79    element_attrs = {}
80    def __init__(self, name, val):
81        self.name = name
82        self.val = val
83    def publish(self, handler):
84        handler.startElement(self.name, self.element_attrs)
85        handler.characters(str(self.val))
86        handler.endElement(self.name)
87
88class DateElement:
89    """implements the 'publish' API for a datetime.datetime
90
91    Takes the tag name and the datetime to publish.
92
93    Converts the datetime to RFC 2822 timestamp (4-digit year).
94    """
95    def __init__(self, name, dt):
96        self.name = name
97        self.dt = dt
98    def publish(self, handler):
99        _element(handler, self.name, _format_date(self.dt))
100####
101
102class Category:
103    """Publish a category element"""
104    def __init__(self, category, domain = None):
105        self.category = category
106        self.domain = domain
107    def publish(self, handler):
108        d = {}
109        if self.domain is not None:
110            d["domain"] = self.domain
111        _element(handler, "category", self.category, d)
112
113class Cloud:
114    """Publish a cloud"""
115    def __init__(self, domain, port, path,
116                 registerProcedure, protocol):
117        self.domain = domain
118        self.port = port
119        self.path = path
120        self.registerProcedure = registerProcedure
121        self.protocol = protocol
122    def publish(self, handler):
123        _element(handler, "cloud", None, {
124            "domain": self.domain,
125            "port": str(self.port),
126            "path": self.path,
127            "registerProcedure": self.registerProcedure,
128            "protocol": self.protocol})
129
130class Image:
131    """Publish a channel Image"""
132    element_attrs = {}
133    def __init__(self, url, title, link,
134                 width = None, height = None, description = None):
135        self.url = url
136        self.title = title
137        self.link = link
138        self.width = width
139        self.height = height
140        self.description = description
141
142    def publish(self, handler):
143        handler.startElement("image", self.element_attrs)
144
145        _element(handler, "url", self.url)
146        _element(handler, "title", self.title)
147        _element(handler, "link", self.link)
148
149        width = self.width
150        if isinstance(width, int):
151            width = IntElement("width", width)
152        _opt_element(handler, "width", width)
153
154        height = self.height
155        if isinstance(height, int):
156            height = IntElement("height", height)
157        _opt_element(handler, "height", height)
158
159        _opt_element(handler, "description", self.description)
160
161        handler.endElement("image")
162
163class Guid:
164    """Publish a guid
165
166    Defaults to being a permalink, which is the assumption if it's
167    omitted.  Hence strings are always permalinks.
168    """
169    def __init__(self, guid, isPermaLink = 1):
170        self.guid = guid
171        self.isPermaLink = isPermaLink
172    def publish(self, handler):
173        d = {}
174        if self.isPermaLink:
175            d["isPermaLink"] = "true"
176        else:
177            d["isPermaLink"] = "false"
178        _element(handler, "guid", self.guid, d)
179
180class TextInput:
181    """Publish a textInput
182
183    Apparently this is rarely used.
184    """
185    element_attrs = {}
186    def __init__(self, title, description, name, link):
187        self.title = title
188        self.description = description
189        self.name = name
190        self.link = link
191
192    def publish(self, handler):
193        handler.startElement("textInput", self.element_attrs)
194        _element(handler, "title", self.title)
195        _element(handler, "description", self.description)
196        _element(handler, "name", self.name)
197        _element(handler, "link", self.link)
198        handler.endElement("textInput")
199
200
201class Enclosure:
202    """Publish an enclosure"""
203    def __init__(self, url, length, type):
204        self.url = url
205        self.length = length
206        self.type = type
207    def publish(self, handler):
208        _element(handler, "enclosure", None,
209                 {"url": self.url,
210                  "length": str(self.length),
211                  "type": self.type,
212                  })
213
214class Source:
215    """Publish the item's original source, used by aggregators"""
216    def __init__(self, name, url):
217        self.name = name
218        self.url = url
219    def publish(self, handler):
220        _element(handler, "source", self.name, {"url": self.url})
221
222class SkipHours:
223    """Publish the skipHours
224
225    This takes a list of hours, as integers.
226    """
227    element_attrs = {}
228    def __init__(self, hours):
229        self.hours = hours
230    def publish(self, handler):
231        if self.hours:
232            handler.startElement("skipHours", self.element_attrs)
233            for hour in self.hours:
234                _element(handler, "hour", str(hour))
235            handler.endElement("skipHours")
236
237class SkipDays:
238    """Publish the skipDays
239
240    This takes a list of days as strings.
241    """
242    element_attrs = {}
243    def __init__(self, days):
244        self.days = days
245    def publish(self, handler):
246        if self.days:
247            handler.startElement("skipDays", self.element_attrs)
248            for day in self.days:
249                _element(handler, "day", day)
250            handler.endElement("skipDays")
251
252class RSS2(WriteXmlMixin):
253    """The main RSS class.
254
255    Stores the channel attributes, with the "category" elements under
256    ".categories" and the RSS items under ".items".
257    """
258
259    rss_attrs = {"version": "2.0"}
260    element_attrs = {}
261    def __init__(self,
262                 title,
263                 link,
264                 description,
265
266                 language = None,
267                 copyright = None,
268                 managingEditor = None,
269                 webMaster = None,
270                 pubDate = None,  # a datetime, *in* *GMT*
271                 lastBuildDate = None, # a datetime
272
273                 categories = None, # list of strings or Category
274                 generator = _generator_name,
275                 docs = "http://blogs.law.harvard.edu/tech/rss",
276                 cloud = None,    # a Cloud
277                 ttl = None,      # integer number of minutes
278
279                 image = None,     # an Image
280                 rating = None,    # a string; I don't know how it's used
281                 textInput = None, # a TextInput
282                 skipHours = None, # a SkipHours with a list of integers
283                 skipDays = None,  # a SkipDays with a list of strings
284
285                 items = None,     # list of RSSItems
286                 ):
287        self.title = title
288        self.link = link
289        self.description = description
290        self.language = language
291        self.copyright = copyright
292        self.managingEditor = managingEditor
293
294        self.webMaster = webMaster
295        self.pubDate = pubDate
296        self.lastBuildDate = lastBuildDate
297
298        if categories is None:
299            categories = []
300        self.categories = categories
301        self.generator = generator
302        self.docs = docs
303        self.cloud = cloud
304        self.ttl = ttl
305        self.image = image
306        self.rating = rating
307        self.textInput = textInput
308        self.skipHours = skipHours
309        self.skipDays = skipDays
310
311        if items is None:
312            items = []
313        self.items = items
314
315    def publish(self, handler):
316        handler.startElement("rss", self.rss_attrs)
317        handler.startElement("channel", self.element_attrs)
318        _element(handler, "title", self.title)
319        _element(handler, "link", self.link)
320        _element(handler, "description", self.description)
321
322        self.publish_extensions(handler)
323
324        _opt_element(handler, "language", self.language)
325        _opt_element(handler, "copyright", self.copyright)
326        _opt_element(handler, "managingEditor", self.managingEditor)
327        _opt_element(handler, "webMaster", self.webMaster)
328
329        pubDate = self.pubDate
330        if isinstance(pubDate, datetime.datetime):
331            pubDate = DateElement("pubDate", pubDate)
332        _opt_element(handler, "pubDate", pubDate)
333
334        lastBuildDate = self.lastBuildDate
335        if isinstance(lastBuildDate, datetime.datetime):
336            lastBuildDate = DateElement("lastBuildDate", lastBuildDate)
337        _opt_element(handler, "lastBuildDate", lastBuildDate)
338
339        for category in self.categories:
340            if isinstance(category, basestring):
341                category = Category(category)
342            category.publish(handler)
343
344        _opt_element(handler, "generator", self.generator)
345        _opt_element(handler, "docs", self.docs)
346
347        if self.cloud is not None:
348            self.cloud.publish(handler)
349
350        ttl = self.ttl
351        if isinstance(self.ttl, int):
352            ttl = IntElement("ttl", ttl)
353        _opt_element(handler, "tt", ttl)
354
355        if self.image is not None:
356            self.image.publish(handler)
357
358        _opt_element(handler, "rating", self.rating)
359        if self.textInput is not None:
360            self.textInput.publish(handler)
361        if self.skipHours is not None:
362            self.skipHours.publish(handler)
363        if self.skipDays is not None:
364            self.skipDays.publish(handler)
365
366        for item in self.items:
367            item.publish(handler)
368
369        handler.endElement("channel")
370        handler.endElement("rss")
371
372    def publish_extensions(self, handler):
373        # Derived classes can hook into this to insert
374        # output after the three required fields.
375        pass
376
377
378
379class RSSItem(WriteXmlMixin):
380    """Publish an RSS Item"""
381    element_attrs = {}
382    def __init__(self,
383                 title = None,  # string
384                 link = None,   # url as string
385                 description = None, # string
386                 author = None,      # email address as string
387                 categories = None,  # list of string or Category
388                 comments = None,  # url as string
389                 enclosure = None, # an Enclosure
390                 guid = None,    # a unique string
391                 pubDate = None, # a datetime
392                 source = None,  # a Source
393                 ):
394
395        if title is None and description is None:
396            raise TypeError(
397                "must define at least one of 'title' or 'description'")
398        self.title = title
399        self.link = link
400        self.description = description
401        self.author = author
402        if categories is None:
403            categories = []
404        self.categories = categories
405        self.comments = comments
406        self.enclosure = enclosure
407        self.guid = guid
408        self.pubDate = pubDate
409        self.source = source
410        # It sure does get tedious typing these names three times...
411
412    def publish(self, handler):
413        handler.startElement("item", self.element_attrs)
414        _opt_element(handler, "title", self.title)
415        _opt_element(handler, "link", self.link)
416        self.publish_extensions(handler)
417        _opt_element(handler, "description", self.description)
418        _opt_element(handler, "author", self.author)
419
420        for category in self.categories:
421            if isinstance(category, basestring):
422                category = Category(category)
423            category.publish(handler)
424
425        _opt_element(handler, "comments", self.comments)
426        if self.enclosure is not None:
427            self.enclosure.publish(handler)
428        _opt_element(handler, "guid", self.guid)
429
430        pubDate = self.pubDate
431        if isinstance(pubDate, datetime.datetime):
432            pubDate = DateElement("pubDate", pubDate)
433        _opt_element(handler, "pubDate", pubDate)
434
435        if self.source is not None:
436            self.source.publish(handler)
437
438        handler.endElement("item")
439
440    def publish_extensions(self, handler):
441        # Derived classes can hook into this to insert
442        # output after the title and link elements
443        pass
444