#!/usr/bin/env python
# RSS->Zephyr parser
# Waseem Daher
# First release: Mar 5, 2005
# Slight revision: Apr 6, 2005
# (try to write each line of
# sent zephyrs by hand, that way
# when we get a bad one, we don't
# clear the list)
# Feb 17, 2008:
# - use .guid when it exists
# - use w3m for output
# June 21, 2008:
# - Rewrite to use a sql backend
# - write-ahead logging so we don't re-send buggy feeds
# - etag support
# - set a reasonable user-agent
# Eventual todo:
# - last-modified-time in addition to etags?
# - web interface (??)
# Questions, comments, concerns?
# Email sipb-zephyrss@mit.edu
FORCE_SCHEDULE = 0
DEBUG = 0
DONT_ZEPHYR = 0
SEND_TO_WDAHERTEST = 0
import os, re, time, sys
sys.path.append('/mit/wdaher/web_scripts/puzzles/')
import secrets
import MySQLdb
import feedparser
feedparser.USER_AGENT = "rssZephyr/2 +http://www.mit.edu/~wdaher/rssZephyr.html"
def debug(str):
if DEBUG:
try:
print str
except:
pass
con = MySQLdb.connect(host = 'sql.mit.edu',
user = 'wdaher',
passwd = secrets.PASSWD,
db = 'wdaher+rss');
cur = con.cursor()
feeds = "/mit/wdaher/Public/rssZephyr/feedsToPoll/feeds"
file = open(feeds, 'r')
feedList = file.readlines()
file.close()
def sendZephyr(msg, classname, instance, sig):
if SEND_TO_WDAHERTEST: classname = 'wdaher-test'
try:
text = msg.encode('iso-8859-1')
debug(text)
if DONT_ZEPHYR:
pass
else:
outstream = os.popen('w3m -cols 70 -dump -T text/html | zwrite -q -d -c %s -i %s -O auto -s "%s"' % (classname, instance, sig), 'w')
outstream.write(text)
outstream.close()
except:
debug("Error zephyring out some text :(")
pass
toHandle = []
for feedLine in feedList:
if len(feedLine) > 4 and feedLine[:4] == 'STOP':
print 'Emergency stop invoked, quitting'
sys.exit(1)
elif feedLine[0] == "#" or feedLine[0] == '\n':
pass
else:
tokens = feedLine.rstrip().split(',')
if len(tokens) == 4:
url, classname, instancename, updateInt = tokens
toHandle.append( (url,
(classname,instancename),
int(updateInt)) )
else:
print 'Invalid feed descriptor:', feedLine
print "Make sure there are no spaces or quotation marks?"
## Make sure the database state is synchronized with the file state
## for the purposes of recording etags
for url, target, interval in toHandle:
cur.execute('select count(*) from feeds_and_tags where feed_url=%s', (url,))
if cur.fetchall()[0][0] == 0:
cur.execute('insert into feeds_and_tags (feed_url, etag) values(%s,%s)', (url, None))
con.commit()
## Actually do your thing
for url, target, interval in toHandle:
if int(time.time() / 60) % interval != 0 and not FORCE_SCHEDULE:
# Not your time
debug("Not time for %s, skipping" % url)
continue
# Get the previous e-tag/last-modified-time this guy had, if any
cur.execute("select etag from feeds_and_tags where feed_url=%s", (url, ))
taglist = cur.fetchall()
etag = None
if len(taglist):
etag = taglist[0][0]
debug("Last tag was: %s for %s" % (etag, url))
# Parse
f = feedparser.parse(url, etag=etag)
newetag = None
if hasattr(f, 'etag'):
newetag = f.etag
# Insert new etag into the database.
cur.execute("update feeds_and_tags set etag=%s where feed_url=%s", (newetag, url))
con.commit()
if not hasattr(f, 'status'):
continue
if f.status == 304:
continue
if f.status != 200:
sendZephyr("HELP, this isn't working: %s returned a code of %i" % (url, f.status),
'wdaher-test', 'rssZephyr', 'SOMETHING IS BROKEN')
for entry in f.entries:
g = ""
if hasattr(entry, 'guid'):
g = entry.guid
elif hasattr(entry, 'link'):
g = entry.link
else:
# Don't know how to deal with this
continue
# Have we seen the guid?
cur.execute("select * from seen_items where guid=%s limit 1", (g, ))
if len(cur.fetchall()) != 0:
continue
# Mark having seen the guid
cur.execute("insert into seen_items values(%s)", (g,))
con.commit()
debug("Zephyring...")
# Zephyr
title, link, desc = "","",""
if hasattr(entry, 'title'):
title = entry.title
if hasattr(entry, 'link'):
link = entry.link.rstrip()
if hasattr(entry, 'description'):
desc = entry.description
output = '''%s
%s
%s
''' % (title, link, desc)
sendZephyr(output, target[0], target[1],
'http://www.mit.edu/~wdaher/rssZephyr.html')
debug("Done zephyring.")
con.close()