# 6.00 Problem Set 6 # RSS Feed Filter import feedparser import string import time from project_util import translate_html from news_gui import Popup #----------------------------------------------------------------------- # # Problem Set 6 #====================== # Code for retrieving and parsing # Google News feed # Do not change this code #====================== def process(url): """ Fetches news items from the rss url and parses them. Returns a list of NewsStory-s. """ feed = feedparser.parse(url) entries = feed.entries ret = [] for entry in entries: guid = entry.guid title = translate_html(entry.title) link = entry.link summary = translate_html(entry.summary) try: subject = translate_html(entry.tags[0]['term']) except AttributeError: subject = "" newsStory = NewsStory(guid, title, subject, summary, link) ret.append(newsStory) return ret #====================== # Part 1 # Data structure design #====================== # Question 1 class NewsStory(object): def __init__(self, guid, title, subject, summary, link): self.guid = guid self.title = title self.subject = subject self.summary = summary self.link = link def getGuid(self): return self.guid def getTitle(self): return self.title def getSubject(self): return self.subject def getSummary(self): return self.summary def getLink(self): return self.link #====================== # Part 2 # Triggers #====================== class Trigger(object): def evaluate(self, story): return True # Whole Word Triggers # Questions 2-5 class WordTrigger(Trigger): def __init__(self, word): self.word = word def isWordIn(self, text): for punc in string.punctuation: text = text.replace(punc, " ") if self.word.lower() in text.lower().split(): return True return False class TitleTrigger(WordTrigger): def evaluate(self, story): return self.isWordIn(story.getTitle()) class SubjectTrigger(WordTrigger): def evaluate(self, story): return self.isWordIn(story.getSubject()) class SummaryTrigger(WordTrigger): def evaluate(self, story): return self.isWordIn(story.getSummary()) # Composite Triggers # Questions 6-8 class NotTrigger(Trigger): def __init__(self, t): self.t = t def evaluate(self, story): return not self.t.evaluate(story) class AndTrigger(Trigger): def __init__(self, t1, t2): self.t1 = t1 self.t2 = t2 def evaluate(self, story): return self.t1.evaluate(story) and self.t2.evaluate(story) class OrTrigger(Trigger): def __init__(self, t1, t2): self.t1 = t1 self.t2 = t2 def evaluate(self, story): return self.t1.evaluate(story) or self.t2.evaluate(story) # Phrase Trigger # Question 9 class PhraseTrigger(Trigger): def __init__(self, phrase): self.phrase = phrase def evaluate(self, story): return self.phrase in story.getTitle() \ or self.phrase in story.getSubject() \ or self.phrase in story.getSummary() #====================== # Part 3 # Filtering #====================== def filter_stories(stories, triggerlist): """ Takes in a list of NewsStory-s. Returns only those stories for whom a trigger in triggerlist fires. """ result = [] for story in stories: for trigger in triggerlist: if trigger.evaluate(story): result.append(story) break return result #====================== # Part 4 # User-Specified Triggers #====================== def readTriggerConfig(filename): """ Returns a list of trigger objects that correspond to the rules set in the file filename """ # Here's some code that we give you # to read in the file and eliminate # blank lines and comments triggerfile = open(filename, "r") all = [ line.rstrip() for line in triggerfile.readlines() ] lines = [] for line in all: if len(line) == 0 or line[0] == '#': continue lines.append(line) # 'lines' has a list of lines you need to parse # Build a set of triggers from it and # return the appropriate ones triggers = {} result = [] for line in lines: words = line.split() if words[0] == "ADD": for trigger_name in words[1:]: result.append(triggers[trigger_name]) else: trigger_name = words[0] trigger_type = words[1] if trigger_type == "TITLE": triggers[trigger_name] = TitleTrigger(words[2]) if trigger_type == "SUBJECT": triggers[trigger_name] = SubjectTrigger(words[2]) if trigger_type == "SUMMARY": triggers[trigger_name] = SummaryTrigger(words[2]) if trigger_type == "NOT": triggers[trigger_name] = NotTrigger(triggers[words[2]]) if trigger_type == "AND": triggers[trigger_name] = AndTrigger(triggers[words[2]], triggers[words[3]]) if trigger_type == "OR": triggers[trigger_name] = OrTrigger(triggers[words[2]], triggers[words[3]]) if trigger_type == "PHRASE": triggers[trigger_name] = PhraseTrigger(' '.join(words[2:])) return result SLEEPTIME = 60 #seconds -- how often we poll if __name__ == '__main__': # A sample trigger list - you'll replace # this with something more configurable in Question 11 #t1 = SubjectTrigger("world") #t2 = SummaryTrigger("H1N1") #t3 = PhraseTrigger("health care") #t4 = OrTrigger(t2, t3) #triggerlist = [t1, t4] # After implementing readTriggerConfig, uncomment this line triggerlist = readTriggerConfig("triggers.txt") guidShown = [] p = Popup() while True: print "Polling..." # Get stories from Google's Top Stories RSS news feed stories = process("http://news.google.com/?output=rss") # Get stories from Yahoo's Top Stories RSS news feed stories.extend(process("http://rss.news.yahoo.com/rss/topstories")) # Only select stories we're interested in stories = filter_stories(stories, triggerlist) # Don't print a story if we have already printed it before newstories = [] for story in stories: if story.getGuid() not in guidShown: newstories.append(story) for story in newstories: guidShown.append(story.getGuid()) p.newWindow(story) print "Sleeping..." time.sleep(SLEEPTIME)