diff --git a/feeds.py b/feeds.py index ef87bb2..2b340e8 100644 --- a/feeds.py +++ b/feeds.py @@ -29,6 +29,7 @@ def __init__(self, feed, id): self.description = '' self.link = '' self.author = '' + self.categories = [] self.read = False @property def time_since(self): @@ -136,6 +137,7 @@ def poll(self, timestamp, filters): item.description = util.format(util.get(entry, 'description', ''), settings.POPUP_BODY_LENGTH) item.link = util.get(entry, 'link', '') item.author = util.format(util.get(entry, 'author', '')) # TODO: max length + item.categories = util.get(entry, 'tags', []) if all(filter.filter(item) for filter in filters): result.append(item) self.clean_cache(settings.FEED_CACHE_SIZE) diff --git a/filters.py b/filters.py index 40da2d4..a13abdc 100644 --- a/filters.py +++ b/filters.py @@ -8,6 +8,7 @@ LINK = 2 AUTHOR = 4 CONTENT = 8 +CATEGORY = 16 TYPES = { None: INCLUDE, @@ -21,6 +22,7 @@ 'link:': LINK, 'author:': AUTHOR, 'content:': CONTENT, + 'category:': CATEGORY, } TYPE_STR = { @@ -34,6 +36,7 @@ LINK: 'link', AUTHOR: 'author', CONTENT: 'content', + CATEGORY: 'category', } class Rule(object): @@ -51,6 +54,12 @@ def evaluate(self, item, ignore_case=True, whole_word=True): strings.append(item.author) if self.qualifier & CONTENT: strings.append(item.description) + if self.qualifier & CATEGORY: + if item.categories: + for category_item in item.categories: + if category_item: + if 'term' in category_item: + strings.append(category_item['term']) text = '\n'.join(strings) word = self.word if ignore_case: @@ -113,6 +122,7 @@ def __str__(self): 'LINK', 'AUTHOR', 'CONTENT', + 'CATEGORY', 'WORD', ] + reserved.values() @@ -136,7 +146,11 @@ def t_AUTHOR(t): def t_CONTENT(t): r'content:' return t - + +def t_CATEGORY(t): + r'category:' + return t + def t_WORD(t): r'(\'[^\']+\') | (\"[^\"]+\") | ([^ \n\t\r+\-()\'"]+)' t.type = reserved.get(t.value, 'WORD') @@ -197,6 +211,7 @@ def p_qualifier(t): | LINK | AUTHOR | CONTENT + | CATEGORY | empty''' t[0] = t[1] diff --git a/util.py b/util.py index 61ff2a5..6621179 100644 --- a/util.py +++ b/util.py @@ -8,6 +8,8 @@ import urlparse import threading import feedparser +from HTMLParser import HTMLParser +import operator from htmlentitydefs import name2codepoint from settings import settings @@ -75,13 +77,75 @@ def abspath(path): path = os.path.abspath(path) path = 'file:///%s' % path.replace('\\', '/') return path + +def unescHTMLSpcChr(str): + return HTMLParser().unescape(str) + +def keyExistsAndNotNull(fpElement, *keys): + if len(keys) == 1: + if keys[0] in fpElement: + if fpElement[keys[0]]: + return True + return False + if len(keys) == 2: + if keys[0] in fpElement: + if fpElement[keys[0]]: + if keys[1] in fpElement[keys[0]]: + if fpElement[keys[0]][keys[1]]: + return True + return False + else: + raise Exception("Unknown signature of doIfExists() call!") + +def unescapeRSSObject(fpDict): + ''' + This function tries to find all human readable + strings in dict, that has been returned by feedparser + and if string is in dict, replaces it with HTML + escaped symbols. + ''' + unescapeToVar = lambda item, key: operator.setitem(item, key, unescHTMLSpcChr(item[key])) + + if keyExistsAndNotNull(fpDict, 'author'): + unescapeToVar(fpDict, 'author') + if keyExistsAndNotNull(fpDict, 'author_detail', 'name'): + unescapeToVar(fpDict['author_detail'], 'name') + if keyExistsAndNotNull(fpDict, 'comments'): + unescapeToVar(fpDict, 'comments') + if keyExistsAndNotNull(fpDict, 'content'): + unescapeToVar(fpDict, 'content') + if keyExistsAndNotNull(fpDict, 'contributors'): + unescapeToVar(fpDict, 'contributors') + if keyExistsAndNotNull(fpDict, 'summary'): + unescapeToVar(fpDict, 'summary') + if keyExistsAndNotNull(fpDict, 'summary_detail', 'value'): + unescapeToVar(fpDict['summary_detail'], 'value') + if keyExistsAndNotNull(fpDict, 'tags'): + for index, tag in enumerate(fpDict['tags']): + if keyExistsAndNotNull(tag, 'term'): + unescapeToVar(fpDict['tags'][index], 'term') + if keyExistsAndNotNull(tag, 'label'): + unescapeToVar(fpDict['tags'][index], 'label') + if keyExistsAndNotNull(fpDict, 'title'): + unescapeToVar(fpDict, 'title') + if keyExistsAndNotNull(fpDict, 'title_detail', 'value'): + unescapeToVar(fpDict['title_detail'], 'value') + return fpDict + +def decodeRSS(rss): + for index, record in enumerate(rss): + rss[index] = unescapeRSSObject(record) + return rss def parse(url, username=None, password=None, etag=None, modified=None): agent = settings.USER_AGENT handlers = [get_proxy()] if username and password: url = insert_credentials(url, username, password) - return feedparser.parse(url, etag=etag, modified=modified, agent=agent, handlers=handlers) + response = feedparser.parse(url, etag=etag, modified=modified, agent=agent, handlers=handlers) + if "entries" in response: + response["entries"] = decodeRSS(response["entries"]) + return response def is_valid_feed(data): entries = get(data, 'entries', [])