fogleman · yma-het · Jul 11, 2015 · Jul 12, 2015 · Jul 16, 2015
diff --git a/feeds.py b/feeds.py
@@ -29,6 +29,7 @@ def __init__(self, feed, id):
         self.description = ''
         self.link = ''
         self.author = ''
+        self.categories = []
         self.read = False
     @property
     def time_since(self):
@@ -136,6 +137,7 @@ def poll(self, timestamp, filters):
             item.description = util.format(util.get(entry, 'description', ''), settings.POPUP_BODY_LENGTH)
             item.link = util.get(entry, 'link', '')
             item.author = util.format(util.get(entry, 'author', '')) # TODO: max length
+            item.categories = util.get(entry, 'tags', [])
             if all(filter.filter(item) for filter in filters):
                 result.append(item)
         self.clean_cache(settings.FEED_CACHE_SIZE)

diff --git a/filters.py b/filters.py
@@ -8,6 +8,7 @@
 LINK = 2
 AUTHOR = 4
 CONTENT = 8
+CATEGORY = 16
 
 TYPES = {
     None: INCLUDE,
@@ -21,6 +22,7 @@
     'link:': LINK,
     'author:': AUTHOR,
     'content:': CONTENT,
+    'category:': CATEGORY,
 }
 
 TYPE_STR = {
@@ -34,6 +36,7 @@
     LINK: 'link',
     AUTHOR: 'author',
     CONTENT: 'content',
+    CATEGORY: 'category',
 }
 
 class Rule(object):
@@ -51,6 +54,12 @@ def evaluate(self, item, ignore_case=True, whole_word=True):
             strings.append(item.author)
         if self.qualifier & CONTENT:
             strings.append(item.description)
+        if self.qualifier & CATEGORY:
+            if item.categories:
+                for category_item in item.categories:
+                    if category_item:
+                        if 'term' in category_item:
+                            strings.append(category_item['term'])
         text = '\n'.join(strings)
         word = self.word
         if ignore_case:
@@ -113,6 +122,7 @@ def __str__(self):
     'LINK',
     'AUTHOR',
     'CONTENT',
+    'CATEGORY',
     'WORD',
 ] + reserved.values()
 
@@ -136,7 +146,11 @@ def t_AUTHOR(t):
 def t_CONTENT(t):
     r'content:'
     return t
-
+
+def t_CATEGORY(t):
+    r'category:'
+    return t
+
 def t_WORD(t):
     r'(\'[^\']+\') | (\"[^\"]+\") | ([^ \n\t\r+\-()\'"]+)'
     t.type = reserved.get(t.value, 'WORD')
@@ -197,6 +211,7 @@ def p_qualifier(t):
                  | LINK 
                  | AUTHOR 
                  | CONTENT
+                 | CATEGORY
                  | empty'''
     t[0] = t[1]
 

diff --git a/util.py b/util.py
@@ -8,6 +8,8 @@
 import urlparse
 import threading
 import feedparser
+from HTMLParser import HTMLParser
+import operator
 from htmlentitydefs import name2codepoint
 from settings import settings
 
@@ -75,13 +77,75 @@ def abspath(path):
     path = os.path.abspath(path)
     path = 'file:///%s' % path.replace('\\', '/')
     return path
+
+def unescHTMLSpcChr(str):
+    return HTMLParser().unescape(str)
+
+def keyExistsAndNotNull(fpElement, *keys):
+    if len(keys) == 1:
+        if keys[0] in fpElement:
+            if fpElement[keys[0]]:
+                return True
+        return False
+    if  len(keys) == 2:
+        if keys[0] in fpElement:
+            if fpElement[keys[0]]:
+                if keys[1] in fpElement[keys[0]]:
+                    if fpElement[keys[0]][keys[1]]:
+                        return True
+        return False
+    else:
+        raise Exception("Unknown signature of doIfExists() call!")
+
+def unescapeRSSObject(fpDict):
+    '''
+    This function tries to find all human readable
+    strings in dict, that has been returned by feedparser
+    and if string is in dict, replaces it with HTML
+    escaped symbols.
+    '''
+    unescapeToVar = lambda item, key: operator.setitem(item, key, unescHTMLSpcChr(item[key]))
+
+    if keyExistsAndNotNull(fpDict, 'author'):
+        unescapeToVar(fpDict, 'author')
+    if keyExistsAndNotNull(fpDict, 'author_detail', 'name'):
+        unescapeToVar(fpDict['author_detail'], 'name')
+    if keyExistsAndNotNull(fpDict, 'comments'):
+        unescapeToVar(fpDict, 'comments')
+    if keyExistsAndNotNull(fpDict, 'content'):
+        unescapeToVar(fpDict, 'content')
+    if keyExistsAndNotNull(fpDict, 'contributors'):
+        unescapeToVar(fpDict, 'contributors')
+    if keyExistsAndNotNull(fpDict, 'summary'):
+        unescapeToVar(fpDict, 'summary')
+    if keyExistsAndNotNull(fpDict, 'summary_detail', 'value'):
+        unescapeToVar(fpDict['summary_detail'], 'value')
+    if keyExistsAndNotNull(fpDict, 'tags'):
+        for index, tag in enumerate(fpDict['tags']):
+            if keyExistsAndNotNull(tag, 'term'):
+                unescapeToVar(fpDict['tags'][index], 'term')
+            if keyExistsAndNotNull(tag, 'label'):
+                unescapeToVar(fpDict['tags'][index], 'label')
+    if keyExistsAndNotNull(fpDict, 'title'):
+        unescapeToVar(fpDict, 'title')
+    if keyExistsAndNotNull(fpDict, 'title_detail', 'value'):
+        unescapeToVar(fpDict['title_detail'], 'value')
+    return fpDict
+
+def decodeRSS(rss):
+    for index, record in enumerate(rss):
+       rss[index] = unescapeRSSObject(record)
+    return rss
 
 def parse(url, username=None, password=None, etag=None, modified=None):
     agent = settings.USER_AGENT
     handlers = [get_proxy()]
     if username and password:
         url = insert_credentials(url, username, password)
-    return feedparser.parse(url, etag=etag, modified=modified, agent=agent, handlers=handlers)
+    response = feedparser.parse(url, etag=etag, modified=modified, agent=agent, handlers=handlers)
+    if "entries" in response:
+        response["entries"] = decodeRSS(response["entries"])
+    return response
 
 def is_valid_feed(data):
     entries = get(data, 'entries', [])