Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions feeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def __init__(self, feed, id):
self.description = ''
self.link = ''
self.author = ''
self.categories = []
self.read = False
@property
def time_since(self):
Expand Down Expand Up @@ -136,6 +137,7 @@ def poll(self, timestamp, filters):
item.description = util.format(util.get(entry, 'description', ''), settings.POPUP_BODY_LENGTH)
item.link = util.get(entry, 'link', '')
item.author = util.format(util.get(entry, 'author', '')) # TODO: max length
item.categories = util.get(entry, 'tags', [])
if all(filter.filter(item) for filter in filters):
result.append(item)
self.clean_cache(settings.FEED_CACHE_SIZE)
Expand Down
17 changes: 16 additions & 1 deletion filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
LINK = 2
AUTHOR = 4
CONTENT = 8
CATEGORY = 16

TYPES = {
None: INCLUDE,
Expand All @@ -21,6 +22,7 @@
'link:': LINK,
'author:': AUTHOR,
'content:': CONTENT,
'category:': CATEGORY,
}

TYPE_STR = {
Expand All @@ -34,6 +36,7 @@
LINK: 'link',
AUTHOR: 'author',
CONTENT: 'content',
CATEGORY: 'category',
}

class Rule(object):
Expand All @@ -51,6 +54,12 @@ def evaluate(self, item, ignore_case=True, whole_word=True):
strings.append(item.author)
if self.qualifier & CONTENT:
strings.append(item.description)
if self.qualifier & CATEGORY:
if item.categories:
for category_item in item.categories:
if category_item:
if 'term' in category_item:
strings.append(category_item['term'])
text = '\n'.join(strings)
word = self.word
if ignore_case:
Expand Down Expand Up @@ -113,6 +122,7 @@ def __str__(self):
'LINK',
'AUTHOR',
'CONTENT',
'CATEGORY',
'WORD',
] + reserved.values()

Expand All @@ -136,7 +146,11 @@ def t_AUTHOR(t):
def t_CONTENT(t):
r'content:'
return t


def t_CATEGORY(t):
r'category:'
return t

def t_WORD(t):
r'(\'[^\']+\') | (\"[^\"]+\") | ([^ \n\t\r+\-()\'"]+)'
t.type = reserved.get(t.value, 'WORD')
Expand Down Expand Up @@ -197,6 +211,7 @@ def p_qualifier(t):
| LINK
| AUTHOR
| CONTENT
| CATEGORY
| empty'''
t[0] = t[1]

Expand Down
66 changes: 65 additions & 1 deletion util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import urlparse
import threading
import feedparser
from HTMLParser import HTMLParser
import operator
from htmlentitydefs import name2codepoint
from settings import settings

Expand Down Expand Up @@ -75,13 +77,75 @@ def abspath(path):
path = os.path.abspath(path)
path = 'file:///%s' % path.replace('\\', '/')
return path

def unescHTMLSpcChr(str):
return HTMLParser().unescape(str)

def keyExistsAndNotNull(fpElement, *keys):
if len(keys) == 1:
if keys[0] in fpElement:
if fpElement[keys[0]]:
return True
return False
if len(keys) == 2:
if keys[0] in fpElement:
if fpElement[keys[0]]:
if keys[1] in fpElement[keys[0]]:
if fpElement[keys[0]][keys[1]]:
return True
return False
else:
raise Exception("Unknown signature of doIfExists() call!")

def unescapeRSSObject(fpDict):
'''
This function tries to find all human readable
strings in dict, that has been returned by feedparser
and if string is in dict, replaces it with HTML
escaped symbols.
'''
unescapeToVar = lambda item, key: operator.setitem(item, key, unescHTMLSpcChr(item[key]))

if keyExistsAndNotNull(fpDict, 'author'):
unescapeToVar(fpDict, 'author')
if keyExistsAndNotNull(fpDict, 'author_detail', 'name'):
unescapeToVar(fpDict['author_detail'], 'name')
if keyExistsAndNotNull(fpDict, 'comments'):
unescapeToVar(fpDict, 'comments')
if keyExistsAndNotNull(fpDict, 'content'):
unescapeToVar(fpDict, 'content')
if keyExistsAndNotNull(fpDict, 'contributors'):
unescapeToVar(fpDict, 'contributors')
if keyExistsAndNotNull(fpDict, 'summary'):
unescapeToVar(fpDict, 'summary')
if keyExistsAndNotNull(fpDict, 'summary_detail', 'value'):
unescapeToVar(fpDict['summary_detail'], 'value')
if keyExistsAndNotNull(fpDict, 'tags'):
for index, tag in enumerate(fpDict['tags']):
if keyExistsAndNotNull(tag, 'term'):
unescapeToVar(fpDict['tags'][index], 'term')
if keyExistsAndNotNull(tag, 'label'):
unescapeToVar(fpDict['tags'][index], 'label')
if keyExistsAndNotNull(fpDict, 'title'):
unescapeToVar(fpDict, 'title')
if keyExistsAndNotNull(fpDict, 'title_detail', 'value'):
unescapeToVar(fpDict['title_detail'], 'value')
return fpDict

def decodeRSS(rss):
for index, record in enumerate(rss):
rss[index] = unescapeRSSObject(record)
return rss

def parse(url, username=None, password=None, etag=None, modified=None):
agent = settings.USER_AGENT
handlers = [get_proxy()]
if username and password:
url = insert_credentials(url, username, password)
return feedparser.parse(url, etag=etag, modified=modified, agent=agent, handlers=handlers)
response = feedparser.parse(url, etag=etag, modified=modified, agent=agent, handlers=handlers)
if "entries" in response:
response["entries"] = decodeRSS(response["entries"])
return response

def is_valid_feed(data):
entries = get(data, 'entries', [])
Expand Down