Skip to content

Commit

Permalink
fixed retrieval and parsing issues
Browse files Browse the repository at this point in the history
  • Loading branch information
gassc committed May 8, 2018
1 parent 715916c commit ab44d62
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 29 deletions.
66 changes: 45 additions & 21 deletions app/get_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def __init__(
# Restrict mail search. Be very specific.
# Machine should be very selective to receive messages.
self.mail_search_criteria = {
'FROM': MAIL_FROM,
'SUBJECT': MAIL_SUBJECT
'FROM': MAIL_FROM
# 'SUBJECT': MAIL_SUBJECT
# 'BODY': MAIL_BODY_SIGNATURE,
}

Expand Down Expand Up @@ -87,15 +87,34 @@ def _imap_search_string_lite(self, criteria):
return c

def _get_first_text_block(self, msg):
type = msg.get_content_maintype()

if type == 'multipart':
t = msg.get_content_maintype()
if t == 'multipart':
for part in msg.get_payload():
print(part.get_payload())
if part.get_content_maintype() == 'text':
return part.get_payload()
elif type == 'text':
elif t == 'text':
return msg.get_payload()

def _get_first_html_block(self, msg):
t = msg.get_content_maintype()
if t == 'multipart':
for part in msg.walk():
# print(part)
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
if part.get('Date'):
print("\t", str(part.get('Date')))
if part.get('Subject'):
print("\t", str(part.get('Subject')))
# skip any text/plain (txt) attachments
if ctype == 'text/html' and 'attachment' not in cdispo:
body = part.get_payload() # decode
# print(body)
return body
# else:
# print(msg)

# ------------------------------------------------------
# WORK

Expand All @@ -113,38 +132,43 @@ def retrieve(self):
server.select(self.folder)

# find messages in the INBOX meeting the search criteria
# result, data = server.uid('search', None, _imap_search_string(self.mail_search_criteria))
result, data = server.search(
result, data = server.uid(
'search',
None,
self._imap_search_string_lite(self.mail_search_criteria)
)
# print(result, data)
uids_on_server = [int(s) for s in data[0].split()]
# print(result, uids_on_server)
log.info(
"{0} matching e-mails found on the server".format(
"{0} e-mails matching search criteria were found on the server".format(
len(uids_on_server))
)

# new_uids = list(set(uids_on_server) - set(logged_uids))
# print("new_uids", new_uids)

for uid in uids_on_server:
log.info('Getting message {0} :::::::::::::::::::::'.format(uid))
log.info('Checking message {0} :::::::::::::::::::::'.format(uid))

result, data = server.uid(
'fetch', str(uid), '(RFC822)') # fetch entire message
server.uid
if data and data[0]:
msg = email.message_from_string(data[0][1].decode())

# parse the email here:
mail = self._get_first_text_block(msg)

# log.info(text)
# parse_email.go(text.split("\n"))
self.mails.append(mail)
try:
msg = email.message_from_string(data[0][1].decode())

# parse the email here:
html_mail = self._get_first_html_block(msg)
if html_mail:
self.mails.append(html_mail)
# text_mail = self._get_first_text_block(msg)
# if text_mail:
# self.mails.append(text_mail)
except:
log.error("\t could not parse messsage")

# record the uid in the db here
# append_uids_to_log([uid])
else:
print("\t", result, data)

server.logout()

Expand Down
12 changes: 7 additions & 5 deletions app/publish_geodata.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,17 @@ def _check_if_exists_in_esri_agol_feature_service(self, token):
)
published = published.json()
if "features" in published:
# this list of ids from the already published data:
published_ids = list(
set([str(f["attributes"]["SerialNumber"]) for f in published["features"]]))
# print("published", published_ids)
# print('records', self.records)
# compare records we've found in e-mail inbox to published features
to_publish = [
# return each record
r for r in self.records
# but only if the id is not in
if str(r["serial_number"]) not in list(set(
# this list of ids from the already published data:
[str(f["attributes"]["SerialNumber"])
for f in published["features"]]
))
if str(r["serial_number"]) not in published_ids
]
if to_publish:
return to_publish
Expand Down
12 changes: 9 additions & 3 deletions app/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,15 @@
class Utils(object):

def list_of_seq_unique_by_key(self, seq, key):
seen = set()
seen_add = seen.add
return [x for x in seq if x[key] not in seen and not seen_add(x[key])]
# make sure spec'd key exists in provided records first
seq = [x for x in seq if key in x.keys()]
# create a list of unique keys from the records provided
if seq:
seen = set()
seen_add = seen.add
return [x for x in seq if x[key] not in seen and not seen_add(x[key])]
else:
return []

def find_floats(self, string, lower=-180, upper=180):
'''This will do its best to get a postive or negative float from a string
Expand Down

0 comments on commit ab44d62

Please sign in to comment.