Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Functionality to Apod-API #106

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 72 additions & 69 deletions apod/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
import json
import re
import urllib3

# import urllib.request

LOG = logging.getLogger(__name__)
LOG = logging.getLogger(__name__) # setting up logger
logging.basicConfig(level=logging.WARN)

# location of backing APOD service
Expand All @@ -27,14 +28,15 @@
# function for getting video thumbnails
def _get_thumbs(data):
global video_thumb
if "youtube" in data or "youtu.be" in data:
if "youtube" in data or "youtu.be" in data: # checking if video is from youtube
# get ID from YouTube URL
youtube_id_regex = re.compile("(?:(?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")
video_id = youtube_id_regex.findall(data)
video_id = ''.join(''.join(elements) for elements in video_id).replace("?", "").replace("&", "")
# get URL of thumbnail
video_thumb = "https://img.youtube.com/vi/" + video_id + "/0.jpg"
elif "vimeo" in data:

elif "vimeo" in data: # checking if video is from vimeo
# get ID from Vimeo URL
vimeo_id_regex = re.compile("(?:/video/)(\d+)")
vimeo_id = vimeo_id_regex.findall(data)[0]
Expand All @@ -56,11 +58,11 @@ def _get_last_url(data):
def _get_apod_chars(dt, thumbs):
media_type = 'image'
if dt:
date_str = dt.strftime('%y%m%d')
apod_url = '%sap%s.html' % (BASE, date_str)
date_str = dt.strftime('%y%m%d') # converting date
apod_url = '%sap%s.html' % (BASE, date_str) # adding to base
else:
apod_url = '%sastropix.html' % BASE
LOG.debug('OPENING URL:' + apod_url)
apod_url = '%sastropix.html' % BASE # setting to default
LOG.debug('OPENING URL:' + apod_url) # logging open url message
res = requests.get(apod_url)

if res.status_code == 404:
Expand All @@ -75,18 +77,18 @@ def _get_apod_chars(dt, thumbs):

# return default_obj_props

soup = BeautifulSoup(res.text, 'html.parser')
LOG.debug('getting the data url')
soup = BeautifulSoup(res.text, 'html.parser') # creating soup obj for html
LOG.debug('getting the data url') # logs message
hd_data = None
if soup.img:
# it is an image, so get both the low- and high-resolution data
data = BASE + soup.img['src']
data = BASE + soup.img['src'] # adding base to src
hd_data = data

LOG.debug('getting the link for hd_data')
for link in soup.find_all('a', href=True):
for link in soup.find_all('a', href=True): # search for a tag
if link['href'] and link['href'].startswith('image'):
hd_data = BASE + link['href']
hd_data = BASE + link['href'] # adding base to found link
break
elif soup.iframe:
# its a video
Expand All @@ -99,27 +101,28 @@ def _get_apod_chars(dt, thumbs):

props = {}

props['explanation'] = _explanation(soup)
props['title'] = _title(soup)
props['explanation'] = _explanation(soup) # adding explanation to dict
props['title'] = _title(soup) # adding title to dict
copyright_text = _copyright(soup)
if copyright_text:
props['copyright'] = copyright_text
props['media_type'] = media_type
props['copyright'] = copyright_text # adding copyright to dict
props['media_type'] = media_type # adding media type
if data:
props['url'] = _get_last_url(data)
props['url'] = _get_last_url(data) # adding url
if dt:
props['date'] = dt.strftime('%Y-%m-%d')
props['date'] = dt.strftime('%Y-%m-%d') # adding date
else:
props['date'] = _date(soup)

if hd_data:
props['hdurl'] = _get_last_url(hd_data)
props['hdurl'] = _get_last_url(hd_data) # adding hd data

if thumbs and media_type == "video":
if thumbs.lower() == "true":
props['thumbnail_url'] = _get_thumbs(data)
props['thumbnail_url'] = _get_thumbs(data) # adding video

return props

return props ## Return dictionary


def _title(soup):
Expand All @@ -131,33 +134,33 @@ def _title(soup):
LOG.debug('getting the title')
try:
# Handler for later APOD entries
number_of_center_elements = len(soup.find_all('center'))
if(number_of_center_elements == 2):
center_selection = soup.find_all('center')[0]
bold_selection = center_selection.find_all('b')[0]
title = bold_selection.text.strip(' ')
number_of_center_elements = len(soup.find_all('center')) # finds all center tags in HTML doc
if(number_of_center_elements == 2): # checks number of elements
center_selection = soup.find_all('center')[0] # gets center tags
bold_selection = center_selection.find_all('b')[0] # finds all b tags in HTML doc
title = bold_selection.text.strip(' ') # removes whitespace
try:
title = title.encode('latin1').decode('cp1252')
title = title.encode('latin1').decode('cp1252') # encodes text in latin1 format --> decodese in cp1252 format
except Exception as ex:
LOG.error(str(ex))
else:
center_selection = soup.find_all('center')[1]
bold_selection = center_selection.find_all('b')[0]
title = bold_selection.text.strip(' ')
LOG.error(str(ex)) # if exception raise error
else: # other cases
center_selection = soup.find_all('center')[1] # gets center tags
bold_selection = center_selection.find_all('b')[0] # gets b tags
title = bold_selection.text.strip(' ') # removes whitespace
try:
title = title.encode('latin1').decode('cp1252')
title = title.encode('latin1').decode('cp1252') # encodes in latin1 --> decodes in cp1252
except Exception as ex:
LOG.error(str(ex))
LOG.error(str(ex)) # if exception raise error

return title
except Exception:
# Handler for early APOD entries
text = soup.title.text.split(' - ')[-1]
title = text.strip()
text = soup.title.text.split(' - ')[-1] # gets title tag from HTML doc
title = text.strip() # removes whitespace
try:
title = title.encode('latin1').decode('cp1252')
title = title.encode('latin1').decode('cp1252') # encodes in latin1 --> decoes in cp1252
except Exception as ex:
LOG.error(str(ex))
LOG.error(str(ex)) # if exception raise error

return title

Expand All @@ -176,45 +179,45 @@ def _copyright(soup):

copyright_text = None
use_next = False
for element in soup.findAll('a', text=True):
for element in soup.findAll('a', text=True): # searching for a tags
# LOG.debug("TEXT: "+element.text)

if use_next:
copyright_text = element.text.strip(' ')
copyright_text = element.text.strip(' ') # stripping copyright
break

if 'Copyright' in element.text:
LOG.debug('Found Copyright text:' + str(element.text))
LOG.debug('Found Copyright text:' + str(element.text)) # appending given copyright
use_next = True

if not copyright_text:

for element in soup.findAll(['b', 'a'], text=True):
for element in soup.findAll(['b', 'a'], text=True): # searching for b and a tags
# search text for explicit match
if 'Copyright' in element.text:
LOG.debug('Found Copyright text:' + str(element.text))
LOG.debug('Found Copyright text:' + str(element.text)) # appending copyright
# pull the copyright from the link text which follows
sibling = element.next_sibling
stuff = ""
while sibling:
try:
stuff = stuff + sibling.text
stuff = stuff + sibling.text # getting cr from next text
except Exception:
pass
sibling = sibling.next_sibling

if stuff:
copyright_text = stuff.strip(' ')
copyright_text = stuff.strip(' ') # stripping text
try:
copyright_text = copyright_text.encode('latin1').decode('cp1252')
copyright_text = copyright_text.encode('latin1').decode('cp1252') # encoding copyright in latin1 and decoding in cp1252
except Exception as ex:
LOG.error(str(ex))
LOG.error(str(ex)) # logging error

return copyright_text

except Exception as ex:
LOG.error(str(ex))
raise ValueError('Unsupported schema for given date.')
raise ValueError('Unsupported schema for given date.') # raises exception


def _explanation(soup):
Expand All @@ -224,33 +227,33 @@ def _explanation(soup):
"""
# Handler for later APOD entries
LOG.debug('getting the explanation')
s = soup.find_all('p')[2].text
s = soup.find_all('p')[2].text # searching for p tags
s = s.replace('\n', ' ')
s = s.replace(' ', ' ')
s = s.strip(' ').strip('Explanation: ')
s = s.strip(' ').strip('Explanation: ') #stripping titles
s = s.split(' Tomorrow\'s picture')[0]
s = s.strip(' ')
if s == '':
# Handler for earlier APOD entries
texts = [x.strip() for x in soup.text.split('\n')]
try:
begin_idx = texts.index('Explanation:') + 1
begin_idx = texts.index('Explanation:') + 1 # grabbing explanation
except ValueError as e:
# Rare case where "Explanation:" is not on its own line
explanation_line = [x for x in texts if "Explanation:" in x]
if len(explanation_line) == 1:
begin_idx = texts.index(explanation_line[0])
begin_idx = texts.index(explanation_line[0]) # grabbing explanation
texts[begin_idx] = texts[begin_idx][12:].strip()
else:
raise e

idx = texts[begin_idx:].index('')
s = ' '.join(texts[begin_idx:begin_idx + idx])
s = ' '.join(texts[begin_idx:begin_idx + idx]) # joining explanation text

try:
s = s.encode('latin1').decode('cp1252')
s = s.encode('latin1').decode('cp1252') # encoding latin1 decoding cp1252
except Exception as ex:
LOG.error(str(ex))
LOG.error(str(ex)) # raising exception

return s

Expand All @@ -260,11 +263,11 @@ def _date(soup):
Accepts a BeautifulSoup object for the APOD HTML page and returns the
date of the APOD image.
"""
LOG.debug('getting the date from soup data.')
_today = datetime.date.today()
LOG.debug('getting the date from soup data.') # logging message getting date
_today = datetime.date.today() # todays date
for line in soup.text.split('\n'):
today_year = str(_today.year)
yesterday_year = str((_today-datetime.timedelta(days=1)).year)
today_year = str(_today.year) # getting year
yesterday_year = str((_today-datetime.timedelta(days=1)).year) # getting yesterdays year
# Looks for the first line that starts with the current year.
# This also checks yesterday's year so it doesn't break on January 1st at 00:00 UTC
# before apod.nasa.gov uploads a new image.
Expand All @@ -279,10 +282,10 @@ def _date(soup):
'september', 'october', 'november', 'december'
].index(month.lower()) + 1
day = int(day)
return datetime.date(year=year, month=month, day=day).strftime('%Y-%m-%d')
return datetime.date(year=year, month=month, day=day).strftime('%Y-%m-%d') # return date converted to datetime
except:
LOG.debug('unable to retrieve date from line: ' + line)
raise Exception('Date not found in soup data.')
LOG.debug('unable to retrieve date from line: ' + line) # logging error message
raise Exception('Date not found in soup data.') # raising exception


def parse_apod(dt, use_default_today_date=False, thumbs=False):
Expand All @@ -291,10 +294,10 @@ def parse_apod(dt, use_default_today_date=False, thumbs=False):
of that day, noting that
"""

LOG.debug('apod chars called date:' + str(dt))
LOG.debug('apod chars called date:' + str(dt)) # logging message apod chars

try:
return _get_apod_chars(dt, thumbs)
return _get_apod_chars(dt, thumbs) # try return apod chars func

except Exception as ex:

Expand All @@ -320,18 +323,18 @@ def get_concepts(request, text, apikey):
"""
cbase = 'http://access.alchemyapi.com/calls/text/TextGetRankedConcepts'

params = dict(
params = dict( # concept params
outputMode='json',
apikey=apikey,
text=text
)

try:

LOG.debug('Getting response')
response = json.loads(request.get(cbase, fields=params))
clist = [concept['text'] for concept in response['concepts']]
LOG.debug('Getting response') # logging message getting response
response = json.loads(request.get(cbase, fields=params)) # converting response into json
clist = [concept['text'] for concept in response['concepts']] # concept list
return {k: v for k, v in zip(range(len(clist)), clist)}

except Exception as ex:
raise ValueError(ex)
raise ValueError(ex) # raising exception
Loading