Skip to content

Commit

Permalink
Updates to NASA Auth plugins and examples (#1042)
Browse files Browse the repository at this point in the history
  • Loading branch information
reidsunderland authored May 10, 2024
1 parent 828e155 commit ddb228c
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 34 deletions.
2 changes: 2 additions & 0 deletions sarracenia/examples/poll/nasa_cmr_opendap.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ sleep 15m
# Use the poll_NASA_CMR plugin
callback poll.poll_NASA_CMR

pollUrl https://cmr.earthdata.nasa.gov/search/granules.umm_json

# Collection Concept IDs - there can be multiple

# https://podaac.jpl.nasa.gov/dataset/AVHRRMTB_G-NAVO-L2P-v2.0
Expand Down
2 changes: 2 additions & 0 deletions sarracenia/examples/poll/nasa_cmr_other.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ sleep 15m
# Use the poll_NASA_CMR plugin
callback poll.poll_NASA_CMR

pollUrl https://cmr.earthdata.nasa.gov/search/granules.umm_json

# Collection Concept IDs - there can be multiple

# OMI/Aura Ozone (O3) Total Column 1-Orbit L2 Swath 13x24 km V003 (OMTO3)
Expand Down
2 changes: 2 additions & 0 deletions sarracenia/examples/poll/nasa_cmr_podaac.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ sleep 15m
# Use the poll_NASA_CMR plugin
callback poll.poll_NASA_CMR

pollUrl https://cmr.earthdata.nasa.gov/search/granules.umm_json

# Collection Concept IDs - there can be multiple

# https://podaac.jpl.nasa.gov/dataset/AVHRRMTB_G-NAVO-L2P-v2.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ exchange xs_NASA-TEST
# no broker-side filtering, get all files posted to xs_NASA-TEST
subtopic #

# use the auth_NASA_Earthdata plugin
callback accept.auth_NASA_Earthdata
# use the NASA Earthdata authentication plugin
callback authenticate.nasa_earthdata

# smaller batch size makes the logs easier to read for debugging
batch 5
Expand All @@ -21,4 +21,4 @@ acceptUnmatched True
directory /tmp/NASA_DL/

# enable DEBUG log messages that come from the auth_NASA_Earthdata plugin
set accept.auth_NASA_Earthdata.logLevel debug
set authenticate.nasa_earthdata.logLevel debug
7 changes: 7 additions & 0 deletions sarracenia/flowcb/authenticate/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""
Authentication plugins.
Work in progress. See https://github.com/MetPX/sarracenia/issues/565
"""

pass
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,22 @@
How to set up your download config:
--------------------------------
Add ``callback accept.auth_NASA_Earthdata``, in your subscribe, sarra or other download config.
Add ``callback authenticate.nasa_earthdata``, in your subscribe, sarra or other download config.
Add ``https://username:[email protected]/`` to your ``credentials.conf`` file.
Optional: set ``acceptSizeWrong True`` in the sarra/subscribe config to suppress the WARNING message
about a file being downloaded with no length given.
For examples, see https://github.com/MetPX/sarracenia/tree/main/sarracenia/examples/subscribe files
For examples, see https://github.com/MetPX/sarracenia/tree/stable/sarracenia/examples/subscribe files
named ``*nasa_earthdata*.conf``.
Change log:
-----------
- 2023-10-10: first attempt at this plugin. The old v2 code re-implemented downloading, using a session with
stored cookies. This should be better, because it uses the native sr3 download code.
- 2024-05-09: refactoring, to be able to be easily called from poll plugins, and elsewhere.
"""

import sarracenia
Expand All @@ -55,16 +56,16 @@

logger = logging.getLogger(__name__)

class Auth_nasa_earthdata(sarracenia.flowcb.FlowCB):
class Nasa_earthdata(sarracenia.flowcb.FlowCB):
def __init__(self, options):
super().__init__(options, logger)

# Allow setting a logLevel *only* for this plugin in the config file:
# set accept.auth_NASA_Earthdata.logLevel debug
# set authenticate.nasa_earthdata.logLevel debug
if hasattr(self.o, 'logLevel'):
logger.setLevel(self.o.logLevel.upper())

logger.debug("plugin: Download_NASA_Earthdata __init__")
logger.debug("plugin: NASA_Earthdata __init__")

self.o.add_option('earthdataUrl', kind='str', default_value='https://urs.earthdata.nasa.gov')

Expand All @@ -82,6 +83,12 @@ def after_accept(self, worklist):
Then adds the bearer token to Sarracenia's credentials DB for the message's baseUrl. This will allow
the file to be downloaded from msg['baseUrl']+msg['relPath'] using the bearer token.
"""
for msg in worklist.incoming:
self.add_token_for_url(msg['baseUrl'])

def add_token_for_url(self, url):
""" For the given URL, add the token to the in memory credentials database.
"""

# It's not clear what time the token expires on the expiry date. If today = expiry date, then try to get a
# token every time this runs. If it's not expired yet, we'll get the same token from the API and can try
Expand All @@ -95,30 +102,26 @@ def after_accept(self, worklist):
# Get a token from the NASA API, if there isn't one already
if not self._token:
# Try to get a new token
if not self.get_earthdata_token():
logger.error(f"Failed to retrieve Bearer token from {self.o.earthdataUrl}. " +
f"Can't download {msg['baseUrl']}{msg['relPath']}")

for msg in worklist.incoming:

# If the credential already exists and the bearer_token matches, don't need to do anything
ok, details = self.o.credentials.get(msg['baseUrl'])
if not self.get_bearer_token():
logger.error(f"Failed to retrieve bearer token from {self.o.earthdataUrl}")

# If the credential already exists and the bearer_token matches, don't need to do anything
ok, details = self.o.credentials.get(url)
token_already_in_creds = False
try:
token_already_in_creds = (ok and details.bearer_token == self._token)
if token_already_in_creds:
logger.debug(f"Token for {url} already in credentials database")
except:
token_already_in_creds = False
try:
token_already_in_creds = (ok and details.bearer_token == self._token)
if token_already_in_creds:
logger.debug(f"Token for {msg['baseUrl']} already in credentials database")
except:
token_already_in_creds = False

if not token_already_in_creds:
logger.info(f"Token for {msg['baseUrl']} not in credentials database. Adding it!")
# Add the new bearer token to the internal credentials db. If the credential is already in the db, it will
# be replaced which is desirable.
cred = sarracenia.credentials.Credential(urlstr=msg['baseUrl'])
cred.bearer_token = self._token
self.o.credentials.add(msg['baseUrl'], details=cred)

if not token_already_in_creds:
logger.info(f"Token for {url} not in credentials database. Adding it!")
# Add the new bearer token to the internal credentials db. If the credential is already in the db, it will
# be replaced which is desirable.
cred = sarracenia.credentials.Credential(urlstr=url)
cred.bearer_token = self._token
self.o.credentials.add(url, details=cred)

def create_earthdata_token(self, auth: requests.auth.HTTPBasicAuth) -> bool:
""" Create a new Earthdata token.
Expand Down Expand Up @@ -148,7 +151,7 @@ def create_earthdata_token(self, auth: requests.auth.HTTPBasicAuth) -> bool:
logger.debug("details:", exc_info=True)
return False

def get_earthdata_token(self) -> bool:
def get_bearer_token(self) -> bool:
""" Try to retrieve a token from the Earthdata account. If there is no token, it will create a new one.
https://urs.earthdata.nasa.gov/documentation/for_users/user_token
"""
Expand All @@ -173,8 +176,9 @@ def get_earthdata_token(self) -> bool:
# Try to get an existing token
resp = requests.get(self.o.earthdataUrl + "/api/users/tokens", auth=auth)
if resp.status_code != 200:
logger.error(f"Failed to login to NASA Earthdata. Code: {resp.status_code} Info: {resp.text}")
return False
logger.error(f"Failed to login to NASA Earthdata ({self.o.earthdataUrl})." +
f" Code: {resp.status_code} Info: {resp.text} Username: {username}")
return False

# If we got 200, we either have an empty response (user has 0 tokens), or we have a token
resp_j = resp.json()
Expand Down
6 changes: 5 additions & 1 deletion sarracenia/flowcb/poll/poll_NASA_CMR.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,8 +350,12 @@ def poll(self) -> list:
if data_url:
# The message is created using the post_baseUrl and relative path
url = urlparse(data_url)
self.o.post_baseUrl = url.scheme + "://" + url.netloc + "/"
baseUrl = url.scheme + "://" + url.netloc + "/"
self.o.post_baseUrl = baseUrl
m = sarracenia.Message.fromFileInfo(url.path, self.o)
# When Sarracenia runs updatePaths again later, from sarracenia.Flow, self.o.post_baseUrl will be
# different, so set msg['post_baseUrl'] here to override whatever setting it has at that point.
m['post_baseUrl'] = baseUrl
if m:
if sumstr:
logger.info(f"md5sum is available for {data_url}. Changing identity from {m['identity']} to {sumstr}")
Expand Down

0 comments on commit ddb228c

Please sign in to comment.