Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to NASA Auth plugins and examples #1042

Merged
merged 1 commit into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sarracenia/examples/poll/nasa_cmr_opendap.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ sleep 15m
# Use the poll_NASA_CMR plugin
callback poll.poll_NASA_CMR

pollUrl https://cmr.earthdata.nasa.gov/search/granules.umm_json

# Collection Concept IDs - there can be multiple

# https://podaac.jpl.nasa.gov/dataset/AVHRRMTB_G-NAVO-L2P-v2.0
Expand Down
2 changes: 2 additions & 0 deletions sarracenia/examples/poll/nasa_cmr_other.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ sleep 15m
# Use the poll_NASA_CMR plugin
callback poll.poll_NASA_CMR

pollUrl https://cmr.earthdata.nasa.gov/search/granules.umm_json

# Collection Concept IDs - there can be multiple

# OMI/Aura Ozone (O3) Total Column 1-Orbit L2 Swath 13x24 km V003 (OMTO3)
Expand Down
2 changes: 2 additions & 0 deletions sarracenia/examples/poll/nasa_cmr_podaac.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ sleep 15m
# Use the poll_NASA_CMR plugin
callback poll.poll_NASA_CMR

pollUrl https://cmr.earthdata.nasa.gov/search/granules.umm_json

# Collection Concept IDs - there can be multiple

# https://podaac.jpl.nasa.gov/dataset/AVHRRMTB_G-NAVO-L2P-v2.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ exchange xs_NASA-TEST
# no broker-side filtering, get all files posted to xs_NASA-TEST
subtopic #

# use the auth_NASA_Earthdata plugin
callback accept.auth_NASA_Earthdata
# use the NASA Earthdata authentication plugin
callback authenticate.nasa_earthdata

# smaller batch size makes the logs easier to read for debugging
batch 5
Expand All @@ -21,4 +21,4 @@ acceptUnmatched True
directory /tmp/NASA_DL/

# enable DEBUG log messages that come from the auth_NASA_Earthdata plugin
set accept.auth_NASA_Earthdata.logLevel debug
set authenticate.nasa_earthdata.logLevel debug
7 changes: 7 additions & 0 deletions sarracenia/flowcb/authenticate/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""
Authentication plugins.

Work in progress. See https://github.com/MetPX/sarracenia/issues/565
"""

pass
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,22 @@
How to set up your download config:
--------------------------------

Add ``callback accept.auth_NASA_Earthdata``, in your subscribe, sarra or other download config.
Add ``callback authenticate.nasa_earthdata``, in your subscribe, sarra or other download config.

Add ``https://username:[email protected]/`` to your ``credentials.conf`` file.

Optional: set ``acceptSizeWrong True`` in the sarra/subscribe config to suppress the WARNING message
about a file being downloaded with no length given.

For examples, see https://github.com/MetPX/sarracenia/tree/main/sarracenia/examples/subscribe files
For examples, see https://github.com/MetPX/sarracenia/tree/stable/sarracenia/examples/subscribe files
named ``*nasa_earthdata*.conf``.

Change log:
-----------

- 2023-10-10: first attempt at this plugin. The old v2 code re-implemented downloading, using a session with
stored cookies. This should be better, because it uses the native sr3 download code.
- 2024-05-09: refactoring, to be able to be easily called from poll plugins, and elsewhere.
"""

import sarracenia
Expand All @@ -55,16 +56,16 @@

logger = logging.getLogger(__name__)

class Auth_nasa_earthdata(sarracenia.flowcb.FlowCB):
class Nasa_earthdata(sarracenia.flowcb.FlowCB):
def __init__(self, options):
super().__init__(options, logger)

# Allow setting a logLevel *only* for this plugin in the config file:
# set accept.auth_NASA_Earthdata.logLevel debug
# set authenticate.nasa_earthdata.logLevel debug
if hasattr(self.o, 'logLevel'):
logger.setLevel(self.o.logLevel.upper())

logger.debug("plugin: Download_NASA_Earthdata __init__")
logger.debug("plugin: NASA_Earthdata __init__")

self.o.add_option('earthdataUrl', kind='str', default_value='https://urs.earthdata.nasa.gov')

Expand All @@ -82,6 +83,12 @@ def after_accept(self, worklist):
Then adds the bearer token to Sarracenia's credentials DB for the message's baseUrl. This will allow
the file to be downloaded from msg['baseUrl']+msg['relPath'] using the bearer token.
"""
for msg in worklist.incoming:
self.add_token_for_url(msg['baseUrl'])

def add_token_for_url(self, url):
""" For the given URL, add the token to the in memory credentials database.
"""

# It's not clear what time the token expires on the expiry date. If today = expiry date, then try to get a
# token every time this runs. If it's not expired yet, we'll get the same token from the API and can try
Expand All @@ -95,30 +102,26 @@ def after_accept(self, worklist):
# Get a token from the NASA API, if there isn't one already
if not self._token:
# Try to get a new token
if not self.get_earthdata_token():
logger.error(f"Failed to retrieve Bearer token from {self.o.earthdataUrl}. " +
f"Can't download {msg['baseUrl']}{msg['relPath']}")

for msg in worklist.incoming:

# If the credential already exists and the bearer_token matches, don't need to do anything
ok, details = self.o.credentials.get(msg['baseUrl'])
if not self.get_bearer_token():
logger.error(f"Failed to retrieve bearer token from {self.o.earthdataUrl}")

# If the credential already exists and the bearer_token matches, don't need to do anything
ok, details = self.o.credentials.get(url)
token_already_in_creds = False
try:
token_already_in_creds = (ok and details.bearer_token == self._token)
if token_already_in_creds:
logger.debug(f"Token for {url} already in credentials database")
except:
token_already_in_creds = False
try:
token_already_in_creds = (ok and details.bearer_token == self._token)
if token_already_in_creds:
logger.debug(f"Token for {msg['baseUrl']} already in credentials database")
except:
token_already_in_creds = False

if not token_already_in_creds:
logger.info(f"Token for {msg['baseUrl']} not in credentials database. Adding it!")
# Add the new bearer token to the internal credentials db. If the credential is already in the db, it will
# be replaced which is desirable.
cred = sarracenia.credentials.Credential(urlstr=msg['baseUrl'])
cred.bearer_token = self._token
self.o.credentials.add(msg['baseUrl'], details=cred)

if not token_already_in_creds:
logger.info(f"Token for {url} not in credentials database. Adding it!")
# Add the new bearer token to the internal credentials db. If the credential is already in the db, it will
# be replaced which is desirable.
cred = sarracenia.credentials.Credential(urlstr=url)
cred.bearer_token = self._token
self.o.credentials.add(url, details=cred)

def create_earthdata_token(self, auth: requests.auth.HTTPBasicAuth) -> bool:
""" Create a new Earthdata token.
Expand Down Expand Up @@ -148,7 +151,7 @@ def create_earthdata_token(self, auth: requests.auth.HTTPBasicAuth) -> bool:
logger.debug("details:", exc_info=True)
return False

def get_earthdata_token(self) -> bool:
def get_bearer_token(self) -> bool:
""" Try to retrieve a token from the Earthdata account. If there is no token, it will create a new one.
https://urs.earthdata.nasa.gov/documentation/for_users/user_token
"""
Expand All @@ -173,8 +176,9 @@ def get_earthdata_token(self) -> bool:
# Try to get an existing token
resp = requests.get(self.o.earthdataUrl + "/api/users/tokens", auth=auth)
if resp.status_code != 200:
logger.error(f"Failed to login to NASA Earthdata. Code: {resp.status_code} Info: {resp.text}")
return False
logger.error(f"Failed to login to NASA Earthdata ({self.o.earthdataUrl})." +
f" Code: {resp.status_code} Info: {resp.text} Username: {username}")
return False

# If we got 200, we either have an empty response (user has 0 tokens), or we have a token
resp_j = resp.json()
Expand Down
6 changes: 5 additions & 1 deletion sarracenia/flowcb/poll/poll_NASA_CMR.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,8 +350,12 @@ def poll(self) -> list:
if data_url:
# The message is created using the post_baseUrl and relative path
url = urlparse(data_url)
self.o.post_baseUrl = url.scheme + "://" + url.netloc + "/"
baseUrl = url.scheme + "://" + url.netloc + "/"
self.o.post_baseUrl = baseUrl
m = sarracenia.Message.fromFileInfo(url.path, self.o)
# When Sarracenia runs updatePaths again later, from sarracenia.Flow, self.o.post_baseUrl will be
# different, so set msg['post_baseUrl'] here to override whatever setting it has at that point.
m['post_baseUrl'] = baseUrl
if m:
if sumstr:
logger.info(f"md5sum is available for {data_url}. Changing identity from {m['identity']} to {sumstr}")
Expand Down
Loading