Skip to content

Commit

Permalink
Merge pull request #60 from nsidc/cryo-184
Browse files Browse the repository at this point in the history
Cryo 184
  • Loading branch information
betolink committed Sep 26, 2023
2 parents d53ee91 + ebc848c commit 28c72fa
Show file tree
Hide file tree
Showing 11 changed files with 3,613 additions and 1 deletion.
4 changes: 3 additions & 1 deletion binder/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ dependencies:
- netcdf4=1.6.2
- geopy
- pyresample
- earthaccess>=0.5.2
- earthaccess>=0.6.1
- fiona
- zarr
- ipympl
Expand All @@ -35,9 +35,11 @@ dependencies:
- pipreqsnb
- conda-lock>=1.2.1
- mamba>=1.0
- coiled>=0.9.30
- pip
- pip:
- git+https://github.com/icesat2py/icepyx.git
- git+https://github.com/ICESat2-SlideRule/h5coro.git@main
- awscliv2
platforms:
- linux-64
Expand Down
893 changes: 893 additions & 0 deletions notebooks/ICESat-2_Cloud_Access/ATL10-h5coro.ipynb

Large diffs are not rendered by default.

1,733 changes: 1,733 additions & 0 deletions notebooks/ICESat-2_Cloud_Access/ATL10-h5coro_rendered.ipynb

Large diffs are not rendered by default.

115 changes: 115 additions & 0 deletions notebooks/ICESat-2_Cloud_Access/h5cloud/read_atl10.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#!/usr/bin/env python

#import coiled

import geopandas as gpd
import numpy as np
import pandas as pd
from rich import print as rprint
from itertools import product
from pqdm.threads import pqdm


import earthaccess
from h5coro import s3driver, webdriver
import h5coro




def get_strong_beams(f):
"""Returns ground track for strong beams based on IS2 orientation"""
orient = f['orbit_info/sc_orient'][0]

if orient == 0:
return [f"gt{i}l" for i in [1, 2, 3]]
elif orient == 1:
return [f"gt{i}r" for i in [1, 2, 3]]
else:
raise KeyError("Spacecraft orientation neither forward nor backward")






def read_atl10(files, bounding_box=None, executors=4, environment="local", credentials=None):
"""Returns a consolidated GeoPandas dataframe for a set of ATL10 file pointers.
Parameters:
files (list[S3FSFile]): list of authenticated fsspec file references to ATL10 on S3 (via earthaccess)
executors (int): number of threads
"""
if environment == "local":
driver = webdriver.HTTPDriver
else:
driver = s3driver.S3Driver

GPS_EPOCH = pd.to_datetime('1980-01-06 00:00:00')

def read_h5coro(file):
"""Reads datasets required for creating gridded freeboard from a single ATL10 file
file: an authenticated fsspec file reference on S3 (returned by earthaccess)
returns: a list of geopandas dataframes
"""
# Open file object
h5 = h5coro.H5Coro(file, driver, credentials=credentials)

# Get strong beams based on orientation
ancillary_datasets = ["orbit_info/sc_orient", "ancillary_data/atlas_sdp_gps_epoch"]
f = h5.readDatasets(datasets=ancillary_datasets, block=True)
strong_beams = get_strong_beams(f)
atlas_sdp_gps_epoch = f["ancillary_data/atlas_sdp_gps_epoch"][:]

# Create list of datasets to load
datasets = ["freeboard_segment/latitude",
"freeboard_segment/longitude",
"freeboard_segment/delta_time",
"freeboard_segment/seg_dist_x",
"freeboard_segment/heights/height_segment_length_seg",
"freeboard_segment/beam_fb_height",
"freeboard_segment/heights/height_segment_type"]
ds_list = ["/".join(p) for p in list(product(strong_beams, datasets))]
# Load datasets
f = h5.readDatasets(datasets=ds_list, block=True)
# rprint(f["gt2l/freeboard_segment/latitude"], type(f["gt2l/freeboard_segment/latitude"]))

# Create a list of geopandas.DataFrames containing beams
tracks = []
for beam in strong_beams:
ds = {dataset.split("/")[-1]: f[dataset][:] for dataset in ds_list if dataset.startswith(beam)}

# Convert delta_time to datetime
ds["delta_time"] = GPS_EPOCH + pd.to_timedelta(ds["delta_time"]+atlas_sdp_gps_epoch, unit='s')
# we don't need nanoseconds to grid daily let alone weekly
ds["delta_time"] = ds["delta_time"].astype('datetime64[s]')

# Add beam identifier
ds["beam"] = beam

# Set fill values to NaN - assume 100 m as threshold
ds["beam_fb_height"] = np.where(ds["beam_fb_height"] > 100, np.nan, ds["beam_fb_height"])

geometry = gpd.points_from_xy(ds["longitude"], ds["latitude"])
del ds["longitude"]
del ds["latitude"]

gdf = gpd.GeoDataFrame(ds, geometry=geometry, crs="EPSG:4326")
gdf.dropna(axis=0, inplace=True)
if bounding_box is not None:
bbox = [float(coord) for coord in bounding_box.split(",")]
gdf = gdf.cx[bbox[0]:bbox[2],bbox[1]:bbox[3]]
tracks.append(gdf)

df = pd.concat(tracks)
return df

dfs = pqdm(files, read_h5coro, n_jobs=executors)
combined = pd.concat(dfs)

return combined


30 changes: 30 additions & 0 deletions notebooks/ICESat-2_Cloud_Access/h5cloud/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
## Running and scaling Python with [Coiled serverless functions](https://docs.coiled.io/user_guide/usage/functions/index.html).

This script contains the same code to read ATL10 data as the notebook, the one difference is that we are using a function decorator from Coiled that allows us to execute the function in the cloud with no modifications whatsoever.

The only requirement for this workflow is to have an active account with Coiled and execute this from our terminal:

```bash
coiled login
```

This will open a browser tab to authenticate ourselves with their APIs

> Note: If you would like to test his ask us to include you with Openscapes!

Our functions can be paralleliza, scaling the computation to hundreds of nodes if needed in the same way we could use Amazon lambda functions. Once we install and activate [`nsidc-tutorials`](../../binder/environment.yml) We can run the script with the following python command:

```bash
python workflow.py --bbox="-180, -90, 180, -60" --year=2023 --out="test-2023-local" --env=local

```

This will run the code locally. If we want to run the code in the cloud we'll

```bash
python workflow.py --bbox="-180, -90, 180, -60" --year=2023 --out="test-2023-local" --env=cloud

```

The first time we execute this function, the provisioning will take a couple minutes and will sync our current Python environment with the cloud instances executing our code.
74 changes: 74 additions & 0 deletions notebooks/ICESat-2_Cloud_Access/h5cloud/workflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env python

import coiled

import geopandas as gpd
import numpy as np
import pandas as pd
from rich import print as rprint
from itertools import product
import argparse

import earthaccess
from h5coro import h5coro, s3driver

from read_atl10 import read_atl10

if __name__ == "__main__":

rprint(f"executing locally")
parser = argparse.ArgumentParser()
parser.add_argument('--bbox', help='bbox')
parser.add_argument('--year', help='year to process')
parser.add_argument('--env', help='execute in the cloud or local, default:local')
parser.add_argument('--out', help='output file name')
args = parser.parse_args()


auth = earthaccess.login()

# ross_sea = (-180, -78, -160, -74)
# antarctic = (-180, -90, 180, -60)

year = int(args.year)
bbox = tuple([float(c) for c in args.bbox.split(",")])

print(f"Searching ATL10 data for year {year} ...")
granules = earthaccess.search_data(
short_name = 'ATL10',
version = '006',
cloud_hosted = True,
bounding_box = bbox,
temporal = (f'{args.year}-06-01',f'{args.year}-09-30'),
count=4,
debug=True
)


if args.env == "local":
files = [g.data_links(access="out_of_region")[0] for g in granules]
credentials = earthaccess.__auth__.token["access_token"]

df = read_atl10(files, bounding_box=args.bbox, environment="local", credentials=credentials)
else:
files = [g.data_links(access="direct")[0].replace("s3://", "") for g in granules]
aws_credentials = earthaccess.get_s3_credentials("NSIDC")
credentials = {
"aws_access_key_id": aws_credentials["accessKeyId"],
"aws_secret_access_key": aws_credentials["secretAccessKey"],
"aws_session_token": aws_credentials["sessionToken"]
}

@coiled.function(region= "us-west-2",
memory= "4 GB",
keepalive="1 HOUR")
def cloud_runnner(files, bounding_box, credentials):
df = read_atl10(files, bounding_box=bounding_box, environment="cloud", credentials=credentials)
return df

df = cloud_runnner(files, args.bbox, credentials=credentials)


df.to_parquet(f"{args.out}.parquet")
rprint(df)

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
81 changes: 81 additions & 0 deletions notebooks/ICESat-2_Cloud_Access/ross_sea.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {},
"geometry": {
"coordinates": [
[
[
-191.09608556432593,
-73.84648052492354
],
[
-196.32896148365322,
-76.01486891873441
],
[
-201.3333475059275,
-79.45419249238772
],
[
-195.62738051734928,
-82.20681871096693
],
[
-189.41756278781764,
-84.1511348270979
],
[
-167.0795373869447,
-84.71222453066771
],
[
-154.94650971884352,
-84.47077199426083
],
[
-147.87987772139172,
-83.76551904624706
],
[
-138.89031336546202,
-83.16126208208007
],
[
-139.89760391715487,
-81.81509135152459
],
[
-145.07462138020958,
-75.8454713912678
],
[
-145.2859453568654,
-73.60545521193768
],
[
-155.7529050321871,
-71.77435794070743
],
[
-173.60352774698885,
-71.50777786832501
],
[
-187.08441940129651,
-71.32576778967325
],
[
-191.09608556432593,
-73.84648052492354
]
]
],
"type": "Polygon"
},
"id": 0
}
]
}
Loading

0 comments on commit 28c72fa

Please sign in to comment.