Skip to content

Commit

Permalink
Merge branch 'main' into feature_18
Browse files Browse the repository at this point in the history
  • Loading branch information
gtramonte committed May 13, 2024
2 parents 4de5d7c + 5eef173 commit 1084516
Show file tree
Hide file tree
Showing 17 changed files with 219 additions and 22 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/build_on_pull_request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
build
--user
- name: Build a binary wheel and a source for drivers
run: python3 -m build ./drivers
run: python3 -m build ./drivers
- name: Set Docker image tag name
run: echo "TAG=$(date +'%Y.%m.%d.%H.%M')" >> $GITHUB_ENV
- name: TAG ECHO
Expand All @@ -32,7 +32,7 @@ jobs:
password: ${{ secrets.DOCKER_PASSWORD }}
registry: ${{ vars.DOCKER_REGISTRY }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
uses: docker/setup-buildx-action@v2
- name: Build and push drivers
uses: docker/build-push-action@v5
with:
Expand All @@ -45,7 +45,7 @@ jobs:
REGISTRY=${{ vars.GEOKUBE_REGISTRY }}
tags: |
${{ vars.DOCKER_REGISTRY }}/geolake-drivers:${{ env.TAG }}
${{ vars.DOCKER_REGISTRY }}/geolake-drivers:latest
${{ vars.DOCKER_REGISTRY }}/geolake-drivers:latest
- name: Build and push datastore component
uses: docker/build-push-action@v5
with:
Expand All @@ -58,7 +58,7 @@ jobs:
cache-to: type=gha,mode=max
tags: |
${{ vars.DOCKER_REGISTRY }}/geolake-datastore:${{ env.TAG }}
${{ vars.DOCKER_REGISTRY }}/geolake-datastore:latest
${{ vars.DOCKER_REGISTRY }}/geolake-datastore:latest
- name: Build and push api component
uses: docker/build-push-action@v5
with:
Expand All @@ -71,7 +71,7 @@ jobs:
cache-to: type=gha,mode=max
tags: |
${{ vars.DOCKER_REGISTRY }}/geolake-api:${{ env.TAG }}
${{ vars.DOCKER_REGISTRY }}/geolake-api:latest
${{ vars.DOCKER_REGISTRY }}/geolake-api:latest
- name: Build and push executor component
uses: docker/build-push-action@v5
with:
Expand All @@ -84,4 +84,4 @@ jobs:
cache-to: type=gha,mode=max
tags: |
${{ vars.DOCKER_REGISTRY }}/geolake-executor:${{ env.TAG }}
${{ vars.DOCKER_REGISTRY }}/geolake-executor:latest
${{ vars.DOCKER_REGISTRY }}/geolake-executor:latest
17 changes: 13 additions & 4 deletions .github/workflows/build_on_release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,45 +32,54 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Build and push drivers
uses: docker/build-push-action@v4
uses: docker/build-push-action@v5
with:
context: ./drivers
file: ./drivers/Dockerfile
push: true
build-args: |
REGISTRY=${{ vars.GEOKUBE_REGISTRY }}
TAG=v0.2a6
cache-from: type=gha
cache-to: type=gha,mode=max
tags: |
${{ vars.GEOLAKE_REGISTRY }}/geolake-drivers:${{ env.RELEASE_TAG }}
- name: Build and push datastore component
uses: docker/build-push-action@v4
uses: docker/build-push-action@v5
with:
context: ./datastore
file: ./datastore/Dockerfile
push: true
build-args: |
REGISTRY=${{ vars.GEOLAKE_REGISTRY }}
TAG=${{ env.RELEASE_TAG }}
cache-from: type=gha
cache-to: type=gha,mode=max
tags: |
${{ vars.GEOLAKE_REGISTRY }}/geolake-datastore:${{ env.RELEASE_TAG }}
- name: Build and push api component
uses: docker/build-push-action@v4
uses: docker/build-push-action@v5
with:
context: ./api
file: ./api/Dockerfile
push: true
build-args: |
REGISTRY=${{ vars.GEOLAKE_REGISTRY }}
TAG=${{ env.RELEASE_TAG }}
cache-from: type=gha
cache-to: type=gha,mode=max
tags: |
${{ vars.GEOLAKE_REGISTRY }}/geolake-api:${{ env.RELEASE_TAG }}
- name: Build and push executor component
uses: docker/build-push-action@v4
uses: docker/build-push-action@v5
with:
context: ./executor
file: ./executor/Dockerfile
push: true
build-args: |
REGISTRY=${{ vars.GEOLAKE_REGISTRY }}
TAG=${{ env.RELEASE_TAG }}
cache-from: type=gha
cache-to: type=gha,mode=max
tags: |
${{ vars.GEOLAKE_REGISTRY }}/geolake-executor:${{ env.RELEASE_TAG }}
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -112,5 +112,3 @@ venv.bak/
_catalogs/
_old/

# Netcdf files
*.nc
51 changes: 51 additions & 0 deletions CITATION.cff
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# This CITATION.cff file was generated with cffinit.
# Visit https://bit.ly/cffinit to generate yours today!

cff-version: 1.2.0
title: geolake
message: >-
If you use this software, please cite it using the
metadata from this file.
type: software
authors:
- given-names: Marco
family-names: Mancini
orcid: 'https://orcid.org/0000-0002-9150-943X'
- given-names: Jakub
family-names: Walczak
orcid: 'https://orcid.org/0000-0002-5632-9484'
- given-names: Mirko
family-names: Stojiljković
- given-names: Valentina
family-names: Scardigno
orcid: 'https://orcid.org/0000-0002-0123-5368'
identifiers:
- type: doi
value: 10.5281/zenodo.10598417
repository-code: 'https://github.com/CMCC-Foundation/geolake'
abstract: >+
geolake is an open source framework for management,
storage, and analytics of Earth Science data. geolake
implements the concept of a data lake as a central
location that holds a large amount of data in its native
and raw format. geolake does not impose any schema when
ingesting the data, however it provides a unified Data
Model and API for geoscientific datasets. The data is kept
in the original format and storage, and the in-memory data
structure is built on-the-fly for the processing analysis.
The system has been designed using a cloud-native
architecture, based on containerized microservices, that
facilitates the development, deployment and maintenance of
the system itself. It has been implemented by integrating
different open source frameworks, tools and libraries and
can be easily deployed using the Kubernetes platform and
related tools such as kubectl.
keywords:
- python framework
- earth science
- data analytics
license: Apache-2.0
version: 0.1.0
date-released: '2024-01-29'
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10598417.svg)](https://doi.org/10.5281/zenodo.10598417)

# geolake

## Description
Expand Down
18 changes: 9 additions & 9 deletions api/app/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Main module with dekube-dds API endpoints defined"""
__version__ = "2.0"
"""Main module with geolake API endpoints defined"""
__version__ = "0.1.0"
import os
from typing import Optional, Dict
from datetime import datetime
Expand Down Expand Up @@ -66,12 +66,12 @@ def map_to_geoquery(
extend_json_encoders()

app = FastAPI(
title="geokube-dds API",
description="REST API for geokube-dds",
title="geolake API",
description="REST API for geolake",
version=__version__,
contact={
"name": "geokube Contributors",
"email": "geokube@googlegroups.com",
"name": "geolake Contributors",
"email": "geolake@googlegroups.com",
},
license_info={
"name": "Apache 2.0",
Expand Down Expand Up @@ -118,9 +118,9 @@ def map_to_geoquery(

# ======== Endpoints definitions ========= #
@app.get("/", tags=[tags.BASIC])
async def dds_info():
"""Return current version of the DDS API"""
return f"DDS API {__version__}"
async def geolake_info():
"""Return current version of the geolake API"""
return f"geolake API {__version__}"


@app.get("/datasets", tags=[tags.DATASET])
Expand Down
2 changes: 2 additions & 0 deletions catalog/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# geolake-sample-catalog
geolake Catalog Sample
40 changes: 40 additions & 0 deletions catalog/RS_indices.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
metadata:
description: >-
Remote Sensing Indices derived from SENTINEL S2A data
contact:
name: Data Deliver System Support Team
email: [email protected]
webpage: https://www.cmcc.it/research-organization/research-divisions/advanced-scientific-computing-division#1553329820238-2055494b-9aa6
label: Remote Sensing Indices from Sentinel S2A
image: null
doi: null
update_frequency: null
license: null
publication_date: 2023-11-22
related_data: null

sources:
10m:
description: Remote Sensing Indices at 10m
metadata:
role: public
filters:
- name: pasture
user_defined: T
label: Pasture
driver: geokube_netcdf
args:
path: '{{ CATALOG_DIR }}/datasets/RS_indices/*/10m/*.nc'
pattern: '{{ CATALOG_DIR }}/datasets/RS_indices/{pasture}/10m/{}.nc'
field_id: '{standard_name}'
mapping:
NDVI: {'name': 'NDVI', 'description': 'Normalized Difference Vegetation Index'}
NDWI: {'name': 'NDWI', 'description': 'Normalized Difference Water Index'}
GLI: {'name': 'GLI', 'description': 'Green Leaf Index'}
GCI: {'name': 'GCI', 'description': 'Green Chlorophyll Index'}
RGR: {'name': 'RGR', 'description': 'Red-Green Ratio'}
metadata_caching: false
metadata_cache_path: '{{ CACHE_DIR }}/s2-indices-10m.cache'
xarray_kwargs:
parallel: true
decode_coords: 'all'
22 changes: 22 additions & 0 deletions catalog/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import argparse
import intake

parser = argparse.ArgumentParser(
prog="Cache generator",
description="The script generating cache for the catalog",
)
parser.add_argument(
"--cachedir",
type=str,
help="Directory where the cache should be saved. Default: .cache",
default=".cache",
)

if __name__ == "__main__":
args = parser.parse_args()
catalog = intake.open_catalog("catalog.yaml")
for ds in list(catalog):
for p in list(catalog[ds]):
print(f"dataset: {ds} product: {p}:")
catalog = catalog(CACHE_DIR=args.cachedir)
kube = catalog[ds][p].read()
23 changes: 23 additions & 0 deletions catalog/catalog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
metadata:
version: 0.1
parameters:
CACHE_DIR:
type: str
description: folder to store metadata cache files
default: .cache

sources:
era5-downscaled:
driver: yaml_file_cat
args:
path: '{{ CATALOG_DIR }}/era5_downscaled.yaml'

thi:
driver: yaml_file_cat
args:
path: '{{ CATALOG_DIR }}/thi.yaml'

rs-indices:
driver: yaml_file_cat
args:
path: '{{ CATALOG_DIR }}/RS_indices.yaml'
Binary file not shown.
Binary file added catalog/datasets/THI/20240101.nc
Binary file not shown.
Binary file added catalog/datasets/era5_downscaled.nc
Binary file not shown.
12 changes: 12 additions & 0 deletions catalog/era5_downscaled.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
metadata:
description: >-
This dataset is related to ERA5 downscaled over Italy at 2km.
sources:
hourly:
description: ERA5 downscaled at 2km over italy hourly.
driver: geokube_netcdf
args:
path: '{{ CATALOG_DIR }}/datasets/era5_downscaled.nc'
metadata_caching: true
metadata_cache_path: '{{ CACHE_DIR }}/era5_downscaled.cache'
37 changes: 37 additions & 0 deletions catalog/thi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
metadata:
description: >-
Thermohygrometric Indices derived from MISTRAL COSMO-2I data
contact:
name: Data Deliver System Support Team
email: [email protected]
webpage: https://www.cmcc.it/research-organization/research-divisions/advanced-scientific-computing-division#1553329820238-2055494b-9aa6
label: Thermohygrometric Indices over Italy
image: null
doi: null
update_frequency: null
license: null
publication_date: 2023-06-19
related_data: null

sources:
hourly:
description: Hourly Thermohygrometric Indices
metadata:
role: public
filters:
- name: date
user_defined: T
label: Date
driver: geokube_netcdf
args:
path: '{{ CATALOG_DIR }}/datasets/THI/*.nc'
pattern: '{{ CATALOG_DIR }}/datasets/THI/{date}.nc'
field_id: '{standard_name}'
mapping:
THI_ext: {'name': 'external_thermohygrometric_index', 'description': 'External Thermohygrometric Index'}
THI_int: {'name': 'internal_thermohygrometric_index', 'description': 'Internal Thermohygrometric Index'}
metadata_caching: false
metadata_cache_path: '{{ CACHE_DIR }}/thi-hourly.cache'
xarray_kwargs:
parallel: true
decode_coords: 'all'
2 changes: 1 addition & 1 deletion datastore/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ ARG TAG=latest
FROM $REGISTRY/geolake-drivers:$TAG

COPY requirements.txt /app/requirements.txt
RUN pip install --no-cache-dir -r /app/requirements.txt
RUN pip install --no-cache-dir -r /app/requirements.txt
COPY ./datastore /app/datastore
COPY ./workflow /app/workflow
COPY ./dbmanager /app/dbmanager
Expand Down
1 change: 1 addition & 0 deletions drivers/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ ARG TAG=latest
FROM $REGISTRY/geokube:$TAG

COPY dist/intake_geokube-0.1a0-py3-none-any.whl /

RUN pip3.10 install /intake_geokube-0.1a0-py3-none-any.whl
RUN rm /intake_geokube-0.1a0-py3-none-any.whl

0 comments on commit 1084516

Please sign in to comment.