Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update notebooks #203

Merged
merged 11 commits into from
Oct 17, 2024
34 changes: 34 additions & 0 deletions notebooks/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import ast
import copy
import logging
import pickle
import random
from calendar import monthrange
from datetime import datetime, timedelta
Expand Down Expand Up @@ -849,6 +850,39 @@ def prepare_visualization(results):
return final_paths


def _results_to_pickle(results, output_dir):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should not be a private method, and probably be called differently, like save_job_results. Can you also add the required type hinting for arguments and return of the method (-> None)?

I also don't get why you specify an output_dir that then also still needs to be created. Why not the path to the exact file constructed the same way as the job result name? Wouldn't silently make directories here.

"""Save the results of an openeo inference run to a pickle file.

Parameters
----------
results : WorldCereal InferenceResults object
Results object containing the results to save.
output_dir : Path
Directory where the results will be saved.
"""
output_dir.mkdir(parents=True, exist_ok=True)
with open(output_dir / "results.pkl", "wb") as f:
pickle.dump(results, f)


def _results_from_pickle(output_dir):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

load_job_results and same comments as above.

"""Load the results from a pickle file.

Parameters
----------
output_dir : Path
Path to the output directory containing the results.

Returns
-------
WorldCereal InferenceResults
Results object containing the loaded results from an openeo inference run.
"""
with open(output_dir / "results.pkl", "rb") as f:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

build in resilience if file is not there? Would start from pure file path, not a directory.

results = pickle.load(f)
return results


############# PRODUCT VISUALIZATION #############


Expand Down
28 changes: 26 additions & 2 deletions notebooks/worldcereal_v1_demo_custom_cropland.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
"source": [
"### Before you start\n",
"\n",
"In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo."
"In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the [Copernicus Data Space Ecosystem](https://dataspace.copernicus.eu/).\n",
"This is free of charge and will grant you a number of free openEO processing credits to continue this demo."
]
},
{
Expand Down Expand Up @@ -308,6 +309,7 @@
"outputs": [],
"source": [
"from worldcereal.job import generate_map, CropLandParameters, PostprocessParameters\n",
"from utils import _results_to_pickle\n",
"\n",
"# Initializes default parameters\n",
"parameters = CropLandParameters()\n",
Expand All @@ -326,7 +328,29 @@
" output_dir=output_dir,\n",
" cropland_parameters=parameters,\n",
" postprocess_parameters=PostprocessParameters(),\n",
")"
")\n",
"# Write results to pickle file\n",
"_results_to_pickle(job_results, output_dir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# The results contain the openeo job id...\n",
"print(f\"Job id: {job_results.job_id}\")\n",
"print(f\"Location of metadata: {job_results.metadata}\")\n",
"#... a list of products that were downloaded...\n",
"print(f\"Products: {job_results.products.keys()}\")\n",
"# ... for each product:\n",
"print('-- For each product --')\n",
"print(f\"Type: {job_results.products['cropland']['type']}\")\n",
"print(f\"Temporal extent: {job_results.products['cropland']['temporal_extent']}\")\n",
"print(f\"Look-up table: {job_results.products['cropland']['lut']}\")\n",
"print(f\"URL: {job_results.products['cropland']['url']}\")\n",
"print(f\"Local path: {job_results.products['cropland']['path']}\")"
]
},
{
Expand Down
28 changes: 26 additions & 2 deletions notebooks/worldcereal_v1_demo_custom_cropland_extended.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
"source": [
"### Before you start\n",
"\n",
"In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo."
"In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the [Copernicus Data Space Ecosystem](https://dataspace.copernicus.eu/).\n",
"This is free of charge and will grant you a number of free openEO processing credits to continue this demo."
]
},
{
Expand Down Expand Up @@ -330,6 +331,7 @@
"outputs": [],
"source": [
"from worldcereal.job import generate_map, CropLandParameters\n",
"from utils import _results_to_pickle\n",
"\n",
"# Initializes default parameters\n",
"parameters = CropLandParameters()\n",
Expand All @@ -348,7 +350,29 @@
" output_dir=output_dir,\n",
" cropland_parameters=parameters,\n",
" postprocess_parameters=postprocess_parameters,\n",
")"
")\n",
"# Write results to pickle file\n",
"_results_to_pickle(job_results, output_dir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# The results contain the openeo job id...\n",
"print(f\"Job id: {job_results.job_id}\")\n",
"print(f\"Location of metadata: {job_results.metadata}\")\n",
"#... a list of products that were downloaded...\n",
"print(f\"Products: {job_results.products.keys()}\")\n",
"# ... for each product:\n",
"print('-- For each product --')\n",
"print(f\"Type: {job_results.products['cropland']['type']}\")\n",
"print(f\"Temporal extent: {job_results.products['cropland']['temporal_extent']}\")\n",
"print(f\"Look-up table: {job_results.products['cropland']['lut']}\")\n",
"print(f\"URL: {job_results.products['cropland']['url']}\")\n",
"print(f\"Local path: {job_results.products['cropland']['path']}\")"
]
},
{
Expand Down
145 changes: 88 additions & 57 deletions notebooks/worldcereal_v1_demo_custom_croptype.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@
" \n",
"- [Before you start](###-Before-you-start)\n",
"- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n",
"- [2. Extract public reference data](#2.-Extract-public-reference-data)\n",
"- [3. Select your desired crop types](#3.-Select-your-desired-crop-types)\n",
"- [4. Prepare training features](#4.-Prepare-training-features)\n",
"- [5. Train custom classification model](#5.-Train-custom-classification-model)\n",
"- [6. Deploy your custom model](#6.-Deploy-your-custom-model)\n",
"- [7. Generate a map](#7.-Generate-a-map)\n"
"- [2. Define your temporal extent](#2.-Define-your-temporal-extent)\n",
"- [3. Extract public reference data](#3-extract-public-reference-data)\n",
"- [4. Select your desired crop types](#4.-Select-your-desired-crop-types)\n",
"- [5. Prepare training features](#5.-Prepare-training-features)\n",
"- [6. Train custom classification model](#6.-Train-custom-classification-model)\n",
"- [7. Deploy your custom model](#7.-Deploy-your-custom-model)\n",
"- [8. Generate a map](#8.-Generate-a-map)\n"
]
},
{
Expand All @@ -38,7 +39,8 @@
"source": [
"### Before you start\n",
"\n",
"In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo."
"In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the [Copernicus Data Space Ecosystem](https://dataspace.copernicus.eu/).\n",
"This is free of charge and will grant you a number of free openEO processing credits to continue this demo."
]
},
{
Expand Down Expand Up @@ -80,7 +82,48 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. Extract public reference data\n",
"### 2. Define your temporal extent\n",
"\n",
"To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils import retrieve_worldcereal_seasons\n",
"\n",
"spatial_extent = map.get_processing_extent()\n",
"seasons = retrieve_worldcereal_seasons(spatial_extent)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now use the slider to select your processing period. Note that the length of the period is always fixed to a year.\n",
"Just make sure your season of interest is fully captured within the period you select."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils import date_slider\n",
"\n",
"slider = date_slider()\n",
"slider.show_slider()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. Extract public reference data\n",
"\n",
"Here we query existing reference data that have already been processed by WorldCereal and are ready to use.\n",
"To increase the number of hits, we expand the search area by 250 km in all directions.\n",
Expand All @@ -96,19 +139,22 @@
"source": [
"from worldcereal.utils.refdata import query_public_extractions\n",
"\n",
"# retrieve the polygon you just drew\n",
"# Retrieve the polygon you just drew\n",
"polygon = map.get_polygon_latlon()\n",
"\n",
"# Retrieve the date range you just selected\n",
"processing_period = slider.get_processing_period()\n",
"\n",
"# Query our public database of training data\n",
"public_df = query_public_extractions(polygon)\n",
"public_df = query_public_extractions(polygon, processing_period=processing_period)\n",
"public_df.year.value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. Select your desired crop types\n",
"### 4. Select your desired crop types\n",
"\n",
"Run the next cell and select all crop types you wish to include in your model. All the crops that are not selected will be grouped under the \"other\" category."
]
Expand Down Expand Up @@ -149,9 +195,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### 4. Prepare training features\n",
"### 5. Prepare training features\n",
"\n",
"Using a deep learning framework (Presto), we derive classification features for each sample. The resulting `encodings` and `targets` will be used for model training."
"Using a deep learning framework (Presto), we derive classification features for each sample in the dataframe resulting from your query. Presto was pre-trained on millions of unlabeled samples around the world and finetuned on global labelled land cover and crop type data from the WorldCereal reference database. The resulting *embeddings* and the *target* labels to train on will be returned as a training dataframe which we will use for downstream model training."
]
},
{
Expand All @@ -169,8 +215,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### 5. Train custom classification model\n",
"We train a catboost model for the selected crop types. Class weights are automatically determined to balance the individual classes."
"### 6. Train custom classification model\n",
"We train a catboost model for the selected crop types. By default, no class weighting is done. You could opt to enable this by setting `balance_classes=True`, however, depending on the class distribution this may lead to undesired results. There is no golden rule here."
]
},
{
Expand All @@ -181,7 +227,7 @@
"source": [
"from utils import train_classifier\n",
"\n",
"custom_model, report, confusion_matrix = train_classifier(training_dataframe)"
"custom_model, report, confusion_matrix = train_classifier(training_dataframe, balance_classes=False)"
]
},
{
Expand All @@ -205,7 +251,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6. Deploy your custom model\n",
"### 7. Deploy your custom model\n",
"\n",
"Once trained, we have to upload our model to the cloud so it can be used by OpenEO for inference. Note that these models are only kept in cloud storage for a limited amount of time.\n"
]
Expand All @@ -229,48 +275,10 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### 7. Generate a map\n",
"### 8. Generate a map\n",
"\n",
"Using our custom model, we generate a map for our region and season of interest.\n",
"To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils import retrieve_worldcereal_seasons\n",
"\n",
"spatial_extent = map.get_processing_extent()\n",
"seasons = retrieve_worldcereal_seasons(spatial_extent)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now use the slider to select your processing period. Note that the length of the period is always fixed to a year.\n",
"Just make sure your season of interest is fully captured within the period you select."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils import date_slider\n",
"\n",
"slider = date_slider()\n",
"slider.show_slider()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Set some other customization options:"
]
},
Expand Down Expand Up @@ -306,6 +314,7 @@
"outputs": [],
"source": [
"from worldcereal.job import PostprocessParameters, WorldCerealProductType, generate_map, CropTypeParameters\n",
"from utils import _results_to_pickle\n",
"\n",
"# Initializes default parameters\n",
"parameters = CropTypeParameters()\n",
Expand All @@ -326,7 +335,29 @@
" product_type=WorldCerealProductType.CROPTYPE,\n",
" croptype_parameters=parameters,\n",
" postprocess_parameters=PostprocessParameters(),\n",
")"
")\n",
"# Write results to pickle file\n",
"_results_to_pickle(job_results, output_dir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# The results contain the openeo job id...\n",
"print(f\"Job id: {job_results.job_id}\")\n",
"print(f\"Location of metadata: {job_results.metadata}\")\n",
"#... a list of products that were downloaded...\n",
"print(f\"Products: {job_results.products.keys()}\")\n",
"# ... for each product:\n",
"print('-- For each product --')\n",
"print(f\"Type: {job_results.products['croptype']['type']}\")\n",
"print(f\"Temporal extent: {job_results.products['croptype']['temporal_extent']}\")\n",
"print(f\"Look-up table: {job_results.products['croptype']['lut']}\")\n",
"print(f\"URL: {job_results.products['croptype']['url']}\")\n",
"print(f\"Local path: {job_results.products['croptype']['path']}\")"
]
},
{
Expand Down
Loading
Loading