WorldCereal · kvantricht · Oct 17, 2024 · Oct 16, 2024 · Oct 16, 2024 · Oct 16, 2024
diff --git a/notebooks/utils.py b/notebooks/utils.py
@@ -1,6 +1,7 @@
 import ast
 import copy
 import logging
+import pickle
 import random
 from calendar import monthrange
 from datetime import datetime, timedelta
@@ -849,6 +850,39 @@ def prepare_visualization(results):
     return final_paths
 
 
+def _results_to_pickle(results, output_dir):
+    """Save the results of an openeo inference run to a pickle file.
+
+    Parameters
+    ----------
+    results : WorldCereal InferenceResults object
+        Results object containing the results to save.
+    output_dir : Path
+        Directory where the results will be saved.
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+    with open(output_dir / "results.pkl", "wb") as f:
+        pickle.dump(results, f)
+
+
+def _results_from_pickle(output_dir):
+    """Load the results from a pickle file.
+
+    Parameters
+    ----------
+    output_dir : Path
+        Path to the output directory containing the results.
+
+    Returns
+    -------
+    WorldCereal InferenceResults
+        Results object containing the loaded results from an openeo inference run.
+    """
+    with open(output_dir / "results.pkl", "rb") as f:
+        results = pickle.load(f)
+    return results
+
+
 ############# PRODUCT VISUALIZATION #############
 
 

diff --git a/notebooks/worldcereal_v1_demo_custom_cropland.ipynb b/notebooks/worldcereal_v1_demo_custom_cropland.ipynb
@@ -38,7 +38,8 @@
    "source": [
     "### Before you start\n",
     "\n",
-    "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo."
+    "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the [Copernicus Data Space Ecosystem](https://dataspace.copernicus.eu/).\n",
+    "This is free of charge and will grant you a number of free openEO processing credits to continue this demo."
    ]
   },
   {
@@ -308,6 +309,7 @@
    "outputs": [],
    "source": [
     "from worldcereal.job import generate_map, CropLandParameters, PostprocessParameters\n",
+    "from utils import _results_to_pickle\n",
     "\n",
     "# Initializes default parameters\n",
     "parameters = CropLandParameters()\n",
@@ -326,7 +328,29 @@
     "    output_dir=output_dir,\n",
     "    cropland_parameters=parameters,\n",
     "    postprocess_parameters=PostprocessParameters(),\n",
-    ")"
+    ")\n",
+    "# Write results to pickle file\n",
+    "_results_to_pickle(job_results, output_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The results contain the openeo job id...\n",
+    "print(f\"Job id: {job_results.job_id}\")\n",
+    "print(f\"Location of metadata: {job_results.metadata}\")\n",
+    "#... a list of products that were downloaded...\n",
+    "print(f\"Products: {job_results.products.keys()}\")\n",
+    "# ... for each product:\n",
+    "print('-- For each product --')\n",
+    "print(f\"Type: {job_results.products['cropland']['type']}\")\n",
+    "print(f\"Temporal extent: {job_results.products['cropland']['temporal_extent']}\")\n",
+    "print(f\"Look-up table: {job_results.products['cropland']['lut']}\")\n",
+    "print(f\"URL: {job_results.products['cropland']['url']}\")\n",
+    "print(f\"Local path: {job_results.products['cropland']['path']}\")"
    ]
   },
   {

diff --git a/notebooks/worldcereal_v1_demo_custom_cropland_extended.ipynb b/notebooks/worldcereal_v1_demo_custom_cropland_extended.ipynb
@@ -38,7 +38,8 @@
    "source": [
     "### Before you start\n",
     "\n",
-    "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo."
+    "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the [Copernicus Data Space Ecosystem](https://dataspace.copernicus.eu/).\n",
+    "This is free of charge and will grant you a number of free openEO processing credits to continue this demo."
    ]
   },
   {
@@ -330,6 +331,7 @@
    "outputs": [],
    "source": [
     "from worldcereal.job import generate_map, CropLandParameters\n",
+    "from utils import _results_to_pickle\n",
     "\n",
     "# Initializes default parameters\n",
     "parameters = CropLandParameters()\n",
@@ -348,7 +350,29 @@
     "    output_dir=output_dir,\n",
     "    cropland_parameters=parameters,\n",
     "    postprocess_parameters=postprocess_parameters,\n",
-    ")"
+    ")\n",
+    "# Write results to pickle file\n",
+    "_results_to_pickle(job_results, output_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The results contain the openeo job id...\n",
+    "print(f\"Job id: {job_results.job_id}\")\n",
+    "print(f\"Location of metadata: {job_results.metadata}\")\n",
+    "#... a list of products that were downloaded...\n",
+    "print(f\"Products: {job_results.products.keys()}\")\n",
+    "# ... for each product:\n",
+    "print('-- For each product --')\n",
+    "print(f\"Type: {job_results.products['cropland']['type']}\")\n",
+    "print(f\"Temporal extent: {job_results.products['cropland']['temporal_extent']}\")\n",
+    "print(f\"Look-up table: {job_results.products['cropland']['lut']}\")\n",
+    "print(f\"URL: {job_results.products['cropland']['url']}\")\n",
+    "print(f\"Local path: {job_results.products['cropland']['path']}\")"
    ]
   },
   {

diff --git a/notebooks/worldcereal_v1_demo_custom_croptype.ipynb b/notebooks/worldcereal_v1_demo_custom_croptype.ipynb
@@ -24,12 +24,13 @@
     "  \n",
     "- [Before you start](###-Before-you-start)\n",
     "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n",
-    "- [2. Extract public reference data](#2.-Extract-public-reference-data)\n",
-    "- [3. Select your desired crop types](#3.-Select-your-desired-crop-types)\n",
-    "- [4. Prepare training features](#4.-Prepare-training-features)\n",
-    "- [5. Train custom classification model](#5.-Train-custom-classification-model)\n",
-    "- [6. Deploy your custom model](#6.-Deploy-your-custom-model)\n",
-    "- [7. Generate a map](#7.-Generate-a-map)\n"
+    "- [2. Define your temporal extent](#2.-Define-your-temporal-extent)\n",
+    "- [3. Extract public reference data](#3-extract-public-reference-data)\n",
+    "- [4. Select your desired crop types](#4.-Select-your-desired-crop-types)\n",
+    "- [5. Prepare training features](#5.-Prepare-training-features)\n",
+    "- [6. Train custom classification model](#6.-Train-custom-classification-model)\n",
+    "- [7. Deploy your custom model](#7.-Deploy-your-custom-model)\n",
+    "- [8. Generate a map](#8.-Generate-a-map)\n"
    ]
   },
   {
@@ -38,7 +39,8 @@
    "source": [
     "### Before you start\n",
     "\n",
-    "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo."
+    "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the [Copernicus Data Space Ecosystem](https://dataspace.copernicus.eu/).\n",
+    "This is free of charge and will grant you a number of free openEO processing credits to continue this demo."
    ]
   },
   {
@@ -80,7 +82,48 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 2. Extract public reference data\n",
+    "### 2. Define your temporal extent\n",
+    "\n",
+    "To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from utils import retrieve_worldcereal_seasons\n",
+    "\n",
+    "spatial_extent = map.get_processing_extent()\n",
+    "seasons = retrieve_worldcereal_seasons(spatial_extent)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now use the slider to select your processing period. Note that the length of the period is always fixed to a year.\n",
+    "Just make sure your season of interest is fully captured within the period you select."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from utils import date_slider\n",
+    "\n",
+    "slider = date_slider()\n",
+    "slider.show_slider()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Extract public reference data\n",
     "\n",
     "Here we query existing reference data that have already been processed by WorldCereal and are ready to use.\n",
     "To increase the number of hits, we expand the search area by 250 km in all directions.\n",
@@ -96,19 +139,22 @@
    "source": [
     "from worldcereal.utils.refdata import query_public_extractions\n",
     "\n",
-    "# retrieve the polygon you just drew\n",
+    "# Retrieve the polygon you just drew\n",
     "polygon = map.get_polygon_latlon()\n",
     "\n",
+    "# Retrieve the date range you just selected\n",
+    "processing_period = slider.get_processing_period()\n",
+    "\n",
     "# Query our public database of training data\n",
-    "public_df = query_public_extractions(polygon)\n",
+    "public_df = query_public_extractions(polygon, processing_period=processing_period)\n",
     "public_df.year.value_counts()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 3. Select your desired crop types\n",
+    "### 4. Select your desired crop types\n",
     "\n",
     "Run the next cell and select all crop types you wish to include in your model. All the crops that are not selected will be grouped under the \"other\" category."
    ]
@@ -149,9 +195,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 4. Prepare training features\n",
+    "### 5. Prepare training features\n",
     "\n",
-    "Using a deep learning framework (Presto), we derive classification features for each sample. The resulting `encodings` and `targets` will be used for model training."
+    "Using a deep learning framework (Presto), we derive classification features for each sample in the dataframe resulting from your query. Presto was pre-trained on millions of unlabeled samples around the world and finetuned on global labelled land cover and crop type data from the WorldCereal reference database. The resulting *embeddings* and the *target* labels to train on will be returned as a training dataframe which we will use for downstream model training."
    ]
   },
   {
@@ -169,8 +215,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 5. Train custom classification model\n",
-    "We train a catboost model for the selected crop types. Class weights are automatically determined to balance the individual classes."
+    "### 6. Train custom classification model\n",
+    "We train a catboost model for the selected crop types. By default, no class weighting is done. You could opt to enable this by setting `balance_classes=True`, however, depending on the class distribution this may lead to undesired results. There is no golden rule here."
    ]
   },
   {
@@ -181,7 +227,7 @@
    "source": [
     "from utils import train_classifier\n",
     "\n",
-    "custom_model, report, confusion_matrix = train_classifier(training_dataframe)"
+    "custom_model, report, confusion_matrix = train_classifier(training_dataframe, balance_classes=False)"
    ]
   },
   {
@@ -205,7 +251,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 6. Deploy your custom model\n",
+    "### 7. Deploy your custom model\n",
     "\n",
     "Once trained, we have to upload our model to the cloud so it can be used by OpenEO for inference. Note that these models are only kept in cloud storage for a limited amount of time.\n"
    ]
@@ -229,48 +275,10 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 7. Generate a map\n",
+    "### 8. Generate a map\n",
     "\n",
     "Using our custom model, we generate a map for our region and season of interest.\n",
-    "To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from utils import retrieve_worldcereal_seasons\n",
-    "\n",
-    "spatial_extent = map.get_processing_extent()\n",
-    "seasons = retrieve_worldcereal_seasons(spatial_extent)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now use the slider to select your processing period. Note that the length of the period is always fixed to a year.\n",
-    "Just make sure your season of interest is fully captured within the period you select."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from utils import date_slider\n",
     "\n",
-    "slider = date_slider()\n",
-    "slider.show_slider()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
     "Set some other customization options:"
    ]
   },
@@ -306,6 +314,7 @@
    "outputs": [],
    "source": [
     "from worldcereal.job import PostprocessParameters, WorldCerealProductType, generate_map, CropTypeParameters\n",
+    "from utils import _results_to_pickle\n",
     "\n",
     "# Initializes default parameters\n",
     "parameters = CropTypeParameters()\n",
@@ -326,7 +335,29 @@
     "    product_type=WorldCerealProductType.CROPTYPE,\n",
     "    croptype_parameters=parameters,\n",
     "    postprocess_parameters=PostprocessParameters(),\n",
-    ")"
+    ")\n",
+    "# Write results to pickle file\n",
+    "_results_to_pickle(job_results, output_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The results contain the openeo job id...\n",
+    "print(f\"Job id: {job_results.job_id}\")\n",
+    "print(f\"Location of metadata: {job_results.metadata}\")\n",
+    "#... a list of products that were downloaded...\n",
+    "print(f\"Products: {job_results.products.keys()}\")\n",
+    "# ... for each product:\n",
+    "print('-- For each product --')\n",
+    "print(f\"Type: {job_results.products['croptype']['type']}\")\n",
+    "print(f\"Temporal extent: {job_results.products['croptype']['temporal_extent']}\")\n",
+    "print(f\"Look-up table: {job_results.products['croptype']['lut']}\")\n",
+    "print(f\"URL: {job_results.products['croptype']['url']}\")\n",
+    "print(f\"Local path: {job_results.products['croptype']['path']}\")"
    ]
   },
   {