diff --git a/notebooks/worldcereal_v1_demo_custom_cropland.ipynb b/notebooks/worldcereal_v1_demo_custom_cropland.ipynb index c5732469..f8ad2fff 100644 --- a/notebooks/worldcereal_v1_demo_custom_cropland.ipynb +++ b/notebooks/worldcereal_v1_demo_custom_cropland.ipynb @@ -15,7 +15,7 @@ "\n", "This notebook guides you through the process of training a custom cropland classification model using publicly available and harmonized in-situ reference data for your area of interest. Afterwards, the model can be applied to your area and season of interest to generate a cropland extent map.\n", "\n", - "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run." + "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run. On average, one such run consumes xxx credits on the Copernicus Data Space Ecosystem." ] }, { @@ -53,7 +53,12 @@ "Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", "\n", "Currently, there is a maximum size of 250 km² for your area within this demo. Upon exceeding this limit, an error will be shown.\n", - "The widget will automatically store the coordinates of the last rectangle you drew on the map.\n" + "You can bypass this limit by altering the code below to:
\n", + "*map = ui_map(area_limit=750)*
\n", + "\n", + "Processing areas beyond 750 km² are currently not supported.\n", + "\n", + "The widget will automatically store the coordinates of the last rectangle you drew on the map." ] }, { @@ -212,8 +217,9 @@ "source": [ "from worldcereal.utils.upload import deploy_model\n", "from openeo_gfmap.backend import cdse_connection\n", + "from utils import get_input\n", "\n", - "modelname = \n", + "modelname = get_input(\"model\")\n", "model_url = deploy_model(cdse_connection(), custom_model, pattern=modelname)" ] }, @@ -276,25 +282,14 @@ "import os\n", "from pathlib import Path\n", "\n", - "# Choose whether or not you want to spatially clean the classification results (default = True)\n", + "# Choose whether or not you want to spatially clean the classification results\n", "postprocess_result = True\n", - "# Choose the postprocessing method you want to use [\"smooth_probabilities\", \"majority_vote\"] (default = \"smooth_probabilities\")\n", - "postprocess_method = \"majority_vote\"\n", - "# Additiona parameters for the majority vote method:\n", - "kernel_size = 3 # default = 5\n", - "# Do you want to save the intermediate results (before applying the postprocessing)\n", - "save_intermediate = True #default is False\n", - "# Do you want to save all class probabilities in the final product? (default is False)\n", - "keep_class_probs = True\n", - "\n", - "postprocess_parameters = PostprocessParameters(enable=postprocess_result,\n", - " method=postprocess_method,\n", - " kernel_size=kernel_size,\n", - " save_intermediate=save_intermediate,\n", - " keep_class_probs=keep_class_probs)\n", + "\n", + "postprocess_parameters = PostprocessParameters(enable=postprocess_result)\n", "\n", "# Specify the local directory where the resulting maps should be downloaded to.\n", - "output_dir = Path(os.getcwd()) / 'CROPLAND_custom_test'\n", + "run = get_input(\"model run\")\n", + "output_dir = Path(os.getcwd()) / f'CROPLAND_{modelname}_{run}'\n", "print(f\"Output directory: {output_dir}\")" ] }, @@ -366,31 +361,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The resulting raster files can be visualized in QGIS.\n", - "\n", - "In case you are running this script on your local environment, you can alternatively use the following cells to visualize the outputs directly in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import visualize_products\n", - "\n", - "visualize_products(rasters, port=8887)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import show_color_legend\n", - "\n", - "show_color_legend(rasters, \"cropland\")" + "The resulting raster files can be visualized in QGIS." ] } ], diff --git a/notebooks/worldcereal_v1_demo_custom_cropland_extended.ipynb b/notebooks/worldcereal_v1_demo_custom_cropland_extended.ipynb new file mode 100644 index 00000000..65a903ff --- /dev/null +++ b/notebooks/worldcereal_v1_demo_custom_cropland_extended.ipynb @@ -0,0 +1,429 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](./resources/System_v1_custom_cropland.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Introduction\n", + "\n", + "This notebook guides you through the process of training a custom cropland classification model using publicly available and harmonized in-situ reference data for your area of interest. Afterwards, the model can be applied to your area and season of interest to generate a cropland extent map.\n", + "\n", + "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run. On average, one such run consumes xxx credits on the Copernicus Data Space Ecosystem." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Content\n", + " \n", + "- [Before you start](###-Before-you-start)\n", + "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n", + "- [2. Extract public reference data](#2.-Extract-public-reference-data)\n", + "- [3. Create your custom cropland class](#3.-Create-your-custom-cropland-class)\n", + "- [4. Prepare training features](#4.-Prepare-training-features)\n", + "- [5. Train custom classification model](#5.-Train-custom-classification-model)\n", + "- [6. Deploy your custom model](#6.-Deploy-your-custom-model)\n", + "- [7. Generate a map](#7.-Generate-a-map)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Before you start\n", + "\n", + "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Define your region of interest\n", + "\n", + "When running the code snippet below, an interactive map will be visualized.\n", + "Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", + "\n", + "Currently, there is a maximum size of 250 km² for your area within this demo. Upon exceeding this limit, an error will be shown.\n", + "You can bypass this limit by altering the code below to:
\n", + "*map = ui_map(area_limit=750)*
\n", + "\n", + "Processing areas beyond 750 km² are currently not supported.\n", + "\n", + "The widget will automatically store the coordinates of the last rectangle you drew on the map." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.map import ui_map\n", + "\n", + "map = ui_map()\n", + "map.show_map()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Extract public reference data\n", + "\n", + "Here we query existing reference data that have already been processed by WorldCereal and are ready to use.\n", + "To increase the number of hits, we expand the search area by 250 km in all directions.\n", + "\n", + "We print the number of training samples retrieved per year." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.refdata import query_public_extractions\n", + "\n", + "# retrieve the polygon you just drew\n", + "polygon = map.get_polygon_latlon()\n", + "\n", + "# Query our public database of training data\n", + "public_df = query_public_extractions(polygon, filter_cropland=False)\n", + "public_df.year.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Create your custom cropland class\n", + "\n", + "Run the next cell and select all land cover classes you would like to include in your \"cropland\" class. All classes that are not selected will be grouped under the \"other\" category. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# from utils import pick_croptypes\n", + "from utils import select_landcover\n", + "from IPython.display import display\n", + "\n", + "checkbox, checkbox_widgets = select_landcover(public_df)\n", + "display(checkbox)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Based on your selection, a custom target label is now generated for each sample. Verify that only land cover classes of your choice are appearing in the `downstream_class`, all others will fall under `other`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import get_custom_cropland_labels\n", + "\n", + "public_df = get_custom_cropland_labels(public_df, checkbox_widgets)\n", + "public_df[\"downstream_class\"].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. Prepare training features\n", + "\n", + "Using a deep learning framework (Presto), we derive classification features for each sample. The resulting `encodings` and `targets` will be used for model training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import prepare_training_dataframe\n", + "\n", + "training_dataframe = prepare_training_dataframe(public_df, task_type=\"cropland\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Train custom classification model\n", + "We train a catboost model for the selected land cover classes. Class weights are automatically determined to balance the individual classes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import train_cropland_classifier\n", + "\n", + "custom_model, report, confusion_matrix = train_cropland_classifier(training_dataframe)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before training, the available training data has been automatically split into a calibration and validation part. By executing the next cell, you get an idea of how well the model performs on the independent validation set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Print the classification report\n", + "print(report)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6. Deploy your custom model\n", + "\n", + "Once trained, we have to upload our model to the cloud so it can be used by OpenEO for inference. Note that these models are only kept in cloud storage for a limited amount of time.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.upload import deploy_model\n", + "from openeo_gfmap.backend import cdse_connection\n", + "from utils import get_input\n", + "\n", + "modelname = get_input(\"model\")\n", + "model_url = deploy_model(cdse_connection(), custom_model, pattern=modelname)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 7. Generate a map\n", + "\n", + "Using our custom model, we generate a map for our region and season of interest.\n", + "To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import retrieve_worldcereal_seasons\n", + "\n", + "spatial_extent = map.get_processing_extent()\n", + "seasons = retrieve_worldcereal_seasons(spatial_extent)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now use the slider to select your processing period. Note that the length of the period is always fixed to a year.\n", + "Just make sure your season of interest is fully captured within the period you select." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import date_slider\n", + "\n", + "slider = date_slider()\n", + "slider.show_slider()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set some other customization options:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.job import PostprocessParameters\n", + "import os\n", + "from pathlib import Path\n", + "\n", + "# Choose whether or not you want to spatially clean the classification results\n", + "postprocess_result = True\n", + "# Choose the postprocessing method you want to use [\"smooth_probabilities\", \"majority_vote\"]\n", + "# (\"smooth_probabilities will do limited spatial cleaning,\n", + "# while \"majority_vote\" will do more aggressive spatial cleaning, depending on the value of kernel_size)\n", + "postprocess_method = \"majority_vote\"\n", + "# Additional parameter for the majority vote method \n", + "# (the higher the value, the more aggressive the spatial cleaning,\n", + "# should be an odd number, not larger than 25, default = 5)\n", + "kernel_size = 5\n", + "# Do you want to save the intermediate results (before applying the postprocessing)\n", + "save_intermediate = True\n", + "# Do you want to save all class probabilities in the final product?\n", + "keep_class_probs = True\n", + "\n", + "postprocess_parameters = PostprocessParameters(enable=postprocess_result,\n", + " method=postprocess_method,\n", + " kernel_size=kernel_size,\n", + " save_intermediate=save_intermediate,\n", + " keep_class_probs=keep_class_probs)\n", + "\n", + "# Specify the local directory where the resulting maps should be downloaded to.\n", + "run = get_input(\"model run\")\n", + "output_dir = Path(os.getcwd()) / f'CROPLAND_{modelname}_{run}'\n", + "print(f\"Output directory: {output_dir}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now have all information we need to generate our map!
\n", + "The next cell will submit a map inference job on CDSE through OpenEO.
\n", + "The first time you run this, you will be asked to authenticate with your CDSE account by clicking the link provided below the cell.
\n", + "Then sit back and wait untill your map is ready..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.job import generate_map, CropLandParameters\n", + "\n", + "# Initializes default parameters\n", + "parameters = CropLandParameters()\n", + "\n", + "# Change the URL to your custom classification model\n", + "parameters.classifier_parameters.classifier_url = model_url\n", + "\n", + "# Get processing period and area\n", + "processing_period = slider.get_processing_period()\n", + "processing_extent = map.get_processing_extent()\n", + "\n", + "# Launch the job\n", + "job_results = generate_map(\n", + " processing_extent,\n", + " processing_period,\n", + " output_dir=output_dir,\n", + " cropland_parameters=parameters,\n", + " postprocess_parameters=postprocess_parameters,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The classification results will be automatically downloaded to your *output_dir* in .tif format.\n", + "By default, OpenEO stores the class labels, confidence score and class probabilities in one file.\n", + "\n", + "Using the function below, we split this information into separate .tif files, thereby adding metadata and a color map, to ease interpretation and visualization:\n", + "- \"xxx_classification_start-date_end-date.tif\" --> contains the classification labels. A class look-up table is included in the .tif metadata.\n", + "- \"xxx_confidence_start-date_end-date.tif\" --> contains the probability associated to the prediction [0 - 100]\n", + "\n", + "In case you chose to store the original per-class probabilities, these are NOT written to a separate file and need to be consulted in the original result downloaded from OpenEO." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import prepare_visualization\n", + "\n", + "rasters = prepare_visualization(job_results)\n", + "print(rasters)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting raster files can be visualized in QGIS.\n", + "\n", + "In case you are running this script on your local environment, you can alternatively use the following cells to visualize the outputs directly in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import visualize_products\n", + "\n", + "visualize_products(rasters, port=8887)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import show_color_legend\n", + "\n", + "show_color_legend(rasters, \"cropland\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "worldcereal", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/worldcereal_v1_demo_custom_croptype.ipynb b/notebooks/worldcereal_v1_demo_custom_croptype.ipynb index 48aee550..78e358c7 100644 --- a/notebooks/worldcereal_v1_demo_custom_croptype.ipynb +++ b/notebooks/worldcereal_v1_demo_custom_croptype.ipynb @@ -15,7 +15,7 @@ "\n", "This notebook guides you through the process of training a custom crop type classification model using publicly available and harmonized in-situ reference data for your area and crop types of interest. Afterwards, the model can be applied to your season of interest to generate a crop type map.\n", "\n", - "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run." + "Please note that for the purpose of this demo, the processing area is limited to 250 km² per model run. On average, one such run consumes xxx credits on the Copernicus Data Space Ecosystem." ] }, { @@ -53,6 +53,11 @@ "Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", "\n", "Currently, there is a maximum size of 250 km² for your area within this demo. Upon exceeding this limit, an error will be shown.\n", + "You can bypass this limit by altering the code below to:
\n", + "*map = ui_map(area_limit=750)*
\n", + "\n", + "Processing areas beyond 750 km² are currently not supported.\n", + "\n", "The widget will automatically store the coordinates of the last rectangle you drew on the map." ] }, @@ -210,8 +215,9 @@ "source": [ "from worldcereal.utils.upload import deploy_model\n", "from openeo_gfmap.backend import cdse_connection\n", + "from utils import get_input\n", "\n", - "modelname = \n", + "modelname = get_input(\"model\")\n", "model_url = deploy_model(cdse_connection(), custom_model, pattern=modelname)" ] }, @@ -274,28 +280,14 @@ "import os\n", "from pathlib import Path\n", "\n", - "# Choose whether you want to store the cropland mask as separate output file (default = False)\n", - "save_mask = True\n", - "\n", - "# Choose whether or not you want to spatially clean the classification results (default = True)\n", + "# Choose whether or not you want to spatially clean the classification results\n", "postprocess_result = True\n", - "# Choose the postprocessing method you want to use [\"smooth_probabilities\", \"majority_vote\"] (default = \"smooth_probabilities\")\n", - "postprocess_method = \"majority_vote\"\n", - "# Additiona parameters for the majority vote method:\n", - "kernel_size = 5 # default = 5\n", - "# Do you want to save the intermediate results (before applying the postprocessing)\n", - "save_intermediate = True #default is False\n", - "# Do you want to save all class probabilities in the final product? (default is False)\n", - "keep_class_probs = True\n", - "\n", - "postprocess_parameters = PostprocessParameters(enable=postprocess_result,\n", - " method=postprocess_method,\n", - " kernel_size=kernel_size,\n", - " save_intermediate=save_intermediate,\n", - " keep_class_probs=keep_class_probs)\n", + "\n", + "postprocess_parameters = PostprocessParameters(enable=postprocess_result)\n", "\n", "# Specify the local directory where the resulting maps should be downloaded to.\n", - "output_dir = Path(os.getcwd()) / 'CROPTYPE_test'\n", + "run = get_input(\"model run\")\n", + "output_dir = Path(os.getcwd()) / f'CROPTYPE_{modelname}_{run}'\n", "print(f\"Output directory: {output_dir}\")" ] }, @@ -322,7 +314,7 @@ "\n", "# Change the URL to your custom classification model\n", "parameters.classifier_parameters.classifier_url = model_url\n", - "parameters.save_mask = save_mask\n", + "parameters.save_mask = True\n", "\n", "# Get processing period and area\n", "processing_period = slider.get_processing_period()\n", @@ -369,31 +361,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The resulting raster files can be visualized in QGIS.\n", - "\n", - "In case you are running this script on your local environment, you can alternatively use the following cells to visualize the outputs directly in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import visualize_products\n", - "\n", - "visualize_products(rasters, port=8887)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import show_color_legend\n", - "\n", - "show_color_legend(rasters, \"croptype\")" + "The resulting raster files can be visualized in QGIS." ] } ], diff --git a/notebooks/worldcereal_v1_demo_custom_croptype_extended.ipynb b/notebooks/worldcereal_v1_demo_custom_croptype_extended.ipynb new file mode 100644 index 00000000..f38b8ea5 --- /dev/null +++ b/notebooks/worldcereal_v1_demo_custom_croptype_extended.ipynb @@ -0,0 +1,432 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](./resources/System_v1_custom_croptype.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Introduction\n", + "\n", + "This notebook guides you through the process of training a custom crop type classification model using publicly available and harmonized in-situ reference data for your area and crop types of interest. Afterwards, the model can be applied to your season of interest to generate a crop type map.\n", + "\n", + "Please note that for the purpose of this demo, the processing area is limited to 250 km² per model run. On average, one such run consumes xxx credits on the Copernicus Data Space Ecosystem." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Content\n", + " \n", + "- [Before you start](###-Before-you-start)\n", + "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n", + "- [2. Extract public reference data](#2.-Extract-public-reference-data)\n", + "- [3. Select your desired crop types](#3.-Select-your-desired-crop-types)\n", + "- [4. Prepare training features](#4.-Prepare-training-features)\n", + "- [5. Train custom classification model](#5.-Train-custom-classification-model)\n", + "- [6. Deploy your custom model](#6.-Deploy-your-custom-model)\n", + "- [7. Generate a map](#7.-Generate-a-map)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Before you start\n", + "\n", + "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Define your region of interest\n", + "\n", + "When running the code snippet below, an interactive map will be visualized.\n", + "Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", + "\n", + "Currently, there is a maximum size of 250 km² for your area within this demo. Upon exceeding this limit, an error will be shown.\n", + "You can bypass this limit by altering the code below to:
\n", + "*map = ui_map(area_limit=750)*
\n", + "\n", + "Processing areas beyond 750 km² are currently not supported.\n", + "\n", + "The widget will automatically store the coordinates of the last rectangle you drew on the map." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.map import ui_map\n", + "\n", + "map = ui_map()\n", + "map.show_map()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Extract public reference data\n", + "\n", + "Here we query existing reference data that have already been processed by WorldCereal and are ready to use.\n", + "To increase the number of hits, we expand the search area by 250 km in all directions.\n", + "\n", + "We print the number of training samples retrieved per year." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.refdata import query_public_extractions\n", + "\n", + "# retrieve the polygon you just drew\n", + "polygon = map.get_polygon_latlon()\n", + "\n", + "# Query our public database of training data\n", + "public_df = query_public_extractions(polygon)\n", + "public_df.year.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Select your desired crop types\n", + "\n", + "Run the next cell and select all crop types you wish to include in your model. All the crops that are not selected will be grouped under the \"other\" category." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import pick_croptypes\n", + "from IPython.display import display\n", + "\n", + "checkbox, checkbox_widgets = pick_croptypes(public_df, samples_threshold=100)\n", + "display(checkbox)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Based on your selection, a custom target label is now generated for each sample. Verify that only crops of your choice are appearing in the `downstream_class`, all others will fall under `other`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import get_custom_croptype_labels\n", + "\n", + "public_df = get_custom_croptype_labels(public_df, checkbox_widgets)\n", + "public_df[\"downstream_class\"].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. Prepare training features\n", + "\n", + "Using a deep learning framework (Presto), we derive classification features for each sample. The resulting `encodings` and `targets` will be used for model training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import prepare_training_dataframe\n", + "\n", + "training_dataframe = prepare_training_dataframe(public_df, task_type=\"croptype\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Train custom classification model\n", + "We train a catboost model for the selected crop types. Class weights are automatically determined to balance the individual classes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import train_classifier\n", + "\n", + "custom_model, report, confusion_matrix = train_classifier(training_dataframe)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before training, the available training data has been automatically split into a calibration and validation part. By executing the next cell, you get an idea of how well the model performs on the independent validation set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Print the classification report\n", + "print(report)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6. Deploy your custom model\n", + "\n", + "Once trained, we have to upload our model to the cloud so it can be used by OpenEO for inference. Note that these models are only kept in cloud storage for a limited amount of time.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.utils.upload import deploy_model\n", + "from openeo_gfmap.backend import cdse_connection\n", + "from utils import get_input\n", + "\n", + "modelname = get_input(\"model\")\n", + "model_url = deploy_model(cdse_connection(), custom_model, pattern=modelname)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 7. Generate a map\n", + "\n", + "Using our custom model, we generate a map for our region and season of interest.\n", + "To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import retrieve_worldcereal_seasons\n", + "\n", + "spatial_extent = map.get_processing_extent()\n", + "seasons = retrieve_worldcereal_seasons(spatial_extent)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now use the slider to select your processing period. Note that the length of the period is always fixed to a year.\n", + "Just make sure your season of interest is fully captured within the period you select." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import date_slider\n", + "\n", + "slider = date_slider()\n", + "slider.show_slider()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set some other customization options:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.job import PostprocessParameters\n", + "import os\n", + "from pathlib import Path\n", + "\n", + "# Choose whether you want to store the cropland mask as separate output file\n", + "save_mask = True\n", + "\n", + "# Choose whether or not you want to spatially clean the classification results\n", + "postprocess_result = True\n", + "# Choose the postprocessing method you want to use [\"smooth_probabilities\", \"majority_vote\"]\n", + "# (\"smooth_probabilities will do limited spatial cleaning,\n", + "# while \"majority_vote\" will do more aggressive spatial cleaning, depending on the value of kernel_size)\n", + "postprocess_method = \"majority_vote\"\n", + "# Additional parameter for the majority vote method \n", + "# (the higher the value, the more aggressive the spatial cleaning,\n", + "# should be an odd number, not larger than 25, default = 5)\n", + "kernel_size = 5\n", + "# Do you want to save the intermediate results (before applying the postprocessing)\n", + "save_intermediate = True\n", + "# Do you want to save all class probabilities in the final product? (default is False)\n", + "keep_class_probs = True\n", + "\n", + "postprocess_parameters = PostprocessParameters(enable=postprocess_result,\n", + " method=postprocess_method,\n", + " kernel_size=kernel_size,\n", + " save_intermediate=save_intermediate,\n", + " keep_class_probs=keep_class_probs)\n", + "\n", + "# Specify the local directory where the resulting maps should be downloaded to.\n", + "run = get_input(\"model run\")\n", + "output_dir = Path(os.getcwd()) / f'CROPTYPE_{modelname}_{run}'\n", + "print(f\"Output directory: {output_dir}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now have all information we need to generate our map!
\n", + "The next cell will submit a map inference job on CDSE through OpenEO.
\n", + "The first time you run this, you will be asked to authenticate with your CDSE account by clicking the link provided below the cell.
\n", + "Then sit back and wait untill your map is ready..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from worldcereal.job import WorldCerealProductType, generate_map, CropTypeParameters\n", + "\n", + "# Initializes default parameters\n", + "parameters = CropTypeParameters()\n", + "\n", + "# Change the URL to your custom classification model\n", + "parameters.classifier_parameters.classifier_url = model_url\n", + "parameters.save_mask = save_mask\n", + "\n", + "# Get processing period and area\n", + "processing_period = slider.get_processing_period()\n", + "processing_extent = map.get_processing_extent()\n", + "\n", + "# Launch the job\n", + "job_results = generate_map(\n", + " processing_extent,\n", + " processing_period,\n", + " output_dir=output_dir,\n", + " product_type=WorldCerealProductType.CROPTYPE,\n", + " croptype_parameters=parameters,\n", + " postprocess_parameters=postprocess_parameters,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The classification results will be automatically downloaded to your *output_dir* in .tif format.\n", + "By default, OpenEO stores the class labels, confidence score and class probabilities in one file.\n", + "\n", + "Using the function below, we split this information into separate .tif files, thereby adding metadata and a color map, to ease interpretation and visualization:\n", + "- \"xxx_classification_start-date_end-date.tif\" --> contains the classification labels. A class look-up table is included in the .tif metadata.\n", + "- \"xxx_confidence_start-date_end-date.tif\" --> contains the probability associated to the prediction [0 - 100]\n", + "\n", + "In case you chose to store the original per-class probabilities, these are NOT written to a separate file and need to be consulted in the original result downloaded from OpenEO." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import prepare_visualization\n", + "\n", + "rasters = prepare_visualization(job_results)\n", + "print(rasters)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting raster files can be visualized in QGIS.\n", + "\n", + "In case you are running this script on your local environment, you can alternatively use the following cells to visualize the outputs directly in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import visualize_products\n", + "\n", + "visualize_products(rasters, port=8887)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import show_color_legend\n", + "\n", + "show_color_legend(rasters, \"croptype\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "worldcereal", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/worldcereal_v1_demo_default_cropland.ipynb b/notebooks/worldcereal_v1_demo_default_cropland.ipynb index 406f3e62..f812ee8d 100644 --- a/notebooks/worldcereal_v1_demo_default_cropland.ipynb +++ b/notebooks/worldcereal_v1_demo_default_cropland.ipynb @@ -16,7 +16,8 @@ "This notebook contains a short demo on how to use the WorldCereal system to generate a cropland extent map for your area and season of interest.
\n", "The map is generated using a default model trained by the WorldCereal consortium to distinguish cropland from all other land use.\n", "\n", - "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run.\n", + "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run. On average, one such run consumes xxx credits on the Copernicus Data Space Ecosystem.\n", + "\n", "\n", "
\n", "Cropland definition: \n", @@ -55,9 +56,15 @@ "source": [ "### 1. Define your region of interest\n", "\n", - "When running the code snippet below, an interactive map will be visualized. Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", + "When running the code snippet below, an interactive map will be visualized.\n", + "Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", + "\n", + "Currently, there is a maximum size of 250 km² for your area within this demo. Upon exceeding this limit, an error will be shown.\n", + "You can bypass this limit by altering the code below to:
\n", + "*map = ui_map(area_limit=750)*
\n", + "\n", + "Processing areas beyond 750 km² are currently not supported.\n", "\n", - "In case your region exceeds the 250 km² limit, you will be asked to draw again.\n", "The widget will automatically store the coordinates of the last rectangle you drew on the map." ] }, @@ -146,6 +153,7 @@ "from worldcereal.job import PostprocessParameters\n", "import os\n", "from pathlib import Path\n", + "from utils import get_input\n", "\n", "# Choose whether or not you want to spatially clean the classification results\n", "postprocess_result = True\n", @@ -153,7 +161,8 @@ "postprocess_parameters = PostprocessParameters(enable=postprocess_result)\n", "\n", "# Specify the local directory where the resulting maps should be downloaded to.\n", - "output_dir = Path(os.getcwd()) / 'CROPLAND_test'\n", + "run = get_input('model run')\n", + "output_dir = Path(os.getcwd()) / f'CROPLAND_default_{run}'\n", "print(f\"Output directory: {output_dir}\")" ] }, diff --git a/notebooks/worldcereal_v1_demo_default_cropland_extended.ipynb b/notebooks/worldcereal_v1_demo_default_cropland_extended.ipynb index 69ece2fb..da1d6555 100644 --- a/notebooks/worldcereal_v1_demo_default_cropland_extended.ipynb +++ b/notebooks/worldcereal_v1_demo_default_cropland_extended.ipynb @@ -16,7 +16,7 @@ "This notebook contains a short demo on how to use the WorldCereal system to generate a cropland extent map for your area and season of interest.
\n", "The map is generated using a default model trained by the WorldCereal consortium to distinguish cropland from all other land use.\n", "\n", - "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run.\n", + "Please note that for the purpose of this demo, the processing area is currently limited to 250 km² per model run. On average, one such run consumes xxx credits on the Copernicus Data Space Ecosystem.\n", "\n", "
\n", "Cropland definition: \n", @@ -55,9 +55,15 @@ "source": [ "### 1. Define your region of interest\n", "\n", - "When running the code snippet below, an interactive map will be visualized. Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", + "When running the code snippet below, an interactive map will be visualized.\n", + "Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", + "\n", + "Currently, there is a maximum size of 250 km² for your area within this demo. Upon exceeding this limit, an error will be shown.\n", + "You can bypass this limit by altering the code below to:
\n", + "*map = ui_map(area_limit=750)*
\n", + "\n", + "Processing areas beyond 750 km² are currently not supported.\n", "\n", - "In case your region exceeds the 250 km² limit, you will be asked to draw again.\n", "The widget will automatically store the coordinates of the last rectangle you drew on the map." ] }, @@ -146,17 +152,22 @@ "from worldcereal.job import PostprocessParameters\n", "import os\n", "from pathlib import Path\n", + "from utils import get_input\n", "\n", - "# Choose whether or not you want to spatially clean the classification results (default = True)\n", + "# Choose whether or not you want to spatially clean the classification results\n", "postprocess_result = True\n", - "# Choose the postprocessing method you want to use [\"smooth_probabilities\", \"majority_vote\"] (default = \"smooth_probabilities\")\n", + "# Choose the postprocessing method you want to use [\"smooth_probabilities\", \"majority_vote\"]\n", + "# (\"smooth_probabilities will do limited spatial cleaning,\n", + "# while \"majority_vote\" will do more aggressive spatial cleaning, depending on the value of kernel_size)\n", "postprocess_method = \"majority_vote\"\n", - "# Additiona parameters for the majority vote method:\n", - "kernel_size = 3 # default = 5\n", + "# Additional parameter for the majority vote method \n", + "# (the higher the value, the more aggressive the spatial cleaning,\n", + "# should be an odd number, not larger than 25, default = 5)\n", + "kernel_size = 5\n", "# Do you want to save the intermediate results (before applying the postprocessing)\n", - "save_intermediate = True # default is False\n", + "save_intermediate = True\n", "# Do you want to save all class probabilities in the final product? \n", - "keep_class_probs = True # default is False\n", + "keep_class_probs = True\n", "\n", "postprocess_parameters = PostprocessParameters(enable=postprocess_result,\n", " method=postprocess_method,\n", @@ -165,7 +176,8 @@ " keep_class_probs=keep_class_probs)\n", "\n", "# Specify the local directory where the resulting maps should be downloaded to.\n", - "output_dir = Path(os.getcwd()) / 'CROPLAND_test'\n", + "run = get_input('model run')\n", + "output_dir = Path(os.getcwd()) / f'CROPLAND_default_{run}'\n", "print(f\"Output directory: {output_dir}\")" ] },