Skip to content

Commit 83b04db

Browse files
authored
Merge pull request #97 from nasaharvest/mali-and-tanzania
Mali and Tanzania
2 parents 6d38e58 + cee9935 commit 83b04db

12 files changed

+3839
-5
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ celerybeat-schedule
8484
# virtualenv
8585
venv/
8686
ENV/
87+
harvest-env/
8788

8889
# Spyder project settings
8990
.spyderproject

cropharvest/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
EXPORT_END_MONTH = 2
1717
EXPORT_END_DAY = 1
1818

19-
DATASET_VERSION_ID = 5828893
19+
DATASET_VERSION_ID = 6855066
2020
DATASET_URL = f"https://zenodo.org/record/{DATASET_VERSION_ID}"
2121
LABELS_FILENAME = "labels.geojson"
2222
FEATURES_DIR = "features"

cropharvest/engineer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ def create_h5_dataset(self, checkpoint: bool = True) -> None:
574574
skipped_files: int = 0
575575
num_new_files: int = 0
576576
for file_path in tqdm(list(self.eo_files.glob("*.tif"))):
577-
file_index, dataset = self.process_filename(file_path.name)
577+
file_index, dataset = self.process_filename(file_path.stem)
578578
file_name = f"{file_index}_{dataset}.h5"
579579
if (checkpoint) & ((arrays_dir / file_name).exists()):
580580
# we check if the file has already been written

datasets.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@
1919
|[[9]](#9)| Tanzania | 392 | Multi-class| CC BY-4.0|
2020
|[[10]](#10)| Kenya | 319 | Multi-class| CC BY-SA-4.0|
2121
|[[11]](#11)| Uganda | 233 | Multi-class| CC BY-4.0|
22+
|[[12]](#12)| Tanzania | 800 | Multi-class| CC BY-SA-4.0|
2223
| Harvest Partner | Mali | 148 | Multi-class| CC BY-4.0|
24+
| Harvest Partner | Mali | 1506 | Multi-class| CC BY-4.0|
2325
| FEWS NET| Zimbabwe | 49 | Multi-class| CC BY-SA-4.0|
2426

27+
2528
## References
2629
<a id="1">[1]</a> Hannah Kerner, Gabriel Tseng, Inbal Becker-Reshef, Catherine Nakalembe, Brian Barker, Blake Munshell, Madhava Paliyam, and Mehdi Hosseini. Rapid response crop maps in data sparse regions. In ACM SIGKDD Conference on Data Mining and Knowledge Discovery Workshops, 2020.
2730

@@ -45,3 +48,5 @@ https://doi.org/10.34911/RDNT.5VX40R, 2019.
4548
<a id="10">[10]</a> Annalyse Kehs, Peter McCloskey, John Chelal, Derek Morr, Stellah Amakove, Bismark Plimo, John Mayieka, Gladys Ntango, Kelvin Nyongesa, Lawrence Pamba, Melodine Jeptoo, James Mugo, Mercyline Tsuma, Winnie Onyango, and David Hughes. From village to globe: A dynamic real-time map of african fields through plantvillage. bioRxiv, 2019
4649

4750
<a id="11">[11]</a> Christophe Bocquet. Dalberg data insights uganda crop classification. https://doi.org/10.34911/RDNT.EII04X, 2019.
51+
52+
<a id="12">[12]</a> Catherine Nakalembe, Andreas Schlueter, Sixbert Maurice, & Taryn Devereux. (2022). 2022 Rice Crop-type Data for Western Tanzania (Version 1). https://doi.org/10.5281/zenodo.6824200

process_labels/datasets.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,14 @@
165165
"https://github.com/lukaskondmann/DENETHOR"
166166
),
167167
},
168+
"mali-helmets-labelling-crops": {
169+
"function": loading_funcs.load_mali_hlc,
170+
"description": ("2022 data collected as part of the Helmets Labelling Crops project"),
171+
},
172+
"tanzania-rice-ecaas": {
173+
"function": loading_funcs.load_tanzania_ecaas,
174+
"description": ("Tanzania Rice ECAAS campaign"),
175+
},
168176
}
169177

170178

process_labels/loading_funcs/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
from .rwanda import load_rwanda_ceo
88
from .kenya import load_kenya, load_kenya_non_crop
99
from .uganda import load_uganda
10-
from .tanzania import load_tanzania
10+
from .tanzania import load_tanzania, load_tanzania_ecaas
1111
from .croplands import load_croplands
1212
from .zimbabwe import load_zimbabwe
13-
from .mali import load_mali, load_mali_crop_noncrop
13+
from .mali import load_mali, load_mali_crop_noncrop, load_mali_hlc
1414
from .france import load_ile_de_france, load_reunion, load_martinique
1515
from .canada import load_canada
1616
from .germany import load_germany
@@ -39,4 +39,6 @@
3939
"load_martinique",
4040
"load_canada",
4141
"load_germany",
42+
"load_mali_hlc",
43+
"load_tanzania_ecaas",
4244
]

process_labels/loading_funcs/mali.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import geopandas
22
import pandas as pd
3+
from shapely.geometry import Point
34

45
from datetime import datetime
56

@@ -14,6 +15,10 @@
1415
"sorghum": "cereals",
1516
"millet": "cereals",
1617
"rice": "cereals",
18+
"sesame": "oilseeds",
19+
"groundnuts": "oilseeds",
20+
"beans": "leguminous",
21+
"cotton": "other",
1722
}
1823

1924

@@ -55,3 +60,35 @@ def load_mali():
5560
df[RequiredColumns.INDEX] = df.index
5661

5762
return df
63+
64+
65+
def load_mali_hlc():
66+
df = pd.read_csv(
67+
DATASET_PATH / "mali/helmets_crop_type_mapping_2022_04_06_16_20_56_356161.csv"
68+
)
69+
70+
# currently don't include intercropped crops
71+
df = df[df["multiple_crops"] == "no"]
72+
73+
df[RequiredColumns.LON] = df[
74+
"field_specification_assessment/_geopoint_widget_placementmap_longitude"
75+
]
76+
df[RequiredColumns.LAT] = df[
77+
"field_specification_assessment/_geopoint_widget_placementmap_latitude"
78+
]
79+
df[RequiredColumns.COLLECTION_DATE] = pd.to_datetime(df["today"])
80+
df[RequiredColumns.IS_CROP] = 1
81+
82+
df[NullableColumns.LABEL] = df["current_season_crop/current_season_current_crop"]
83+
df[NullableColumns.CLASSIFICATION_LABEL] = df.apply(
84+
lambda x: LABEL_TO_CLASSIFICATION[x[NullableColumns.LABEL]], axis=1
85+
)
86+
df[RequiredColumns.EXPORT_END_DATE] = datetime(2022, EXPORT_END_MONTH, EXPORT_END_DAY)
87+
df[RequiredColumns.GEOMETRY] = df.apply(
88+
lambda x: Point(x[RequiredColumns.LON], x[RequiredColumns.LAT]), axis=1
89+
)
90+
91+
df = df.reset_index(drop=True)
92+
df[RequiredColumns.INDEX] = df.index
93+
94+
return geopandas.GeoDataFrame(df, geometry=RequiredColumns.GEOMETRY)

process_labels/loading_funcs/tanzania.py

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from pathlib import Path
22
import json
33
import geopandas
4+
import pandas as pd
45
from datetime import datetime
5-
from shapely.geometry import Polygon
6+
from shapely.geometry import Polygon, Point
67
from cropharvest.columns import RequiredColumns, NullableColumns
8+
from cropharvest.config import EXPORT_END_MONTH, EXPORT_END_DAY
79

810
from .utils import export_date_from_row
911
from ..utils import DATASET_PATH
@@ -18,9 +20,20 @@
1820
"Safflower": "oilseeds",
1921
"White Sorghum": "cereals",
2022
"Yellow Maize": "cereals",
23+
"rice": "cereals",
24+
"maize": "cereals",
2125
}
2226

2327

28+
def convert_date(date_str):
29+
date_str = date_str.split("T")[0]
30+
date_str = date_str.split("-")
31+
year = date_str[0]
32+
month = date_str[1]
33+
day = date_str[2]
34+
return datetime(int(year), int(month), int(day))
35+
36+
2437
def _load_single_stac(path_to_stac: Path) -> List[Tuple[Polygon, str, datetime, datetime]]:
2538
with (path_to_stac / "labels.geojson").open("r") as f:
2639
label_json = json.load(f)
@@ -85,3 +98,73 @@ def load_tanzania():
8598
df = df.reset_index(drop=True)
8699
df[RequiredColumns.INDEX] = df.index
87100
return df
101+
102+
103+
def load_tanzania_ecaas():
104+
105+
ecaas_files = (DATASET_PATH / "tanzania" / "tanzania_rice_ecaas").glob("*.csv")
106+
107+
gdfs: List[geopandas.GeoDataFrame] = []
108+
for file_path in ecaas_files:
109+
gdf = geopandas.GeoDataFrame(crs="EPSG:4326")
110+
df = pd.read_csv(file_path)
111+
112+
# replace NaN with Rice
113+
df["consent_given/field_planted/primary_crop"].fillna("rice", inplace=True)
114+
# lat and long
115+
gdf[RequiredColumns.LAT] = df["consent_given/_field_center_latitude"]
116+
gdf[RequiredColumns.LON] = df["consent_given/_field_center_longitude"]
117+
gdf[RequiredColumns.GEOMETRY] = gdf.apply(
118+
lambda row: Point(row[RequiredColumns.LON], row[RequiredColumns.LAT]), axis=1
119+
)
120+
# collection date
121+
gdf[RequiredColumns.COLLECTION_DATE] = df["end"].apply(convert_date)
122+
123+
# export date
124+
gdf[RequiredColumns.EXPORT_END_DATE] = datetime(2022, EXPORT_END_MONTH, EXPORT_END_DAY)
125+
126+
# label and classification label
127+
gdf[NullableColumns.LABEL] = df["consent_given/field_planted/primary_crop"]
128+
gdf[NullableColumns.CLASSIFICATION_LABEL] = gdf.apply(
129+
lambda row: LABEL_TO_CLASSIFICATION[row[NullableColumns.LABEL]], axis=1
130+
)
131+
# manual inputs
132+
gdf[RequiredColumns.IS_CROP] = 1
133+
# fill the NANs in the harvest and planting date columns with one of their values
134+
df["consent_given/field_planted/planting_date"].fillna(
135+
"2022-01-20T00:00:00.000+03:00", inplace=True
136+
)
137+
138+
df["consent_given/field_planted/harvesting_date"].fillna(
139+
"2022-05-01T00:00:00.000+03:00", inplace=True
140+
)
141+
gdf[NullableColumns.HARVEST_DATE] = df[
142+
"consent_given/field_planted/harvesting_date"
143+
].apply(convert_date)
144+
gdf[NullableColumns.PLANTING_DATE] = df["consent_given/field_planted/planting_date"].apply(
145+
convert_date
146+
)
147+
148+
gdfs.append(gdf)
149+
150+
df = pd.concat(gdfs)
151+
152+
df = df.groupby([RequiredColumns.LON, RequiredColumns.LAT]).agg(
153+
{
154+
RequiredColumns.LAT: "first",
155+
RequiredColumns.LON: "first",
156+
RequiredColumns.GEOMETRY: "first",
157+
RequiredColumns.COLLECTION_DATE: "first",
158+
RequiredColumns.EXPORT_END_DATE: "first",
159+
NullableColumns.LABEL: "first",
160+
NullableColumns.CLASSIFICATION_LABEL: "first",
161+
RequiredColumns.IS_CROP: "first",
162+
NullableColumns.HARVEST_DATE: "first",
163+
NullableColumns.PLANTING_DATE: "first",
164+
}
165+
)
166+
167+
df = df.reset_index(drop=True)
168+
df[RequiredColumns.INDEX] = df.index
169+
170+
return df

0 commit comments

Comments
 (0)