Skip to content

Commit

Permalink
Merge pull request #100 from nasaharvest/contributed
Browse files Browse the repository at this point in the history
Add an 'externally contributed' column to the datasets
  • Loading branch information
gabrieltseng authored Aug 12, 2022
2 parents 763b03f + 40d478c commit fa80888
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 12 deletions.
2 changes: 1 addition & 1 deletion benchmarks/dl/maml.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def _make_tasks(
if task.k >= min_task_k:
label_to_task[task.id] = task

for label in labels.classes_in_bbox(country_bbox):
for label in labels.classes_in_bbox(country_bbox, True):
if country in test_countries_to_crops:
if label in test_countries_to_crops[country]:
continue
Expand Down
1 change: 1 addition & 0 deletions cropharvest/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class RequiredColumns(Columns):
EXPORT_END_DATE = "export_end_date"
GEOMETRY = "geometry"
IS_TEST = "is_test"
EXTERNALLY_CONTRIBUTED_DATASET = "externally_contributed_dataset"

@classmethod
def date_columns(cls) -> List[str]:
Expand Down
2 changes: 1 addition & 1 deletion cropharvest/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
EXPORT_END_MONTH = 2
EXPORT_END_DAY = 1

DATASET_VERSION_ID = 6855066
DATASET_VERSION_ID = 6985649
DATASET_URL = f"https://zenodo.org/record/{DATASET_VERSION_ID}"
LABELS_FILENAME = "labels.geojson"
FEATURES_DIR = "features"
Expand Down
27 changes: 20 additions & 7 deletions cropharvest/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class Task:
balance_negative_crops: bool = False
test_identifier: Optional[str] = None
normalize: bool = True
include_externally_contributed_labels: bool = True

def __post_init__(self):
if self.target_label is None:
Expand Down Expand Up @@ -90,17 +91,27 @@ def as_geojson(self) -> geopandas.GeoDataFrame:
return self._labels

@staticmethod
def filter_geojson(gpdf: geopandas.GeoDataFrame, bounding_box: BBox) -> geopandas.GeoDataFrame:
def filter_geojson(
gpdf: geopandas.GeoDataFrame, bounding_box: BBox, include_external_contributions: bool
) -> geopandas.GeoDataFrame:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# warning: invalid value encountered in ? (vectorized)
in_bounding_box = np.vectorize(bounding_box.contains)(
include_condition = np.vectorize(bounding_box.contains)(
gpdf[RequiredColumns.LAT], gpdf[RequiredColumns.LON]
)
return gpdf[in_bounding_box]

def classes_in_bbox(self, bounding_box: BBox) -> List[str]:
bbox_geojson = self.filter_geojson(self.as_geojson(), bounding_box)
if not include_external_contributions:
include_condition &= gpdf[
gpdf[RequiredColumns.EXTERNALLY_CONTRIBUTED_DATASET] == False
]
return gpdf[include_condition]

def classes_in_bbox(
self, bounding_box: BBox, include_external_contributions: bool
) -> List[str]:
bbox_geojson = self.filter_geojson(
self.as_geojson(), bounding_box, include_external_contributions
)
unique_labels = [x for x in bbox_geojson.label.unique() if x is not None]
return unique_labels

Expand All @@ -117,7 +128,9 @@ def construct_positive_and_negative_labels(
if filter_test:
gpdf = gpdf[gpdf[RequiredColumns.IS_TEST] == False]
if task.bounding_box is not None:
gpdf = self.filter_geojson(gpdf, task.bounding_box)
gpdf = self.filter_geojson(
gpdf, task.bounding_box, task.include_externally_contributed_labels
)

if len(gpdf) == 0:
raise NoDataForBoundingBoxError
Expand Down
36 changes: 33 additions & 3 deletions process_labels/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,25 @@
"ethiopia": {
"function": loading_funcs.load_ethiopia,
"description": "Hand-labelled crop / non-crop labels in Ethiopia",
"externally_contributed": False,
},
"sudan": {
"function": loading_funcs.load_sudan,
"description": "Hand-labelled crop / non crop labels in Sudan",
"externally_contributed": False,
},
"togo": {
"function": loading_funcs.load_togo,
"description": "Hand-labelled crop / non crop labels in Togo",
"externally_contributed": False,
},
"togo-eval": {
"function": loading_funcs.load_togo_eval,
"description": (
"Hand-labelled crop / non crop labels in Togo. "
"These labels are a consensus set collected from 4 labellers."
),
"externally_contributed": False,
},
"lem-brazil": {
"function": loading_funcs.load_lem_brazil,
Expand All @@ -38,6 +42,7 @@
"For more information, please refer to "
"https://www.sciencedirect.com/science/article/pii/S2352340920314359"
),
"externally_contributed": False,
},
"geowiki-landcover-2017": {
"function": loading_funcs.load_geowiki_landcover_2017,
Expand All @@ -46,6 +51,7 @@
"GeoWiki. For more information, please refer to "
"https://doi.pangaea.de/10.1594/PANGAEA.873912"
),
"externally_contributed": False,
},
"central-asia": {
"function": loading_funcs.load_central_asia,
Expand All @@ -54,6 +60,7 @@
"For more information, please refer to "
"https://www.nature.com/articles/s41597-020-00591-2.pdf"
),
"externally_contributed": False,
},
"kenya": {
"function": loading_funcs.load_kenya,
Expand All @@ -62,10 +69,12 @@
"information, please refer to "
"https://doi.org/10.34911/rdnt.u41j87"
),
"externally_contributed": False,
},
"kenya-non-crop": {
"function": loading_funcs.load_kenya_non_crop,
"description": "Hand-labelled non crop labels in Kenya",
"externally_contributed": False,
},
"uganda": {
"function": loading_funcs.load_uganda,
Expand All @@ -74,6 +83,7 @@
"information, please refer to "
"https://registry.mlhub.earth/10.34911/rdnt.eii04x/"
),
"externally_contributed": False,
},
"tanzania": {
"function": loading_funcs.load_tanzania,
Expand All @@ -82,6 +92,7 @@
"more information, please refer to "
"https://doi.org/10.34911/rdnt.5vx40r"
),
"externally_contributed": False,
},
"croplands": {
"function": loading_funcs.load_croplands,
Expand All @@ -91,10 +102,12 @@
"project (https://croplands.org/home) retrieved from "
"https://croplands.org/app/data/search?page=1&page_size=200 "
),
"externally_contributed": False,
},
"zimbabwe": {
"function": loading_funcs.load_zimbabwe,
"description": "Maize labels collected by the FEWS NET",
"externally_contributed": False,
},
"mali": {
"function": loading_funcs.load_mali,
Expand All @@ -103,10 +116,12 @@
"collected as part of the Relief to Resistance in the Sahel "
"(R2R)"
),
"externally_contributed": False,
},
"mali-non-crop": {
"function": loading_funcs.load_mali_crop_noncrop,
"description": "Hand labelled non-crop labels in Mali",
"externally_contributed": False,
},
"ile-de-france": {
"function": loading_funcs.load_ile_de_france,
Expand All @@ -117,10 +132,12 @@
"on May 4th 2021. When loaded from the raw data, the dataset size is significantly "
"reduced (i.e. we take a small subset of the total available labels) "
),
"externally_contributed": False,
},
"brazil-non-crop": {
"function": loading_funcs.load_brazil_noncrop,
"description": {"Hand labelled non-crop labels in Brazil"},
"externally_contributed": False,
},
"reunion-france": {
"function": loading_funcs.load_reunion,
Expand All @@ -132,6 +149,7 @@
"on June 2nd 2021. When loaded from the raw data, the dataset size is significantly "
"reduced (i.e. we take a small subset of the total available labels) "
),
"externally_contributed": False,
},
"martinique-france": {
"function": loading_funcs.load_martinique,
Expand All @@ -143,10 +161,12 @@
"on June 2nd 2021. When loaded from the raw data, the dataset size is significantly "
"reduced (i.e. we take a small subset of the total available labels) "
),
"externally_contributed": False,
},
"rwanda-ceo": {
"function": loading_funcs.load_rwanda_ceo,
"description": "Hand-labelled crop / non crop labels in Rwanda",
"externally_contributed": False,
},
"canada": {
"function": loading_funcs.load_canada,
Expand All @@ -156,6 +176,7 @@
"https://open.canada.ca/data/en/dataset/503a3113-e435-49f4-850c-d70056788632. "
"Contains information licensed under the Open Government Licence – Canada."
),
"externally_contributed": False,
},
"germany": {
"function": loading_funcs.load_germany,
Expand All @@ -164,14 +185,17 @@
" of the European Union, and processed in "
"https://github.com/lukaskondmann/DENETHOR"
),
"externally_contributed": False,
},
"mali-helmets-labelling-crops": {
"function": loading_funcs.load_mali_hlc,
"description": ("2022 data collected as part of the Helmets Labelling Crops project"),
"externally_contributed": False,
},
"tanzania-rice-ecaas": {
"function": loading_funcs.load_tanzania_ecaas,
"description": ("Tanzania Rice ECAAS campaign"),
"description": "Tanzania Rice ECAAS campaign",
"externally_contributed": False,
},
}

Expand Down Expand Up @@ -200,8 +224,14 @@ def combine_datasets(datasets: Optional[List[str]] = None) -> geopandas.GeoDataF

for dataset_name in datasets:
dataset = load(dataset_name)
dataset = dataset.assign(dataset=dataset_name)

dataset = dataset.assign(
**{
RequiredColumns.DATASET: dataset_name,
RequiredColumns.EXTERNALLY_CONTRIBUTED_DATASET: DATASETS[dataset_name][
"externally_contributed"
],
}
)
for column in NullableColumns.tolist():
if column not in dataset:
dataset = dataset.assign(
Expand Down

0 comments on commit fa80888

Please sign in to comment.