Skip to content

Commit

Permalink
#346 Prevent double nesting of features field
Browse files Browse the repository at this point in the history
  • Loading branch information
twinkarma committed May 25, 2023
1 parent f7f8b22 commit 416083c
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 3 deletions.
9 changes: 6 additions & 3 deletions backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -971,17 +971,20 @@ def get_doc_annotation_dict(self, json_format="raw", anonymize=True):
if json_format == "raw" or json_format == "csv":
doc_dict = self.data
elif json_format == "gate":
# GATE json format are expected to have an existing "features" field
features_dict = self.data["features"] if "features" in self.data and isinstance(self.data["features"], dict) else {}

ignore_keys = {"text", self.project.document_id_field}
features_dict = {key: value for key, value in self.data.items() if key not in ignore_keys}
# Add any non-compliant top-level fields into the "features" field instead
ignore_keys = {"text", "features", self.project.document_id_field}
features_dict.update({key: value for key, value in self.data.items() if key not in ignore_keys})

doc_dict = {
"text": self.data["text"],
"features": features_dict,
"offset_type": "p",
"name": get_value_from_key_path(self.data, self.project.document_id_field)
}
pass


# Insert annotation sets into the doc dict
annotations = self.annotations.filter(status=Annotation.COMPLETED)
Expand Down
18 changes: 18 additions & 0 deletions backend/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1111,6 +1111,11 @@ def setUp(self):
"feature1": "Testvalue 1",
"feature2": "Testvalue 1",
"feature3": "Testvalue 1",
"features": {
"gate_format_feature1": "Gate feature test value",
"gate_format_feature2": "Gate feature test value",
"gate_format_feature3": "Gate feature test value",
}

}
)
Expand Down Expand Up @@ -1146,19 +1151,28 @@ def setUp(self):
def test_export_raw(self):

for document in self.project.documents.all():
# Fields should remain exactly the same as what's been uploaded
# aside from annotation_sets
doc_dict = document.get_doc_annotation_dict("raw")
print(doc_dict)
self.assertTrue("id" in doc_dict)
self.assertTrue("text" in doc_dict)
self.assertTrue("feature1" in doc_dict)
self.assertTrue("feature2" in doc_dict)
self.assertTrue("feature3" in doc_dict)
self.assertTrue("features" in doc_dict)
doc_features = doc_dict["features"]
self.assertTrue("gate_format_feature1" in doc_features)
self.assertTrue("gate_format_feature2" in doc_features)
self.assertTrue("gate_format_feature3" in doc_features)

self.check_raw_gate_annotation_formatting(doc_dict)

def test_export_gate(self):

for document in self.project.documents.all():
# All top-level fields apart from name, text, features and annotation_sets should be
# nested inside the features field
doc_dict = document.get_doc_annotation_dict("gate")
print(doc_dict)

Expand All @@ -1169,6 +1183,10 @@ def test_export_gate(self):
self.assertTrue("feature1" in doc_features)
self.assertTrue("feature2" in doc_features)
self.assertTrue("feature3" in doc_features)
self.assertFalse("features" in doc_features, "Double nesting of features field")
self.assertTrue("gate_format_feature1" in doc_features)
self.assertTrue("gate_format_feature2" in doc_features)
self.assertTrue("gate_format_feature3" in doc_features)

self.check_raw_gate_annotation_formatting(doc_dict)

Expand Down

0 comments on commit 416083c

Please sign in to comment.