Skip to content

Commit

Permalink
#346 Prevent double nesting of features field
Browse files Browse the repository at this point in the history
  • Loading branch information
twinkarma authored and ianroberts committed Feb 26, 2024
1 parent d91a554 commit f650d87
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 2 deletions.
7 changes: 5 additions & 2 deletions backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,9 +980,12 @@ def get_doc_annotation_dict(self, json_format="raw", anonymize=True):
if json_format == "raw" or json_format == "csv":
doc_dict = self.data.copy()
elif json_format == "gate":
# GATE json format are expected to have an existing "features" field
features_dict = self.data["features"] if "features" in self.data and isinstance(self.data["features"], dict) else {}

ignore_keys = {"text", self.project.document_id_field}
features_dict = {key: value for key, value in self.data.items() if key not in ignore_keys}
# Add any non-compliant top-level fields into the "features" field instead
ignore_keys = {"text", "features", self.project.document_id_field}
features_dict.update({key: value for key, value in self.data.items() if key not in ignore_keys})

doc_dict = {
"text": self.data["text"],
Expand Down
18 changes: 18 additions & 0 deletions backend/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1113,6 +1113,11 @@ def setUp(self):
"feature1": "Testvalue 1",
"feature2": "Testvalue 1",
"feature3": "Testvalue 1",
"features": {
"gate_format_feature1": "Gate feature test value",
"gate_format_feature2": "Gate feature test value",
"gate_format_feature3": "Gate feature test value",
}

}
)
Expand Down Expand Up @@ -1148,20 +1153,29 @@ def setUp(self):
def test_export_raw(self):

for document in self.project.documents.all():
# Fields should remain exactly the same as what's been uploaded
# aside from annotation_sets
doc_dict = document.get_doc_annotation_dict("raw")
print(doc_dict)
self.assertTrue("id" in doc_dict)
self.assertTrue("text" in doc_dict)
self.assertTrue("feature1" in doc_dict)
self.assertTrue("feature2" in doc_dict)
self.assertTrue("feature3" in doc_dict)
self.assertTrue("features" in doc_dict)
doc_features = doc_dict["features"]
self.assertTrue("gate_format_feature1" in doc_features)
self.assertTrue("gate_format_feature2" in doc_features)
self.assertTrue("gate_format_feature3" in doc_features)

self.check_raw_gate_annotation_formatting(doc_dict)
self.check_teamware_status(doc_dict, self.anon_annotator_names)

def test_export_gate(self):

for document in self.project.documents.all():
# All top-level fields apart from name, text, features and annotation_sets should be
# nested inside the features field
doc_dict = document.get_doc_annotation_dict("gate")
print(doc_dict)

Expand All @@ -1172,6 +1186,10 @@ def test_export_gate(self):
self.assertTrue("feature1" in doc_features)
self.assertTrue("feature2" in doc_features)
self.assertTrue("feature3" in doc_features)
self.assertFalse("features" in doc_features, "Double nesting of features field")
self.assertTrue("gate_format_feature1" in doc_features)
self.assertTrue("gate_format_feature2" in doc_features)
self.assertTrue("gate_format_feature3" in doc_features)

self.check_raw_gate_annotation_formatting(doc_dict)
self.check_teamware_status(doc_features, self.anon_annotator_names)
Expand Down

0 comments on commit f650d87

Please sign in to comment.