diff --git a/backend/models.py b/backend/models.py index 65fd07a8..8b99975e 100644 --- a/backend/models.py +++ b/backend/models.py @@ -971,9 +971,12 @@ def get_doc_annotation_dict(self, json_format="raw", anonymize=True): if json_format == "raw" or json_format == "csv": doc_dict = self.data elif json_format == "gate": + # GATE json format are expected to have an existing "features" field + features_dict = self.data["features"] if "features" in self.data and isinstance(self.data["features"], dict) else {} - ignore_keys = {"text", self.project.document_id_field} - features_dict = {key: value for key, value in self.data.items() if key not in ignore_keys} + # Add any non-compliant top-level fields into the "features" field instead + ignore_keys = {"text", "features", self.project.document_id_field} + features_dict.update({key: value for key, value in self.data.items() if key not in ignore_keys}) doc_dict = { "text": self.data["text"], @@ -981,7 +984,7 @@ def get_doc_annotation_dict(self, json_format="raw", anonymize=True): "offset_type": "p", "name": get_value_from_key_path(self.data, self.project.document_id_field) } - pass + # Insert annotation sets into the doc dict annotations = self.annotations.filter(status=Annotation.COMPLETED) diff --git a/backend/tests/test_models.py b/backend/tests/test_models.py index 8469323f..1b1cd08d 100644 --- a/backend/tests/test_models.py +++ b/backend/tests/test_models.py @@ -1111,6 +1111,11 @@ def setUp(self): "feature1": "Testvalue 1", "feature2": "Testvalue 1", "feature3": "Testvalue 1", + "features": { + "gate_format_feature1": "Gate feature test value", + "gate_format_feature2": "Gate feature test value", + "gate_format_feature3": "Gate feature test value", + } } ) @@ -1146,6 +1151,8 @@ def setUp(self): def test_export_raw(self): for document in self.project.documents.all(): + # Fields should remain exactly the same as what's been uploaded + # aside from annotation_sets doc_dict = document.get_doc_annotation_dict("raw") print(doc_dict) self.assertTrue("id" in doc_dict) @@ -1153,12 +1160,19 @@ def test_export_raw(self): self.assertTrue("feature1" in doc_dict) self.assertTrue("feature2" in doc_dict) self.assertTrue("feature3" in doc_dict) + self.assertTrue("features" in doc_dict) + doc_features = doc_dict["features"] + self.assertTrue("gate_format_feature1" in doc_features) + self.assertTrue("gate_format_feature2" in doc_features) + self.assertTrue("gate_format_feature3" in doc_features) self.check_raw_gate_annotation_formatting(doc_dict) def test_export_gate(self): for document in self.project.documents.all(): + # All top-level fields apart from name, text, features and annotation_sets should be + # nested inside the features field doc_dict = document.get_doc_annotation_dict("gate") print(doc_dict) @@ -1169,6 +1183,10 @@ def test_export_gate(self): self.assertTrue("feature1" in doc_features) self.assertTrue("feature2" in doc_features) self.assertTrue("feature3" in doc_features) + self.assertFalse("features" in doc_features, "Double nesting of features field") + self.assertTrue("gate_format_feature1" in doc_features) + self.assertTrue("gate_format_feature2" in doc_features) + self.assertTrue("gate_format_feature3" in doc_features) self.check_raw_gate_annotation_formatting(doc_dict)