Materials-Consortia · JPBergsma · Aug 17, 2022 · Aug 17, 2022 · Aug 17, 2022 · Aug 27, 2022
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     - id: trailing-whitespace
       args: [--markdown-linebreak-ext=md]
 
-  - repo: https://gitlab.com/pycqa/flake8
+  - repo: https://github.com/pycqa/flake8
     rev: '3.9.2'
     hooks:
     - id: flake8

@@ -37,6 +37,7 @@ Instead, if you are storing chemical formulae as an unreduced count per simulati
 This would then instead require option 2 above, namely either the addition of auxiliary fields that store the correct (or mappable) OPTIMADE format in the database, or the creation of a secondary database that returns the pre-converted structures.
 
 In the simplest case, the mapper classes can be used to define aliases between fields in the database and the OPTIMADE field name; these can be configured via the [`aliases`][optimade.server.config.ServerConfig.aliases] option as a dictionary mapping stored in a dictionary under the appropriate endpoint name, e.g. `"aliases": {"structures": {"chemical_formula_reduced": "my_chem_form"}}`, or defined as part of a custom mapper class.
+Incase the alias is a nested field the field names should be separated by ".". Example: `"aliases": { "structures": {"OPTIMADE_field": "field.nested_field"}}`  
 
 In either option, you should now be able to insert your data into the corresponding MongoDB (or otherwise) collection.
 

@@ -39,11 +39,11 @@ class Aggregate(Enum):
 class LinksResourceAttributes(Attributes):
     """Links endpoint resource object attributes"""
 
-    name: str = StrictField(
+    name: Optional[str] = StrictField(
         ...,
         description="Human-readable name for the OPTIMADE API implementation, e.g., for use in clients to show the name to the end-user.",
     )
-    description: str = StrictField(
+    description: Optional[str] = StrictField(
         ...,
         description="Human-readable description for the OPTIMADE API implementation, e.g., for use in clients to show a description to the end-user.",
     )

@@ -2,7 +2,6 @@
 from pydantic import (  # pylint: disable=no-name-in-module
     BaseModel,
     AnyUrl,
-    validator,
 )
 from typing import List, Optional
 
@@ -264,9 +263,3 @@ class ReferenceResource(EntryResource):
         queryable=SupportLevel.MUST,
     )
     attributes: ReferenceResourceAttributes
-
-    @validator("attributes")
-    def validate_attributes(cls, v):
-        if not any(prop[1] is not None for prop in v):
-            raise ValueError("reference object must have at least one field defined")
-        return v
@@ -5,6 +5,7 @@
     },
     "assemblies": null,
     "chemsys": "Ac",
+    "dichtheid": 10.07,
     "cartesian_site_positions": [
       [
         0.17570227444196573,
@@ -1222,6 +1223,7 @@
     "nelements": 5,
     "nsites": 24,
     "pretty_formula": "Ag2C6ClH12N3",
+    "fancy_formulas": {"hill": "C6H12Ag2ClN3"},
     "species": [
       {
         "chemical_symbols": [
@@ -1475,6 +1477,9 @@
     "nelements": 5,
     "nsites": 25,
     "pretty_formula": "Ag2C2H2N6O13",
+    "fancy_formulas" : {
+      "hill": "C2H2Ag2N6O13"
+    },
     "species": [
       {
         "chemical_symbols": [
@@ -1723,6 +1728,7 @@
     "nelements": 7,
     "nsites": 23,
     "pretty_formula": "Ag2C2ClH8N5O3S2",
+    "fancy_formulas": {"hill": "C2H8Ag2ClN5O3S2"},
     "species": [
       {
         "chemical_symbols": [
@@ -2467,6 +2473,7 @@
     "nelements": 8,
     "nsites": 74,
     "pretty_formula": "AgB10C15Cl2H40NO3P2",
+    "fancy_formulas": {"hill": "C15H40AgB10Cl2NO3P2"},
     "species": [
       {
         "chemical_symbols": [
@@ -2821,6 +2828,7 @@
     "nelements": 7,
     "nsites": 29,
     "pretty_formula": "AgC3ClH14N6OS3",
+    "fancy_formulas":{"hill": "C3H14AgClN6OS3"},
     "species": [
       {
         "chemical_symbols": [

@@ -169,9 +169,7 @@ def _run_db_query(
         page_offset = criteria.get("skip", 0)
         limit = criteria.get("limit", CONFIG.page_limit)
 
-        all_aliased_fields = [
-            self.resource_mapper.get_backend_field(field) for field in self.all_fields
-        ]
+        all_aliased_fields = [field for field in criteria.get("projection", [])]
         search = search.source(includes=all_aliased_fields)
 
         elastic_sort = [

@@ -4,7 +4,7 @@
 import re
 
 from lark import Transformer
-
+from functools import lru_cache
 from optimade.filterparser import LarkParser
 from optimade.models import EntryResource
 from optimade.server.config import CONFIG, SupportedBackend
@@ -16,6 +16,7 @@
     UnknownProviderProperty,
     QueryParamNotUsed,
 )
+from optimade.utils import set_field_to_none_if_missing_in_dict
 
 
 def create_collection(
@@ -119,9 +120,7 @@ def count(self, **kwargs: Any) -> int:
 
     def find(
         self, params: Union[EntryListingQueryParams, SingleEntryQueryParams]
-    ) -> Tuple[
-        Union[List[EntryResource], EntryResource, None], int, bool, Set[str], Set[str]
-    ]:
+    ) -> Tuple[Union[List[EntryResource], EntryResource, None], int, bool, Set[str]]:
         """
         Fetches results and indicates if more data is available.
 
@@ -145,6 +144,14 @@ def find(
             criteria, single_entry
         )
 
+        exclude_fields = self.all_fields - response_fields
+
+        results = [self.resource_mapper.map_back(doc) for doc in results]
+        self.check_and_add_missing_fields(results, response_fields)
+
+        if results:
+            results = self.resource_mapper.deserialize(results)
+
         if single_entry:
             results = results[0] if results else None
 
@@ -153,10 +160,20 @@ def find(
                     detail=f"Instead of a single entry, {data_returned} entries were found",
                 )
 
-        exclude_fields = self.all_fields - response_fields
+        return results, data_returned, more_data_available, exclude_fields
+
+    def check_and_add_missing_fields(self, results: List[dict], response_fields: set):
+        """Checks whether the response_fields and mandatory fields are present.
+        If they are not present the values are set to None, so the deserialization works correctly.
+        It also checks whether all fields in the response have been defined either in the model or in the config file.
+        If not it raises an appropriate error or warning."""
         include_fields = (
             response_fields - self.resource_mapper.TOP_LEVEL_NON_ATTRIBUTES_FIELDS
-        )
+        ) | set(self.get_non_optional_fields())
+        # Include missing fields
+        for result in results:
+            for field in include_fields:
+                set_field_to_none_if_missing_in_dict(result["attributes"], field)
 
         bad_optimade_fields = set()
         bad_provider_fields = set()
@@ -183,17 +200,6 @@ def find(
                 detail=f"Unrecognised OPTIMADE field(s) in requested `response_fields`: {bad_optimade_fields}."
             )
 
-        if results:
-            results = self.resource_mapper.deserialize(results)
-
-        return (
-            results,
-            data_returned,
-            more_data_available,
-            exclude_fields,
-            include_fields,
-        )
-
     @abstractmethod
     def _run_db_query(
         self, criteria: Dict[str, Any], single_entry: bool = False
@@ -236,6 +242,26 @@ def all_fields(self) -> Set[str]:
 
         return self._all_fields
 
+    @lru_cache(maxsize=4)
+    def get_non_optional_fields(self) -> List[str]:
+        """
+        Returns those fields that should be set before a response class can be initialized.
+
+        Returns:
+            Property names.
+        """
+
+        schema = self.get_schema()
+        attributes = schema["properties"]["attributes"]
+        if "$ref" in attributes:
+            path = attributes["$ref"].split("/")[1:]
+            attributes = schema.copy()
+            while path:
+                next_key = path.pop(0)
+                attributes = attributes[next_key]
+            return attributes["required"]
+
+    @lru_cache(maxsize=4)
     def get_attribute_fields(self) -> Set[str]:
         """Get the set of attribute fields
 
@@ -252,7 +278,7 @@ def get_attribute_fields(self) -> Set[str]:
 
         """
 
-        schema = self.resource_cls.schema()
+        schema = self.get_schema()
         attributes = schema["properties"]["attributes"]
         if "allOf" in attributes:
             allOf = attributes.pop("allOf")
@@ -266,6 +292,10 @@ def get_attribute_fields(self) -> Set[str]:
                 attributes = attributes[next_key]
         return set(attributes["properties"].keys())
 
+    @lru_cache(maxsize=4)
+    def get_schema(self):
+        return self.resource_cls.schema()
+
     def handle_query_params(
         self, params: Union[EntryListingQueryParams, SingleEntryQueryParams]
     ) -> Dict[str, Any]:
@@ -319,16 +349,15 @@ def handle_query_params(
             cursor_kwargs["limit"] = CONFIG.page_limit
 
         # response_fields
-        cursor_kwargs["projection"] = {
-            f"{self.resource_mapper.get_backend_field(f)}": True
-            for f in self.all_fields
-        }
-
         if getattr(params, "response_fields", False):
             response_fields = set(params.response_fields.split(","))
             response_fields |= self.resource_mapper.get_required_fields()
         else:
             response_fields = self.all_fields.copy()
+        cursor_kwargs["projection"] = {
+            f"{self.resource_mapper.get_backend_field(f)}": True
+            for f in response_fields
+        }
 
         cursor_kwargs["fields"] = response_fields