Add aggregation coverage

cloud-bulldozer · Dec 10, 2024 · 1446089 · 1446089
1 parent 40887bd
commit 1446089
Show file tree

Hide file tree

Showing 2 changed files with 148 additions and 50 deletions.
diff --git a/backend/tests/fake_elastic.py b/backend/tests/fake_elastic.py
@@ -22,53 +22,50 @@ def __init__(self, hosts: Union[str, list[str]], **kwargs):
 
     # Testing helpers to manage fake searches
     def set_query(
-        self, root_index: str, data: list[dict[str, Any]], version: int = 7
+        self,
+        root_index: str,
+        hit_list: Optional[list[dict[str, Any]]] = None,
+        aggregation_list: Optional[dict[str, Any]] = None,
+        version: int = 7,
     ):
         ver = f"v{version:d}dev"
         index = f"cdm{ver}-{root_index}"
         hits = []
-        for d in data:
-            source = d
-            source["cdm"] = {"ver": ver}
-            hits.append(
-                {
-                    "_index": index,
-                    "_id": "random_string",
-                    "_score": 1.0,
-                    "_source": source,
+        aggregations = None
+        if hit_list:
+            for d in hit_list:
+                source = d
+                source["cdm"] = {"ver": ver}
+                hits.append(
+                    {
+                        "_index": index,
+                        "_id": "random_string",
+                        "_score": 1.0,
+                        "_source": source,
+                    }
+                )
+        if aggregation_list:
+            aggregations = {
+                k: {
+                    "doc_count_error_upper_bound": 0,
+                    "sum_other_doc_count": 0,
+                    "buckets": v,
                 }
-            )
+                for k, v in aggregation_list.items()
+            }
         self.data[index] = {
             "took": 1,
             "timed_out": False,
             "_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0},
             "hits": {
-                "total": {"value": len(data), "relation": "eq"},
+                "total": {"value": len(hits), "relation": "eq"},
                 "max_score": 1.0,
                 "hits": hits,
             },
         }
-
-    # Testing helpers to manage fake aggregations
-    #
-    # TODO: how much Opensearch boilerplate (score, etc) can reasonably be
-    # factored out into this method?
-    def set_aggregate(self, index: str, data: dict[str, Any]):
-        self.data[index] = {
-            "took": 1,
-            "timed_out": False,
-            "_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0},
-            "hits": {
-                "total": {"value": len(data), "relation": "eq"},
-                "max_score": 1.0,
-                "hits": {
-                    "total": {"value": 10000, "relation": "gte"},
-                    "max_score": None,
-                    "hits": [],
-                },
-            },
-            "aggregations": data,
-        }
+        if aggregations:
+            print(f"AGGREGATIONS => {aggregations}")
+            self.data[index]["aggregations"] = aggregations
 
     # Faked AsyncElasticsearch methods
     async def close(self):

diff --git a/backend/tests/test_crucible.py b/backend/tests/test_crucible.py
@@ -26,6 +26,13 @@ def fake_elastic(monkeypatch, fake_config):
     )
 
 
+@pytest.fixture
+async def fake_crucible(fake_elastic):
+    crucible = CrucibleService("TEST")
+    yield crucible
+    await crucible.close()
+
+
 class TestParser:
 
     def test_parse_normal(self):
@@ -80,17 +87,16 @@ def test_one(self):
 
 class TestCrucible:
 
-    async def test_create(self, fake_elastic):
+    async def test_create(self, fake_crucible):
         """Create and close a CrucibleService instance"""
 
-        crucible = CrucibleService("TEST")
-        assert crucible
-        assert isinstance(crucible, CrucibleService)
-        assert isinstance(crucible.elastic, AsyncElasticsearch)
-        assert app.config.get_config().get("TEST.url") == crucible.url
-        elastic = crucible.elastic
-        await crucible.close()
-        assert crucible.elastic is None
+        assert fake_crucible
+        assert isinstance(fake_crucible, CrucibleService)
+        assert isinstance(fake_crucible.elastic, AsyncElasticsearch)
+        assert app.config.get_config().get("TEST.url") == fake_crucible.url
+        elastic = fake_crucible.elastic
+        await fake_crucible.close()
+        assert fake_crucible.elastic is None
         assert elastic.closed
 
     def test_no_hits(self):
@@ -124,13 +130,12 @@ def test_hits_fields(self):
             CrucibleService._hits({"hits": {"hits": payload}}, ["f"])
         )
 
-    async def test_metric_ids_none(self, fake_elastic):
+    async def test_metric_ids_none(self, fake_crucible):
         """A simple query for failure matching metric IDs"""
 
-        crucible = CrucibleService("TEST")
-        crucible.elastic.set_query("metric_desc", [])
+        fake_crucible.elastic.set_query("metric_desc", [])
         with pytest.raises(HTTPException) as e:
-            await crucible._get_metric_ids("runid", "source::type")
+            await fake_crucible._get_metric_ids("runid", "source::type")
         assert 400 == e.value.status_code
         assert "No matches for source::type" == e.value.detail
 
@@ -152,13 +157,109 @@ async def test_metric_ids_none(self, fake_elastic):
             ),
         ),
     )
-    async def test_metric_ids(self, fake_elastic, found, expected):
+    async def test_metric_ids(self, fake_crucible, found, expected):
         """A simple query for matching metric IDs"""
 
-        crucible = CrucibleService("TEST")
-        crucible.elastic.set_query("metric_desc", found)
-        assert expected == await crucible._get_metric_ids(
+        fake_crucible.elastic.set_query("metric_desc", found)
+        assert expected == await fake_crucible._get_metric_ids(
             "runid",
             "source::type",
             aggregate=len(expected) > 1,
         )
+
+    async def test_run_filters(self, fake_crucible):
+        """Test aggregations
+
+        This is the "simplest" aggregation-based query, but we need to define
+        fake aggregations for the tag, param, and run indices.
+        """
+
+        fake_crucible.elastic.set_query(
+            "tag",
+            aggregation_list={
+                "key": [
+                    {
+                        "key": "topology",
+                        "doc_count": 25,
+                        "values": {
+                            "doc_count_error_upper_bound": 0,
+                            "sum_other_doc_count": 0,
+                            "buckets": [],
+                        },
+                    },
+                    {
+                        "key": "accelerator",
+                        "doc_count": 19,
+                        "values": {
+                            "doc_count_error_upper_bound": 0,
+                            "sum_other_doc_count": 0,
+                            "buckets": [
+                                {"key": "A100", "doc_count": 5},
+                                {"key": "L40S", "doc_count": 2},
+                            ],
+                        },
+                    },
+                    {
+                        "key": "project",
+                        "doc_count": 19,
+                        "values": {
+                            "doc_count_error_upper_bound": 0,
+                            "sum_other_doc_count": 0,
+                            "buckets": [
+                                {"key": "rhelai", "doc_count": 1},
+                                {"key": "rhosai", "doc_count": 2},
+                            ],
+                        },
+                    },
+                ]
+            },
+        )
+        fake_crucible.elastic.set_query(
+            "param",
+            aggregation_list={
+                "key": [
+                    {
+                        "key": "bucket",
+                        "doc_count": 25,
+                        "values": {
+                            "doc_count_error_upper_bound": 0,
+                            "sum_other_doc_count": 0,
+                            "buckets": [{"key": 200, "doc_count": 30}],
+                        },
+                    },
+                ]
+            },
+        )
+        fake_crucible.elastic.set_query(
+            "run",
+            aggregation_list={
+                "begin": [{"key": 123456789, "doc_count": 1}],
+                "benchmark": [{"key": "ilab", "doc_count": 25}],
+                "desc": [],
+                "email": [
+                    {"key": "[email protected]", "doc_count": 10},
+                    {"key": "[email protected]", "doc_count": 15},
+                ],
+                "end": [{"key": 1234, "doc_count": 10}],
+                "harness": [],
+                "host": [
+                    {"key": "one.example.com", "doc_count": 5},
+                    {"key": "two.example.com", "doc_count": 20},
+                ],
+                "id": [],
+                "name": [],
+                "source": [],
+            },
+        )
+        filters = await fake_crucible.get_run_filters()
+
+        # Array ordering is not reliable, so we need to sort
+        assert sorted(filters.keys()) == ["param", "run", "tag"]
+        assert sorted(filters["tag"].keys()) == ["accelerator", "project"]
+        assert sorted(filters["param"].keys()) == ["bucket"]
+        assert sorted(filters["run"].keys()) == ["benchmark", "email", "host"]
+        assert sorted(filters["tag"]["accelerator"]) == ["A100", "L40S"]
+        assert sorted(filters["param"]["bucket"]) == [200]
+        assert sorted(filters["run"]["benchmark"]) == ["ilab"]
+        assert sorted(filters["run"]["email"]) == ["[email protected]", "[email protected]"]
+        assert sorted(filters["run"]["host"]) == ["one.example.com", "two.example.com"]