Skip to content

Commit

Permalink
Add aggregation coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
dbutenhof committed Dec 10, 2024
1 parent 40887bd commit 1446089
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 50 deletions.
63 changes: 30 additions & 33 deletions backend/tests/fake_elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,53 +22,50 @@ def __init__(self, hosts: Union[str, list[str]], **kwargs):

# Testing helpers to manage fake searches
def set_query(
self, root_index: str, data: list[dict[str, Any]], version: int = 7
self,
root_index: str,
hit_list: Optional[list[dict[str, Any]]] = None,
aggregation_list: Optional[dict[str, Any]] = None,
version: int = 7,
):
ver = f"v{version:d}dev"
index = f"cdm{ver}-{root_index}"
hits = []
for d in data:
source = d
source["cdm"] = {"ver": ver}
hits.append(
{
"_index": index,
"_id": "random_string",
"_score": 1.0,
"_source": source,
aggregations = None
if hit_list:
for d in hit_list:
source = d
source["cdm"] = {"ver": ver}
hits.append(
{
"_index": index,
"_id": "random_string",
"_score": 1.0,
"_source": source,
}
)
if aggregation_list:
aggregations = {
k: {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": v,
}
)
for k, v in aggregation_list.items()
}
self.data[index] = {
"took": 1,
"timed_out": False,
"_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0},
"hits": {
"total": {"value": len(data), "relation": "eq"},
"total": {"value": len(hits), "relation": "eq"},
"max_score": 1.0,
"hits": hits,
},
}

# Testing helpers to manage fake aggregations
#
# TODO: how much Opensearch boilerplate (score, etc) can reasonably be
# factored out into this method?
def set_aggregate(self, index: str, data: dict[str, Any]):
self.data[index] = {
"took": 1,
"timed_out": False,
"_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0},
"hits": {
"total": {"value": len(data), "relation": "eq"},
"max_score": 1.0,
"hits": {
"total": {"value": 10000, "relation": "gte"},
"max_score": None,
"hits": [],
},
},
"aggregations": data,
}
if aggregations:
print(f"AGGREGATIONS => {aggregations}")
self.data[index]["aggregations"] = aggregations

# Faked AsyncElasticsearch methods
async def close(self):
Expand Down
135 changes: 118 additions & 17 deletions backend/tests/test_crucible.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ def fake_elastic(monkeypatch, fake_config):
)


@pytest.fixture
async def fake_crucible(fake_elastic):
crucible = CrucibleService("TEST")
yield crucible
await crucible.close()


class TestParser:

def test_parse_normal(self):
Expand Down Expand Up @@ -80,17 +87,16 @@ def test_one(self):

class TestCrucible:

async def test_create(self, fake_elastic):
async def test_create(self, fake_crucible):
"""Create and close a CrucibleService instance"""

crucible = CrucibleService("TEST")
assert crucible
assert isinstance(crucible, CrucibleService)
assert isinstance(crucible.elastic, AsyncElasticsearch)
assert app.config.get_config().get("TEST.url") == crucible.url
elastic = crucible.elastic
await crucible.close()
assert crucible.elastic is None
assert fake_crucible
assert isinstance(fake_crucible, CrucibleService)
assert isinstance(fake_crucible.elastic, AsyncElasticsearch)
assert app.config.get_config().get("TEST.url") == fake_crucible.url
elastic = fake_crucible.elastic
await fake_crucible.close()
assert fake_crucible.elastic is None
assert elastic.closed

def test_no_hits(self):
Expand Down Expand Up @@ -124,13 +130,12 @@ def test_hits_fields(self):
CrucibleService._hits({"hits": {"hits": payload}}, ["f"])
)

async def test_metric_ids_none(self, fake_elastic):
async def test_metric_ids_none(self, fake_crucible):
"""A simple query for failure matching metric IDs"""

crucible = CrucibleService("TEST")
crucible.elastic.set_query("metric_desc", [])
fake_crucible.elastic.set_query("metric_desc", [])
with pytest.raises(HTTPException) as e:
await crucible._get_metric_ids("runid", "source::type")
await fake_crucible._get_metric_ids("runid", "source::type")
assert 400 == e.value.status_code
assert "No matches for source::type" == e.value.detail

Expand All @@ -152,13 +157,109 @@ async def test_metric_ids_none(self, fake_elastic):
),
),
)
async def test_metric_ids(self, fake_elastic, found, expected):
async def test_metric_ids(self, fake_crucible, found, expected):
"""A simple query for matching metric IDs"""

crucible = CrucibleService("TEST")
crucible.elastic.set_query("metric_desc", found)
assert expected == await crucible._get_metric_ids(
fake_crucible.elastic.set_query("metric_desc", found)
assert expected == await fake_crucible._get_metric_ids(
"runid",
"source::type",
aggregate=len(expected) > 1,
)

async def test_run_filters(self, fake_crucible):
"""Test aggregations
This is the "simplest" aggregation-based query, but we need to define
fake aggregations for the tag, param, and run indices.
"""

fake_crucible.elastic.set_query(
"tag",
aggregation_list={
"key": [
{
"key": "topology",
"doc_count": 25,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [],
},
},
{
"key": "accelerator",
"doc_count": 19,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "A100", "doc_count": 5},
{"key": "L40S", "doc_count": 2},
],
},
},
{
"key": "project",
"doc_count": 19,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "rhelai", "doc_count": 1},
{"key": "rhosai", "doc_count": 2},
],
},
},
]
},
)
fake_crucible.elastic.set_query(
"param",
aggregation_list={
"key": [
{
"key": "bucket",
"doc_count": 25,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{"key": 200, "doc_count": 30}],
},
},
]
},
)
fake_crucible.elastic.set_query(
"run",
aggregation_list={
"begin": [{"key": 123456789, "doc_count": 1}],
"benchmark": [{"key": "ilab", "doc_count": 25}],
"desc": [],
"email": [
{"key": "[email protected]", "doc_count": 10},
{"key": "[email protected]", "doc_count": 15},
],
"end": [{"key": 1234, "doc_count": 10}],
"harness": [],
"host": [
{"key": "one.example.com", "doc_count": 5},
{"key": "two.example.com", "doc_count": 20},
],
"id": [],
"name": [],
"source": [],
},
)
filters = await fake_crucible.get_run_filters()

# Array ordering is not reliable, so we need to sort
assert sorted(filters.keys()) == ["param", "run", "tag"]
assert sorted(filters["tag"].keys()) == ["accelerator", "project"]
assert sorted(filters["param"].keys()) == ["bucket"]
assert sorted(filters["run"].keys()) == ["benchmark", "email", "host"]
assert sorted(filters["tag"]["accelerator"]) == ["A100", "L40S"]
assert sorted(filters["param"]["bucket"]) == [200]
assert sorted(filters["run"]["benchmark"]) == ["ilab"]
assert sorted(filters["run"]["email"]) == ["[email protected]", "[email protected]"]
assert sorted(filters["run"]["host"]) == ["one.example.com", "two.example.com"]

0 comments on commit 1446089

Please sign in to comment.