Skip to content

Commit a19f83e

Browse files
committed
test: align metrics imports with csb rename
1 parent b80d064 commit a19f83e

File tree

3 files changed

+8
-8
lines changed

3 files changed

+8
-8
lines changed

tests/test_extract_task_metrics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
88

99
from scripts.extract_task_metrics import process_task_dir
10-
from scripts.ccb_metrics.models import TaskMetrics
10+
from scripts.csb_metrics.models import TaskMetrics
1111

1212

1313
class ExtractTaskMetricsEmitterTests(unittest.TestCase):

tests/test_judge_engine.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616
# Make scripts/ importable
1717
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts"))
1818

19-
from ccb_metrics.judge import (
19+
from csb_metrics.judge import (
2020
JudgeInput,
2121
JudgeResult,
2222
LLMJudge,
2323
normalize_score,
2424
)
25-
from ccb_metrics.judge.backends import AnthropicBackend, JudgeBackendError
25+
from csb_metrics.judge.backends import AnthropicBackend, JudgeBackendError, _parse_json
2626

2727
# ---------------------------------------------------------------------------
2828
# Shared fixtures / helpers
@@ -70,7 +70,7 @@ def _make_judge() -> LLMJudge:
7070

7171

7272
class TestNormalizeScore:
73-
"""Tests for ccb_metrics.judge.models.normalize_score."""
73+
"""Tests for csb_metrics.judge.models.normalize_score."""
7474

7575
def test_string_pass(self):
7676
assert normalize_score("pass") == 1.0
@@ -292,7 +292,7 @@ def flaky_raw_call(system_prompt: str, user_prompt: str) -> str:
292292
return valid_json_str
293293

294294
with patch.object(AnthropicBackend, "_raw_call", side_effect=flaky_raw_call):
295-
with patch("ccb_metrics.judge.backends.time.sleep"): # skip actual sleep
295+
with patch("csb_metrics.judge.backends.time.sleep"): # skip actual sleep
296296
result = backend.call("system", "user")
297297

298298
assert call_count == 2
@@ -314,5 +314,5 @@ def bad_raw_call(system_prompt: str, user_prompt: str) -> str:
314314
def test_parse_json_from_markdown_code_block(self):
315315
"""_parse_json handles responses wrapped in ```json``` code blocks."""
316316
wrapped = f"```json\n{json.dumps(_MOCK_BACKEND_RESPONSE)}\n```"
317-
result = AnthropicBackend._parse_json(wrapped)
317+
result = _parse_json(wrapped)
318318
assert result["reasoning"] == _MOCK_BACKEND_RESPONSE["reasoning"]

tests/test_oracle_checks_tiered.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Unit tests for tiered scoring in scripts/ccb_metrics/oracle_checks.py.
1+
"""Unit tests for tiered scoring in scripts/csb_metrics/oracle_checks.py.
22
33
Covers the two-tier weighted scoring added to check_file_set_match and the
44
_get_primary_score preference for weighted_f1 over plain f1.
@@ -9,7 +9,7 @@
99

1010
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
1111

12-
from scripts.ccb_metrics.oracle_checks import check_file_set_match, _get_primary_score
12+
from scripts.csb_metrics.oracle_checks import check_file_set_match, _get_primary_score
1313

1414

1515
def _file(repo, path, tier=None):

0 commit comments

Comments
 (0)