Skip to content

Commit ca08d46

Browse files
jucorclaude
andcommitted
Add warning for name collisions and test for include_local
- Warn when local dataset shadows a committed dataset with same name - Add test for include_local=True behavior - Add test for name collision warning 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent b4aab54 commit ca08d46

File tree

2 files changed

+54
-1
lines changed

2 files changed

+54
-1
lines changed

delphi/polismath/regression/datasets.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,17 @@ def discover_datasets(include_local: bool = False) -> Dict[str, DatasetInfo]:
8989
"""Auto-discover datasets from real_data/ and optionally .local/"""
9090
datasets = _discover_in_dir(get_real_data_dir(), is_local=False)
9191
if include_local:
92-
datasets.update(_discover_in_dir(get_local_data_dir(), is_local=True))
92+
local_datasets = _discover_in_dir(get_local_data_dir(), is_local=True)
93+
# Warn about name collisions (local would shadow committed)
94+
collisions = set(datasets.keys()) & set(local_datasets.keys())
95+
if collisions:
96+
import warnings
97+
warnings.warn(
98+
f"Local datasets shadow committed datasets with same name: {', '.join(sorted(collisions))}. "
99+
f"Local versions will be used.",
100+
UserWarning
101+
)
102+
datasets.update(local_datasets)
93103
return datasets
94104

95105

delphi/tests/test_datasets.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,16 @@ def test_discover_real_data(self):
8888
result = discover_datasets(include_local=False)
8989
assert len(result) > 0
9090

91+
def test_discover_with_include_local(self):
92+
"""Should include local datasets when flag is set."""
93+
without_local = discover_datasets(include_local=False)
94+
with_local = discover_datasets(include_local=True)
95+
# With local should have >= datasets (may have more if .local/ has data)
96+
assert len(with_local) >= len(without_local)
97+
# All committed datasets should still be present
98+
for name in without_local:
99+
assert name in with_local
100+
91101
def test_list_regression_datasets(self):
92102
result = list_regression_datasets()
93103
assert isinstance(result, list)
@@ -101,3 +111,36 @@ def test_list_available_datasets(self):
101111
def test_paths_exist(self):
102112
assert get_real_data_dir().exists()
103113
assert get_local_data_dir().parent.exists()
114+
115+
116+
class TestNameCollision:
117+
def test_warns_on_name_collision(self, tmp_path, monkeypatch):
118+
"""Should warn when local dataset shadows committed dataset."""
119+
import warnings
120+
from polismath.regression import datasets
121+
122+
# Create mock directories
123+
real_data = tmp_path / "real_data"
124+
local_data = real_data / ".local"
125+
real_data.mkdir()
126+
local_data.mkdir()
127+
128+
# Create dataset with same name in both locations
129+
(real_data / "rabc123-test").mkdir()
130+
(local_data / "rdef456-test").mkdir() # Same name "test", different report_id
131+
132+
# Patch the directory functions
133+
monkeypatch.setattr(datasets, "get_real_data_dir", lambda: real_data)
134+
monkeypatch.setattr(datasets, "get_local_data_dir", lambda: local_data)
135+
136+
# Should warn about collision
137+
with warnings.catch_warnings(record=True) as w:
138+
warnings.simplefilter("always")
139+
result = datasets.discover_datasets(include_local=True)
140+
assert len(w) == 1
141+
assert "shadow" in str(w[0].message)
142+
assert "test" in str(w[0].message)
143+
144+
# Local version should win
145+
assert result["test"].report_id == "rdef456"
146+
assert result["test"].is_local

0 commit comments

Comments
 (0)