Skip to content

Commit 5bd3226

Browse files
committed
test cov
1 parent b90cfc6 commit 5bd3226

File tree

2 files changed

+111
-0
lines changed

2 files changed

+111
-0
lines changed

tests/test_regex_annotator.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,17 @@ def test_zip_regex(zip_code: str, should_match: bool):
318318
), f"Incorrectly detected invalid ZIP: {zip_code}"
319319

320320

321+
def test_annotate_with_spans_empty_text():
322+
"""Test that annotate_with_spans handles empty text correctly."""
323+
annotator = RegexAnnotator()
324+
result_dict, annotation_result = annotator.annotate_with_spans("")
325+
326+
# Verify empty result for empty input
327+
assert result_dict == {label: [] for label in annotator.LABELS}
328+
assert annotation_result.text == ""
329+
assert len(annotation_result.spans) == 0
330+
331+
321332
def test_annotation_result_format():
322333
"""Test the structured AnnotationResult format."""
323334
annotator = RegexAnnotator()

tests/test_text_service.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,17 @@ def mock_regex_annotator():
1919
"EMAIL": ["john@example.com"],
2020
"PHONE": ["555-555-5555"],
2121
}
22+
23+
# Add mock for annotate_with_spans method
24+
from datafog.processing.text_processing.regex_annotator import AnnotationResult, Span
25+
spans = [
26+
Span(label="EMAIL", start=0, end=15, text="john@example.com"),
27+
Span(label="PHONE", start=20, end=32, text="555-555-5555")
28+
]
29+
mock.annotate_with_spans.return_value = (
30+
{"EMAIL": ["john@example.com"], "PHONE": ["555-555-5555"]},
31+
AnnotationResult(text="test", spans=spans)
32+
)
2233
return mock
2334

2435

@@ -242,3 +253,92 @@ def test_auto_engine_with_fallback(
242253
assert mock_annotator.annotate.called
243254

244255
assert result == {"PER": ["John Doe"], "ORG": ["Acme Inc"]}
256+
257+
258+
def test_structured_output_regex_engine(text_service_with_engine, mock_regex_annotator):
259+
"""Test structured output mode with regex engine."""
260+
service = text_service_with_engine(engine="regex")
261+
# Override chunk length to avoid multiple calls
262+
service.text_chunk_length = 1000
263+
result = service.annotate_text_sync("john@example.com", structured=True)
264+
265+
# Should call regex annotator's annotate_with_spans method
266+
assert mock_regex_annotator.annotate_with_spans.called
267+
268+
# Verify the result is a list of Span objects
269+
assert isinstance(result, list)
270+
assert len(result) == 2
271+
assert result[0].label == "EMAIL"
272+
assert result[0].text == "john@example.com"
273+
assert result[1].label == "PHONE"
274+
assert result[1].text == "555-555-5555"
275+
276+
277+
def test_structured_output_spacy_engine(text_service_with_engine, mock_annotator):
278+
"""Test structured output mode with spaCy engine."""
279+
service = text_service_with_engine(engine="spacy")
280+
# Override chunk length to avoid multiple calls
281+
service.text_chunk_length = 1000
282+
283+
# Set up mock to return entities that can be found in the test text
284+
test_text = "John Doe works at Acme Inc"
285+
mock_annotator.annotate.return_value = {
286+
"PER": ["John Doe"],
287+
"ORG": ["Acme Inc"]
288+
}
289+
290+
result = service.annotate_text_sync(test_text, structured=True)
291+
292+
# Should call spaCy annotator
293+
assert mock_annotator.annotate.called
294+
295+
# Verify the result is a list of Span objects
296+
assert isinstance(result, list)
297+
assert len(result) == 2
298+
299+
# Check that spans were created correctly
300+
per_spans = [span for span in result if span.label == "PER"]
301+
org_spans = [span for span in result if span.label == "ORG"]
302+
303+
assert len(per_spans) == 1
304+
assert per_spans[0].text == "John Doe"
305+
assert per_spans[0].start == test_text.find("John Doe")
306+
assert per_spans[0].end == test_text.find("John Doe") + len("John Doe")
307+
308+
assert len(org_spans) == 1
309+
assert org_spans[0].text == "Acme Inc"
310+
assert org_spans[0].start == test_text.find("Acme Inc")
311+
assert org_spans[0].end == test_text.find("Acme Inc") + len("Acme Inc")
312+
313+
314+
def test_structured_output_auto_engine(
315+
text_service_with_engine, mock_regex_annotator, mock_annotator
316+
):
317+
"""Test structured output mode with auto engine."""
318+
# Configure regex annotator to return empty spans
319+
from datafog.processing.text_processing.regex_annotator import AnnotationResult
320+
mock_regex_annotator.annotate_with_spans.return_value = (
321+
{"EMAIL": [], "PHONE": []},
322+
AnnotationResult(text="test", spans=[])
323+
)
324+
325+
service = text_service_with_engine(engine="auto")
326+
# Override chunk length to avoid multiple calls
327+
service.text_chunk_length = 1000
328+
329+
# Set up mock to return entities that can be found in the test text
330+
test_text = "John Doe works at Acme Inc"
331+
mock_annotator.annotate.return_value = {
332+
"PER": ["John Doe"],
333+
"ORG": ["Acme Inc"]
334+
}
335+
336+
result = service.annotate_text_sync(test_text, structured=True)
337+
338+
# Should call both annotators
339+
assert mock_regex_annotator.annotate_with_spans.called
340+
assert mock_annotator.annotate.called
341+
342+
# Verify the result is a list of Span objects
343+
assert isinstance(result, list)
344+
assert len(result) == 2

0 commit comments

Comments
 (0)