@@ -19,6 +19,17 @@ def mock_regex_annotator():
1919 "EMAIL" : ["john@example.com" ],
2020 "PHONE" : ["555-555-5555" ],
2121 }
22+
23+ # Add mock for annotate_with_spans method
24+ from datafog .processing .text_processing .regex_annotator import AnnotationResult , Span
25+ spans = [
26+ Span (label = "EMAIL" , start = 0 , end = 15 , text = "john@example.com" ),
27+ Span (label = "PHONE" , start = 20 , end = 32 , text = "555-555-5555" )
28+ ]
29+ mock .annotate_with_spans .return_value = (
30+ {"EMAIL" : ["john@example.com" ], "PHONE" : ["555-555-5555" ]},
31+ AnnotationResult (text = "test" , spans = spans )
32+ )
2233 return mock
2334
2435
@@ -242,3 +253,92 @@ def test_auto_engine_with_fallback(
242253 assert mock_annotator .annotate .called
243254
244255 assert result == {"PER" : ["John Doe" ], "ORG" : ["Acme Inc" ]}
256+
257+
258+ def test_structured_output_regex_engine (text_service_with_engine , mock_regex_annotator ):
259+ """Test structured output mode with regex engine."""
260+ service = text_service_with_engine (engine = "regex" )
261+ # Override chunk length to avoid multiple calls
262+ service .text_chunk_length = 1000
263+ result = service .annotate_text_sync ("john@example.com" , structured = True )
264+
265+ # Should call regex annotator's annotate_with_spans method
266+ assert mock_regex_annotator .annotate_with_spans .called
267+
268+ # Verify the result is a list of Span objects
269+ assert isinstance (result , list )
270+ assert len (result ) == 2
271+ assert result [0 ].label == "EMAIL"
272+ assert result [0 ].text == "john@example.com"
273+ assert result [1 ].label == "PHONE"
274+ assert result [1 ].text == "555-555-5555"
275+
276+
277+ def test_structured_output_spacy_engine (text_service_with_engine , mock_annotator ):
278+ """Test structured output mode with spaCy engine."""
279+ service = text_service_with_engine (engine = "spacy" )
280+ # Override chunk length to avoid multiple calls
281+ service .text_chunk_length = 1000
282+
283+ # Set up mock to return entities that can be found in the test text
284+ test_text = "John Doe works at Acme Inc"
285+ mock_annotator .annotate .return_value = {
286+ "PER" : ["John Doe" ],
287+ "ORG" : ["Acme Inc" ]
288+ }
289+
290+ result = service .annotate_text_sync (test_text , structured = True )
291+
292+ # Should call spaCy annotator
293+ assert mock_annotator .annotate .called
294+
295+ # Verify the result is a list of Span objects
296+ assert isinstance (result , list )
297+ assert len (result ) == 2
298+
299+ # Check that spans were created correctly
300+ per_spans = [span for span in result if span .label == "PER" ]
301+ org_spans = [span for span in result if span .label == "ORG" ]
302+
303+ assert len (per_spans ) == 1
304+ assert per_spans [0 ].text == "John Doe"
305+ assert per_spans [0 ].start == test_text .find ("John Doe" )
306+ assert per_spans [0 ].end == test_text .find ("John Doe" ) + len ("John Doe" )
307+
308+ assert len (org_spans ) == 1
309+ assert org_spans [0 ].text == "Acme Inc"
310+ assert org_spans [0 ].start == test_text .find ("Acme Inc" )
311+ assert org_spans [0 ].end == test_text .find ("Acme Inc" ) + len ("Acme Inc" )
312+
313+
314+ def test_structured_output_auto_engine (
315+ text_service_with_engine , mock_regex_annotator , mock_annotator
316+ ):
317+ """Test structured output mode with auto engine."""
318+ # Configure regex annotator to return empty spans
319+ from datafog .processing .text_processing .regex_annotator import AnnotationResult
320+ mock_regex_annotator .annotate_with_spans .return_value = (
321+ {"EMAIL" : [], "PHONE" : []},
322+ AnnotationResult (text = "test" , spans = [])
323+ )
324+
325+ service = text_service_with_engine (engine = "auto" )
326+ # Override chunk length to avoid multiple calls
327+ service .text_chunk_length = 1000
328+
329+ # Set up mock to return entities that can be found in the test text
330+ test_text = "John Doe works at Acme Inc"
331+ mock_annotator .annotate .return_value = {
332+ "PER" : ["John Doe" ],
333+ "ORG" : ["Acme Inc" ]
334+ }
335+
336+ result = service .annotate_text_sync (test_text , structured = True )
337+
338+ # Should call both annotators
339+ assert mock_regex_annotator .annotate_with_spans .called
340+ assert mock_annotator .annotate .called
341+
342+ # Verify the result is a list of Span objects
343+ assert isinstance (result , list )
344+ assert len (result ) == 2
0 commit comments