Skip to content

Commit f947d9c

Browse files
Copilotwannaphong
andcommitted
Add comprehensive test suite and GitHub Actions workflow for testing
Co-authored-by: wannaphong <8536487+wannaphong@users.noreply.github.com>
1 parent 08b1b86 commit f947d9c

3 files changed

Lines changed: 283 additions & 0 deletions

File tree

.github/workflows/test.yml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Run Tests
2+
3+
on:
4+
push:
5+
branches: [ main, master, dev, copilot/** ]
6+
pull_request:
7+
branches: [ main, master, dev ]
8+
9+
jobs:
10+
test:
11+
runs-on: ubuntu-latest
12+
strategy:
13+
matrix:
14+
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
15+
16+
steps:
17+
- uses: actions/checkout@v3
18+
19+
- name: Set up Python ${{ matrix.python-version }}
20+
uses: actions/setup-python@v4
21+
with:
22+
python-version: ${{ matrix.python-version }}
23+
24+
- name: Install dependencies
25+
run: |
26+
python -m pip install --upgrade pip
27+
pip install -e .
28+
pip install pytest pytest-cov
29+
30+
- name: Run tests
31+
run: |
32+
pytest tests/ -v --cov=spacy_pythainlp --cov-report=term-missing
33+
34+
- name: Upload coverage reports
35+
if: matrix.python-version == '3.11'
36+
uses: codecov/codecov-action@v3
37+
with:
38+
fail_ci_if_error: false

tests/__init__.py

Whitespace-only changes.

tests/test_dependency_parsing.py

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
"""
2+
Tests for spacy-pythainlp dependency parsing functionality.
3+
4+
This test suite verifies the fix for handling variable-length CoNLL-U format
5+
output from PyThaiNLP's dependency_parsing function.
6+
"""
7+
8+
import unittest
9+
from unittest.mock import Mock, patch
10+
import spacy
11+
from spacy.tokens import Doc
12+
13+
14+
class TestDependencyParsing(unittest.TestCase):
15+
"""Test cases for dependency parsing with variable-length field tuples."""
16+
17+
def setUp(self):
18+
"""Set up test fixtures."""
19+
self.nlp = spacy.blank('th')
20+
21+
def test_import_spacy_pythainlp(self):
22+
"""Test that spacy_pythainlp can be imported."""
23+
import spacy_pythainlp.core
24+
self.assertIsNotNone(spacy_pythainlp.core)
25+
26+
def test_add_pythainlp_pipe(self):
27+
"""Test that pythainlp pipeline can be added."""
28+
import spacy_pythainlp.core
29+
30+
# Add pipeline with minimal configuration
31+
self.nlp.add_pipe(
32+
"pythainlp",
33+
config={
34+
"pos": False,
35+
"sent": False,
36+
"ner": False,
37+
"tokenize": False,
38+
"dependency_parsing": False,
39+
"word_vector": False,
40+
}
41+
)
42+
self.assertIn("pythainlp", self.nlp.pipe_names)
43+
44+
@patch('pythainlp.parse.dependency_parsing')
45+
def test_dependency_parsing_with_10_fields(self, mock_dep_parsing):
46+
"""Test dependency parsing with exactly 10 fields (standard CoNLL-U)."""
47+
import spacy_pythainlp.core
48+
49+
# Mock the dependency_parsing function to return 10-field tuples
50+
# Using head indices: token 0 points to token 1 (head=1), token 1 is root (head=0)
51+
mock_dep_parsing.return_value = [
52+
['1', 'ฉัน', 'ฉัน', 'PRON', 'PRON', '_', '1', 'nsubj', '_', '_'],
53+
['2', 'ชอบ', 'ชอบ', 'VERB', 'VERB', '_', '0', 'root', '_', '_'],
54+
]
55+
56+
nlp = spacy.blank('th')
57+
nlp.add_pipe(
58+
"pythainlp",
59+
config={
60+
"pos": False,
61+
"sent": False,
62+
"ner": False,
63+
"tokenize": False,
64+
"dependency_parsing": True,
65+
"dependency_parsing_engine": "esupar",
66+
"word_vector": False,
67+
}
68+
)
69+
70+
doc = nlp("ฉันชอบ")
71+
72+
# Verify the document was processed
73+
self.assertIsInstance(doc, Doc)
74+
self.assertEqual(len(doc), 2)
75+
self.assertEqual(doc[0].text, 'ฉัน')
76+
self.assertEqual(doc[1].text, 'ชอบ')
77+
78+
@patch('pythainlp.parse.dependency_parsing')
79+
def test_dependency_parsing_with_11_fields(self, mock_dep_parsing):
80+
"""Test dependency parsing with 11 fields (extra field beyond standard)."""
81+
import spacy_pythainlp.core
82+
83+
# Mock the dependency_parsing function to return 11-field tuples
84+
# This simulates the issue reported in the bug
85+
mock_dep_parsing.return_value = [
86+
['1', 'ฉัน', 'ฉัน', 'PRON', 'PRON', '_', '1', 'nsubj', '_', '_', 'SpaceAfter=No'],
87+
['2', 'ชอบ', 'ชอบ', 'VERB', 'VERB', '_', '0', 'root', '_', '_', 'SpaceAfter=Yes'],
88+
]
89+
90+
nlp = spacy.blank('th')
91+
nlp.add_pipe(
92+
"pythainlp",
93+
config={
94+
"pos": False,
95+
"sent": False,
96+
"ner": False,
97+
"tokenize": False,
98+
"dependency_parsing": True,
99+
"dependency_parsing_engine": "esupar",
100+
"word_vector": False,
101+
}
102+
)
103+
104+
# This should NOT raise ValueError anymore
105+
doc = nlp("ฉันชอบ")
106+
107+
# Verify the document was processed correctly
108+
self.assertIsInstance(doc, Doc)
109+
self.assertEqual(len(doc), 2)
110+
self.assertEqual(doc[0].text, 'ฉัน')
111+
self.assertEqual(doc[1].text, 'ชอบ')
112+
113+
@patch('pythainlp.parse.dependency_parsing')
114+
def test_dependency_parsing_with_12_fields(self, mock_dep_parsing):
115+
"""Test dependency parsing with 12 fields (multiple extra fields)."""
116+
import spacy_pythainlp.core
117+
118+
# Mock with 12 fields
119+
mock_dep_parsing.return_value = [
120+
['1', 'ฉัน', 'ฉัน', 'PRON', 'PRON', '_', '0', 'root', '_', '_', 'extra1', 'extra2'],
121+
]
122+
123+
nlp = spacy.blank('th')
124+
nlp.add_pipe(
125+
"pythainlp",
126+
config={
127+
"pos": False,
128+
"sent": False,
129+
"ner": False,
130+
"tokenize": False,
131+
"dependency_parsing": True,
132+
"dependency_parsing_engine": "esupar",
133+
"word_vector": False,
134+
}
135+
)
136+
137+
# Should handle extra fields gracefully
138+
doc = nlp("ฉัน")
139+
self.assertIsInstance(doc, Doc)
140+
self.assertEqual(len(doc), 1)
141+
142+
@patch('pythainlp.parse.dependency_parsing')
143+
def test_dependency_parsing_with_insufficient_fields(self, mock_dep_parsing):
144+
"""Test that dependency parsing raises error with fewer than 10 fields."""
145+
import spacy_pythainlp.core
146+
147+
# Mock with only 9 fields (insufficient)
148+
mock_dep_parsing.return_value = [
149+
['1', 'ฉัน', 'ฉัน', 'PRON', 'PRON', '_', '2', 'nsubj', '_'],
150+
]
151+
152+
nlp = spacy.blank('th')
153+
nlp.add_pipe(
154+
"pythainlp",
155+
config={
156+
"pos": False,
157+
"sent": False,
158+
"ner": False,
159+
"tokenize": False,
160+
"dependency_parsing": True,
161+
"dependency_parsing_engine": "esupar",
162+
"word_vector": False,
163+
}
164+
)
165+
166+
# Should raise ValueError with clear message
167+
with self.assertRaises(ValueError) as context:
168+
doc = nlp("ฉัน")
169+
170+
self.assertIn("Expected at least 10 fields", str(context.exception))
171+
172+
@patch('pythainlp.parse.dependency_parsing')
173+
def test_dependency_parsing_pos_and_dep_tags(self, mock_dep_parsing):
174+
"""Test that POS tags and dependency relations are correctly extracted."""
175+
import spacy_pythainlp.core
176+
177+
# Mock with complete CoNLL-U data
178+
# Head indices: token 0 and 2 point to token 1, token 1 is root
179+
mock_dep_parsing.return_value = [
180+
['1', 'ฉัน', 'ฉัน', 'PRON', 'PRON', '_', '1', 'nsubj', '_', '_'],
181+
['2', 'ชอบ', 'ชอบ', 'VERB', 'VERB', '_', '0', 'root', '_', '_'],
182+
['3', 'แมว', 'แมว', 'NOUN', 'NOUN', '_', '1', 'obj', '_', '_'],
183+
]
184+
185+
nlp = spacy.blank('th')
186+
nlp.add_pipe(
187+
"pythainlp",
188+
config={
189+
"pos": False,
190+
"sent": False,
191+
"ner": False,
192+
"tokenize": False,
193+
"dependency_parsing": True,
194+
"dependency_parsing_engine": "esupar",
195+
"word_vector": False,
196+
}
197+
)
198+
199+
doc = nlp("ฉันชอบแมว")
200+
201+
# Check POS tags
202+
self.assertEqual(doc[0].pos_, 'PRON')
203+
self.assertEqual(doc[1].pos_, 'VERB')
204+
self.assertEqual(doc[2].pos_, 'NOUN')
205+
206+
# Check dependency relations
207+
self.assertEqual(doc[0].dep_, 'nsubj')
208+
self.assertEqual(doc[1].dep_, 'root')
209+
self.assertEqual(doc[2].dep_, 'obj')
210+
211+
212+
class TestBasicFunctionality(unittest.TestCase):
213+
"""Test basic spacy-pythainlp functionality."""
214+
215+
def test_blank_model_creation(self):
216+
"""Test that a blank Thai model can be created."""
217+
nlp = spacy.blank('th')
218+
self.assertIsNotNone(nlp)
219+
self.assertEqual(nlp.lang, 'th')
220+
221+
def test_pipeline_with_tokenization(self):
222+
"""Test pythainlp pipeline with tokenization enabled."""
223+
import spacy_pythainlp.core
224+
225+
nlp = spacy.blank('th')
226+
nlp.add_pipe(
227+
"pythainlp",
228+
config={
229+
"pos": False,
230+
"sent": False,
231+
"ner": False,
232+
"tokenize": True,
233+
"tokenize_engine": "newmm",
234+
"dependency_parsing": False,
235+
"word_vector": False,
236+
}
237+
)
238+
239+
doc = nlp("ผมเป็นนักศึกษา")
240+
self.assertIsInstance(doc, Doc)
241+
self.assertGreater(len(doc), 0)
242+
243+
244+
if __name__ == '__main__':
245+
unittest.main()

0 commit comments

Comments
 (0)