Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/deploy-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ jobs:
- name: Install build tools and doc build tools
run: pip install --upgrade pip
- name: Install PyThaiNLP
run: pip install ".[docs]"
run: |
pip install .
pip install --group docs
- name: Build sphinx documentation
run: |
cd docs && make html
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/pypi-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ jobs:
SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True
run: |
python -m pip install --upgrade pip
python -m pip install ".[compact]"
python -m pip install .
python -m pip install --group test-compact
python -m nltk.downloader omw-1.4

# Download the sdist from PyPI, extract it, and run the tests inside it
Expand Down
31 changes: 7 additions & 24 deletions .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ jobs:

runs-on: ${{ matrix.os }}
env:
PYICU_WIN_VER: 2.15 # 2.15 is the last version that supports Python 3.9
INSTALL_FULL_DEPS: false
PYTHON_VERSION_LATEST: "3.14"
PYTHON_VERSION_LATEST_2: "3.13" # Second-latest supported version
Expand All @@ -95,31 +94,13 @@ jobs:
pip install --upgrade pip
pip install coverage coveralls

- name: Install ICU (macOS)
if: startsWith(matrix.os, 'macos-')
run: |
brew install icu4c
PKG_CONFIG_PATH=$(brew --prefix)/opt/icu4c/lib/pkgconfig
echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}" >> "${GITHUB_ENV}"
ICU_VER=$(pkg-config --modversion icu-i18n)
echo "ICU_VER=${ICU_VER}"
echo "ICU_VER=${ICU_VER}" >> "${GITHUB_ENV}"

- name: Install PyICU (Windows)
if: startsWith(matrix.os, 'windows-') && (matrix.python-version == env.PYTHON_VERSION_LATEST_2 || matrix.python-version == env.PYTHON_VERSION_EARLIEST)
shell: powershell
# Get the wheel URL from https://github.com/cgohlke/pyicu-build/releases
run: |
$PYTHON_WIN_VER = "${{ matrix.python-version }}"
$CP_VER = "cp" + $PYTHON_WIN_VER.Replace(".", "")
$WHEEL_URL = "https://github.com/cgohlke/pyicu-build/releases/download/v${{ env.PYICU_WIN_VER }}/PyICU-${{ env.PYICU_WIN_VER }}-${CP_VER}-${CP_VER}-win_amd64.whl"
pip install "$WHEEL_URL"

- name: Install PyThaiNLP + full testing dependencies
if: env.INSTALL_FULL_DEPS == 'true'
env:
SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True
run: pip install ".[full]"
run: |
pip install .
pip install --group test-full

- name: Test (core)
if: ${{ (matrix.os == 'ubuntu-latest' && matrix.python-version != env.PYTHON_VERSION_LATEST_2 && matrix.python-version != env.PYTHON_VERSION_EARLIEST) || (matrix.os != 'ubuntu-latest' && matrix.python-version == env.PYTHON_VERSION_LATEST) }}
Expand All @@ -138,15 +119,17 @@ jobs:
env:
PYTHONIOENCODING: utf-8
run: |
pip install ".[compact]"
pip install .
pip install --group test-compact
python -m unittest tests.core tests.compact

- name: Test (extra + compact + core)
if: matrix.os == 'ubuntu-latest' && matrix.python-version == env.PYTHON_VERSION_LATEST_2
env:
PYTHONIOENCODING: utf-8
run: |
pip install ".[compact,extra]"
pip install .
pip install --group test-extra
coverage run -m unittest tests.core tests.compact tests.extra

# Only submit a report from the "extra" run, to get maximum coverage
Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@ and this project adheres to

This release focuses on security issues related to corpus file loading.

### Changed

- Migrate development, documentation, and testing dependencies from
`[project.optional-dependencies]` to PEP 735 `[dependency-groups]`
in `pyproject.toml`; use `pip install --group <name>` to install (#1329)
- Use `pyicu-wheels` in test dependency groups for pre-built ICU binary
wheels, simplifying local development and CI on Windows and macOS (#1329)
- Simplify CI by removing platform-specific ICU compilation steps (#1329)

### Security

- thai2fit: Use JSON model instead of pickle (#1325)
Expand Down
109 changes: 60 additions & 49 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,28 +68,7 @@ dependencies = [

[project.optional-dependencies]

## 1) Development ########################################

dev = [
"black>=25.11.0",
"build>=1.0.0",
"bump-my-version>=1.2.6",
"coverage>=7.10.7",
"flake8>=7.0.0",
"flake8-type-checking>=3.2.0",
"mypy>=1.19.1",
"pylint>=4.0.0",
"ruff>=0.14.14",
"tox>=4.30.3",
]

docs = [
"Sphinx>=6.2",
"sphinx-copybutton>=0.5.2",
"sphinx-rtd-theme>=3.1.0",
]

## 2) Feature sets #######################################
## Feature sets ###########################################

abbreviation = ["khamyo>=0.2.0"]

Expand All @@ -116,7 +95,7 @@ esupar = ["esupar>=1.3.8", "numpy>=1.22", "transformers>=4.22.1"]

generate = ["fastai<2.0"]

icu = ["pyicu>=2.3"]
icu = ["pyicu>=2.14"] # PyICU 2.14 is the last version to support Python 3.9

ipa = ["epitran>=1.1"]

Expand Down Expand Up @@ -174,23 +153,55 @@ wtp = ["transformers>=4.22.1", "wtpsplit>=1.0.1"]

wunsen = ["wunsen>=0.0.3"]

## 3) Testing ############################################
## Dependency groups (PEP 735) ############################

# Below are dependency groups for each defined test suites
# Use `pip install --group <group-name>` to install a group.
# For test groups, pyicu-wheels (https://pypi.org/project/pyicu-wheels/)
# provides pre-built ICU binary wheels for all platforms without
# local compilation. Note: pyicu-wheels is from a third-party source
# (Init7, not the official PyICU project) and is used for testing only.
# Distribution dependencies still use pyicu
# (see icu = ["pyicu>=2.3"] in optional-dependencies below).

# Compact dependencies - for tests.compact modules
# Safe small set of optional dependencies
# for common tasks like tokenization and tagging.
compact = [
[dependency-groups]

# Development tools
dev = [
"black>=25.11.0",
"build>=1.0.0",
"bump-my-version>=1.2.6",
"coverage>=7.10.7",
"flake8>=7.0.0",
"flake8-type-checking>=3.2.0",
"mypy>=1.19.1",
"pylint>=4.0.0",
"ruff>=0.14.14",
"tox>=4.30.3",
]

# Documentation build tools
docs = [
"Sphinx>=6.2",
"sphinx-copybutton>=0.5.2",
"sphinx-rtd-theme>=3.1.0",
]

# Dependencies for tests.core
# No extra optional dependencies needed beyond the base package.
test-core = []

# Dependencies for tests.compact
test-compact = [
"nlpo3>=1.4.0",
"numpy>=1.26.0",
"pyicu>=2.3",
"pyicu-wheels>=2.14",
"python-crfsuite>=0.9.7",
"PyYAML>=5.4.1",
]

# Extra dependencies - for tests.extra modules
extra = [
# Dependencies for tests.extra (includes compact dependencies)
test-extra = [
{include-group = "test-compact"},
"bpemb>=0.3.2",
"budoux>=0.7.0",
"gensim>=4.0.0",
Expand All @@ -201,11 +212,8 @@ extra = [
"tltk>=1.10",
]

# Noauto test dependencies - for tests.noauto_* modules
# These are further grouped by dependency framework to avoid conflicts

# PyTorch-based dependencies - for tests.noauto_torch
noauto-torch = [
# Dependencies for tests.noauto_torch
test-noauto-torch = [
"attacut>=1.0.6",
"numpy>=1.26.0",
"sentencepiece>=0.1.91",
Expand All @@ -216,33 +224,36 @@ noauto-torch = [
"wtpsplit>=1.0.1",
]

# TensorFlow-based dependencies - for tests.noauto_tensorflow
noauto-tensorflow = [
# Dependencies for tests.noauto_tensorflow
test-noauto-tensorflow = [
"deepcut>=0.7.0",
"numpy>=1.26.0",
]

# ONNX Runtime-based dependencies - for tests.noauto_onnx
noauto-onnx = [
# Dependencies for tests.noauto_onnx
test-noauto-onnx = [
"numpy>=1.26.0",
"onnxruntime>=1.10.0",
"oskut>=1.3",
"sefr_cut>=1.1",
]

# Cython-based dependencies - for tests.noauto_cython
noauto-cython = [
# Dependencies for tests.noauto_cython
test-noauto-cython = [
"phunspell>=0.1.6",
]

# Network-dependent tests - for tests.noauto_network
# These tests require network access but minimal dependencies
noauto-network = [
# Dependencies for tests.noauto_network
test-noauto-network = [
"huggingface-hub>=0.16.0",
]

# Full dependencies - pinned where available
full = [
# Dependencies for tests.corpus
# No extra optional dependencies needed beyond the base package.
test-corpus = []

# Full pinned dependencies for all features (for reproducible testing)
test-full = [
"attacut==1.0.6",
"attaparse==1.0.0",
"bpemb>=0.3.6,<0.4",
Expand All @@ -266,7 +277,7 @@ full = [
"pandas>=2.2.0,<3",
"panphon==0.22.2",
"phunspell==0.1.6",
"pyicu>=2.15.2,<3",
"pyicu-wheels>=2.14",
"python-crfsuite==0.9.12",
"PyYAML>=5.4.1,<6.0.2",
"sacremoses==0.1.1",
Expand Down
63 changes: 55 additions & 8 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,20 @@ The CI/CD test workflow is at
## Compact tests (testc_*.py)

- Run `python -m unittest tests.compact`
- Need dependencies from `pip install "pythainlp[compact]"`
- Need dependencies:

```shell
pip install pythainlp
pip install --group test-compact
```

The `test-compact` group uses `pyicu-wheels` for pre-built ICU binary
wheels (easier to install on all platforms without local compilation).
Note: `pyicu-wheels` is from a third-party source (Init7) and is used
for testing convenience only; distribution dependencies use `pyicu`.
- Test a limited set of functionalities that rely on a stable
and small set of dependencies.
- These dependencies are `PyYAML`, `nlpo3`, `numpy`, `pyicu`,
- These dependencies are `PyYAML`, `nlpo3`, `numpy`, `pyicu-wheels`,
and `python-crfsuite`.
- Includes corpus download/remove tests (may require network access).
- Tested on:
Expand All @@ -74,7 +84,14 @@ The CI/CD test workflow is at
## Extra tests (testx_*.py)

- Run `python -m unittest tests.extra`
- Need dependencies from `pip install "pythainlp[compact,extra]"`
- Need dependencies:

```shell
pip install pythainlp
pip install --group test-extra
```

The `test-extra` group includes all `test-compact` dependencies.
- Test more functionalities that rely on larger set of dependencies
or one that require more time or computation.
- Only tested on Ubuntu using the second-latest Python version.
Expand Down Expand Up @@ -116,7 +133,13 @@ By separating tests by dependency group, we can:
##### PyTorch-based: tests.noauto_torch

- Run `python -m unittest tests.noauto_torch`
- Need dependencies from `pip install "pythainlp[noauto-torch]"`
- Need dependencies:

```shell
pip install pythainlp
pip install --group test-noauto-torch
```

- Tests requiring PyTorch and its ecosystem:
- torch, transformers (PyTorch backend)
- attacut, thai-nner, wtpsplit, tltk
Expand All @@ -129,7 +152,13 @@ By separating tests by dependency group, we can:
##### TensorFlow-based: tests.noauto_tensorflow

- Run `python -m unittest tests.noauto_tensorflow`
- Need dependencies from `pip install "pythainlp[noauto-tensorflow]"`
- Need dependencies:

```shell
pip install pythainlp
pip install --group test-noauto-tensorflow
```

- Tests requiring TensorFlow:
- deepcut tokenizer
- Dependencies: ~1-2 GB
Expand All @@ -139,7 +168,13 @@ By separating tests by dependency group, we can:
##### ONNX Runtime-based: tests.noauto_onnx

- Run `python -m unittest tests.noauto_onnx`
- Need dependencies from `pip install "pythainlp[noauto-onnx]"`
- Need dependencies:

```shell
pip install pythainlp
pip install --group test-noauto-onnx
```

- Tests requiring ONNX Runtime:
- oskut, sefr_cut tokenizers
- Dependencies: ~200-500 MB
Expand All @@ -148,7 +183,13 @@ By separating tests by dependency group, we can:
##### Cython-compiled: tests.noauto_cython

- Run `python -m unittest tests.noauto_cython`
- Need dependencies from `pip install "pythainlp[noauto-cython]"`
- Need dependencies:

```shell
pip install pythainlp
pip install --group test-noauto-cython
```

- Tests requiring Cython-compiled packages:
- phunspell spell checker
- Requires: Cython, C compiler, system libraries (hunspell)
Expand All @@ -158,7 +199,13 @@ By separating tests by dependency group, we can:
##### Network-dependent: tests.noauto_network

- Run `python -m unittest tests.noauto_network`
- Need dependencies from `pip install "pythainlp[noauto-network]"`
- Need dependencies:

```shell
pip install pythainlp
pip install --group test-noauto-network
```

- Tests requiring network access:
- Hugging Face Hub model downloads
- External API calls
Expand Down
Loading