Skip to content

Commit 7bbc29f

Browse files
authored
Merge pull request #310 from vchamarthi/asv-bench-pr
ASV Benchmarks Integration
2 parents d27abb2 + 1640998 commit 7bbc29f

10 files changed

Lines changed: 842 additions & 0 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,6 @@ mkl_fft/_pydfti.c
99
mkl_fft/_pydfti.cpython*.so
1010
mkl_fft/_pydfti.*-win_amd64.pyd
1111
mkl_fft/src/mklfft.c
12+
13+
# ASV benchmark artifacts
14+
.asv/

benchmarks/README.md

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# mkl_fft ASV Benchmarks
2+
3+
Performance benchmarks for [mkl_fft](https://github.com/IntelPython/mkl_fft) using
4+
[Airspeed Velocity (ASV)](https://asv.readthedocs.io/en/stable/).
5+
6+
### Coverage
7+
8+
| File | API | Transforms | Dtypes | Sizes/Shapes |
9+
|------|-----|-----------|--------|-------------|
10+
| `bench_fft1d.py` | `mkl_fft` | `fft`, `ifft`, `rfft`, `irfft` | float32, float64, complex64, complex128 | power-of-two and non-power-of-two |
11+
| `bench_fftnd.py` | `mkl_fft` | `fft2`, `ifft2`, `rfft2`, `irfft2`, `fftn`, `ifftn`, `rfftn`, `irfftn` | float32, float64, complex64, complex128 | square and non-square/non-cubic |
12+
| `bench_interfaces.py` | `mkl_fft.interfaces.{numpy_fft, scipy_fft}` | All exported functions; selected by a `module` parameter. Hermitian 2-D/N-D (`hfft2`, `hfftn`) are scipy-only. | float32, float64, complex64, complex128 | power-of-two and cubic |
13+
| `bench_memory.py` | `mkl_fft` | Peak RSS for 1-D, 2-D, and 3-D transforms | float32, float64, complex128 | power-of-two |
14+
15+
## Threading
16+
17+
Set `MKL_NUM_THREADS` in the environment before running ASV to control the
18+
thread count used by MKL:
19+
20+
```bash
21+
MKL_NUM_THREADS=8 asv run --python=same --quick HEAD^!
22+
```
23+
24+
If `MKL_NUM_THREADS` is not set, `__init__.py` applies a default: **4** threads
25+
when the machine has 4 or more physical cores, or **1** (single-threaded)
26+
otherwise. This keeps results comparable across CI machines in the shared pool
27+
regardless of their total core count. Physical cores are detected via
28+
`psutil.cpu_count(logical=False)` — hyperthreads are excluded per MKL
29+
recommendation.
30+
31+
## Notes on Measurement
32+
33+
### DFTI descriptor warmup
34+
35+
MKL creates a DFTI descriptor on the first FFT call for a given (size, dtype,
36+
strides) combination and reuses it on subsequent calls. To avoid charging
37+
that one-time cost to the first measured iteration, each benchmark's `setup`
38+
performs an explicit warmup call after preparing the input array. ASV's
39+
default `warmup_time` (0.1s) already amortizes this for sub-millisecond
40+
transforms, but the explicit warmup makes the intent visible.
41+
42+
## Running Benchmarks
43+
44+
Prerequisites:
45+
46+
```bash
47+
pip install asv psutil
48+
```
49+
50+
Run benchmarks against the current environment:
51+
52+
```bash
53+
asv run --python=same --quick HEAD^!
54+
```
55+
56+
Compare two commits:
57+
58+
```bash
59+
asv continuous --python=same HEAD~1 HEAD
60+
```
61+
62+
View results in a browser:
63+
64+
```bash
65+
asv publish
66+
asv preview
67+
```

benchmarks/asv.conf.json

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"version": 1,
3+
"project": "mkl_fft",
4+
"project_url": "https://github.com/IntelPython/mkl_fft",
5+
"show_commit_url": "https://github.com/IntelPython/mkl_fft/commit/",
6+
"repo": "..",
7+
"branches": [
8+
"master"
9+
],
10+
"environment_type": "conda",
11+
"conda_channels": [
12+
"https://software.repos.intel.com/python/conda/",
13+
"conda-forge"
14+
],
15+
"benchmark_dir": "benchmarks",
16+
"env_dir": ".asv/env",
17+
"results_dir": ".asv/results",
18+
"html_dir": ".asv/html",
19+
"build_cache_size": 2,
20+
"default_benchmark_timeout": 500,
21+
"regressions_thresholds": {
22+
".*": 0.3
23+
}
24+
}

benchmarks/benchmarks/__init__.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"""ASV benchmarks for mkl_fft"""
2+
3+
import os
4+
5+
import psutil
6+
7+
_MIN_THREADS = 4 # minimum physical cores required for multi-threaded mode
8+
9+
10+
def _physical_cores():
11+
"""Return physical core count; fall back to 1 (conservative)."""
12+
return psutil.cpu_count(logical=False) or 1
13+
14+
15+
def _thread_count():
16+
physical = _physical_cores()
17+
return str(_MIN_THREADS) if physical >= _MIN_THREADS else "1"
18+
19+
20+
_THREADS = os.environ.get("MKL_NUM_THREADS", _thread_count())
21+
os.environ["MKL_NUM_THREADS"] = _THREADS

benchmarks/benchmarks/_utils.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
"""Shared utilities for mkl_fft benchmarks."""
2+
3+
import numpy as np
4+
5+
_RNG_SEED = 42
6+
7+
8+
def _make_input(rng, shape, dtype):
9+
"""Return an array of *shape* and *dtype*.
10+
11+
Complex dtypes get non-zero imaginary parts for a realistic signal.
12+
`shape` may be an int (1-D) or a tuple.
13+
"""
14+
dt = np.dtype(dtype)
15+
s = (shape,) if isinstance(shape, int) else shape
16+
if dt.kind == "c":
17+
return (rng.standard_normal(s) + 1j * rng.standard_normal(s)).astype(dt)
18+
return rng.standard_normal(s).astype(dt)
19+
20+
21+
class BenchC2C:
22+
"""Base setup for complex-to-complex benchmarks.
23+
24+
Subclasses define params, param_names, and time_* / peakmem_* methods.
25+
Other positional params are ignored.
26+
"""
27+
28+
def setup(self, shape, dtype, *_):
29+
rng = np.random.default_rng(_RNG_SEED)
30+
self.x = _make_input(rng, shape, dtype)
31+
32+
33+
# dtype axes
34+
_DTYPES_ALL = ["float32", "float64", "complex64", "complex128"]
35+
_DTYPES_REAL = ["float32", "float64"]
36+
_DTYPES_REDUCED = ["float64", "complex128"]
37+
38+
# shape/size axes shared across multiple files
39+
_SHAPES_2D = [(64, 64), (128, 128), (256, 256), (512, 512)]
40+
_SHAPES_2D_IFACE = [(64, 64), (256, 256), (512, 512)]
41+
_SHAPES_3D = [(16, 16, 16), (32, 32, 32), (64, 64, 64)]
42+
43+
44+
class BenchR2C:
45+
"""Base setup for real-to-complex / complex-to-real and Hermitian benchmarks.
46+
47+
Prepares:
48+
self.x_real — real array of full shape (rfft / ihfft input)
49+
self.x_complex — complex half-spectrum array (irfft / hfft input)
50+
51+
DC (index 0 of the last axis) of x_complex has its imaginary part zeroed,
52+
and when the full last-axis length is even the Nyquist bin imaginary part
53+
is also zeroed, satisfying Hermitian symmetry expected by hfft / ihfft2 /
54+
hfftn. Extra positional params are accepted and ignored.
55+
"""
56+
57+
def setup(self, shape, dtype, *_):
58+
rng = np.random.default_rng(_RNG_SEED)
59+
cdtype = "complex64" if dtype == "float32" else "complex128"
60+
if isinstance(shape, int):
61+
n_last = shape
62+
half_shape = shape // 2 + 1
63+
else:
64+
n_last = shape[-1]
65+
half_shape = shape[:-1] + (shape[-1] // 2 + 1,)
66+
self.x_real = rng.standard_normal(shape).astype(dtype)
67+
self.x_complex = (
68+
rng.standard_normal(half_shape)
69+
+ 1j * rng.standard_normal(half_shape)
70+
).astype(cdtype)
71+
self.x_complex[..., 0] = self.x_complex[..., 0].real
72+
if n_last % 2 == 0:
73+
self.x_complex[..., -1] = self.x_complex[..., -1].real
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
"""Benchmarks for 1-D FFT operations using the mkl_fft root API."""
2+
3+
import mkl_fft
4+
5+
from ._utils import _DTYPES_ALL, _DTYPES_REAL, BenchC2C, BenchR2C
6+
7+
_SIZES_POW2 = [64, 256, 1024, 4096, 16384, 65536]
8+
_SIZES_NONPOW2 = [127, 509, 1000, 4001, 10007]
9+
10+
11+
# ---------------------------------------------------------------------------
12+
# Complex-to-complex 1-D (power-of-two sizes)
13+
# ---------------------------------------------------------------------------
14+
15+
16+
class BenchFFT1D(BenchC2C):
17+
"""Forward and inverse complex FFT — power-of-two sizes."""
18+
19+
params = [_SIZES_POW2, _DTYPES_ALL]
20+
param_names = ["n", "dtype"]
21+
22+
def setup(self, n, dtype):
23+
super().setup(n, dtype)
24+
# prime MKL DFTI descriptor cache
25+
mkl_fft.fft(self.x)
26+
mkl_fft.ifft(self.x)
27+
28+
def time_fft(self, n, dtype):
29+
mkl_fft.fft(self.x)
30+
31+
def time_ifft(self, n, dtype):
32+
mkl_fft.ifft(self.x)
33+
34+
35+
# ---------------------------------------------------------------------------
36+
# Real-to-complex / complex-to-real 1-D (power-of-two sizes)
37+
# ---------------------------------------------------------------------------
38+
39+
40+
class BenchRFFT1D(BenchR2C):
41+
"""Forward rfft and inverse irfft — power-of-two sizes."""
42+
43+
params = [_SIZES_POW2, _DTYPES_REAL]
44+
param_names = ["n", "dtype"]
45+
46+
def setup(self, n, dtype):
47+
super().setup(n, dtype)
48+
mkl_fft.rfft(self.x_real)
49+
mkl_fft.irfft(self.x_complex, n=n)
50+
51+
def time_rfft(self, n, dtype):
52+
mkl_fft.rfft(self.x_real)
53+
54+
def time_irfft(self, n, dtype):
55+
mkl_fft.irfft(self.x_complex, n=n)
56+
57+
58+
# ---------------------------------------------------------------------------
59+
# Complex-to-complex 1-D (non-power-of-two sizes)
60+
# ---------------------------------------------------------------------------
61+
62+
63+
class BenchFFT1DNonPow2(BenchC2C):
64+
"""Forward and inverse complex FFT — non-power-of-two sizes.
65+
66+
MKL uses a different code path for non-power-of-two transforms;
67+
this suite catches regressions in that path.
68+
"""
69+
70+
params = [_SIZES_NONPOW2, _DTYPES_ALL]
71+
param_names = ["n", "dtype"]
72+
73+
def setup(self, n, dtype):
74+
super().setup(n, dtype)
75+
mkl_fft.fft(self.x)
76+
mkl_fft.ifft(self.x)
77+
78+
def time_fft(self, n, dtype):
79+
mkl_fft.fft(self.x)
80+
81+
def time_ifft(self, n, dtype):
82+
mkl_fft.ifft(self.x)
83+
84+
85+
# ---------------------------------------------------------------------------
86+
# Real-to-complex / complex-to-real 1-D (non-power-of-two sizes)
87+
# ---------------------------------------------------------------------------
88+
89+
90+
class BenchRFFT1DNonPow2(BenchR2C):
91+
"""Forward rfft and inverse irfft — non-power-of-two sizes."""
92+
93+
params = [_SIZES_NONPOW2, _DTYPES_REAL]
94+
param_names = ["n", "dtype"]
95+
96+
def setup(self, n, dtype):
97+
super().setup(n, dtype)
98+
mkl_fft.rfft(self.x_real)
99+
mkl_fft.irfft(self.x_complex, n=n)
100+
101+
def time_rfft(self, n, dtype):
102+
mkl_fft.rfft(self.x_real)
103+
104+
def time_irfft(self, n, dtype):
105+
mkl_fft.irfft(self.x_complex, n=n)

0 commit comments

Comments
 (0)