milvus-io · XuanYang-cn · Oct 21, 2025
diff --git a/.gitignore b/.gitignore
@@ -42,3 +42,12 @@ uv.lock
 
 # AI rules
 WARP.md
+
+# perf
+*.svg
+**/.benchmarks/**
+*.html
+
+#cython
+*.so
+*.c
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,6 +5,7 @@ requires = [
   "wheel",
   "gitpython",
   "setuptools_scm[toml]>=6.2",
+  "Cython>=3.0.0",
 ]
 build-backend = "setuptools.build_meta"
 
@@ -73,6 +74,8 @@ dev = [
     "pytest-cov>=5.0.0",
     "pytest-timeout>=1.3.4",
     "pytest-asyncio",
+    "pytest-benchmark[histogram]",
+    "Cython>=3.0.0",
     "ruff>=0.12.9,<1",
     "black",
     # develop bulk_writer
@@ -215,3 +218,18 @@ builtins-ignorelist = [
     "filter",
 ]
 builtins-allowed-modules = ["types"]
+
+[tool.cibuildwheel]
+build = ["cp38-*", "cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*"]
+skip = ["*-musllinux_*", "pp*"]
+test-requires = "pytest"
+test-command = "pytest {package}/tests -k 'not (test_hybrid_search or test_milvus_client)' -x --tb=short || true"
+
+[tool.cibuildwheel.linux]
+before-all = "yum install -y gcc || apt-get update && apt-get install -y gcc"
+
+[tool.cibuildwheel.macos]
+before-all = "brew install gcc || true"
+
+[tool.cibuildwheel.windows]
+before-build = "pip install Cython>=3.0.0"
diff --git a/tests/benchmark/README.md b/tests/benchmark/README.md
@@ -0,0 +1,161 @@
+# pymilvus MilvusClient Benchmarking Suite
+
+This benchmark suite measures client-side performance of pymilvus MilvusClient API operations (search, query, hybrid search) without requiring a running Milvus server.
+
+## Overview
+
+We benchmark **client-side code only** by mocking gRPC calls:
+- ✅ Request preparation (parameter validation, serialization)
+- ✅ Response parsing (deserialization, type conversion)
+- ❌ Network I/O (excluded via mocking)
+- ❌ Server-side processing (excluded via mocking)
+
+## Directory Structure
+
+```
+tests/benchmark/
+├── README.md                # This file - complete guide
+├── conftest.py              # Mock gRPC stubs & shared fixtures
+├── mock_responses.py        # Fake protobuf response builders
+├── test_search_bench.py     # Search timing benchmarks
+├── test_query_bench.py      # Query timing benchmarks
+├── test_hybrid_bench.py     # Hybrid search timing benchmarks
+└── scripts/
+    ├── profile_cpu.sh       # CPU profiling wrapper
+    └── profile_memory.sh    # Memory profiling wrapper
+```
+
+### Installation
+
+```bash
+pip install -r requirements.txt
+```
+
+---
+
+## 1. Timing Benchmarks (pytest-benchmark)
+### Usage
+
+```bash
+# Run all benchmarks
+pytest tests/benchmark/ --benchmark-only
+
+# Run specific benchmark
+pytest tests/benchmark/test_search_bench.py::TestSearchBench::test_search_float32 --benchmark-only
+
+# Save baseline for comparison
+pytest tests/benchmark/ --benchmark-only --benchmark-save=baseline
+
+# Compare against baseline
+pytest tests/benchmark/ --benchmark-only --benchmark-compare=baseline
+
+# Generate histogram
+pytest tests/benchmark/ --benchmark-only --benchmark-histogram
+```
+
+## 2. CPU Profiling (py-spy)
+### Usage
+
+#### Option A: Profile entire benchmark run
+
+```bash
+# Generate flamegraph (SVG)
+py-spy record -o cpu_profile.svg --native -- pytest tests/benchmark/test_search_bench.py::TestSearchBench::test_search_float32 -v
+
+# Generate speedscope format (interactive viewer)
+py-spy record -o cpu_profile.speedscope.json -f speedscope -- pytest tests/benchmark/test_search_bench.py::TestSearchBench::test_search_float32 -v
+
+# View speedscope: Upload to https://www.speedscope.app/
+```
+
+#### Option B: Use helper script
+
+```bash
+./tests/benchmark/scripts/profile_cpu.sh test_search_bench.py::test_search_float32
+```
+
+#### Option C: Profile specific function
+
+```bash
+# Top functions by CPU time
+py-spy top -- python -m pytest tests/benchmark/test_search_bench.py::test_search_float32 -v
+```
+
+## 3. Memory Profiling (memray)
+
+### What it Measures
+- Memory allocation over time
+- Peak memory usage
+- Allocation flamegraphs
+- Memory leaks
+- Allocation call stacks
+
+### Usage
+
+#### Option A: Profile and generate reports
+
+```bash
+# Run with memray
+memray run -o search_bench.bin pytest tests/benchmark/test_search_bench.py::test_search_float32 -v
+
+# Generate flamegraph (HTML)
+memray flamegraph search_bench.bin
+
+# Generate table view (top allocators)
+memray table search_bench.bin
+
+# Generate tree view (call stack)
+memray tree search_bench.bin
+
+# Generate summary stats
+memray summary search_bench.bin
+```
+
+#### Option B: Live monitoring
+
+```bash
+# Real-time memory usage in terminal
+memray run --live pytest tests/benchmark/test_search_bench.py::test_search_float32 -v
+```
+
+#### Option C: Use helper script
+
+```bash
+./tests/benchmark/scripts/profile_memory.sh test_search_bench.py::test_search_float32
+```
+
+## 6. Complete Workflow
+
+```bash
+# Step 1: Install dependencies
+pip install -e ".[dev]"
+
+# Step 2: Run timing benchmarks (fast, ~minutes)
+pytest tests/benchmark/ --benchmark-only
+
+# Step 3: Identify slow tests from benchmark results
+
+# Step 4: CPU profile specific slow tests
+py-spy record -o cpu_slow_test.svg -- pytest tests/benchmark/test_search_bench.py::test_slow_one -v
+
+# Step 5: Memory profile tests with large results
+memray run -o mem_large.bin pytest tests/benchmark/test_search_bench.py::test_large_results -v
+memray flamegraph mem_large.bin
+
+# Step 6: Analyze results and fix bottlenecks
+
+# Step 7: Re-run benchmarks and compare with baseline
+pytest tests/benchmark/ --benchmark-only --benchmark-compare=baseline
+```
+
+## Expected Bottlenecks
+
+Based on code analysis, we expect to find:
+
+1. **Protobuf deserialization** - Large responses with many fields
+2. **Vector data conversion** - Bytes → numpy arrays
+3. **Type conversions** - Protobuf types → Python types
+4. **Field iteration** - Processing many output fields
+5. **Memory copies** - Unnecessary data duplication
+
+These benchmarks will help us validate and quantify these hypotheses.
diff --git a/tests/benchmark/__init__.py b/tests/benchmark/__init__.py
diff --git a/tests/benchmark/conftest.py b/tests/benchmark/conftest.py
@@ -0,0 +1,105 @@
+from unittest.mock import MagicMock, patch
+import pytest
+
+from pymilvus import MilvusClient
+from . import mock_responses
+from pymilvus.grpc_gen import common_pb2, milvus_pb2
+
+
+@pytest.fixture
+def mock_search_stub():
+    def _mock_search(request, timeout=None, metadata=None):
+        return mock_responses.create_search_results(
+            num_queries=1,
+            top_k=10,
+            output_fields=["id", "age", "score", "name"]
+        )
+    return _mock_search
+
+
+@pytest.fixture
+def mock_query_stub():
+    def _mock_query(request, timeout=None, metadata=None):
+        return mock_responses.create_query_results(
+            num_rows=100,
+            output_fields=["id", "age", "score", "name", "active", "metadata"]
+        )
+    return _mock_query
+
+
+@pytest.fixture
+def mocked_milvus_client(mock_search_stub, mock_query_stub):
+    with patch('grpc.insecure_channel') as mock_channel_func, \
+         patch('grpc.secure_channel') as mock_secure_channel_func, \
+         patch('grpc.channel_ready_future') as mock_ready_future, \
+         patch('pymilvus.grpc_gen.milvus_pb2_grpc.MilvusServiceStub') as mock_stub_class:
+
+        mock_channel = MagicMock()
+        mock_channel_func.return_value = mock_channel
+        mock_secure_channel_func.return_value = mock_channel
+
+        mock_future = MagicMock()
+        mock_future.result = MagicMock(return_value=None)
+        mock_ready_future.return_value = mock_future
+
+        mock_stub = MagicMock()
+
+
+        mock_connect_response = milvus_pb2.ConnectResponse()
+        mock_connect_response.status.error_code = common_pb2.ErrorCode.Success
+        mock_connect_response.status.code = 0
+        mock_connect_response.identifier = 12345
+        mock_stub.Connect = MagicMock(return_value=mock_connect_response)
+
+        mock_stub.Search = MagicMock(side_effect=mock_search_stub)
+        mock_stub.Query = MagicMock(side_effect=mock_query_stub)
+        mock_stub.HybridSearch = MagicMock(side_effect=mock_search_stub)
+        mock_stub.DescribeCollection = MagicMock(return_value=_create_describe_collection_response())
+
+        mock_stub_class.return_value = mock_stub
+
+        client = MilvusClient(uri="http://localhost:19530")
+
+        yield client
+
+
+def _create_describe_collection_response():
+    from pymilvus.grpc_gen import milvus_pb2, schema_pb2, common_pb2
+
+    response = milvus_pb2.DescribeCollectionResponse()
+    response.status.error_code = common_pb2.ErrorCode.Success
+
+    schema = response.schema
+    schema.name = "test_collection"
+
+    id_field = schema.fields.add()
+    id_field.fieldID = 1
+    id_field.name = "id"
+    id_field.data_type = schema_pb2.DataType.Int64
+    id_field.is_primary_key = True
+
+    embedding_field = schema.fields.add()
+    embedding_field.fieldID = 2
+    embedding_field.name = "embedding"
+    embedding_field.data_type = schema_pb2.DataType.FloatVector
+
+    dim_param = embedding_field.type_params.add()
+    dim_param.key = "dim"
+    dim_param.value = "128"
+
+    age_field = schema.fields.add()
+    age_field.fieldID = 3
+    age_field.name = "age"
+    age_field.data_type = schema_pb2.DataType.Int32
+
+    score_field = schema.fields.add()
+    score_field.fieldID = 4
+    score_field.name = "score"
+    score_field.data_type = schema_pb2.DataType.Float
+
+    name_field = schema.fields.add()
+    name_field.fieldID = 5
+    name_field.name = "name"
+    name_field.data_type = schema_pb2.DataType.VarChar
+
+    return response
-Original file line number
+Diff line change
@@ Expand Up / @@ -42,3 +42,12 @@ uv.lock @@
     # AI rules
     WARP.md
+    # perf
+    *.svg
+    **/.benchmarks/**
+    *.html
+    #cython
+    *.so
+    *.c