Skip to content

Commit 4cce734

Browse files
committed
Attempt sharding tests
1 parent 3f55be7 commit 4cce734

File tree

5 files changed

+392
-26
lines changed

5 files changed

+392
-26
lines changed

.github/actions/run-integration-test/action.yaml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ inputs:
1414
PINECONE_ADDITIONAL_HEADERS:
1515
description: 'Additional headers to send with the request'
1616
required: false
17-
default: '{"sdk-test-suite": "pinecone-python-client"}'
17+
default: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}'
1818
use_grpc:
1919
description: 'Whether to use gRPC or REST'
2020
required: false
@@ -31,6 +31,12 @@ inputs:
3131
INDEX_HOST_SPARSE:
3232
description: 'The host of the sparse index for db data tests'
3333
required: false
34+
pytest_splits:
35+
description: 'Number of shards to split tests into (for test sharding)'
36+
required: false
37+
pytest_group:
38+
description: 'Which shard to run (1-indexed, for test sharding)'
39+
required: false
3440

3541
runs:
3642
using: 'composite'
@@ -46,7 +52,12 @@ runs:
4652
id: run-tests
4753
shell: bash
4854
run: |
55+
PYTEST_ARGS=""
56+
if [ -n "${{ inputs.pytest_splits }}" ] && [ -n "${{ inputs.pytest_group }}" ]; then
57+
PYTEST_ARGS="--splits=${{ inputs.pytest_splits }} --group=${{ inputs.pytest_group }}"
58+
fi
4959
poetry run pytest ${{ inputs.test_suite }} \
60+
$PYTEST_ARGS \
5061
--retries 2 \
5162
--retry-delay 35 \
5263
--log-cli-level=DEBUG \

.github/workflows/testing-integration.yaml

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,25 +21,14 @@ permissions: {}
2121

2222
jobs:
2323
rest_sync:
24-
name: rest ${{ matrix.python_version }} ${{ matrix.test_suite }}
24+
name: rest ${{ matrix.python_version }} shard ${{ matrix.shard }}/${{ matrix.total_shards }}
2525
runs-on: ubuntu-latest
2626
strategy:
2727
fail-fast: false
2828
matrix:
2929
python_version: ${{ fromJson(inputs.python_versions_json) }}
30-
test_suite: [
31-
# Quick tests we can run together
32-
'tests/integration/rest_sync/admin tests/integration/rest_sync/inference tests/integration/rest_sync/plugins',
33-
34-
# Control plane tests
35-
'tests/integration/rest_sync/db/control/pod',
36-
'tests/integration/rest_sync/db/control/serverless',
37-
'tests/integration/rest_sync/db/control/resources/index',
38-
# 'tests/integration/rest_sync/db/control/resources/collections', # These tests are slow, so disable for now
39-
40-
# Data plane tests
41-
'tests/integration/rest_sync/db/data',
42-
]
30+
shard: [1, 2, 3, 4, 5]
31+
total_shards: [5]
4332
steps:
4433
- uses: actions/checkout@v4
4534
- name: Setup Poetry
@@ -52,24 +41,23 @@ jobs:
5241
with:
5342
encrypted_project_api_key: '${{ inputs.encrypted_project_api_key }}'
5443
encryption_key: '${{ secrets.FERNET_ENCRYPTION_KEY }}'
55-
test_suite: '${{ matrix.test_suite }}'
44+
test_suite: 'tests/integration/rest_sync'
45+
pytest_splits: '${{ matrix.total_shards }}'
46+
pytest_group: '${{ matrix.shard }}'
5647
PINECONE_CLIENT_ID: ${{ secrets.PINECONE_SERVICE_ACCOUNT_CLIENT_ID }}
5748
PINECONE_CLIENT_SECRET: ${{ secrets.PINECONE_SERVICE_ACCOUNT_CLIENT_SECRET }}
5849
INDEX_HOST_DENSE: '${{ inputs.dense_index_host }}'
5950
INDEX_HOST_SPARSE: '${{ inputs.sparse_index_host }}'
6051

6152
rest_asyncio:
62-
name: rest_asyncio ${{ matrix.python_version }} ${{ matrix.test_suite }}
53+
name: rest_asyncio ${{ matrix.python_version }} shard ${{ matrix.shard }}/${{ matrix.total_shards }}
6354
runs-on: ubuntu-latest
6455
strategy:
6556
fail-fast: false
6657
matrix:
6758
python_version: ${{ fromJson(inputs.python_versions_json) }}
68-
test_suite: [
69-
'tests/integration/rest_asyncio/inference',
70-
'tests/integration/rest_asyncio/db/data',
71-
'tests/integration/rest_asyncio/db/control',
72-
]
59+
shard: [1, 2, 3, 4, 5]
60+
total_shards: [5]
7361
steps:
7462
- uses: actions/checkout@v4
7563
- name: Setup Poetry
@@ -82,20 +70,21 @@ jobs:
8270
with:
8371
encrypted_project_api_key: '${{ inputs.encrypted_project_api_key }}'
8472
encryption_key: '${{ secrets.FERNET_ENCRYPTION_KEY }}'
85-
test_suite: '${{ matrix.test_suite }}'
73+
test_suite: 'tests/integration/rest_asyncio'
74+
pytest_splits: '${{ matrix.total_shards }}'
75+
pytest_group: '${{ matrix.shard }}'
8676
PINECONE_CLIENT_ID: ${{ secrets.PINECONE_SERVICE_ACCOUNT_CLIENT_ID }}
8777
PINECONE_CLIENT_SECRET: ${{ secrets.PINECONE_SERVICE_ACCOUNT_CLIENT_SECRET }}
8878
INDEX_HOST_DENSE: '${{ inputs.dense_index_host }}'
8979
INDEX_HOST_SPARSE: '${{ inputs.sparse_index_host }}'
9080

9181
grpc:
92-
name: grpc ${{ matrix.python_version }} ${{ matrix.test_suite }}
82+
name: grpc ${{ matrix.python_version }}
9383
runs-on: ubuntu-latest
9484
strategy:
9585
fail-fast: false
9686
matrix:
9787
python_version: ${{ fromJson(inputs.python_versions_json) }}
98-
test_suite: ['tests/integration/grpc']
9988
steps:
10089
- uses: actions/checkout@v4
10190
- name: Setup Poetry
@@ -108,7 +97,8 @@ jobs:
10897
with:
10998
encrypted_project_api_key: '${{ inputs.encrypted_project_api_key }}'
11099
encryption_key: '${{ secrets.FERNET_ENCRYPTION_KEY }}'
111-
test_suite: '${{ matrix.test_suite }}'
100+
test_suite: 'tests/integration/grpc tests/integration/rest_sync/db/data'
101+
use_grpc: 'true'
112102
PINECONE_CLIENT_ID: ${{ secrets.PINECONE_SERVICE_ACCOUNT_CLIENT_ID }}
113103
PINECONE_CLIENT_SECRET: ${{ secrets.PINECONE_SERVICE_ACCOUNT_CLIENT_SECRET }}
114104
INDEX_HOST_DENSE: '${{ inputs.dense_index_host }}'

tests/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"""
2+
Root-level conftest.py for the tests directory.
3+
4+
This file registers pytest plugins that should be available for all tests.
5+
"""
6+
7+
# Register pytest shard plugin globally
8+
pytest_plugins = ["tests.pytest_shard"]

tests/pytest_shard.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
"""
2+
Pytest plugin for sharding tests across multiple CI builds.
3+
4+
This plugin allows splitting the test suite into N shards and running only
5+
the tests in a specified shard. This is useful for parallelizing test runs
6+
across multiple CI jobs.
7+
8+
Usage:
9+
pytest --splits=3 --group=1 # Run shard 1 of 3
10+
pytest --splits=3 --group=2 # Run shard 2 of 3
11+
pytest --splits=3 --group=3 # Run shard 3 of 3
12+
13+
Environment variables:
14+
PYTEST_SPLITS: Number of shards (alternative to --splits)
15+
PYTEST_GROUP: Shard number to run (alternative to --group, 1-indexed)
16+
"""
17+
18+
import hashlib
19+
import os
20+
from typing import List
21+
22+
import pytest
23+
24+
25+
def pytest_addoption(parser: pytest.Parser) -> None:
26+
"""Add command-line options for test sharding."""
27+
group = parser.getgroup("shard", "test sharding options")
28+
group.addoption(
29+
"--splits", type=int, default=None, help="Total number of shards to split tests into"
30+
)
31+
group.addoption(
32+
"--group",
33+
type=int,
34+
default=None,
35+
help="Which shard to run (1-indexed, must be between 1 and --splits)",
36+
)
37+
38+
39+
def pytest_collection_modifyitems(config: pytest.Config, items: List[pytest.Item]) -> None:
40+
"""Filter test items based on shard assignment."""
41+
splits = config.getoption("--splits") or int(os.environ.get("PYTEST_SPLITS", "0"))
42+
group = config.getoption("--group") or int(os.environ.get("PYTEST_GROUP", "0"))
43+
44+
# Only activate if splits is provided
45+
if splits == 0:
46+
return
47+
48+
# Validate arguments
49+
if splits < 1:
50+
raise pytest.UsageError("--splits must be a positive integer (or set PYTEST_SPLITS)")
51+
52+
if group < 1:
53+
raise pytest.UsageError(
54+
"--group must be a positive integer between 1 and --splits (or set PYTEST_GROUP)"
55+
)
56+
57+
if group > splits:
58+
raise pytest.UsageError(f"--group ({group}) must be between 1 and --splits ({splits})")
59+
60+
# Assign tests to shards using hash-based distribution
61+
# This ensures deterministic assignment across runs
62+
shard_items: List[pytest.Item] = []
63+
for item in items:
64+
# Use the test node ID as the basis for hashing
65+
# nodeid format: "path/to/test_file.py::TestClass::test_method"
66+
nodeid_bytes = item.nodeid.encode("utf-8")
67+
hash_value = int(hashlib.md5(nodeid_bytes).hexdigest(), 16)
68+
# Assign to shard (1-indexed)
69+
assigned_shard = (hash_value % splits) + 1
70+
71+
if assigned_shard == group:
72+
shard_items.append(item)
73+
74+
# Replace items with only those in the current shard
75+
original_count = len(items)
76+
items[:] = shard_items
77+
78+
# Store shard info for later reporting
79+
config._shard_info = {
80+
"group": group,
81+
"splits": splits,
82+
"shard_count": len(shard_items),
83+
"total_count": original_count,
84+
}

0 commit comments

Comments
 (0)