Skip to content

Commit 9bef39b

Browse files
authored
Merge pull request #398 from sam-herman/index-construction-jmh
add index construction benchmark
2 parents 356c490 + 32335b4 commit 9bef39b

File tree

3 files changed

+192
-0
lines changed

3 files changed

+192
-0
lines changed

benchmarks-jmh/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,26 @@ Common JMH command line options you can use in the configuration or command line
3333
- `-t <num>` - Number of threads
3434
- `-p <param>=<value>` - Benchmark parameters
3535
- `-prof <profiler>` - Add profiler
36+
37+
38+
2. Focus on specific benchmarks
39+
40+
For example in the below command lines we are going to run only `IndexConstructionWithStaticSetBenchmark`
41+
```shell
42+
mvn clean install -DskipTests=true
43+
BENCHMARK_NAME="IndexConstructionWithStaticSetBenchmark"
44+
java --enable-native-access=ALL-UNNAMED \
45+
--add-modules=jdk.incubator.vector \
46+
-XX:+HeapDumpOnOutOfMemoryError \
47+
-Xmx14G -Djvector.experimental.enable_native_vectorization=true \
48+
-jar benchmarks-jmh/target/benchmarks-jmh-4.0.0-beta.2-SNAPSHOT.jar $BENCHMARK_NAME
49+
```
50+
51+
If you want to rerun a specific benchmark without testing the entire grid of scenarios defined in the benchmark.
52+
You can just do the following to set M and beamWidth:
53+
```shell
54+
java -jar target/benchmarks.jar IndexConstructionWithStaticSetBenchmark -p M=32 -p beamWidth=100
55+
```
56+
57+
58+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package io.github.jbellis.jvector.bench;
17+
18+
import io.github.jbellis.jvector.example.SiftSmall;
19+
import io.github.jbellis.jvector.example.util.SiftLoader;
20+
import io.github.jbellis.jvector.graph.*;
21+
import io.github.jbellis.jvector.graph.similarity.BuildScoreProvider;
22+
import io.github.jbellis.jvector.util.Bits;
23+
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
24+
import io.github.jbellis.jvector.vector.types.VectorFloat;
25+
import org.openjdk.jmh.annotations.*;
26+
import org.openjdk.jmh.infra.Blackhole;
27+
import org.slf4j.Logger;
28+
import org.slf4j.LoggerFactory;
29+
30+
import java.io.IOException;
31+
import java.util.ArrayList;
32+
import java.util.Set;
33+
import java.util.concurrent.TimeUnit;
34+
35+
@BenchmarkMode(Mode.AverageTime)
36+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
37+
@State(Scope.Thread)
38+
@Fork(1)
39+
@Warmup(iterations = 2)
40+
@Measurement(iterations = 5)
41+
@Threads(1)
42+
public class IndexConstructionWithStaticSetBenchmark {
43+
private static final Logger log = LoggerFactory.getLogger(IndexConstructionWithStaticSetBenchmark.class);
44+
private RandomAccessVectorValues ravv;
45+
private ArrayList<VectorFloat<?>> baseVectors;
46+
private ArrayList<VectorFloat<?>> queryVectors;
47+
private ArrayList<Set<Integer>> groundTruth;
48+
private BuildScoreProvider bsp;
49+
@Param({"16", "32", "64"})
50+
private int M; // graph degree
51+
@Param({"10", "100"})
52+
private int beamWidth;
53+
int originalDimension;
54+
55+
@Setup
56+
public void setup() throws IOException {
57+
var siftPath = "siftsmall";
58+
baseVectors = SiftLoader.readFvecs(String.format("%s/siftsmall_base.fvecs", siftPath));
59+
queryVectors = SiftLoader.readFvecs(String.format("%s/siftsmall_query.fvecs", siftPath));
60+
groundTruth = SiftLoader.readIvecs(String.format("%s/siftsmall_groundtruth.ivecs", siftPath));
61+
log.info("base vectors size: {}, query vectors size: {}, loaded, dimensions {}",
62+
baseVectors.size(), queryVectors.size(), baseVectors.get(0).length());
63+
originalDimension = baseVectors.get(0).length();
64+
// wrap the raw vectors in a RandomAccessVectorValues
65+
ravv = new ListRandomAccessVectorValues(baseVectors, originalDimension);
66+
67+
// score provider using the raw, in-memory vectors
68+
bsp = BuildScoreProvider.randomAccessScoreProvider(ravv, VectorSimilarityFunction.EUCLIDEAN);
69+
}
70+
71+
@TearDown
72+
public void tearDown() throws IOException {
73+
baseVectors.clear();
74+
queryVectors.clear();
75+
groundTruth.clear();
76+
}
77+
78+
@Benchmark
79+
public void buildIndexBenchmark(Blackhole blackhole) throws IOException {
80+
// score provider using the raw, in-memory vectors
81+
try (final var graphIndexBuilder = new GraphIndexBuilder(bsp, ravv.dimension(), M, beamWidth, 1.2f, 1.2f)) {
82+
final var graphIndex = graphIndexBuilder.build(ravv);
83+
blackhole.consume(graphIndex);
84+
}
85+
}
86+
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package io.github.jbellis.jvector.bench;
17+
18+
import io.github.jbellis.jvector.example.util.SiftLoader;
19+
import io.github.jbellis.jvector.graph.GraphIndexBuilder;
20+
import io.github.jbellis.jvector.graph.ListRandomAccessVectorValues;
21+
import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
22+
import io.github.jbellis.jvector.graph.similarity.BuildScoreProvider;
23+
import io.github.jbellis.jvector.quantization.ProductQuantization;
24+
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
25+
import io.github.jbellis.jvector.vector.types.VectorFloat;
26+
import org.openjdk.jmh.annotations.*;
27+
import org.openjdk.jmh.infra.Blackhole;
28+
import org.slf4j.Logger;
29+
import org.slf4j.LoggerFactory;
30+
31+
import java.io.IOException;
32+
import java.util.ArrayList;
33+
import java.util.Set;
34+
import java.util.concurrent.TimeUnit;
35+
36+
@BenchmarkMode(Mode.AverageTime)
37+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
38+
@State(Scope.Thread)
39+
@Fork(1)
40+
@Warmup(iterations = 2)
41+
@Measurement(iterations = 5)
42+
@Threads(1)
43+
public class PQBenchmark {
44+
private static final Logger log = LoggerFactory.getLogger(PQBenchmark.class);
45+
private RandomAccessVectorValues ravv;
46+
private ArrayList<VectorFloat<?>> baseVectors;
47+
private ArrayList<VectorFloat<?>> queryVectors;
48+
private ArrayList<Set<Integer>> groundTruth;
49+
@Param({"16", "32", "64"})
50+
private int M; // Number of subspaces
51+
int originalDimension;
52+
53+
@Setup
54+
public void setup() throws IOException {
55+
var siftPath = "siftsmall";
56+
baseVectors = SiftLoader.readFvecs(String.format("%s/siftsmall_base.fvecs", siftPath));
57+
queryVectors = SiftLoader.readFvecs(String.format("%s/siftsmall_query.fvecs", siftPath));
58+
groundTruth = SiftLoader.readIvecs(String.format("%s/siftsmall_groundtruth.ivecs", siftPath));
59+
log.info("base vectors size: {}, query vectors size: {}, loaded, dimensions {}",
60+
baseVectors.size(), queryVectors.size(), baseVectors.get(0).length());
61+
originalDimension = baseVectors.get(0).length();
62+
// wrap the raw vectors in a RandomAccessVectorValues
63+
ravv = new ListRandomAccessVectorValues(baseVectors, originalDimension);
64+
}
65+
66+
@TearDown
67+
public void tearDown() throws IOException {
68+
baseVectors.clear();
69+
queryVectors.clear();
70+
groundTruth.clear();
71+
}
72+
73+
@Benchmark
74+
public void productQuantizationComputeBenchmark(Blackhole blackhole) throws IOException {
75+
// Compress the original vectors using PQ. this represents a compression ratio of 128 * 4 / 16 = 32x
76+
ProductQuantization pq = ProductQuantization.compute(ravv,
77+
M, // number of subspaces
78+
256, // number of centroids per subspace
79+
true); // center the dataset
80+
81+
blackhole.consume(pq);
82+
}
83+
}

0 commit comments

Comments
 (0)