From 51fc219e7ec1b1a9ef6fd458a91caaeb2c792ebf Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Wed, 27 Nov 2024 10:55:07 -0500
Subject: [PATCH 01/14] MSMARCO parquet

---
 ...ge.bge-base-en-v1.5.parquet.flat.cached.md | 103 ++++++++++++++++++
 ...-embed-english-v3.0.parquet.flat.cached.md | 101 +++++++++++++++++
 ...sage.cos-dpr-distil.parquet.flat.cached.md | 103 ++++++++++++++++++
 ...passage.openai-ada2.parquet.flat.cached.md | 103 ++++++++++++++++++
 ...-base-en-v1.5.parquet.flat.cached.template |  81 ++++++++++++++
 ...-english-v3.0.parquet.flat.cached.template |  79 ++++++++++++++
 ...os-dpr-distil.parquet.flat.cached.template |  81 ++++++++++++++
 ...e.openai-ada2.parquet.flat.cached.template |  81 ++++++++++++++
 ....bge-base-en-v1.5.parquet.flat.cached.yaml |  65 +++++++++++
 ...mbed-english-v3.0.parquet.flat.cached.yaml |  65 +++++++++++
 ...ge.cos-dpr-distil.parquet.flat.cached.yaml |  65 +++++++++++
 ...ssage.openai-ada2.parquet.flat.cached.yaml |  65 +++++++++++
 12 files changed, 992 insertions(+)
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat.cached.template
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml

diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md
new file mode 100644
index 000000000..b0266597f
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md
@@ -0,0 +1,103 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5 \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5 &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3641    |
+| **RR@10**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3583    |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9006    |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9811    |
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
new file mode 100644
index 000000000..3f1436028
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
@@ -0,0 +1,101 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar -P collections/
+tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached \
+  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  >& logs/log.msmarco-passage-cohere-embed-english-v3.0 &
+```
+
+The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3716    |
+| **RR@10**                                                                                                    | **cohere-embed-english-v3.0**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3658    |
+| **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.8935    |
+| **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9786    |
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md
new file mode 100644
index 000000000..2053a5493
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md
@@ -0,0 +1,103 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
+  >& logs/log.msmarco-passage-cos-dpr-distil &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3942    |
+| **RR@10**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3896    |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9075    |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9796    |
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md
new file mode 100644
index 000000000..4e1f2cc01
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md
@@ -0,0 +1,103 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: OpenAI-ada2 embeddings with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.parquet.flat.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.openai-ada2.parquet.flat.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar -P collections/
+tar xvf collections/msmarco-passage-openai-ada2.tar -C collections/
+```
+
+To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.parquet.flat.cached \
+  --corpus-path collections/msmarco-passage-openai-ada2
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-openai-ada2 \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \
+  >& logs/log.msmarco-passage-openai-ada2 &
+```
+
+The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **OpenAI-ada2**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3505    |
+| **RR@10**                                                                                                    | **OpenAI-ada2**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3434    |
+| **R@100**                                                                                                    | **OpenAI-ada2**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.8996    |
+| **R@1000**                                                                                                   | **OpenAI-ada2**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9858    |
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.template
new file mode 100644
index 000000000..c89aecbad
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.template
@@ -0,0 +1,81 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 59 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.template
new file mode 100644
index 000000000..65f403567
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.template
@@ -0,0 +1,79 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.template
new file mode 100644
index 000000000..a0852590e
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.template
@@ -0,0 +1,81 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 57 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat.cached.template
new file mode 100644
index 000000000..e6c01ac39
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat.cached.template
@@ -0,0 +1,81 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: OpenAI-ada2 embeddings with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 109 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
new file mode 100644
index 000000000..e798ddc1c
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
@@ -0,0 +1,65 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar
+download_checksum: 353d2c9e72e858897ad479cca4ea0db1
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: bge-flat-cached
+    display: BGE-base-en-v1.5
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3641
+      RR@10:
+        - 0.3583
+      R@100:
+        - 0.9006
+      R@1000:
+        - 0.9811
diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
new file mode 100644
index 000000000..6e0d0e456
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
@@ -0,0 +1,65 @@
+---
+corpus: msmarco-passage-cohere-embed-english-v3.0
+corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar
+download_checksum: 06a6e38a0522850c6aa504db7b2617f5
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: cohere-embed-english-v3.0-flat-cached
+    display: cohere-embed-english-v3.0
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3716
+      RR@10:
+        - 0.3658
+      R@100:
+        - 0.8935
+      R@1000:
+        - 0.9786
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml
new file mode 100644
index 000000000..e00ab04a0
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml
@@ -0,0 +1,65 @@
+---
+corpus: msmarco-passage-cos-dpr-distil
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar
+download_checksum: e20ffbc8b5e7f760af31298aefeaebbd
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: cos-dpr-distil-flat-cached
+    display: cosDPR-distil
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3942
+      RR@10:
+        - 0.3896
+      R@100:
+        - 0.9075
+      R@1000:
+        - 0.9796
diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml
new file mode 100644
index 000000000..da04e5b70
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml
@@ -0,0 +1,65 @@
+---
+corpus: msmarco-passage-openai-ada2
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
+download_checksum: a4d843d522ff3a3af7edbee789a63402
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.openai-ada2/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: openai-ada2-flat-cached
+    display: OpenAI-ada2
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3505
+      RR@10:
+        - 0.3434
+      R@100:
+        - 0.8996
+      R@1000:
+        - 0.9858

From 9b044033fbd312199f0d7ef52b9e6466728157c6 Mon Sep 17 00:00:00 2001
From: jimmylin <jimmylin@uwaterloo.ca>
Date: Wed, 27 Nov 2024 19:33:03 -0500
Subject: [PATCH 02/14] Update corpus.

---
 bin/run.sh                                                      | 2 +-
 ...msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +-
 ...1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml | 2 +-
 .../msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml  | 2 +-
 .../msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml     | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/bin/run.sh b/bin/run.sh
index 43bb461cc..d0d07d61b 100755
--- a/bin/run.sh
+++ b/bin/run.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-java -cp `ls target/*-fatjar.jar` -Xms512M -Xmx64G --add-modules jdk.incubator.vector $@
\ No newline at end of file
+java -cp `ls target/*-fatjar.jar` -Xms512M -Xmx128G --add-modules jdk.incubator.vector $@
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
index e798ddc1c..30dc5292e 100644
--- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
@@ -1,6 +1,6 @@
 ---
 corpus: msmarco-passage-bge-base-en-v1.5
-corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
 
 download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar
 download_checksum: 353d2c9e72e858897ad479cca4ea0db1
diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
index 6e0d0e456..839238c2e 100644
--- a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
@@ -1,6 +1,6 @@
 ---
 corpus: msmarco-passage-cohere-embed-english-v3.0
-corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0/
+corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
 
 download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar
 download_checksum: 06a6e38a0522850c6aa504db7b2617f5
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml
index e00ab04a0..ccff33440 100644
--- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml
@@ -1,6 +1,6 @@
 ---
 corpus: msmarco-passage-cos-dpr-distil
-corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
 
 download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar
 download_checksum: e20ffbc8b5e7f760af31298aefeaebbd
diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml
index da04e5b70..9d2ce7abe 100644
--- a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml
@@ -1,6 +1,6 @@
 ---
 corpus: msmarco-passage-openai-ada2
-corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
 
 download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
 download_checksum: a4d843d522ff3a3af7edbee789a63402

From 494bbc6d8b33aff8bc213425327270d247256aa6 Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Thu, 28 Nov 2024 16:11:53 -0500
Subject: [PATCH 03/14] Tweaks, changed tarball.

---
 ...ssions-dl19-passage.bge-base-en-v1.5.flat-int8.cached.md | 2 +-
 ...ressions-dl19-passage.bge-base-en-v1.5.flat-int8.onnx.md | 2 +-
 ...regressions-dl19-passage.bge-base-en-v1.5.flat.cached.md | 2 +-
 .../regressions-dl19-passage.bge-base-en-v1.5.flat.onnx.md  | 2 +-
 ...ssions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md | 2 +-
 ...ressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md | 2 +-
 ...regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md | 2 +-
 .../regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md  | 2 +-
 ...ssions-dl20-passage.bge-base-en-v1.5.flat-int8.cached.md | 2 +-
 ...ressions-dl20-passage.bge-base-en-v1.5.flat-int8.onnx.md | 2 +-
 ...regressions-dl20-passage.bge-base-en-v1.5.flat.cached.md | 2 +-
 .../regressions-dl20-passage.bge-base-en-v1.5.flat.onnx.md  | 2 +-
 ...ssions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md | 2 +-
 ...ressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md | 2 +-
 ...regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md | 2 +-
 .../regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md  | 2 +-
 ...-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.md | 2 +-
 ...ns-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.md | 2 +-
 ...sions-msmarco-v1-passage.bge-base-en-v1.5.flat.cached.md | 2 +-
 ...essions-msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.md | 2 +-
 ...-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md | 2 +-
 ...ns-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md | 2 +-
 ...sions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md | 2 +-
 ...essions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md | 2 +-
 ...marco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md | 6 +++---
 ...passage.cohere-embed-english-v3.0.parquet.flat.cached.md | 4 ++--
 ...msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md | 4 ++--
 ...ns-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md | 4 ++--
 .../dl19-passage.bge-base-en-v1.5.flat-int8.cached.template | 2 +-
 .../dl19-passage.bge-base-en-v1.5.flat-int8.onnx.template   | 2 +-
 .../dl19-passage.bge-base-en-v1.5.flat.cached.template      | 2 +-
 .../dl19-passage.bge-base-en-v1.5.flat.onnx.template        | 2 +-
 .../dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template | 2 +-
 .../dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template   | 2 +-
 .../dl19-passage.bge-base-en-v1.5.hnsw.cached.template      | 2 +-
 .../dl19-passage.bge-base-en-v1.5.hnsw.onnx.template        | 2 +-
 .../dl20-passage.bge-base-en-v1.5.flat-int8.cached.template | 2 +-
 .../dl20-passage.bge-base-en-v1.5.flat-int8.onnx.template   | 2 +-
 .../dl20-passage.bge-base-en-v1.5.flat.cached.template      | 2 +-
 .../dl20-passage.bge-base-en-v1.5.flat.onnx.template        | 2 +-
 .../dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template | 2 +-
 .../dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template   | 2 +-
 .../dl20-passage.bge-base-en-v1.5.hnsw.cached.template      | 2 +-
 .../dl20-passage.bge-base-en-v1.5.hnsw.onnx.template        | 2 +-
 ...co-v1-passage.bge-base-en-v1.5.flat-int8.cached.template | 2 +-
 ...arco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.template | 2 +-
 ...msmarco-v1-passage.bge-base-en-v1.5.flat.cached.template | 2 +-
 .../msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.template  | 2 +-
 ...co-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template | 2 +-
 ...arco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template | 2 +-
 ...msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template | 2 +-
 .../msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template  | 2 +-
 ...v1-passage.bge-base-en-v1.5.parquet.flat.cached.template | 4 ++--
 ...e.cohere-embed-english-v3.0.parquet.flat.cached.template | 2 +-
 ...o-v1-passage.cos-dpr-distil.parquet.flat.cached.template | 2 +-
 ...arco-v1-passage.openai-ada2.parquet.flat.cached.template | 2 +-
 ...rco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml | 4 ++--
 ...ssage.cohere-embed-english-v3.0.parquet.flat.cached.yaml | 4 ++--
 ...marco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml | 4 ++--
 .../msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml | 4 ++--
 60 files changed, 70 insertions(+), 70 deletions(-)

diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.cached.md
index cdb2df747..c7dd56722 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.cached.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.flat-int8.cached
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.onnx.md
index 991f94b8f..ebda2e35a 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.onnx.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.flat-int8.onnx
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.cached.md
index 5e66d4710..08fd9ce64 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.cached.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.cached.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.flat.cached
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.onnx.md
index 46f12c267..0d1514439 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.onnx.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.flat.onnx
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md
index 13c9c2645..0a4448980 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8.cached
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
index e464cadbf..2525bde01 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md
index d45864761..f9e744445 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw.cached
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md
index 3e4ce7de4..ab5e4e72e 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw.onnx
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.cached.md
index 8ab2c0abe..349abef6b 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.cached.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.flat-int8.cached
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.onnx.md
index e395e8305..c48b5919b 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.onnx.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.flat-int8.onnx
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.cached.md
index 1dae4ced6..837b59cd3 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.cached.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.cached.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.flat.cached
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.onnx.md
index a1a750976..6fba001d2 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.onnx.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.flat.onnx
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md
index db28c5e83..bd0481622 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8.cached
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
index b5fabdaf2..56a7391f0 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md
index bd37bd12c..69acf4121 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw.cached
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md
index 4d5252c21..8dbf86271 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw.onnx
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.md
index f60ba513b..7266ed1e3 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.md
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.md
index 8321695b8..f5fe02950 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.md
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.cached.md
index ea88359b6..48a6af17b 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.cached.md
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.flat.cached
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.md
index 7a0a2b2a3..4d283d13e 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.md
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.flat.onnx
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md
index 221f82e61..b2ffd220b 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
index a2a59c221..1e7f03279 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md
index aa659b7e3..924b05350 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md
index 62a43cda7..4dbd0b5f3 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md
index b0266597f..9ed35d9db 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
@@ -32,11 +32,11 @@ The `run_regression.py` script automates the following steps, but if you want to
 Download the corpus and unpack into `collections/`:
 
 ```bash
-wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
 tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/
 ```
 
-To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`.
+To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
 With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
index 3f1436028..b52b87cf0 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
@@ -30,11 +30,11 @@ The `run_regression.py` script automates the following steps, but if you want to
 Download the corpus and unpack into `collections/`:
 
 ```bash
-wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar -P collections/
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -P collections/
 tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.tar -C collections/
 ```
 
-To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`.
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
 With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md
index 2053a5493..a3c78cf99 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md
@@ -32,11 +32,11 @@ The `run_regression.py` script automates the following steps, but if you want to
 Download the corpus and unpack into `collections/`:
 
 ```bash
-wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
 tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/
 ```
 
-To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`.
+To confirm, `msmarco-passage-cos-dpr-distil.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
 With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md
index 4e1f2cc01..12acc7f37 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md
@@ -32,11 +32,11 @@ The `run_regression.py` script automates the following steps, but if you want to
 Download the corpus and unpack into `collections/`:
 
 ```bash
-wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar -P collections/
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar -P collections/
 tar xvf collections/msmarco-passage-openai-ada2.tar -C collections/
 ```
 
-To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`.
+To confirm, `msmarco-passage-openai-ada2.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
 With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat-int8.cached.template
index 6f1a137b7..62ef565ae 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat-int8.cached.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat-int8.onnx.template
index 50f6e7e05..d5c1eeb0d 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat-int8.onnx.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat.cached.template
index 38ce98103..e0a404c16 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat.cached.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat.onnx.template
index 97d2339fa..41c190ca8 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.flat.onnx.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template
index 63fdad31d..8aaeed299 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
index 69afbfc03..9cca24c3d 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template
index d011773dd..8b4f7d44d 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.onnx.template
index 89c9d8ee7..40f503992 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.onnx.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat-int8.cached.template
index 16d2bf261..103072c5b 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat-int8.cached.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat-int8.onnx.template
index de5f36d97..de783b7f2 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat-int8.onnx.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat.cached.template
index 76adbf78b..a89253257 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat.cached.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat.onnx.template
index 80f5f169a..b4aa58e6d 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.flat.onnx.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template
index a67e15082..4a8af6f44 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
index 957322a5b..109a8bae6 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template
index 9197b856b..773c46400 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.onnx.template
index 058502495..75d744e32 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.onnx.template
@@ -20,7 +20,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.template
index 906d9c609..c800dea1a 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.template
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.template
index eac1c724a..809a6374d 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.template
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat.cached.template
index c89aecbad..1ab8daf49 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat.cached.template
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.template
index d65193849..2ff10818c 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.template
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template
index 486b4bc69..cd51e05e3 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
index 5dc942633..b6d111e0d 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template
index aba6e3262..ba3d1d405 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template
index fb6a69722..a627a8725 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.template
index c89aecbad..358ba6162 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.template
@@ -17,7 +17,7 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
 
 From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
@@ -36,7 +36,7 @@ wget ${download_url} -P collections/
 tar xvf collections/${corpus}.tar -C collections/
 ```
 
-To confirm, `${corpus}.tar` is 59 GB and has MD5 checksum `${download_checksum}`.
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
 With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.template
index 65f403567..bc054fd38 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.template
@@ -34,7 +34,7 @@ wget ${download_url} -P collections/
 tar xvf collections/${corpus}.tar -C collections/
 ```
 
-To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+To confirm, `${corpus}.tar` is 16 GB and has MD5 checksum `${download_checksum}`.
 With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.template
index a0852590e..70ae6d1f1 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.template
@@ -36,7 +36,7 @@ wget ${download_url} -P collections/
 tar xvf collections/${corpus}.tar -C collections/
 ```
 
-To confirm, `${corpus}.tar` is 57 GB and has MD5 checksum `${download_checksum}`.
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
 With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat.cached.template
index e6c01ac39..ea9283a53 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat.cached.template
@@ -36,7 +36,7 @@ wget ${download_url} -P collections/
 tar xvf collections/${corpus}.tar -C collections/
 ```
 
-To confirm, `${corpus}.tar` is 109 GB and has MD5 checksum `${download_checksum}`.
+To confirm, `${corpus}.tar` is 75 GB and has MD5 checksum `${download_checksum}`.
 With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
index 30dc5292e..f47e8a05c 100644
--- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
@@ -2,8 +2,8 @@
 corpus: msmarco-passage-bge-base-en-v1.5
 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
 
-download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar
-download_checksum: 353d2c9e72e858897ad479cca4ea0db1
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
 
 index_path: indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/
 index_type: flat
diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
index 839238c2e..7f87b146d 100644
--- a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
@@ -2,8 +2,8 @@
 corpus: msmarco-passage-cohere-embed-english-v3.0
 corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
 
-download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar
-download_checksum: 06a6e38a0522850c6aa504db7b2617f5
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar
+download_checksum: 40c5caf33476746e93ceeb75174b8d64
 
 index_path: indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/
 index_type: flat
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml
index ccff33440..e4db94942 100644
--- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml
@@ -2,8 +2,8 @@
 corpus: msmarco-passage-cos-dpr-distil
 corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
 
-download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar
-download_checksum: e20ffbc8b5e7f760af31298aefeaebbd
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
 
 index_path: indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/
 index_type: flat
diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml
index 9d2ce7abe..3c0aee7a3 100644
--- a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml
@@ -2,8 +2,8 @@
 corpus: msmarco-passage-openai-ada2
 corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
 
-download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
-download_checksum: a4d843d522ff3a3af7edbee789a63402
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar
+download_checksum: fa3637e9c4150b157270e19ef3a4f779
 
 index_path: indexes/lucene-flat.msmarco-v1-passage.openai-ada2/
 index_type: flat

From 5a12c636b0ca61ed50e2695e3accbbe9f72371db Mon Sep 17 00:00:00 2001
From: jimmylin <jimmylin@uwaterloo.ca>
Date: Fri, 29 Nov 2024 09:13:38 -0500
Subject: [PATCH 04/14] tweak docs.

---
 ...ge.bge-base-en-v1.5.parquet.flat.cached.md | 22 +++++++++----------
 ...-embed-english-v3.0.parquet.flat.cached.md | 22 +++++++++----------
 ...sage.cos-dpr-distil.parquet.flat.cached.md | 22 +++++++++----------
 ...passage.openai-ada2.parquet.flat.cached.md | 22 +++++++++----------
 ....bge-base-en-v1.5.parquet.flat.cached.yaml |  2 +-
 ...mbed-english-v3.0.parquet.flat.cached.yaml |  2 +-
 ...ge.cos-dpr-distil.parquet.flat.cached.yaml |  2 +-
 ...ssage.openai-ada2.parquet.flat.cached.yaml |  2 +-
 8 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md
index 9ed35d9db..433219085 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md
@@ -33,15 +33,15 @@ Download the corpus and unpack into `collections/`:
 
 ```bash
 wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
-tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
 ```
 
-To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
 With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached \
-  --corpus-path collections/msmarco-passage-bge-base-en-v1.5
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
 ```
 
 ## Indexing
@@ -52,13 +52,13 @@ Sample indexing command, building flat indexes:
 bin/run.sh io.anserini.index.IndexFlatDenseVectors \
   -threads 16 \
   -collection ParquetDenseVectorCollection \
-  -input /path/to/msmarco-passage-bge-base-en-v1.5 \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
   -generator ParquetDenseVectorDocumentGenerator \
   -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
-  >& logs/log.msmarco-passage-bge-base-en-v1.5 &
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
 ```
 
-The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
 ## Retrieval
@@ -73,17 +73,17 @@ bin/run.sh io.anserini.search.SearchFlatDenseVectors \
   -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
   -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz \
   -topicReader JsonIntVector \
-  -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \
   -hits 1000 -threads 16 &
 ```
 
 Evaluation can be performed using `trec_eval`:
 
 ```bash
-bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
-bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
-bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
-bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
 ```
 
 ## Effectiveness
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
index b52b87cf0..715597e3b 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
@@ -31,15 +31,15 @@ Download the corpus and unpack into `collections/`:
 
 ```bash
 wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -P collections/
-tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.tar -C collections/
+tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -C collections/
 ```
 
-To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.parquet.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
 With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached \
-  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0
+  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0.parquet
 ```
 
 ## Indexing
@@ -50,13 +50,13 @@ Sample indexing command, building flat indexes:
 bin/run.sh io.anserini.index.IndexFlatDenseVectors \
   -threads 16 \
   -collection ParquetDenseVectorCollection \
-  -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \
+  -input /path/to/msmarco-passage-cohere-embed-english-v3.0.parquet \
   -generator ParquetDenseVectorDocumentGenerator \
   -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \
-  >& logs/log.msmarco-passage-cohere-embed-english-v3.0 &
+  >& logs/log.msmarco-passage-cohere-embed-english-v3.0.parquet &
 ```
 
-The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
+The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
 ## Retrieval
@@ -71,17 +71,17 @@ bin/run.sh io.anserini.search.SearchFlatDenseVectors \
   -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \
   -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz \
   -topicReader JsonIntVector \
-  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \
+  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \
   -hits 1000 -threads 16 &
 ```
 
 Evaluation can be performed using `trec_eval`:
 
 ```bash
-bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
-bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
-bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
-bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
 ```
 
 ## Effectiveness
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md
index a3c78cf99..672ea8056 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md
@@ -33,15 +33,15 @@ Download the corpus and unpack into `collections/`:
 
 ```bash
 wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
-tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
 ```
 
-To confirm, `msmarco-passage-cos-dpr-distil.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
 With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached \
-  --corpus-path collections/msmarco-passage-cos-dpr-distil
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
 ```
 
 ## Indexing
@@ -52,13 +52,13 @@ Sample indexing command, building flat indexes:
 bin/run.sh io.anserini.index.IndexFlatDenseVectors \
   -threads 16 \
   -collection ParquetDenseVectorCollection \
-  -input /path/to/msmarco-passage-cos-dpr-distil \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
   -generator ParquetDenseVectorDocumentGenerator \
   -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
-  >& logs/log.msmarco-passage-cos-dpr-distil &
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
 ```
 
-The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
 ## Retrieval
@@ -73,17 +73,17 @@ bin/run.sh io.anserini.search.SearchFlatDenseVectors \
   -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
   -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz \
   -topicReader JsonIntVector \
-  -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \
   -hits 1000 -threads 16 &
 ```
 
 Evaluation can be performed using `trec_eval`:
 
 ```bash
-bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
-bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
-bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
-bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
 ```
 
 ## Effectiveness
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md
index 12acc7f37..8ea76e5f1 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md
@@ -33,15 +33,15 @@ Download the corpus and unpack into `collections/`:
 
 ```bash
 wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar -P collections/
-tar xvf collections/msmarco-passage-openai-ada2.tar -C collections/
+tar xvf collections/msmarco-passage-openai-ada2.parquet.tar -C collections/
 ```
 
-To confirm, `msmarco-passage-openai-ada2.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
+To confirm, `msmarco-passage-openai-ada2.parquet.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
 With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.parquet.flat.cached \
-  --corpus-path collections/msmarco-passage-openai-ada2
+  --corpus-path collections/msmarco-passage-openai-ada2.parquet
 ```
 
 ## Indexing
@@ -52,13 +52,13 @@ Sample indexing command, building flat indexes:
 bin/run.sh io.anserini.index.IndexFlatDenseVectors \
   -threads 16 \
   -collection ParquetDenseVectorCollection \
-  -input /path/to/msmarco-passage-openai-ada2 \
+  -input /path/to/msmarco-passage-openai-ada2.parquet \
   -generator ParquetDenseVectorDocumentGenerator \
   -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \
-  >& logs/log.msmarco-passage-openai-ada2 &
+  >& logs/log.msmarco-passage-openai-ada2.parquet &
 ```
 
-The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
+The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
 ## Retrieval
@@ -73,17 +73,17 @@ bin/run.sh io.anserini.search.SearchFlatDenseVectors \
   -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \
   -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz \
   -topicReader JsonIntVector \
-  -output runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \
+  -output runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \
   -hits 1000 -threads 16 &
 ```
 
 Evaluation can be performed using `trec_eval`:
 
 ```bash
-bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
-bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
-bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
-bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
 ```
 
 ## Effectiveness
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
index f47e8a05c..6b72f68ed 100644
--- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
@@ -1,5 +1,5 @@
 ---
-corpus: msmarco-passage-bge-base-en-v1.5
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
 
 download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
index 7f87b146d..194f553d6 100644
--- a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
@@ -1,5 +1,5 @@
 ---
-corpus: msmarco-passage-cohere-embed-english-v3.0
+corpus: msmarco-passage-cohere-embed-english-v3.0.parquet
 corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
 
 download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml
index e4db94942..553e5dbd9 100644
--- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.yaml
@@ -1,5 +1,5 @@
 ---
-corpus: msmarco-passage-cos-dpr-distil
+corpus: msmarco-passage-cos-dpr-distil.parquet
 corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
 
 download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml
index 3c0aee7a3..e2872aee8 100644
--- a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat.cached.yaml
@@ -1,5 +1,5 @@
 ---
-corpus: msmarco-passage-openai-ada2
+corpus: msmarco-passage-openai-ada2.parquet
 corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
 
 download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar

From 68a8df81f81df5301535693718b08d521b42bc21 Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Wed, 4 Dec 2024 07:18:09 -0500
Subject: [PATCH 05/14] Added all MS MARCO v1 passage conditions.

---
 ...base-en-v1.5.parquet.flat-int8.cached.yaml | 74 +++++++++++++++++++
 ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 74 +++++++++++++++++++
 ...ge.bge-base-en-v1.5.parquet.flat.onnx.yaml | 74 +++++++++++++++++++
 ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 74 +++++++++++++++++++
 ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 74 +++++++++++++++++++
 ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 74 +++++++++++++++++++
 ...ge.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 74 +++++++++++++++++++
 ...english-v3.0.parquet.flat-int8.cached.yaml | 74 +++++++++++++++++++
 ...english-v3.0.parquet.hnsw-int8.cached.yaml | 74 +++++++++++++++++++
 ...mbed-english-v3.0.parquet.hnsw.cached.yaml | 74 +++++++++++++++++++
 ...s-dpr-distil.parquet.flat-int8.cached.yaml | 74 +++++++++++++++++++
 ...cos-dpr-distil.parquet.flat-int8.onnx.yaml | 74 +++++++++++++++++++
 ...sage.cos-dpr-distil.parquet.flat.onnx.yaml | 74 +++++++++++++++++++
 ...s-dpr-distil.parquet.hnsw-int8.cached.yaml | 74 +++++++++++++++++++
 ...cos-dpr-distil.parquet.hnsw-int8.onnx.yaml | 74 +++++++++++++++++++
 ...ge.cos-dpr-distil.parquet.hnsw.cached.yaml | 74 +++++++++++++++++++
 ...sage.cos-dpr-distil.parquet.hnsw.onnx.yaml | 74 +++++++++++++++++++
 ....openai-ada2.parquet.flat-int8.cached.yaml | 74 +++++++++++++++++++
 ....openai-ada2.parquet.hnsw-int8.cached.yaml | 74 +++++++++++++++++++
 ...ssage.openai-ada2.parquet.hnsw.cached.yaml | 74 +++++++++++++++++++
 20 files changed, 1480 insertions(+)
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.yaml

diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml
new file mode 100644
index 000000000..653539586
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: bge-flat-int8-cached
+    display: BGE-base-en-v1.5
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3641
+      RR@10:
+        - 0.3583
+      R@100:
+        - 0.9006
+      R@1000:
+        - 0.9811
+    tolerance:
+      AP@1000:
+        - 0.002
+      RR@10:
+        - 0.002
+      R@100:
+        - 0.002
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml
new file mode 100644
index 000000000..acaff4559
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.txt
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: bge-flat-int8-onnx
+    display: BGE-base-en-v1.5
+    type: flat
+    params: -encoder BgeBaseEn15 -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3641
+      RR@10:
+        - 0.3583
+      R@100:
+        - 0.9006
+      R@1000:
+        - 0.9811
+    tolerance:
+      AP@1000:
+        - 0.002
+      RR@10:
+        - 0.002
+      R@100:
+        - 0.002
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml
new file mode 100644
index 000000000..acbbccc3d
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.txt
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: bge-flat-onnx
+    display: BGE-base-en-v1.5
+    type: flat
+    params: -encoder BgeBaseEn15 -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3641
+      RR@10:
+        - 0.3583
+      R@100:
+        - 0.9006
+      R@1000:
+        - 0.9811
+    tolerance:
+      AP@1000:
+        - 0.001
+      RR@10:
+        - 0.001
+      R@100:
+        - 0.001
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml
new file mode 100644
index 000000000..1c112716d
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: bge-hnsw-int8-cached
+    display: BGE-base-en-v1.5
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3641
+      RR@10:
+        - 0.3583
+      R@100:
+        - 0.9006
+      R@1000:
+        - 0.9811
+    tolerance:
+      AP@1000:
+        - 0.004
+      RR@10:
+        - 0.004
+      R@100:
+        - 0.01
+      R@1000:
+        - 0.015
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml
new file mode 100644
index 000000000..f19d31534
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.txt
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: bge-hnsw-int8-onnx
+    display: BGE-base-en-v1.5
+    type: hnsw
+    params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3641
+      RR@10:
+        - 0.3583
+      R@100:
+        - 0.9006
+      R@1000:
+        - 0.9811
+    tolerance:
+      AP@1000:
+        - 0.004
+      RR@10:
+        - 0.004
+      R@100:
+        - 0.01
+      R@1000:
+        - 0.015
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml
new file mode 100644
index 000000000..7a1a6fb79
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: bge-hnsw-cached
+    display: BGE-base-en-v1.5
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3641
+      RR@10:
+        - 0.3583
+      R@100:
+        - 0.9006
+      R@1000:
+        - 0.9811
+    tolerance:
+      AP@1000:
+        - 0.002
+      RR@10:
+        - 0.002
+      R@100:
+        - 0.008
+      R@1000:
+        - 0.01
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml
new file mode 100644
index 000000000..7c1839958
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.txt
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: bge-hnsw-onnx
+    display: BGE-base-en-v1.5
+    type: hnsw
+    params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3641
+      RR@10:
+        - 0.3583
+      R@100:
+        - 0.9006
+      R@1000:
+        - 0.9811
+    tolerance:
+      AP@1000:
+        - 0.002
+      RR@10:
+        - 0.002
+      R@100:
+        - 0.008
+      R@1000:
+        - 0.01
diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml
new file mode 100644
index 000000000..6eeeaf464
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cohere-embed-english-v3.0.parquet
+corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar
+download_checksum: 40c5caf33476746e93ceeb75174b8d64
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: cohere-embed-english-v3.0-flat-int8-cached
+    display: cohere-embed-english-v3.0
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3716
+      RR@10:
+        - 0.3658
+      R@100:
+        - 0.8935
+      R@1000:
+        - 0.9786
+    tolerance:
+      AP@1000:
+        - 0.003
+      RR@10:
+        - 0.003
+      R@100:
+        - 0.003
+      R@1000:
+        - 0.009
diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml
new file mode 100644
index 000000000..a40eb51e2
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cohere-embed-english-v3.0.parquet
+corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar
+download_checksum: 40c5caf33476746e93ceeb75174b8d64
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: cohere-embed-english-v3.0-hnsw-int8-cached
+    display: cohere-embed-english-v3.0
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3716
+      RR@10:
+        - 0.3658
+      R@100:
+        - 0.8935
+      R@1000:
+        - 0.9786
+    tolerance:
+      AP@1000:
+        - 0.004
+      RR@10:
+        - 0.004
+      R@100:
+        - 0.01
+      R@1000:
+        - 0.01
diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml
new file mode 100644
index 000000000..95a5c5d93
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cohere-embed-english-v3.0.parquet
+corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar
+download_checksum: 40c5caf33476746e93ceeb75174b8d64
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: cohere-embed-english-v3.0-hnsw-cached
+    display: cohere-embed-english-v3.0
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3716
+      RR@10:
+        - 0.3658
+      R@100:
+        - 0.8935
+      R@1000:
+        - 0.9786
+    tolerance:
+      AP@1000:
+        - 0.004
+      RR@10:
+        - 0.004
+      R@100:
+        - 0.015
+      R@1000:
+        - 0.015
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml
new file mode 100644
index 000000000..86987a803
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: cos-dpr-distil-flat-int8-cached
+    display: cosDPR-distil
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3942
+      RR@10:
+        - 0.3896
+      R@100:
+        - 0.9075
+      R@1000:
+        - 0.9796
+    tolerance:
+      AP@1000:
+        - 0.001
+      RR@10:
+        - 0.001
+      R@100:
+        - 0.002
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml
new file mode 100644
index 000000000..9ffb6997f
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.txt
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: cos-dpr-distil-flat-int8-onnx
+    display: cosDPR-distil
+    type: flat
+    params: -encoder CosDprDistil -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3942
+      RR@10:
+        - 0.3896
+      R@100:
+        - 0.9075
+      R@1000:
+        - 0.9796
+    tolerance:
+      AP@1000:
+        - 0.001
+      RR@10:
+        - 0.001
+      R@100:
+        - 0.001
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.yaml
new file mode 100644
index 000000000..631883df3
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.txt
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: cos-dpr-distil-flat-onnx
+    display: cosDPR-distil
+    type: flat
+    params: -encoder CosDprDistil -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3942
+      RR@10:
+        - 0.3896
+      R@100:
+        - 0.9075
+      R@1000:
+        - 0.9796
+    tolerance:
+      AP@1000:
+        - 0.001
+      RR@10:
+        - 0.001
+      R@100:
+        - 0.001
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml
new file mode 100644
index 000000000..7d1e8aa3d
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: cos-dpr-distil-hnsw-int8-cached
+    display: cosDPR-distil
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3942
+      RR@10:
+        - 0.3896
+      R@100:
+        - 0.9075
+      R@1000:
+        - 0.9796
+    tolerance:
+      AP@1000:
+        - 0.003
+      RR@10:
+        - 0.003
+      R@100:
+        - 0.01
+      R@1000:
+        - 0.015
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml
new file mode 100644
index 000000000..7f0ade3d0
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.txt
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: cos-dpr-distil-hnsw-int8-onnx
+    display: cosDPR-distil
+    type: hnsw
+    params: -encoder CosDprDistil -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3942
+      RR@10:
+        - 0.3896
+      R@100:
+        - 0.9075
+      R@1000:
+        - 0.9796
+    tolerance:
+      AP@1000:
+        - 0.003
+      RR@10:
+        - 0.003
+      R@100:
+        - 0.01
+      R@1000:
+        - 0.015
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.yaml
new file mode 100644
index 000000000..ab5ec4e57
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: cos-dpr-distil-hnsw-cached
+    display: cosDPR-distil
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3942
+      RR@10:
+        - 0.3896
+      R@100:
+        - 0.9075
+      R@1000:
+        - 0.9796
+    tolerance:
+      AP@1000:
+        - 0.004
+      RR@10:
+        - 0.004
+      R@100:
+        - 0.015
+      R@1000:
+        - 0.02
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml
new file mode 100644
index 000000000..e7e91a8d6
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.txt
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: cos-dpr-distil-hnsw-onnx
+    display: cosDPR-distil
+    type: hnsw
+    params: -encoder CosDprDistil -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3942
+      RR@10:
+        - 0.3896
+      R@100:
+        - 0.9075
+      R@1000:
+        - 0.9796
+    tolerance:
+      AP@1000:
+        - 0.005
+      RR@10:
+        - 0.004
+      R@100:
+        - 0.015
+      R@1000:
+        - 0.02
diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.yaml
new file mode 100644
index 000000000..38cb8f8cc
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-openai-ada2.parquet
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar
+download_checksum: fa3637e9c4150b157270e19ef3a4f779
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: openai-ada2-flat-int8-cached
+    display: OpenAI-ada2
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3505
+      RR@10:
+        - 0.3434
+      R@100:
+        - 0.8996
+      R@1000:
+        - 0.9858
+    tolerance:
+      AP@1000:
+        - 0.008
+      RR@10:
+        - 0.009
+      R@100:
+        - 0.006
+      R@1000:
+        - 0.002
diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
new file mode 100644
index 000000000..fee6401b8
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-openai-ada2.parquet
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar
+download_checksum: fa3637e9c4150b157270e19ef3a4f779
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: openai-ada2-hnsw-int8-cached
+    display: OpenAI-ada2
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3505
+      RR@10:
+        - 0.3434
+      R@100:
+        - 0.8996
+      R@1000:
+        - 0.9858
+    tolerance:
+      AP@1000:
+        - 0.015
+      RR@10:
+        - 0.015
+      R@100:
+        - 0.01
+      R@1000:
+        - 0.006
diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.yaml
new file mode 100644
index 000000000..65967d7b3
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-openai-ada2.parquet
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar
+download_checksum: fa3637e9c4150b157270e19ef3a4f779
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -c -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: RR@10
+    command: bin/trec_eval
+    params: -c -M 10 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
+    id: dev
+    path: topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz
+    qrel: qrels.msmarco-passage.dev-subset.txt
+
+models:
+  - name: openai-ada2-hnsw-cached
+    display: OpenAI-ada2
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.3505
+      RR@10:
+        - 0.3434
+      R@100:
+        - 0.8996
+      R@1000:
+        - 0.9858
+    tolerance:
+      AP@1000:
+        - 0.002
+      RR@10:
+        - 0.002
+      R@100:
+        - 0.005
+      R@1000:
+        - 0.005

From 4c482da97f84a48db1f9b04c3799e7d9f3c502f2 Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Thu, 5 Dec 2024 14:10:36 -0500
Subject: [PATCH 06/14] increased mem

---
 bin/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/run.sh b/bin/run.sh
index d0d07d61b..5edfa6c7c 100755
--- a/bin/run.sh
+++ b/bin/run.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-java -cp `ls target/*-fatjar.jar` -Xms512M -Xmx128G --add-modules jdk.incubator.vector $@
+java -cp `ls target/*-fatjar.jar` -Xms512M -Xmx192G --add-modules jdk.incubator.vector $@

From f8342b69cfe952222c1942b367654a33b57aa5a2 Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Thu, 5 Dec 2024 14:19:45 -0500
Subject: [PATCH 07/14] Added dl19 and dl20 yaml.

---
 ...base-en-v1.5.parquet.flat-int8.cached.yaml | 74 +++++++++++++++++++
 ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 74 +++++++++++++++++++
 ....bge-base-en-v1.5.parquet.flat.cached.yaml | 65 ++++++++++++++++
 ...ge.bge-base-en-v1.5.parquet.flat.onnx.yaml | 74 +++++++++++++++++++
 ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 74 +++++++++++++++++++
 ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 74 +++++++++++++++++++
 ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 74 +++++++++++++++++++
 ...ge.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 74 +++++++++++++++++++
 ...english-v3.0.parquet.flat-int8.cached.yaml | 74 +++++++++++++++++++
 ...mbed-english-v3.0.parquet.flat.cached.yaml | 65 ++++++++++++++++
 ...english-v3.0.parquet.hnsw-int8.cached.yaml | 74 +++++++++++++++++++
 ...mbed-english-v3.0.parquet.hnsw.cached.yaml | 74 +++++++++++++++++++
 ...s-dpr-distil.parquet.flat-int8.cached.yaml | 74 +++++++++++++++++++
 ...cos-dpr-distil.parquet.flat-int8.onnx.yaml | 74 +++++++++++++++++++
 ...ge.cos-dpr-distil.parquet.flat.cached.yaml | 65 ++++++++++++++++
 ...sage.cos-dpr-distil.parquet.flat.onnx.yaml | 74 +++++++++++++++++++
 ...s-dpr-distil.parquet.hnsw-int8.cached.yaml | 74 +++++++++++++++++++
 ...cos-dpr-distil.parquet.hnsw-int8.onnx.yaml | 74 +++++++++++++++++++
 ...ge.cos-dpr-distil.parquet.hnsw.cached.yaml | 74 +++++++++++++++++++
 ...sage.cos-dpr-distil.parquet.hnsw.onnx.yaml | 74 +++++++++++++++++++
 ....openai-ada2.parquet.flat-int8.cached.yaml | 74 +++++++++++++++++++
 ...ssage.openai-ada2.parquet.flat.cached.yaml | 65 ++++++++++++++++
 ....openai-ada2.parquet.hnsw-int8.cached.yaml | 74 +++++++++++++++++++
 ...ssage.openai-ada2.parquet.hnsw.cached.yaml | 74 +++++++++++++++++++
 ...base-en-v1.5.parquet.flat-int8.cached.yaml | 74 +++++++++++++++++++
 ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 74 +++++++++++++++++++
 ....bge-base-en-v1.5.parquet.flat.cached.yaml | 65 ++++++++++++++++
 ...ge.bge-base-en-v1.5.parquet.flat.onnx.yaml | 74 +++++++++++++++++++
 ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 74 +++++++++++++++++++
 ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 74 +++++++++++++++++++
 ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 74 +++++++++++++++++++
 ...ge.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 74 +++++++++++++++++++
 ...english-v3.0.parquet.flat-int8.cached.yaml | 74 +++++++++++++++++++
 ...mbed-english-v3.0.parquet.flat.cached.yaml | 65 ++++++++++++++++
 ...english-v3.0.parquet.hnsw-int8.cached.yaml | 74 +++++++++++++++++++
 ...mbed-english-v3.0.parquet.hnsw.cached.yaml | 74 +++++++++++++++++++
 ...s-dpr-distil.parquet.flat-int8.cached.yaml | 74 +++++++++++++++++++
 ...cos-dpr-distil.parquet.flat-int8.onnx.yaml | 74 +++++++++++++++++++
 ...ge.cos-dpr-distil.parquet.flat.cached.yaml | 65 ++++++++++++++++
 ...sage.cos-dpr-distil.parquet.flat.onnx.yaml | 74 +++++++++++++++++++
 ...s-dpr-distil.parquet.hnsw-int8.cached.yaml | 74 +++++++++++++++++++
 ...cos-dpr-distil.parquet.hnsw-int8.onnx.yaml | 74 +++++++++++++++++++
 ...ge.cos-dpr-distil.parquet.hnsw.cached.yaml | 74 +++++++++++++++++++
 ...sage.cos-dpr-distil.parquet.hnsw.onnx.yaml | 74 +++++++++++++++++++
 ....openai-ada2.parquet.flat-int8.cached.yaml | 74 +++++++++++++++++++
 ...ssage.openai-ada2.parquet.flat.cached.yaml | 65 ++++++++++++++++
 ....openai-ada2.parquet.hnsw-int8.cached.yaml | 74 +++++++++++++++++++
 ...ssage.openai-ada2.parquet.hnsw.cached.yaml | 74 +++++++++++++++++++
 48 files changed, 3480 insertions(+)
 create mode 100644 src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat.onnx.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat.onnx.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
 create mode 100644 src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw.cached.yaml

diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml
new file mode 100644
index 000000000..420d078d8
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.bge-base-en-v1.5.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: bge-flat-int8-cached
+    display: BGE-base-en-v1.5
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4435
+      nDCG@10:
+        - 0.7065
+      R@100:
+        - 0.6171
+      R@1000:
+        - 0.8472
+    tolerance:
+      AP@1000:
+        - 0.001
+      nDCG@10:
+        - 0.004
+      R@100:
+        - 0.007
+      R@1000:
+        - 0.002
diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml
new file mode 100644
index 000000000..7d7bfe340
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.txt
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: bge-flat-int8-onnx
+    display: BGE-base-en-v1.5
+    type: flat
+    params: -encoder BgeBaseEn15 -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4435
+      nDCG@10:
+        - 0.7065
+      R@100:
+        - 0.6171
+      R@1000:
+        - 0.8472
+    tolerance:
+      AP@1000:
+        - 0.004
+      nDCG@10:
+        - 0.006
+      R@100:
+        - 0.007
+      R@1000:
+        - 0.005
diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
new file mode 100644
index 000000000..e8c2a9f72
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
@@ -0,0 +1,65 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.bge-base-en-v1.5.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: bge-flat-cached
+    display: BGE-base-en-v1.5
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4435
+      nDCG@10:
+        - 0.7065
+      R@100:
+        - 0.6171
+      R@1000:
+        - 0.8472
diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml
new file mode 100644
index 000000000..598569561
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.txt
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: bge-flat-onnx
+    display: BGE-base-en-v1.5
+    type: flat
+    params: -encoder BgeBaseEn15 -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4435
+      nDCG@10:
+        - 0.7065
+      R@100:
+        - 0.6171
+      R@1000:
+        - 0.8472
+    tolerance:
+      AP@1000:
+        - 0.006
+      nDCG@10:
+        - 0.005
+      R@100:
+        - 0.008
+      R@1000:
+        - 0.005
diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml
new file mode 100644
index 000000000..3d2479b91
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.bge-base-en-v1.5.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: bge-hnsw-int8-cached
+    display: BGE-base-en-v1.5
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4435
+      nDCG@10:
+        - 0.7065
+      R@100:
+        - 0.6171
+      R@1000:
+        - 0.8472
+    tolerance:
+      AP@1000:
+        - 0.015
+      nDCG@10:
+        - 0.02
+      R@100:
+        - 0.03
+      R@1000:
+        - 0.03
diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml
new file mode 100644
index 000000000..f2612d3e3
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.txt
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: bge-hnsw-int8-onnx
+    display: BGE-base-en-v1.5
+    type: hnsw
+    params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4435
+      nDCG@10:
+        - 0.7065
+      R@100:
+        - 0.6171
+      R@1000:
+        - 0.8472
+    tolerance:
+      AP@1000:
+        - 0.01
+      nDCG@10:
+        - 0.02
+      R@100:
+        - 0.025
+      R@1000:
+        - 0.03
diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml
new file mode 100644
index 000000000..da5a012c5
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.bge-base-en-v1.5.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: bge-hnsw-cached
+    display: BGE-base-en-v1.5
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4435
+      nDCG@10:
+        - 0.7065
+      R@100:
+        - 0.6171
+      R@1000:
+        - 0.8472
+    tolerance:
+      AP@1000:
+        - 0.008
+      nDCG@10:
+        - 0.009
+      R@100:
+        - 0.009
+      R@1000:
+        - 0.02
diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml
new file mode 100644
index 000000000..282ff9141
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.txt
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: bge-hnsw-onnx
+    display: BGE-base-en-v1.5
+    type: hnsw
+    params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4435
+      nDCG@10:
+        - 0.7065
+      R@100:
+        - 0.6171
+      R@1000:
+        - 0.8472
+    tolerance:
+      AP@1000:
+        - 0.002
+      nDCG@10:
+        - 0.015
+      R@100:
+        - 0.02
+      R@1000:
+        - 0.02
diff --git a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml
new file mode 100644
index 000000000..806564d71
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cohere-embed-english-v3.0.parquet
+corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar
+download_checksum: 40c5caf33476746e93ceeb75174b8d64
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: cohere-embed-english-v3.0-flat-int8-cached
+    display: cohere-embed-english-v3.0
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4884
+      nDCG@10:
+        - 0.6956
+      R@100:
+        - 0.6484
+      R@1000:
+        - 0.8630
+    tolerance:
+      AP@1000:
+        - 0.001
+      nDCG@10:
+        - 0.002
+      R@100:
+        - 0.001
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
new file mode 100644
index 000000000..ffc5fe3d1
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
@@ -0,0 +1,65 @@
+---
+corpus: msmarco-passage-cohere-embed-english-v3.0.parquet
+corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar
+download_checksum: 40c5caf33476746e93ceeb75174b8d64
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: cohere-embed-english-v3.0-flat-cached
+    display: cohere-embed-english-v3.0
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4884
+      nDCG@10:
+        - 0.6956
+      R@100:
+        - 0.6484
+      R@1000:
+        - 0.8630
\ No newline at end of file
diff --git a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml
new file mode 100644
index 000000000..7c5d00330
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cohere-embed-english-v3.0.parquet
+corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar
+download_checksum: 40c5caf33476746e93ceeb75174b8d64
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: cohere-embed-english-v3.0-hnsw-int8-cached
+    display: cohere-embed-english-v3.0
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4884
+      nDCG@10:
+        - 0.6956
+      R@100:
+        - 0.6484
+      R@1000:
+        - 0.8630
+    tolerance:
+      AP@1000:
+        - 0.015
+      nDCG@10:
+        - 0.02
+      R@100:
+        - 0.02
+      R@1000:
+        - 0.035
diff --git a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml
new file mode 100644
index 000000000..925ed6546
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cohere-embed-english-v3.0.parquet
+corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar
+download_checksum: 40c5caf33476746e93ceeb75174b8d64
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: cohere-embed-english-v3.0-hnsw-cached
+    display: cohere-embed-english-v3.0
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4884
+      nDCG@10:
+        - 0.6956
+      R@100:
+        - 0.6484
+      R@1000:
+        - 0.8630
+    tolerance:
+      AP@1000:
+        - 0.01
+      nDCG@10:
+        - 0.015
+      R@100:
+        - 0.015
+      R@1000:
+        - 0.03
diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml
new file mode 100644
index 000000000..9f45de147
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.cos-dpr-distil.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: cos-dpr-distil-flat-int8-cached
+    display: cosDPR-distil
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4656
+      nDCG@10:
+        - 0.7250
+      R@100:
+        - 0.6173
+      R@1000:
+        - 0.8201
+    tolerance:
+      AP@1000:
+        - 0.001
+      nDCG@10:
+        - 0.003
+      R@100:
+        - 0.003
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml
new file mode 100644
index 000000000..fe80abf10
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.txt
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: cos-dpr-distil-flat-int8-onnx
+    display: cosDPR-distil
+    type: flat
+    params: -encoder CosDprDistil -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4656
+      nDCG@10:
+        - 0.7250
+      R@100:
+        - 0.6173
+      R@1000:
+        - 0.8201
+    tolerance:
+      AP@1000:
+        - 0.001
+      nDCG@10:
+        - 0.002
+      R@100:
+        - 0.002
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat.cached.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat.cached.yaml
new file mode 100644
index 000000000..dd3d34a74
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat.cached.yaml
@@ -0,0 +1,65 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.cos-dpr-distil.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: cos-dpr-distil-flat-cached
+    display: cosDPR-distil
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4656
+      nDCG@10:
+        - 0.7250
+      R@100:
+        - 0.6173
+      R@1000:
+        - 0.8201
diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat.onnx.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat.onnx.yaml
new file mode 100644
index 000000000..82c2c3b78
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.txt
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: cos-dpr-distil-flat-onnx
+    display: cosDPR-distil
+    type: flat
+    params: -encoder CosDprDistil -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4656
+      nDCG@10:
+        - 0.7250
+      R@100:
+        - 0.6173
+      R@1000:
+        - 0.8201
+    tolerance:
+      AP@1000:
+        - 0.001
+      nDCG@10:
+        - 0.001
+      R@100:
+        - 0.001
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml
new file mode 100644
index 000000000..3c725674c
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.cos-dpr-distil.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: cos-dpr-distil-hnsw-int8-cached
+    display: cosDPR-distil
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4656
+      nDCG@10:
+        - 0.7250
+      R@100:
+        - 0.6173
+      R@1000:
+        - 0.8201
+    tolerance:
+      AP@1000:
+        - 0.02
+      nDCG@10:
+        - 0.025
+      R@100:
+        - 0.025
+      R@1000:
+        - 0.03
diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml
new file mode 100644
index 000000000..aaf4e0514
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.txt
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: cos-dpr-distil-hnsw-int8-onnx
+    display: cosDPR-distil
+    type: hnsw
+    params: -encoder CosDprDistil -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4656
+      nDCG@10:
+        - 0.7250
+      R@100:
+        - 0.6173
+      R@1000:
+        - 0.8201
+    tolerance:
+      AP@1000:
+        - 0.02
+      nDCG@10:
+        - 0.025
+      R@100:
+        - 0.025
+      R@1000:
+        - 0.03
diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.yaml
new file mode 100644
index 000000000..0a7eec9f2
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.cos-dpr-distil.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: cos-dpr-distil-hnsw-cached
+    display: cosDPR-distil
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4656
+      nDCG@10:
+        - 0.7250
+      R@100:
+        - 0.6173
+      R@1000:
+        - 0.8201
+    tolerance:
+      AP@1000:
+        - 0.015
+      nDCG@10:
+        - 0.025
+      R@100:
+        - 0.02
+      R@1000:
+        - 0.025
diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml
new file mode 100644
index 000000000..4fb88509f
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.txt
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: cos-dpr-distil-hnsw-onnx
+    display: cosDPR-distil
+    type: hnsw
+    params: -encoder CosDprDistil -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4656
+      nDCG@10:
+        - 0.7250
+      R@100:
+        - 0.6173
+      R@1000:
+        - 0.8201
+    tolerance:
+      AP@1000:
+        - 0.015
+      nDCG@10:
+        - 0.025
+      R@100:
+        - 0.02
+      R@1000:
+        - 0.025
diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat-int8.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat-int8.cached.yaml
new file mode 100644
index 000000000..e7b5da4e8
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-openai-ada2
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
+download_checksum: a4d843d522ff3a3af7edbee789a63402
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.openai-ada2.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: openai-ada2-flat-int8-cached
+    display: OpenAI-ada2
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4788
+      nDCG@10:
+        - 0.7035
+      R@100:
+        - 0.6235
+      R@1000:
+        - 0.8629
+    tolerance:
+      AP@1000:
+        - 0.002
+      nDCG@10:
+        - 0.002
+      R@100:
+        - 0.007
+      R@1000:
+        - 0.008
diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat.cached.yaml
new file mode 100644
index 000000000..e4c205da1
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat.cached.yaml
@@ -0,0 +1,65 @@
+---
+corpus: msmarco-passage-openai-ada2
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
+download_checksum: a4d843d522ff3a3af7edbee789a63402
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.openai-ada2/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.openai-ada2.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: openai-ada2-flat-cached
+    display: OpenAI-ada2
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4788
+      nDCG@10:
+        - 0.7035
+      R@100:
+        - 0.6235
+      R@1000:
+        - 0.8629
diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
new file mode 100644
index 000000000..0ee9cbbc4
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-openai-ada2
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
+download_checksum: a4d843d522ff3a3af7edbee789a63402
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.openai-ada2.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: openai-ada2-hnsw-int8-cached
+    display: OpenAI-ada2
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4788
+      nDCG@10:
+        - 0.7035
+      R@100:
+        - 0.6235
+      R@1000:
+        - 0.8629
+    tolerance:
+      AP@1000:
+        - 0.015
+      nDCG@10:
+        - 0.015
+      R@100:
+        - 0.015
+      R@1000:
+        - 0.015
diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw.cached.yaml
new file mode 100644
index 000000000..1eb849e33
--- /dev/null
+++ b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-openai-ada2
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
+download_checksum: a4d843d522ff3a3af7edbee789a63402
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl19
+    path: topics.dl19-passage.openai-ada2.jsonl.gz
+    qrel: qrels.dl19-passage.txt
+
+models:
+  - name: openai-ada2-hnsw-cached
+    display: OpenAI-ada2
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4788
+      nDCG@10:
+        - 0.7035
+      R@100:
+        - 0.6235
+      R@1000:
+        - 0.8629
+    tolerance:
+      AP@1000:
+        - 0.002
+      nDCG@10:
+        - 0.004
+      R@100:
+        - 0.005
+      R@1000:
+        - 0.009
diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml
new file mode 100644
index 000000000..a61f22aa4
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.bge-base-en-v1.5.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: bge-flat-int8-cached
+    display: BGE-base-en-v1.5
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4650
+      nDCG@10:
+        - 0.6780
+      R@100:
+        - 0.7169
+      R@1000:
+        - 0.8503
+    tolerance:
+      AP@1000:
+        - 0.003
+      nDCG@10:
+        - 0.006
+      R@100:
+        - 0.005
+      R@1000:
+        - 0.002
diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml
new file mode 100644
index 000000000..578c8cb56
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.txt
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: bge-flat-int8-onnx
+    display: BGE-base-en-v1.5
+    type: flat
+    params: -encoder BgeBaseEn15 -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4650
+      nDCG@10:
+        - 0.6780
+      R@100:
+        - 0.7169
+      R@1000:
+        - 0.8503
+    tolerance:
+      AP@1000:
+        - 0.004
+      nDCG@10:
+        - 0.003
+      R@100:
+        - 0.004
+      R@1000:
+        - 0.003
diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
new file mode 100644
index 000000000..d9b1c290f
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat.cached.yaml
@@ -0,0 +1,65 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.bge-base-en-v1.5.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: bge-flat-cached
+    display: BGE-base-en-v1.5
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4650
+      nDCG@10:
+        - 0.6780
+      R@100:
+        - 0.7169
+      R@1000:
+        - 0.8503
diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml
new file mode 100644
index 000000000..0c6388fca
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.txt
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: bge-flat-onnx
+    display: BGE-base-en-v1.5
+    type: flat
+    params: -encoder BgeBaseEn15 -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4650
+      nDCG@10:
+        - 0.6780
+      R@100:
+        - 0.7169
+      R@1000:
+        - 0.8503
+    tolerance:
+      AP@1000:
+        - 0.003
+      nDCG@10:
+        - 0.002
+      R@100:
+        - 0.003
+      R@1000:
+        - 0.005
diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml
new file mode 100644
index 000000000..581fb9431
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.bge-base-en-v1.5.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: bge-hnsw-int8-cached
+    display: BGE-base-en-v1.5
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4650
+      nDCG@10:
+        - 0.6780
+      R@100:
+        - 0.7169
+      R@1000:
+        - 0.8503
+    tolerance:
+      AP@1000:
+        - 0.01
+      nDCG@10:
+        - 0.01
+      R@100:
+        - 0.02
+      R@1000:
+        - 0.03
diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml
new file mode 100644
index 000000000..0481321c6
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.txt
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: bge-hnsw-int8-onnx
+    display: BGE-base-en-v1.5
+    type: hnsw
+    params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4650
+      nDCG@10:
+        - 0.6780
+      R@100:
+        - 0.7169
+      R@1000:
+        - 0.8503
+    tolerance:
+      AP@1000:
+        - 0.015
+      nDCG@10:
+        - 0.008
+      R@100:
+        - 0.02
+      R@1000:
+        - 0.02
diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml
new file mode 100644
index 000000000..07d3f015e
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.bge-base-en-v1.5.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: bge-hnsw-cached
+    display: BGE-base-en-v1.5
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4650
+      nDCG@10:
+        - 0.6780
+      R@100:
+        - 0.7169
+      R@1000:
+        - 0.8503
+    tolerance:
+      AP@1000:
+        - 0.003
+      nDCG@10:
+        - 0.001
+      R@100:
+        - 0.009
+      R@1000:
+        - 0.02
diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml
new file mode 100644
index 000000000..731d5ce63
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-bge-base-en-v1.5.parquet
+corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar
+download_checksum: b235e19ec492c18a18057b30b8b23fd4
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.txt
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: bge-hnsw-onnx
+    display: BGE-base-en-v1.5
+    type: hnsw
+    params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4650
+      nDCG@10:
+        - 0.6780
+      R@100:
+        - 0.7169
+      R@1000:
+        - 0.8503
+    tolerance:
+      AP@1000:
+        - 0.005
+      nDCG@10:
+        - 0.002
+      R@100:
+        - 0.01
+      R@1000:
+        - 0.01
diff --git a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml
new file mode 100644
index 000000000..e0483677d
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cohere-embed-english-v3.0.parquet
+corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar
+download_checksum: 40c5caf33476746e93ceeb75174b8d64
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.cohere-embed-english-v3.0.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: cohere-embed-english-v3.0-flat-int8-cached
+    display: cohere-embed-english-v3.0
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.5067
+      nDCG@10:
+        - 0.7245
+      R@100:
+        - 0.7279
+      R@1000:
+        - 0.8682
+    tolerance:
+      AP@1000:
+        - 0.001
+      nDCG@10:
+        - 0.004
+      R@100:
+        - 0.001
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
new file mode 100644
index 000000000..a54f120a7
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml
@@ -0,0 +1,65 @@
+---
+corpus: msmarco-passage-cohere-embed-english-v3.0.parquet
+corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar
+download_checksum: 40c5caf33476746e93ceeb75174b8d64
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.cohere-embed-english-v3.0.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: cohere-embed-english-v3.0-flat-cached
+    display: cohere-embed-english-v3.0
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.5067
+      nDCG@10:
+        - 0.7245
+      R@100:
+        - 0.7279
+      R@1000:
+        - 0.8682
\ No newline at end of file
diff --git a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml
new file mode 100644
index 000000000..f3bbfe564
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cohere-embed-english-v3.0.parquet
+corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar
+download_checksum: 40c5caf33476746e93ceeb75174b8d64
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.cohere-embed-english-v3.0.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: cohere-embed-english-v3.0-hnsw-int8-cached
+    display: cohere-embed-english-v3.0
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.5067
+      nDCG@10:
+        - 0.7245
+      R@100:
+        - 0.7279
+      R@1000:
+        - 0.8682
+    tolerance:
+      AP@1000:
+        - 0.0057
+      nDCG@10:
+        - 0.007
+      R@100:
+        - 0.02
+      R@1000:
+        - 0.02
diff --git a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml
new file mode 100644
index 000000000..9e5a745f4
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cohere-embed-english-v3.0.parquet
+corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar
+download_checksum: 40c5caf33476746e93ceeb75174b8d64
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.cohere-embed-english-v3.0.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: cohere-embed-english-v3.0-hnsw-cached
+    display: cohere-embed-english-v3.0
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.5067
+      nDCG@10:
+        - 0.7245
+      R@100:
+        - 0.7279
+      R@1000:
+        - 0.8682
+    tolerance:
+      AP@1000:
+        - 0.005
+      nDCG@10:
+        - 0.001
+      R@100:
+        - 0.015
+      R@1000:
+        - 0.025
diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml
new file mode 100644
index 000000000..b0de920ca
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.cos-dpr-distil.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: cos-dpr-distil-flat-int8-cached
+    display: cosDPR-distil
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4876
+      nDCG@10:
+        - 0.7025
+      R@100:
+        - 0.7204
+      R@1000:
+        - 0.8533
+    tolerance:
+      AP@1000:
+        - 0.002
+      nDCG@10:
+        - 0.005
+      R@100:
+        - 0.004
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml
new file mode 100644
index 000000000..e60a433f4
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.txt
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: cos-dpr-distil-flat-int8-onnx
+    display: cosDPR-distil
+    type: flat
+    params: -encoder CosDprDistil -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4876
+      nDCG@10:
+        - 0.7025
+      R@100:
+        - 0.7204
+      R@1000:
+        - 0.8533
+    tolerance:
+      AP@1000:
+        - 0.001
+      nDCG@10:
+        - 0.005
+      R@100:
+        - 0.004
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat.cached.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat.cached.yaml
new file mode 100644
index 000000000..186ab03d9
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat.cached.yaml
@@ -0,0 +1,65 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.cos-dpr-distil.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: cos-dpr-distil-flat-cached
+    display: cosDPR-distil
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4876
+      nDCG@10:
+        - 0.7025
+      R@100:
+        - 0.7204
+      R@1000:
+        - 0.8533
diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat.onnx.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat.onnx.yaml
new file mode 100644
index 000000000..493bc1eea
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.txt
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: cos-dpr-distil-flat-onnx
+    display: cosDPR-distil
+    type: flat
+    params: -encoder CosDprDistil -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4876
+      nDCG@10:
+        - 0.7025
+      R@100:
+        - 0.7204
+      R@1000:
+        - 0.8533
+    tolerance:
+      AP@1000:
+        - 0.001
+      nDCG@10:
+        - 0.001
+      R@100:
+        - 0.001
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml
new file mode 100644
index 000000000..8a2cfafed
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.cos-dpr-distil.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: cos-dpr-distil-hnsw-int8-cached
+    display: cosDPR-distil
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4876
+      nDCG@10:
+        - 0.7025
+      R@100:
+        - 0.7204
+      R@1000:
+        - 0.8533
+    tolerance:
+      AP@1000:
+        - 0.009
+      nDCG@10:
+        - 0.006
+      R@100:
+        - 0.02
+      R@1000:
+        - 0.02
diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml
new file mode 100644
index 000000000..2d55327e7
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.txt
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: cos-dpr-distil-hnsw-int8-onnx
+    display: cosDPR-distil
+    type: hnsw
+    params: -encoder CosDprDistil -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4876
+      nDCG@10:
+        - 0.7025
+      R@100:
+        - 0.7204
+      R@1000:
+        - 0.8533
+    tolerance:
+      AP@1000:
+        - 0.009
+      nDCG@10:
+        - 0.006
+      R@100:
+        - 0.02
+      R@1000:
+        - 0.02
diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.yaml
new file mode 100644
index 000000000..20bd4677a
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.cos-dpr-distil.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: cos-dpr-distil-hnsw-cached
+    display: cosDPR-distil
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4876
+      nDCG@10:
+        - 0.7025
+      R@100:
+        - 0.7204
+      R@1000:
+        - 0.8533
+    tolerance:
+      AP@1000:
+        - 0.015
+      nDCG@10:
+        - 0.008
+      R@100:
+        - 0.025
+      R@1000:
+        - 0.025
diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml
new file mode 100644
index 000000000..c3a86458e
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-cos-dpr-distil.parquet
+corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil.parquet/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar
+download_checksum: c8a204fbc3ccda581aa375936af43a97
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvInt
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.txt
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: cos-dpr-distil-hnsw-onnx
+    display: cosDPR-distil
+    type: hnsw
+    params: -encoder CosDprDistil -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4876
+      nDCG@10:
+        - 0.7025
+      R@100:
+        - 0.7204
+      R@1000:
+        - 0.8533
+    tolerance:
+      AP@1000:
+        - 0.015
+      nDCG@10:
+        - 0.008
+      R@100:
+        - 0.025
+      R@1000:
+        - 0.025
diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat-int8.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat-int8.cached.yaml
new file mode 100644
index 000000000..1717b7909
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-openai-ada2
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
+download_checksum: a4d843d522ff3a3af7edbee789a63402
+
+index_path: indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.openai-ada2.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: openai-ada2-flat-int8-cached
+    display: OpenAI-ada2
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4771
+      nDCG@10:
+        - 0.6759
+      R@100:
+        - 0.7237
+      R@1000:
+        - 0.8705
+    tolerance:
+      AP@1000:
+        - 0.001
+      nDCG@10:
+        - 0.002
+      R@100:
+        - 0.001
+      R@1000:
+        - 0.001
diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat.cached.yaml
new file mode 100644
index 000000000..f7b35b97d
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat.cached.yaml
@@ -0,0 +1,65 @@
+---
+corpus: msmarco-passage-openai-ada2
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
+download_checksum: a4d843d522ff3a3af7edbee789a63402
+
+index_path: indexes/lucene-flat.msmarco-v1-passage.openai-ada2/
+index_type: flat
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: ""
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.openai-ada2.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: openai-ada2-flat-cached
+    display: OpenAI-ada2
+    type: flat
+    params: -hits 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4771
+      nDCG@10:
+        - 0.6759
+      R@100:
+        - 0.7237
+      R@1000:
+        - 0.8705
diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
new file mode 100644
index 000000000..7a8a38451
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-openai-ada2
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
+download_checksum: a4d843d522ff3a3af7edbee789a63402
+
+index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100 -quantize.int8
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.openai-ada2.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: openai-ada2-hnsw-int8-cached
+    display: OpenAI-ada2
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4771
+      nDCG@10:
+        - 0.6759
+      R@100:
+        - 0.7237
+      R@1000:
+        - 0.8705
+    tolerance:
+      AP@1000:
+        - 0.008
+      nDCG@10:
+        - 0.015
+      R@100:
+        - 0.015
+      R@1000:
+        - 0.015
diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw.cached.yaml
new file mode 100644
index 000000000..483994e9d
--- /dev/null
+++ b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw.cached.yaml
@@ -0,0 +1,74 @@
+---
+corpus: msmarco-passage-openai-ada2
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
+download_checksum: a4d843d522ff3a3af7edbee789a63402
+
+index_path: indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/
+index_type: hnsw
+collection_class: ParquetDenseVectorCollection
+generator_class: ParquetDenseVectorDocumentGenerator
+index_threads: 16
+index_options: -M 16 -efC 100
+
+metrics:
+  - metric: AP@1000
+    command: bin/trec_eval
+    params: -m map -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@10
+    command: bin/trec_eval
+    params: -m ndcg_cut.10 -c
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -m recall.100 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -m recall.1000 -c -l 2
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: JsonIntVector
+topics:
+  - name: "[DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)"
+    id: dl20
+    path: topics.dl20.openai-ada2.jsonl.gz
+    qrel: qrels.dl20-passage.txt
+
+models:
+  - name: openai-ada2-hnsw-cached
+    display: OpenAI-ada2
+    type: hnsw
+    params: -hits 1000 -efSearch 1000 -threads 16
+    results:
+      AP@1000:
+        - 0.4771
+      nDCG@10:
+        - 0.6759
+      R@100:
+        - 0.7237
+      R@1000:
+        - 0.8705
+    tolerance:
+      AP@1000:
+        - 0.001
+      nDCG@10:
+        - 0.001
+      R@100:
+        - 0.003
+      R@1000:
+        - 0.009

From 5f39662d58cf948200e0df26d0bc98143ffc956b Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Thu, 5 Dec 2024 14:42:50 -0500
Subject: [PATCH 08/14] Tweaked yaml.

---
 ...dl19-passage.openai-ada2.parquet.flat-int8.cached.yaml | 8 ++++----
 .../dl19-passage.openai-ada2.parquet.flat.cached.yaml     | 8 ++++----
 ...dl19-passage.openai-ada2.parquet.hnsw-int8.cached.yaml | 8 ++++----
 .../dl19-passage.openai-ada2.parquet.hnsw.cached.yaml     | 8 ++++----
 ...dl20-passage.openai-ada2.parquet.flat-int8.cached.yaml | 8 ++++----
 .../dl20-passage.openai-ada2.parquet.flat.cached.yaml     | 8 ++++----
 ...dl20-passage.openai-ada2.parquet.hnsw-int8.cached.yaml | 8 ++++----
 .../dl20-passage.openai-ada2.parquet.hnsw.cached.yaml     | 8 ++++----
 8 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat-int8.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat-int8.cached.yaml
index e7b5da4e8..0539d9a5c 100644
--- a/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat-int8.cached.yaml
+++ b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat-int8.cached.yaml
@@ -1,9 +1,9 @@
 ---
-corpus: msmarco-passage-openai-ada2
-corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+corpus: msmarco-passage-openai-ada2.parquet
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
 
-download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
-download_checksum: a4d843d522ff3a3af7edbee789a63402
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar
+download_checksum: fa3637e9c4150b157270e19ef3a4f779
 
 index_path: indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/
 index_type: flat
diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat.cached.yaml
index e4c205da1..d1f247ccb 100644
--- a/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat.cached.yaml
@@ -1,9 +1,9 @@
 ---
-corpus: msmarco-passage-openai-ada2
-corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+corpus: msmarco-passage-openai-ada2.parquet
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
 
-download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
-download_checksum: a4d843d522ff3a3af7edbee789a63402
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar
+download_checksum: fa3637e9c4150b157270e19ef3a4f779
 
 index_path: indexes/lucene-flat.msmarco-v1-passage.openai-ada2/
 index_type: flat
diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
index 0ee9cbbc4..32e2d8ba2 100644
--- a/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
@@ -1,9 +1,9 @@
 ---
-corpus: msmarco-passage-openai-ada2
-corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+corpus: msmarco-passage-openai-ada2.parquet
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
 
-download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
-download_checksum: a4d843d522ff3a3af7edbee789a63402
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar
+download_checksum: fa3637e9c4150b157270e19ef3a4f779
 
 index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/
 index_type: hnsw
diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw.cached.yaml
index 1eb849e33..373416895 100644
--- a/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw.cached.yaml
+++ b/src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw.cached.yaml
@@ -1,9 +1,9 @@
 ---
-corpus: msmarco-passage-openai-ada2
-corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+corpus: msmarco-passage-openai-ada2.parquet
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
 
-download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
-download_checksum: a4d843d522ff3a3af7edbee789a63402
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar
+download_checksum: fa3637e9c4150b157270e19ef3a4f779
 
 index_path: indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/
 index_type: hnsw
diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat-int8.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat-int8.cached.yaml
index 1717b7909..dd3de04a9 100644
--- a/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat-int8.cached.yaml
+++ b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat-int8.cached.yaml
@@ -1,9 +1,9 @@
 ---
-corpus: msmarco-passage-openai-ada2
-corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+corpus: msmarco-passage-openai-ada2.parquet
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
 
-download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
-download_checksum: a4d843d522ff3a3af7edbee789a63402
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar
+download_checksum: fa3637e9c4150b157270e19ef3a4f779
 
 index_path: indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/
 index_type: flat
diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat.cached.yaml
index f7b35b97d..fd0fa7926 100644
--- a/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat.cached.yaml
+++ b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat.cached.yaml
@@ -1,9 +1,9 @@
 ---
-corpus: msmarco-passage-openai-ada2
-corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+corpus: msmarco-passage-openai-ada2.parquet
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
 
-download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
-download_checksum: a4d843d522ff3a3af7edbee789a63402
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar
+download_checksum: fa3637e9c4150b157270e19ef3a4f779
 
 index_path: indexes/lucene-flat.msmarco-v1-passage.openai-ada2/
 index_type: flat
diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
index 7a8a38451..0e529f7ba 100644
--- a/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.yaml
@@ -1,9 +1,9 @@
 ---
-corpus: msmarco-passage-openai-ada2
-corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+corpus: msmarco-passage-openai-ada2.parquet
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
 
-download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
-download_checksum: a4d843d522ff3a3af7edbee789a63402
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar
+download_checksum: fa3637e9c4150b157270e19ef3a4f779
 
 index_path: indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/
 index_type: hnsw
diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw.cached.yaml
index 483994e9d..66f985930 100644
--- a/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw.cached.yaml
+++ b/src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw.cached.yaml
@@ -1,9 +1,9 @@
 ---
-corpus: msmarco-passage-openai-ada2
-corpus_path: collections/msmarco/msmarco-passage-openai-ada2/
+corpus: msmarco-passage-openai-ada2.parquet
+corpus_path: collections/msmarco/msmarco-passage-openai-ada2.parquet/
 
-download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar
-download_checksum: a4d843d522ff3a3af7edbee789a63402
+download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar
+download_checksum: fa3637e9c4150b157270e19ef3a4f779
 
 index_path: indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/
 index_type: hnsw

From 2b5a733862534e04b1324b727921cc985d5cb80b Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Thu, 5 Dec 2024 20:22:59 -0500
Subject: [PATCH 09/14] Added docs.

---
 ...e-base-en-v1.5.parquet.flat-int8.cached.md | 117 ++++++++++++++++
 ...bge-base-en-v1.5.parquet.flat-int8.onnx.md | 117 ++++++++++++++++
 ...ge.bge-base-en-v1.5.parquet.flat.cached.md | 115 ++++++++++++++++
 ...sage.bge-base-en-v1.5.parquet.flat.onnx.md | 116 ++++++++++++++++
 ...e-base-en-v1.5.parquet.hnsw-int8.cached.md | 123 +++++++++++++++++
 ...bge-base-en-v1.5.parquet.hnsw-int8.onnx.md | 123 +++++++++++++++++
 ...ge.bge-base-en-v1.5.parquet.hnsw.cached.md | 121 +++++++++++++++++
 ...sage.bge-base-en-v1.5.parquet.hnsw.onnx.md | 121 +++++++++++++++++
 ...d-english-v3.0.parquet.flat-int8.cached.md | 112 ++++++++++++++++
 ...-embed-english-v3.0.parquet.flat.cached.md | 110 +++++++++++++++
 ...d-english-v3.0.parquet.hnsw-int8.cached.md | 116 ++++++++++++++++
 ...-embed-english-v3.0.parquet.hnsw.cached.md | 116 ++++++++++++++++
 ...cos-dpr-distil.parquet.flat-int8.cached.md | 117 ++++++++++++++++
 ...e.cos-dpr-distil.parquet.flat-int8.onnx.md | 119 +++++++++++++++++
 ...sage.cos-dpr-distil.parquet.flat.cached.md | 115 ++++++++++++++++
 ...assage.cos-dpr-distil.parquet.flat.onnx.md | 118 +++++++++++++++++
 ...cos-dpr-distil.parquet.hnsw-int8.cached.md | 123 +++++++++++++++++
 ...e.cos-dpr-distil.parquet.hnsw-int8.onnx.md | 125 ++++++++++++++++++
 ...sage.cos-dpr-distil.parquet.hnsw.cached.md | 121 +++++++++++++++++
 ...assage.cos-dpr-distil.parquet.hnsw.onnx.md | 123 +++++++++++++++++
 ...ge.openai-ada2.parquet.flat-int8.cached.md | 117 ++++++++++++++++
 ...passage.openai-ada2.parquet.flat.cached.md | 115 ++++++++++++++++
 ...ge.openai-ada2.parquet.hnsw-int8.cached.md | 123 +++++++++++++++++
 ...passage.openai-ada2.parquet.hnsw.cached.md | 121 +++++++++++++++++
 ...e-base-en-v1.5.parquet.flat-int8.cached.md | 117 ++++++++++++++++
 ...bge-base-en-v1.5.parquet.flat-int8.onnx.md | 117 ++++++++++++++++
 ...ge.bge-base-en-v1.5.parquet.flat.cached.md | 115 ++++++++++++++++
 ...sage.bge-base-en-v1.5.parquet.flat.onnx.md | 116 ++++++++++++++++
 ...e-base-en-v1.5.parquet.hnsw-int8.cached.md | 123 +++++++++++++++++
 ...bge-base-en-v1.5.parquet.hnsw-int8.onnx.md | 123 +++++++++++++++++
 ...ge.bge-base-en-v1.5.parquet.hnsw.cached.md | 121 +++++++++++++++++
 ...sage.bge-base-en-v1.5.parquet.hnsw.onnx.md | 121 +++++++++++++++++
 ...d-english-v3.0.parquet.flat-int8.cached.md | 112 ++++++++++++++++
 ...-embed-english-v3.0.parquet.flat.cached.md | 110 +++++++++++++++
 ...d-english-v3.0.parquet.hnsw-int8.cached.md | 116 ++++++++++++++++
 ...-embed-english-v3.0.parquet.hnsw.cached.md | 116 ++++++++++++++++
 ...cos-dpr-distil.parquet.flat-int8.cached.md | 117 ++++++++++++++++
 ...e.cos-dpr-distil.parquet.flat-int8.onnx.md | 119 +++++++++++++++++
 ...sage.cos-dpr-distil.parquet.flat.cached.md | 115 ++++++++++++++++
 ...assage.cos-dpr-distil.parquet.flat.onnx.md | 118 +++++++++++++++++
 ...cos-dpr-distil.parquet.hnsw-int8.cached.md | 123 +++++++++++++++++
 ...e.cos-dpr-distil.parquet.hnsw-int8.onnx.md | 125 ++++++++++++++++++
 ...sage.cos-dpr-distil.parquet.hnsw.cached.md | 121 +++++++++++++++++
 ...assage.cos-dpr-distil.parquet.hnsw.onnx.md | 123 +++++++++++++++++
 ...ge.openai-ada2.parquet.flat-int8.cached.md | 117 ++++++++++++++++
 ...passage.openai-ada2.parquet.flat.cached.md | 115 ++++++++++++++++
 ...ge.openai-ada2.parquet.hnsw-int8.cached.md | 123 +++++++++++++++++
 ...passage.openai-ada2.parquet.hnsw.cached.md | 121 +++++++++++++++++
 ...e-base-en-v1.5.parquet.flat-int8.cached.md | 105 +++++++++++++++
 ...bge-base-en-v1.5.parquet.flat-int8.onnx.md | 105 +++++++++++++++
 ...sage.bge-base-en-v1.5.parquet.flat.onnx.md | 104 +++++++++++++++
 ...e-base-en-v1.5.parquet.hnsw-int8.cached.md | 115 ++++++++++++++++
 ...bge-base-en-v1.5.parquet.hnsw-int8.onnx.md | 115 ++++++++++++++++
 ...ge.bge-base-en-v1.5.parquet.hnsw.cached.md | 113 ++++++++++++++++
 ...sage.bge-base-en-v1.5.parquet.hnsw.onnx.md | 113 ++++++++++++++++
 ...d-english-v3.0.parquet.flat-int8.cached.md | 103 +++++++++++++++
 ...d-english-v3.0.parquet.hnsw-int8.cached.md | 113 ++++++++++++++++
 ...-embed-english-v3.0.parquet.hnsw.cached.md | 111 ++++++++++++++++
 ...cos-dpr-distil.parquet.flat-int8.cached.md | 105 +++++++++++++++
 ...e.cos-dpr-distil.parquet.flat-int8.onnx.md | 107 +++++++++++++++
 ...assage.cos-dpr-distil.parquet.flat.onnx.md | 106 +++++++++++++++
 ...cos-dpr-distil.parquet.hnsw-int8.cached.md | 117 ++++++++++++++++
 ...e.cos-dpr-distil.parquet.hnsw-int8.onnx.md | 117 ++++++++++++++++
 ...sage.cos-dpr-distil.parquet.hnsw.cached.md | 115 ++++++++++++++++
 ...assage.cos-dpr-distil.parquet.hnsw.onnx.md | 115 ++++++++++++++++
 ...ge.openai-ada2.parquet.flat-int8.cached.md | 105 +++++++++++++++
 ...ge.openai-ada2.parquet.hnsw-int8.cached.md | 116 ++++++++++++++++
 ...passage.openai-ada2.parquet.hnsw.cached.md | 114 ++++++++++++++++
 ...-en-v1.5.parquet.flat-int8.cached.template |  94 +++++++++++++
 ...se-en-v1.5.parquet.flat-int8.onnx.template |  94 +++++++++++++
 ...-base-en-v1.5.parquet.flat.cached.template |  93 +++++++++++++
 ...ge-base-en-v1.5.parquet.flat.onnx.template |  94 +++++++++++++
 ...-en-v1.5.parquet.hnsw-int8.cached.template | 100 ++++++++++++++
 ...se-en-v1.5.parquet.hnsw-int8.onnx.template | 100 ++++++++++++++
 ...-base-en-v1.5.parquet.hnsw.cached.template |  98 ++++++++++++++
 ...ge-base-en-v1.5.parquet.hnsw.onnx.template |  98 ++++++++++++++
 ...ish-v3.0.parquet.flat-int8.cached.template |  89 +++++++++++++
 ...-english-v3.0.parquet.flat.cached.template |  88 ++++++++++++
 ...ish-v3.0.parquet.hnsw-int8.cached.template |  93 +++++++++++++
 ...-english-v3.0.parquet.hnsw.cached.template |  93 +++++++++++++
 ...r-distil.parquet.flat-int8.cached.template |  94 +++++++++++++
 ...dpr-distil.parquet.flat-int8.onnx.template |  96 ++++++++++++++
 ...os-dpr-distil.parquet.flat.cached.template |  93 +++++++++++++
 ....cos-dpr-distil.parquet.flat.onnx.template |  96 ++++++++++++++
 ...r-distil.parquet.hnsw-int8.cached.template | 100 ++++++++++++++
 ...dpr-distil.parquet.hnsw-int8.onnx.template | 102 ++++++++++++++
 ...os-dpr-distil.parquet.hnsw.cached.template |  98 ++++++++++++++
 ....cos-dpr-distil.parquet.hnsw.onnx.template | 100 ++++++++++++++
 ...nai-ada2.parquet.flat-int8.cached.template |  94 +++++++++++++
 ...e.openai-ada2.parquet.flat.cached.template |  93 +++++++++++++
 ...nai-ada2.parquet.hnsw-int8.cached.template | 100 ++++++++++++++
 ...e.openai-ada2.parquet.hnsw.cached.template |  98 ++++++++++++++
 ...-en-v1.5.parquet.flat-int8.cached.template |  94 +++++++++++++
 ...se-en-v1.5.parquet.flat-int8.onnx.template |  94 +++++++++++++
 ...-base-en-v1.5.parquet.flat.cached.template |  93 +++++++++++++
 ...ge-base-en-v1.5.parquet.flat.onnx.template |  94 +++++++++++++
 ...-en-v1.5.parquet.hnsw-int8.cached.template | 100 ++++++++++++++
 ...se-en-v1.5.parquet.hnsw-int8.onnx.template | 100 ++++++++++++++
 ...-base-en-v1.5.parquet.hnsw.cached.template |  98 ++++++++++++++
 ...ge-base-en-v1.5.parquet.hnsw.onnx.template |  98 ++++++++++++++
 ...ish-v3.0.parquet.flat-int8.cached.template |  89 +++++++++++++
 ...-english-v3.0.parquet.flat.cached.template |  88 ++++++++++++
 ...ish-v3.0.parquet.hnsw-int8.cached.template |  93 +++++++++++++
 ...-english-v3.0.parquet.hnsw.cached.template |  93 +++++++++++++
 ...r-distil.parquet.flat-int8.cached.template |  94 +++++++++++++
 ...dpr-distil.parquet.flat-int8.onnx.template |  96 ++++++++++++++
 ...os-dpr-distil.parquet.flat.cached.template |  93 +++++++++++++
 ....cos-dpr-distil.parquet.flat.onnx.template |  96 ++++++++++++++
 ...r-distil.parquet.hnsw-int8.cached.template | 100 ++++++++++++++
 ...dpr-distil.parquet.hnsw-int8.onnx.template | 102 ++++++++++++++
 ...os-dpr-distil.parquet.hnsw.cached.template |  98 ++++++++++++++
 ....cos-dpr-distil.parquet.hnsw.onnx.template | 100 ++++++++++++++
 ...nai-ada2.parquet.flat-int8.cached.template |  94 +++++++++++++
 ...e.openai-ada2.parquet.flat.cached.template |  93 +++++++++++++
 ...nai-ada2.parquet.hnsw-int8.cached.template | 100 ++++++++++++++
 ...e.openai-ada2.parquet.hnsw.cached.template |  98 ++++++++++++++
 ...-en-v1.5.parquet.flat-int8.cached.template |  82 ++++++++++++
 ...se-en-v1.5.parquet.flat-int8.onnx.template |  82 ++++++++++++
 ...ge-base-en-v1.5.parquet.flat.onnx.template |  82 ++++++++++++
 ...-en-v1.5.parquet.hnsw-int8.cached.template |  92 +++++++++++++
 ...se-en-v1.5.parquet.hnsw-int8.onnx.template |  92 +++++++++++++
 ...-base-en-v1.5.parquet.hnsw.cached.template |  90 +++++++++++++
 ...ge-base-en-v1.5.parquet.hnsw.onnx.template |  90 +++++++++++++
 ...ish-v3.0.parquet.flat-int8.cached.template |  80 +++++++++++
 ...ish-v3.0.parquet.hnsw-int8.cached.template |  90 +++++++++++++
 ...-english-v3.0.parquet.hnsw.cached.template |  88 ++++++++++++
 ...r-distil.parquet.flat-int8.cached.template |  82 ++++++++++++
 ...dpr-distil.parquet.flat-int8.onnx.template |  84 ++++++++++++
 ....cos-dpr-distil.parquet.flat.onnx.template |  84 ++++++++++++
 ...r-distil.parquet.hnsw-int8.cached.template |  94 +++++++++++++
 ...dpr-distil.parquet.hnsw-int8.onnx.template |  94 +++++++++++++
 ...os-dpr-distil.parquet.hnsw.cached.template |  92 +++++++++++++
 ....cos-dpr-distil.parquet.hnsw.onnx.template |  92 +++++++++++++
 ...nai-ada2.parquet.flat-int8.cached.template |  82 ++++++++++++
 ...nai-ada2.parquet.hnsw-int8.cached.template |  93 +++++++++++++
 ...e.openai-ada2.parquet.hnsw.cached.template |  91 +++++++++++++
 136 files changed, 14254 insertions(+)
 create mode 100644 docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md
 create mode 100644 docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.md
 create mode 100644 docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
 create mode 100644 docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
 create mode 100644 docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.md
 create mode 100644 docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat.onnx.md
 create mode 100644 docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
 create mode 100644 docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.md
 create mode 100644 docs/regressions/regressions-dl19-passage.openai-ada2.parquet.flat-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.openai-ada2.parquet.flat.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md
 create mode 100644 docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.md
 create mode 100644 docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
 create mode 100644 docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
 create mode 100644 docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.md
 create mode 100644 docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat.onnx.md
 create mode 100644 docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
 create mode 100644 docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.md
 create mode 100644 docs/regressions/regressions-dl20-passage.openai-ada2.parquet.flat-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.openai-ada2.parquet.flat.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw-int8.cached.md
 create mode 100644 docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.md
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.flat-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.flat.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.flat-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.flat.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.template

diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md
new file mode 100644
index 000000000..559e9c733
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md
@@ -0,0 +1,117 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.bge-base-en-v1.5.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4435    |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7065    |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6171    |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8472    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md
new file mode 100644
index 000000000..b82e282cc
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md
@@ -0,0 +1,117 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.dl19-passage.txt \
+  -encoder BgeBaseEn15 -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.dl19-passage.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.dl19-passage.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4435    |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7065    |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6171    |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8472    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat.cached.md
new file mode 100644
index 000000000..f18fb9544
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat.cached.md
@@ -0,0 +1,115 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.flat.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.flat.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.flat.cached \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.bge-base-en-v1.5.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4435    |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7065    |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6171    |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8472    |
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.md
new file mode 100644
index 000000000..1bd14ef0f
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.md
@@ -0,0 +1,116 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.flat.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.flat.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.flat.onnx \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.dl19-passage.txt \
+  -encoder BgeBaseEn15 -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.dl19-passage.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.dl19-passage.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4435    |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7065    |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6171    |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8472    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on non-quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
new file mode 100644
index 000000000..67e3a90f1
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
@@ -0,0 +1,123 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.bge-base-en-v1.5.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.444     |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.706     |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.847     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
new file mode 100644
index 000000000..3856bbb00
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
@@ -0,0 +1,123 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.dl19-passage.txt \
+  -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.dl19-passage.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.dl19-passage.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.444     |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.706     |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.847     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
new file mode 100644
index 000000000..ad4a813cd
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
@@ -0,0 +1,121 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.bge-base-en-v1.5.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.444     |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.706     |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.847     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
new file mode 100644
index 000000000..7d8acab91
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
@@ -0,0 +1,121 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.dl19-passage.txt \
+  -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.dl19-passage.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.dl19-passage.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.444     |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.706     |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.847     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md
new file mode 100644
index 000000000..10efe5827
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md
@@ -0,0 +1,112 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.parquet.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached \
+  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cohere-embed-english-v3.0.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-cohere-embed-english-v3.0.parquet &
+```
+
+The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4884    |
+| **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6956    |
+| **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6484    |
+| **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8630    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
new file mode 100644
index 000000000..1848bdd76
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
@@ -0,0 +1,110 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.parquet.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached \
+  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cohere-embed-english-v3.0.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  >& logs/log.msmarco-passage-cohere-embed-english-v3.0.parquet &
+```
+
+The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4884    |
+| **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6956    |
+| **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6484    |
+| **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8630    |
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
new file mode 100644
index 000000000..4c2d93583
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
@@ -0,0 +1,116 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.parquet.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached \
+  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cohere-embed-english-v3.0.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-cohere-embed-english-v3.0.parquet &
+```
+
+The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.488     |
+| **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.696     |
+| **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.648     |
+| **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.863     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
new file mode 100644
index 000000000..b8bf150e4
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
@@ -0,0 +1,116 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.parquet.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached \
+  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cohere-embed-english-v3.0.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-cohere-embed-english-v3.0.parquet &
+```
+
+The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.488     |
+| **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.696     |
+| **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.648     |
+| **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.863     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.md
new file mode 100644
index 000000000..77c11051c
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.md
@@ -0,0 +1,117 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.flat-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.flat-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.flat-int8.cached \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.cos-dpr-distil.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4656    |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7250    |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6173    |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8201    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.md
new file mode 100644
index 000000000..2bf755e88
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.md
@@ -0,0 +1,119 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt \
+  -encoder CosDprDistil -hits 1000 -threads 16 &
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4656    |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7250    |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6173    |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8201    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat.cached.md
new file mode 100644
index 000000000..6d96a5925
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat.cached.md
@@ -0,0 +1,115 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.flat.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.flat.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.flat.cached \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.cos-dpr-distil.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4656    |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7250    |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6173    |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8201    |
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat.onnx.md
new file mode 100644
index 000000000..cff934439
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat.onnx.md
@@ -0,0 +1,118 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.flat.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.flat.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.flat.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.flat.onnx \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt \
+  -encoder CosDprDistil -hits 1000 -threads 16 &
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4656    |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7250    |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6173    |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8201    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on non-quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
new file mode 100644
index 000000000..ddfc2bc72
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
@@ -0,0 +1,123 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.cos-dpr-distil.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.466     |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.725     |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.820     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
new file mode 100644
index 000000000..ac1355ac2
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
@@ -0,0 +1,125 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.dl19-passage.txt \
+  -encoder CosDprDistil -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.dl19-passage.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.dl19-passage.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.466     |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.725     |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.820     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.cached.md
new file mode 100644
index 000000000..2ce7954cf
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.cached.md
@@ -0,0 +1,121 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.hnsw.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.hnsw.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.hnsw.cached \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.cos-dpr-distil.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.466     |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.725     |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.820     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.md
new file mode 100644
index 000000000..8e44b1dd9
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.md
@@ -0,0 +1,123 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.hnsw.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.hnsw.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.parquet.hnsw.onnx \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt \
+  -encoder CosDprDistil -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.466     |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.725     |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.820     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.flat-int8.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.flat-int8.cached.md
new file mode 100644
index 000000000..e469a120e
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.flat-int8.cached.md
@@ -0,0 +1,117 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.flat-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.parquet.flat-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.openai-ada2.parquet.flat-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-openai-ada2.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-openai-ada2.parquet.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.parquet.flat-int8.cached \
+  --corpus-path collections/msmarco-passage-openai-ada2.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-openai-ada2.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-openai-ada2.parquet &
+```
+
+The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.openai-ada2.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **OpenAI-ada2**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4788    |
+| **nDCG@10**                                                                                                  | **OpenAI-ada2**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7035    |
+| **R@100**                                                                                                    | **OpenAI-ada2**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6235    |
+| **R@1000**                                                                                                   | **OpenAI-ada2**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8629    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.flat-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.flat.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.flat.cached.md
new file mode 100644
index 000000000..e95fbd71a
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.flat.cached.md
@@ -0,0 +1,115 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.openai-ada2.parquet.flat.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.flat.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.parquet.flat.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.openai-ada2.parquet.flat.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-openai-ada2.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-openai-ada2.parquet.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.parquet.flat.cached \
+  --corpus-path collections/msmarco-passage-openai-ada2.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-openai-ada2.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \
+  >& logs/log.msmarco-passage-openai-ada2.parquet &
+```
+
+The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.openai-ada2.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **OpenAI-ada2**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4788    |
+| **nDCG@10**                                                                                                  | **OpenAI-ada2**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7035    |
+| **R@100**                                                                                                    | **OpenAI-ada2**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6235    |
+| **R@1000**                                                                                                   | **OpenAI-ada2**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8629    |
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.flat.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw-int8.cached.md
new file mode 100644
index 000000000..65f311932
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw-int8.cached.md
@@ -0,0 +1,123 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.parquet.hnsw-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.openai-ada2.parquet.hnsw-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-openai-ada2.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-openai-ada2.parquet.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.parquet.hnsw-int8.cached \
+  --corpus-path collections/msmarco-passage-openai-ada2.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-openai-ada2.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-openai-ada2.parquet &
+```
+
+The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.openai-ada2.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **OpenAI-ada2**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.479     |
+| **nDCG@10**                                                                                                  | **OpenAI-ada2**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.703     |
+| **R@100**                                                                                                    | **OpenAI-ada2**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.623     |
+| **R@1000**                                                                                                   | **OpenAI-ada2**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.863     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw.cached.md
new file mode 100644
index 000000000..4a92ddf02
--- /dev/null
+++ b/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw.cached.md
@@ -0,0 +1,121 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.openai-ada2.parquet.hnsw.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.parquet.hnsw.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.openai-ada2.parquet.hnsw.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-openai-ada2.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-openai-ada2.parquet.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.parquet.hnsw.cached \
+  --corpus-path collections/msmarco-passage-openai-ada2.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-openai-ada2.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-openai-ada2.parquet &
+```
+
+The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \
+  -topics tools/topics-and-qrels/topics.dl19-passage.openai-ada2.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.dl19-passage.openai-ada2.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.dl19-passage.openai-ada2.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **OpenAI-ada2**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.479     |
+| **nDCG@10**                                                                                                  | **OpenAI-ada2**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.703     |
+| **R@100**                                                                                                    | **OpenAI-ada2**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.623     |
+| **R@1000**                                                                                                   | **OpenAI-ada2**|
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.863     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md
new file mode 100644
index 000000000..a594a3833
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md
@@ -0,0 +1,117 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl20.bge-base-en-v1.5.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4650    |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6780    |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7169    |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8503    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md
new file mode 100644
index 000000000..5a0c0890f
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md
@@ -0,0 +1,117 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl20.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.dl20.txt \
+  -encoder BgeBaseEn15 -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.dl20.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.dl20.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.dl20.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.dl20.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4650    |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6780    |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7169    |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8503    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat.cached.md
new file mode 100644
index 000000000..bdcd9d550
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat.cached.md
@@ -0,0 +1,115 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.flat.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.flat.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.flat.cached \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl20.bge-base-en-v1.5.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4650    |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6780    |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7169    |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8503    |
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.md
new file mode 100644
index 000000000..0e909d8c7
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.md
@@ -0,0 +1,116 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.flat.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.flat.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.flat.onnx \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl20.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.dl20.txt \
+  -encoder BgeBaseEn15 -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.dl20.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.dl20.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.dl20.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.dl20.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4650    |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6780    |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7169    |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8503    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on non-quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
new file mode 100644
index 000000000..8136df376
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
@@ -0,0 +1,123 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl20.bge-base-en-v1.5.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.465     |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.678     |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.717     |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.850     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
new file mode 100644
index 000000000..8843cbd62
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
@@ -0,0 +1,123 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl20.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.dl20.txt \
+  -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.dl20.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.dl20.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.dl20.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.dl20.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.465     |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.678     |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.717     |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.850     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
new file mode 100644
index 000000000..f42efe36c
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
@@ -0,0 +1,121 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl20.bge-base-en-v1.5.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.465     |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.678     |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.717     |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.850     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
new file mode 100644
index 000000000..e2c00aacc
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
@@ -0,0 +1,121 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.dl20.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.dl20.txt \
+  -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.dl20.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.dl20.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.dl20.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.dl20.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.465     |
+| **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.678     |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.717     |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.850     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md
new file mode 100644
index 000000000..82ee8403a
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md
@@ -0,0 +1,112 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2020.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.parquet.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached \
+  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cohere-embed-english-v3.0.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-cohere-embed-english-v3.0.parquet &
+```
+
+The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -topics tools/topics-and-qrels/topics.dl20.cohere-embed-english-v3.0.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.5067    |
+| **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7245    |
+| **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7279    |
+| **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8682    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
new file mode 100644
index 000000000..86b89a079
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.md
@@ -0,0 +1,110 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2020.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.parquet.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached \
+  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cohere-embed-english-v3.0.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  >& logs/log.msmarco-passage-cohere-embed-english-v3.0.parquet &
+```
+
+The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -topics tools/topics-and-qrels/topics.dl20.cohere-embed-english-v3.0.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.5067    |
+| **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7245    |
+| **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7279    |
+| **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8682    |
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
new file mode 100644
index 000000000..845bea2bd
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
@@ -0,0 +1,116 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2020.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.parquet.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached \
+  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cohere-embed-english-v3.0.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-cohere-embed-english-v3.0.parquet &
+```
+
+The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -topics tools/topics-and-qrels/topics.dl20.cohere-embed-english-v3.0.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.507     |
+| **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.725     |
+| **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.728     |
+| **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.868     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
new file mode 100644
index 000000000..26ebd5bd9
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
@@ -0,0 +1,116 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2020.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.parquet.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached \
+  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cohere-embed-english-v3.0.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-cohere-embed-english-v3.0.parquet &
+```
+
+The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -topics tools/topics-and-qrels/topics.dl20.cohere-embed-english-v3.0.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.507     |
+| **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.725     |
+| **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.728     |
+| **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.868     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.md
new file mode 100644
index 000000000..06120623e
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.md
@@ -0,0 +1,117 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.flat-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.flat-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.flat-int8.cached \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl20.cos-dpr-distil.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4876    |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7025    |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7204    |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8533    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.md
new file mode 100644
index 000000000..21f486b6a
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.md
@@ -0,0 +1,119 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl20.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt \
+  -encoder CosDprDistil -hits 1000 -threads 16 &
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4876    |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7025    |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7204    |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8533    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat.cached.md
new file mode 100644
index 000000000..4ed64751e
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat.cached.md
@@ -0,0 +1,115 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.flat.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.flat.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.flat.cached \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl20.cos-dpr-distil.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4876    |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7025    |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7204    |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8533    |
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat.onnx.md
new file mode 100644
index 000000000..e2ec83ee2
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat.onnx.md
@@ -0,0 +1,118 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.flat.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.flat.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.flat.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.flat.onnx \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl20.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.dl20.txt \
+  -encoder CosDprDistil -hits 1000 -threads 16 &
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.dl20.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.dl20.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.dl20.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.dl20.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4876    |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7025    |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7204    |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8533    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on non-quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
new file mode 100644
index 000000000..0e0f1fa47
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
@@ -0,0 +1,123 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl20.cos-dpr-distil.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.488     |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.702     |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.720     |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.853     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
new file mode 100644
index 000000000..af3f79b4f
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
@@ -0,0 +1,125 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl20.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.dl20.txt \
+  -encoder CosDprDistil -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.dl20.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.dl20.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.dl20.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.dl20.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.488     |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.702     |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.720     |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.853     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.cached.md
new file mode 100644
index 000000000..1b23bc047
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.cached.md
@@ -0,0 +1,121 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.hnsw.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.hnsw.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.hnsw.cached \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl20.cos-dpr-distil.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.488     |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.702     |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.720     |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.853     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.md
new file mode 100644
index 000000000..7f60b9814
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.md
@@ -0,0 +1,123 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.hnsw.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.hnsw.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.parquet.hnsw.onnx \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.dl20.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.dl20.txt \
+  -encoder CosDprDistil -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.dl20.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.dl20.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.dl20.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.dl20.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.488     |
+| **nDCG@10**                                                                                                  | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.702     |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.720     |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.853     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.flat-int8.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.flat-int8.cached.md
new file mode 100644
index 000000000..88613cab9
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.flat-int8.cached.md
@@ -0,0 +1,117 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.flat-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.parquet.flat-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.openai-ada2.parquet.flat-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-openai-ada2.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-openai-ada2.parquet.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.parquet.flat-int8.cached \
+  --corpus-path collections/msmarco-passage-openai-ada2.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-openai-ada2.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-openai-ada2.parquet &
+```
+
+The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/ \
+  -topics tools/topics-and-qrels/topics.dl20.openai-ada2.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **OpenAI-ada2**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4771    |
+| **nDCG@10**                                                                                                  | **OpenAI-ada2**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6759    |
+| **R@100**                                                                                                    | **OpenAI-ada2**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7237    |
+| **R@1000**                                                                                                   | **OpenAI-ada2**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8705    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.flat-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.flat.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.flat.cached.md
new file mode 100644
index 000000000..a4dd6b336
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.flat.cached.md
@@ -0,0 +1,115 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.openai-ada2.parquet.flat.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.flat.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.parquet.flat.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.openai-ada2.parquet.flat.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-openai-ada2.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-openai-ada2.parquet.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.parquet.flat.cached \
+  --corpus-path collections/msmarco-passage-openai-ada2.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-openai-ada2.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \
+  >& logs/log.msmarco-passage-openai-ada2.parquet &
+```
+
+The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \
+  -topics tools/topics-and-qrels/topics.dl20.openai-ada2.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **OpenAI-ada2**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.4771    |
+| **nDCG@10**                                                                                                  | **OpenAI-ada2**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.6759    |
+| **R@100**                                                                                                    | **OpenAI-ada2**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.7237    |
+| **R@1000**                                                                                                   | **OpenAI-ada2**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.8705    |
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.flat.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw-int8.cached.md
new file mode 100644
index 000000000..e72b5f82d
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw-int8.cached.md
@@ -0,0 +1,123 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.parquet.hnsw-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.openai-ada2.parquet.hnsw-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-openai-ada2.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-openai-ada2.parquet.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.parquet.hnsw-int8.cached \
+  --corpus-path collections/msmarco-passage-openai-ada2.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-openai-ada2.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-openai-ada2.parquet &
+```
+
+The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \
+  -topics tools/topics-and-qrels/topics.dl20.openai-ada2.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.dl20.openai-ada2.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.dl20.openai-ada2.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.dl20.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.dl20.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.dl20.openai-ada2.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **OpenAI-ada2**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.477     |
+| **nDCG@10**                                                                                                  | **OpenAI-ada2**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.676     |
+| **R@100**                                                                                                    | **OpenAI-ada2**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.724     |
+| **R@1000**                                                                                                   | **OpenAI-ada2**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.871     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw.cached.md
new file mode 100644
index 000000000..4b319dce3
--- /dev/null
+++ b/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw.cached.md
@@ -0,0 +1,121 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.openai-ada2.parquet.hnsw.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.parquet.hnsw.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.openai-ada2.parquet.hnsw.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-openai-ada2.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-openai-ada2.parquet.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.parquet.hnsw.cached \
+  --corpus-path collections/msmarco-passage-openai-ada2.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-openai-ada2.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-openai-ada2.parquet &
+```
+
+The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \
+  -topics tools/topics-and-qrels/topics.dl20.openai-ada2.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.dl20.openai-ada2.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.dl20.openai-ada2.jsonl.txt
+bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.dl20.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.dl20.openai-ada2.jsonl.txt
+bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.dl20.openai-ada2.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **OpenAI-ada2**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.477     |
+| **nDCG@10**                                                                                                  | **OpenAI-ada2**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.676     |
+| **R@100**                                                                                                    | **OpenAI-ada2**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.724     |
+| **R@1000**                                                                                                   | **OpenAI-ada2**|
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.871     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md
new file mode 100644
index 000000000..f3701a02a
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md
@@ -0,0 +1,105 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3641    |
+| **RR@10**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3583    |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9006    |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9811    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md
new file mode 100644
index 000000000..d27cb3b4e
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md
@@ -0,0 +1,105 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt \
+  -encoder BgeBaseEn15 -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3641    |
+| **RR@10**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3583    |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9006    |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9811    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on quantized indexes, results may differ slightly.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.md
new file mode 100644
index 000000000..8df34e844
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.md
@@ -0,0 +1,104 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt \
+  -encoder BgeBaseEn15 -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3641    |
+| **RR@10**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3583    |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9006    |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9811    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on non-quantized indexes, results may differ slightly.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
new file mode 100644
index 000000000..ab86e3905
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
@@ -0,0 +1,115 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.364     |
+| **RR@10**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.358     |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.901     |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.981     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](../../docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
new file mode 100644
index 000000000..aa94cd91e
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
@@ -0,0 +1,115 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt \
+  -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.364     |
+| **RR@10**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.358     |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.901     |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.981     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](../../docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
new file mode 100644
index 000000000..8c9ea1335
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
@@ -0,0 +1,113 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.364     |
+| **RR@10**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.358     |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.901     |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.981     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](../../docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
new file mode 100644
index 000000000..48f938391
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
@@ -0,0 +1,113 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-bge-base-en-v1.5.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-bge-base-en-v1.5.parquet.tar` is 39 GB and has MD5 checksum `b235e19ec492c18a18057b30b8b23fd4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx \
+  --corpus-path collections/msmarco-passage-bge-base-en-v1.5.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-bge-base-en-v1.5.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-bge-base-en-v1.5.parquet &
+```
+
+The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.msmarco-passage.dev-subset.txt \
+  -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.parquet.bge-hnsw-onnx.topics.msmarco-passage.dev-subset.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.364     |
+| **RR@10**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.358     |
+| **R@100**                                                                                                    | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.901     |
+| **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.981     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](../../docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md
new file mode 100644
index 000000000..ffa149000
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md
@@ -0,0 +1,103 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with quantized flat quantized indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.parquet.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached \
+  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cohere-embed-english-v3.0.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-cohere-embed-english-v3.0.parquet &
+```
+
+The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3716    |
+| **RR@10**                                                                                                    | **cohere-embed-english-v3.0**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3658    |
+| **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.8935    |
+| **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9786    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
new file mode 100644
index 000000000..a55eec0a1
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
@@ -0,0 +1,113 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.parquet.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached \
+  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cohere-embed-english-v3.0.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-cohere-embed-english-v3.0.parquet &
+```
+
+The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.372     |
+| **RR@10**                                                                                                    | **cohere-embed-english-v3.0**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.366     |
+| **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.893     |
+| **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.979     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](../../docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
new file mode 100644
index 000000000..7ba065e89
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
@@ -0,0 +1,111 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cohere-embed-english-v3.0.parquet.tar` is 16 GB and has MD5 checksum `40c5caf33476746e93ceeb75174b8d64`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached \
+  --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cohere-embed-english-v3.0.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-cohere-embed-english-v3.0.parquet &
+```
+
+The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.parquet.cohere-embed-english-v3.0-hnsw-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.372     |
+| **RR@10**                                                                                                    | **cohere-embed-english-v3.0**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.366     |
+| **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.893     |
+| **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.979     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](../../docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.md
new file mode 100644
index 000000000..afa7f33a0
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.md
@@ -0,0 +1,105 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3942    |
+| **RR@10**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3896    |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9075    |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9796    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.md
new file mode 100644
index 000000000..d713d0668
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.md
@@ -0,0 +1,107 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with quantized flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt \
+  -encoder CosDprDistil -hits 1000 -threads 16 &
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3942    |
+| **RR@10**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3896    |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9075    |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9796    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on quantized indexes, results may differ slightly.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.md
new file mode 100644
index 000000000..3febe0dcc
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.md
@@ -0,0 +1,106 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt \
+  -encoder CosDprDistil -hits 1000 -threads 16 &
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3942    |
+| **RR@10**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3896    |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9075    |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9796    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on non-quantized indexes, results may differ slightly.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
new file mode 100644
index 000000000..c3939032a
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
@@ -0,0 +1,117 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.394     |
+| **RR@10**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.390     |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.908     |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.980     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](../../docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
+
++ Results reproduced by [@yilinjz](https://github.com/yilinjz) on 2023-09-01 (commit [`4ae518b`](https://github.com/castorini/anserini/commit/4ae518bb284ebcba0b273a473bc8774735cb7d19))
\ No newline at end of file
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
new file mode 100644
index 000000000..755c45d86
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
@@ -0,0 +1,117 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with quantized HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt \
+  -encoder CosDprDistil -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.394     |
+| **RR@10**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.390     |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.908     |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.980     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](../../docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.md
new file mode 100644
index 000000000..ce94082b1
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.md
@@ -0,0 +1,115 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.394     |
+| **RR@10**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.390     |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.908     |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.980     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](../../docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation.
+
++ Results reproduced by [@yilinjz](https://github.com/yilinjz) on 2023-09-01 (commit [`4ae518b`](https://github.com/castorini/anserini/commit/4ae518bb284ebcba0b273a473bc8774735cb7d19))
\ No newline at end of file
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.md
new file mode 100644
index 000000000..cfda2597d
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.md
@@ -0,0 +1,115 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-cos-dpr-distil.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-cos-dpr-distil.parquet.tar` is 38 GB and has MD5 checksum `c8a204fbc3ccda581aa375936af43a97`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx \
+  --corpus-path collections/msmarco-passage-cos-dpr-distil.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-cos-dpr-distil.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-cos-dpr-distil.parquet &
+```
+
+The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \
+  -topicReader TsvInt \
+  -output runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.msmarco-passage.dev-subset.txt \
+  -encoder CosDprDistil -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.msmarco-passage.dev-subset.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.parquet.cos-dpr-distil-hnsw-onnx.topics.msmarco-passage.dev-subset.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **cosDPR-distil**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.394     |
+| **RR@10**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.390     |
+| **R@100**                                                                                                    | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.908     |
+| **R@1000**                                                                                                   | **cosDPR-distil**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.980     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](../../docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.template) and run `bin/build.sh` to rebuild the documentation.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.md
new file mode 100644
index 000000000..29f213d10
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.md
@@ -0,0 +1,105 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: OpenAI-ada2 embeddings with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-openai-ada2.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-openai-ada2.parquet.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached \
+  --corpus-path collections/msmarco-passage-openai-ada2.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexFlatDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-openai-ada2.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/ \
+  -quantize.int8 \
+  >& logs/log.msmarco-passage-openai-ada2.parquet &
+```
+
+The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchFlatDenseVectors \
+  -index indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \
+  -hits 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **OpenAI-ada2**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3505    |
+| **RR@10**                                                                                                    | **OpenAI-ada2**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.3434    |
+| **R@100**                                                                                                    | **OpenAI-ada2**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.8996    |
+| **R@1000**                                                                                                   | **OpenAI-ada2**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.9858    |
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.md
new file mode 100644
index 000000000..c659687bb
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.md
@@ -0,0 +1,116 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: OpenAI-ada2 embeddings with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-openai-ada2.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-openai-ada2.parquet.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached \
+  --corpus-path collections/msmarco-passage-openai-ada2.parquet
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-openai-ada2.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \
+  -M 16 -efC 100 -quantize.int8 \
+  >& logs/log.msmarco-passage-openai-ada2.parquet &
+```
+
+The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **OpenAI-ada2**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.350     |
+| **RR@10**                                                                                                    | **OpenAI-ada2**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.343     |
+| **R@100**                                                                                                    | **OpenAI-ada2**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.900     |
+| **R@1000**                                                                                                   | **OpenAI-ada2**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.986     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](../../docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation.
+
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.md
new file mode 100644
index 000000000..df2cfd8c4
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.md
@@ -0,0 +1,114 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: OpenAI-ada2 embeddings with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.parquet.hnsw.cached
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.openai-ada2.parquet.hnsw.cached
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar -P collections/
+tar xvf collections/msmarco-passage-openai-ada2.parquet.tar -C collections/
+```
+
+To confirm, `msmarco-passage-openai-ada2.parquet.tar` is 75 GB and has MD5 checksum `fa3637e9c4150b157270e19ef3a4f779`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.parquet.hnsw.cached \
+  --corpus-path collections/msmarco-passage-openai-ada2.parquet
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.index.IndexHnswDenseVectors \
+  -threads 16 \
+  -collection ParquetDenseVectorCollection \
+  -input /path/to/msmarco-passage-openai-ada2.parquet \
+  -generator ParquetDenseVectorDocumentGenerator \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \
+  -M 16 -efC 100 \
+  >& logs/log.msmarco-passage-openai-ada2.parquet &
+```
+
+The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](../../docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+bin/run.sh io.anserini.search.SearchHnswDenseVectors \
+  -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \
+  -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz \
+  -topicReader JsonIntVector \
+  -output runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \
+  -hits 1000 -efSearch 1000 -threads 16 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.parquet.openai-ada2-hnsw-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **AP@1000**                                                                                                  | **OpenAI-ada2**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.350     |
+| **RR@10**                                                                                                    | **OpenAI-ada2**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.343     |
+| **R@100**                                                                                                    | **OpenAI-ada2**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.900     |
+| **R@1000**                                                                                                   | **OpenAI-ada2**|
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.986     |
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](../../docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation.
+
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template
new file mode 100644
index 000000000..cfc6aab4b
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template
@@ -0,0 +1,94 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template
new file mode 100644
index 000000000..d2521082f
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template
@@ -0,0 +1,94 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat.cached.template
new file mode 100644
index 000000000..3628965a2
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat.cached.template
@@ -0,0 +1,93 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.template
new file mode 100644
index 000000000..d6c690925
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.template
@@ -0,0 +1,94 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on non-quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
new file mode 100644
index 000000000..96fccddc6
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
@@ -0,0 +1,100 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
new file mode 100644
index 000000000..b6b36004d
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
@@ -0,0 +1,100 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
new file mode 100644
index 000000000..a2360ca28
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
@@ -0,0 +1,98 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
new file mode 100644
index 000000000..88fe261e8
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
@@ -0,0 +1,98 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template
new file mode 100644
index 000000000..d6878d6f5
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template
@@ -0,0 +1,89 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 16 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.template b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.template
new file mode 100644
index 000000000..93a902334
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.template
@@ -0,0 +1,88 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 16 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
new file mode 100644
index 000000000..231838e84
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
@@ -0,0 +1,93 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 16 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
new file mode 100644
index 000000000..3a27d94be
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
@@ -0,0 +1,93 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 16 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.template
new file mode 100644
index 000000000..9f67f9423
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.template
@@ -0,0 +1,94 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.template
new file mode 100644
index 000000000..26ef4092c
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.template
@@ -0,0 +1,96 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat.cached.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat.cached.template
new file mode 100644
index 000000000..cc7d902f6
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat.cached.template
@@ -0,0 +1,93 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat.onnx.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat.onnx.template
new file mode 100644
index 000000000..38fd8df4b
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.flat.onnx.template
@@ -0,0 +1,96 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on non-quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
new file mode 100644
index 000000000..82433cd7b
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
@@ -0,0 +1,100 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
new file mode 100644
index 000000000..04361bc70
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
@@ -0,0 +1,102 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.template
new file mode 100644
index 000000000..1849b75f2
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.template
@@ -0,0 +1,98 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.template
new file mode 100644
index 000000000..016f48e05
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.template
@@ -0,0 +1,100 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.flat-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.flat-int8.cached.template
new file mode 100644
index 000000000..f79851f97
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.flat-int8.cached.template
@@ -0,0 +1,94 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 75 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.flat.cached.template b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.flat.cached.template
new file mode 100644
index 000000000..839f78a07
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.flat.cached.template
@@ -0,0 +1,93 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 75 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.template
new file mode 100644
index 000000000..c4993c4b8
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.template
@@ -0,0 +1,100 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 75 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw.cached.template
new file mode 100644
index 000000000..589eae1f0
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw.cached.template
@@ -0,0 +1,98 @@
+# Anserini Regressions: TREC 2019 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2019 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 75 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 43 topics for which NIST has provided judgments as part of the TREC 2019 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2019.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2003.07820).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template
new file mode 100644
index 000000000..dec3d4c05
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template
@@ -0,0 +1,94 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template
new file mode 100644
index 000000000..922a005a1
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template
@@ -0,0 +1,94 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat.cached.template
new file mode 100644
index 000000000..8693cec8d
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat.cached.template
@@ -0,0 +1,93 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.template
new file mode 100644
index 000000000..1e823fb31
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.template
@@ -0,0 +1,94 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on non-quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
new file mode 100644
index 000000000..65088fd27
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
@@ -0,0 +1,100 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
new file mode 100644
index 000000000..295f0b46d
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
@@ -0,0 +1,100 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
new file mode 100644
index 000000000..6dede3d54
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
@@ -0,0 +1,98 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
new file mode 100644
index 000000000..4feede0df
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
@@ -0,0 +1,98 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template
new file mode 100644
index 000000000..6e3fcf0f2
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template
@@ -0,0 +1,89 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2020.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 16 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.template b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.template
new file mode 100644
index 000000000..69270c2cc
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.template
@@ -0,0 +1,88 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2020.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 16 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
new file mode 100644
index 000000000..36896b36c
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
@@ -0,0 +1,93 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2020.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 16 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
new file mode 100644
index 000000000..e87d20d8d
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
@@ -0,0 +1,93 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2020.html).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 16 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.template
new file mode 100644
index 000000000..d3d7f8343
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.template
@@ -0,0 +1,94 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.template
new file mode 100644
index 000000000..5862f1c43
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.template
@@ -0,0 +1,96 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat.cached.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat.cached.template
new file mode 100644
index 000000000..4f004115c
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat.cached.template
@@ -0,0 +1,93 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat.onnx.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat.onnx.template
new file mode 100644
index 000000000..1b7469a97
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.flat.onnx.template
@@ -0,0 +1,96 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on non-quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
new file mode 100644
index 000000000..8755edef0
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
@@ -0,0 +1,100 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
new file mode 100644
index 000000000..06ca830f2
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
@@ -0,0 +1,102 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with quantized HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.template
new file mode 100644
index 000000000..f405fcbd1
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.template
@@ -0,0 +1,98 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.template
new file mode 100644
index 000000000..3d5f64432
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.template
@@ -0,0 +1,100 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: cosDPR-distil with HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.flat-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.flat-int8.cached.template
new file mode 100644
index 000000000..d81636da9
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.flat-int8.cached.template
@@ -0,0 +1,94 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 75 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.flat.cached.template b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.flat.cached.template
new file mode 100644
index 000000000..662566dec
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.flat.cached.template
@@ -0,0 +1,93 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 75 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+Note that since we're running brute-force search with cached queries on non-quantized indexes, the results should be reproducible _exactly_.
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.template
new file mode 100644
index 000000000..16b6d147d
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.template
@@ -0,0 +1,100 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 75 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw.cached.template
new file mode 100644
index 000000000..5b0d8e242
--- /dev/null
+++ b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw.cached.template
@@ -0,0 +1,98 @@
+# Anserini Regressions: TREC 2020 Deep Learning Track (Passage)
+
+**Model**: OpenAI-ada2 embeddings with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [TREC 2020 Deep Learning Track passage ranking task](https://trec.nist.gov/data/deep2019.html), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast).
+For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 75 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 54 topics for which NIST has provided judgments as part of the TREC 2020 Deep Learning Track.
+The original data can be found [here](https://trec.nist.gov/data/deep2020.html).
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
+For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
+The experimental results reported here are directly comparable to the results reported in the [track overview paper](https://arxiv.org/abs/2102.07662).
+
+## Reproduction Log[*](reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template
new file mode 100644
index 000000000..0a2bf5305
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.template
@@ -0,0 +1,82 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template
new file mode 100644
index 000000000..0ce5e5c89
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.template
@@ -0,0 +1,82 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on quantized indexes, results may differ slightly.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.template
new file mode 100644
index 000000000..1e31ba1f5
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.template
@@ -0,0 +1,82 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on non-quantized indexes, results may differ slightly.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
new file mode 100644
index 000000000..006ac5ecc
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
@@ -0,0 +1,92 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](${root_path}/docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
new file mode 100644
index 000000000..b8d3d7d35
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
@@ -0,0 +1,92 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with quantized HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](${root_path}/docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
new file mode 100644
index 000000000..fc21af616
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
@@ -0,0 +1,90 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](${root_path}/docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
new file mode 100644
index 000000000..6db3018d0
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
@@ -0,0 +1,90 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) with HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [BGE-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighoff. [C-Pack: Packaged Resources To Advance General Chinese Embedding.](https://arxiv.org/abs/2309.07597) _arXiv:2309.07597_, 2023.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded by the BGE-base-en-v1.5 model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 39 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](${root_path}/docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template
new file mode 100644
index 000000000..81a42998b
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.template
@@ -0,0 +1,80 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with quantized flat quantized indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 16 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
new file mode 100644
index 000000000..81495c988
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
@@ -0,0 +1,90 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 16 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](${root_path}/docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
new file mode 100644
index 000000000..7a9578af1
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
@@ -0,0 +1,88 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the [Cohere embed-english-v3.0](https://docs.cohere.com/reference/embed) model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking).
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 16 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](${root_path}/docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.template
new file mode 100644
index 000000000..d9062c826
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.template
@@ -0,0 +1,82 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.template
new file mode 100644
index 000000000..9eb1eef79
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.template
@@ -0,0 +1,84 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with quantized flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on quantized indexes, results may differ slightly.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.template
new file mode 100644
index 000000000..26c995351
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.template
@@ -0,0 +1,84 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with flat indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With ONNX query encoding on non-quantized indexes, results may differ slightly.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
new file mode 100644
index 000000000..81f201207
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
@@ -0,0 +1,94 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](${root_path}/docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
+
++ Results reproduced by [@yilinjz](https://github.com/yilinjz) on 2023-09-01 (commit [`4ae518b`](https://github.com/castorini/anserini/commit/4ae518bb284ebcba0b273a473bc8774735cb7d19))
\ No newline at end of file
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
new file mode 100644
index 000000000..0bf42dcae
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
@@ -0,0 +1,94 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with quantized HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](${root_path}/docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.template
new file mode 100644
index 000000000..b7b22a8b7
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.template
@@ -0,0 +1,92 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](${root_path}/docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
+
++ Results reproduced by [@yilinjz](https://github.com/yilinjz) on 2023-09-01 (commit [`4ae518b`](https://github.com/castorini/anserini/commit/4ae518bb284ebcba0b273a473bc8774735cb7d19))
\ No newline at end of file
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.template
new file mode 100644
index 000000000..5e5654929
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.template
@@ -0,0 +1,92 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: cosDPR-distil with HNSW indexes (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using the cosDPR-distil model on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Xueguang Ma, Tommaso Teofili, and Jimmy Lin. [Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes.](https://dl.acm.org/doi/10.1145/3583780.3615112) _Proceedings of the 32nd International Conference on Information and Knowledge Management (CIKM 2023)_, October 2023, pages 5366–5370, Birmingham, the United Kingdom.
+
+In these experiments, we are performing query inference "on-the-fly" with ONNX.
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 38 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Note that we are performing query inference "on-the-fly" with ONNX in these experiments.
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](${root_path}/docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.template
new file mode 100644
index 000000000..cbf8f062f
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.template
@@ -0,0 +1,82 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: OpenAI-ada2 embeddings with quantized flat indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 75 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized flat indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized indexes.
+With cached queries on quantized indexes, results may differ slightly.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.template
new file mode 100644
index 000000000..51f36c577
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.template
@@ -0,0 +1,93 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: OpenAI-ada2 embeddings with quantized HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 75 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building quantized HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
+See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](${root_path}/docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
+
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.template
new file mode 100644
index 000000000..dae5d39c6
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.template
@@ -0,0 +1,91 @@
+# Anserini Regressions: MS MARCO Passage Ranking
+
+**Model**: OpenAI-ada2 embeddings with HNSW indexes (using cached queries)
+
+This page describes regression experiments, integrated into Anserini's regression testing framework, using OpenAI-ada2 embeddings on the [MS MARCO passage ranking task](https://github.com/microsoft/MSMARCO-Passage-Ranking), as described in the following paper:
+
+> Jimmy Lin, Ronak Pradeep, Tommaso Teofili, and Jasper Xian. [Vector Search with OpenAI Embeddings: Lucene Is All You Need.](https://arxiv.org/abs/2308.14963) _arXiv:2308.14963_, 2023.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model.
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download the corpus and unpack into `collections/`:
+
+```bash
+wget ${download_url} -P collections/
+tar xvf collections/${corpus}.tar -C collections/
+```
+
+To confirm, `${corpus}.tar` is 75 GB and has MD5 checksum `${download_checksum}`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
+
+## Indexing
+
+Sample indexing command, building HNSW indexes:
+
+```bash
+${index_cmds}
+```
+
+The path `/path/to/${corpus}/` should point to the corpus downloaded above.
+Upon completion, we should have an index with 8,841,823 documents.
+
+Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
+This is because merging index segments is a costly operation and not worthwhile given our query set.
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+The regression experiments here evaluate on the 6980 dev set questions; see [this page](${root_path}/docs/experiments-msmarco-passage.md) for more details.
+
+After indexing has completed, you should be able to perform retrieval as follows using HNSW indexes:
+
+```bash
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```bash
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
+
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
+
+## Reproduction Log[*](${root_path}/docs/reproducibility.md)
+
+To add to this reproduction log, modify [this template](${template}) and run `bin/build.sh` to rebuild the documentation.
+

From f54f416d5d64d3d807c3d2a10e08f509b93959d6 Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Thu, 5 Dec 2024 20:43:34 -0500
Subject: [PATCH 10/14] Tweaked README.

---
 README.md | 89 ++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 34 deletions(-)

diff --git a/README.md b/README.md
index 0b1601d2d..71812e287 100644
--- a/README.md
+++ b/README.md
@@ -109,26 +109,44 @@ See individual pages for details.
 
 ### MS MARCO V1 Passage Regressions
 
+|                                            |                                                                                                                                                                                                   dev                                                                                                                                                                                                    |                                                                                                                                                                                      DL19                                                                                                                                                                                       |                                                                                                                                                                                      DL20                                                                                                                                                                                       |
+|--------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
+| **Unsupervised Sparse**                    |                                                                                                                                                                                                                                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                                                 |
+| Lucene BoW baselines                       |                                                                                                                                                                         [🔑](docs/regressions/regressions-msmarco-v1-passage.md)                                                                                                                                                                         |                                                                                                                                                               [🔑](docs/regressions/regressions-dl19-passage.md)                                                                                                                                                                |                                                                                                                                                               [🔑](docs/regressions/regressions-dl20-passage.md)                                                                                                                                                                |
+| Quantized BM25                             |                                                                                                                                                                     [🔑](docs/regressions/regressions-msmarco-v1-passage.bm25-b8.md)                                                                                                                                                                     |                                                                                                                                                           [🔑](docs/regressions/regressions-dl19-passage.bm25-b8.md)                                                                                                                                                            |                                                                                                                                                           [🔑](docs/regressions/regressions-dl20-passage.bm25-b8.md)                                                                                                                                                            |
+| WordPiece baselines (pre-tokenized)        |                                                                                                                                                                     [🔑](docs/regressions/regressions-msmarco-v1-passage.wp-tok.md)                                                                                                                                                                      |                                                                                                                                                            [🔑](docs/regressions/regressions-dl19-passage.wp-tok.md)                                                                                                                                                            |                                                                                                                                                            [🔑](docs/regressions/regressions-dl20-passage.wp-tok.md)                                                                                                                                                            |
+| WordPiece baselines (Huggingface)          |                                                                                                                                                                     [🔑](docs/regressions/regressions-msmarco-v1-passage.wp-hgf.md)                                                                                                                                                                      |                                                                                                                                                            [🔑](docs/regressions/regressions-dl19-passage.wp-hgf.md)                                                                                                                                                            |                                                                                                                                                            [🔑](docs/regressions/regressions-dl20-passage.wp-hgf.md)                                                                                                                                                            |
+| WordPiece + Lucene BoW baselines           |                                                                                                                                                                      [🔑](docs/regressions/regressions-msmarco-v1-passage.wp-ca.md)                                                                                                                                                                      |                                                                                                                                                            [🔑](docs/regressions/regressions-dl19-passage.wp-ca.md)                                                                                                                                                             |                                                                                                                                                            [🔑](docs/regressions/regressions-dl20-passage.wp-ca.md)                                                                                                                                                             |
+| doc2query                                  |                                                                                                                                                                    [🔑](docs/regressions/regressions-msmarco-v1-passage.doc2query.md)                                                                                                                                                                    |                                                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                                                 |
+| doc2query-T5                               |                                                                                                                                                                  [🔑](docs/regressions/regressions-msmarco-v1-passage.docTTTTTquery.md)                                                                                                                                                                  |                                                                                                                                                        [🔑](docs/regressions/regressions-dl19-passage.docTTTTTquery.md)                                                                                                                                                         |                                                                                                                                                        [🔑](docs/regressions/regressions-dl20-passage.docTTTTTquery.md)                                                                                                                                                         |
+| **Learned Sparse (uniCOIL family)**        |                                                                                                                                                                                                                                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                                                 |
+| uniCOIL noexp                              |                                                                                                                                                              [🫙](docs/regressions/regressions-msmarco-v1-passage.unicoil-noexp.cached.md)                                                                                                                                                               |                                                                                                                                                     [🫙](docs/regressions/regressions-dl19-passage.unicoil-noexp.cached.md)                                                                                                                                                     |                                                                                                                                                     [🫙](docs/regressions/regressions-dl20-passage.unicoil-noexp.cached.md)                                                                                                                                                     |
+| uniCOIL with doc2query-T5                  |                                                                                                                                                                 [🫙](docs/regressions/regressions-msmarco-v1-passage.unicoil.cached.md)                                                                                                                                                                  |                                                                                                                                                        [🫙](docs/regressions/regressions-dl19-passage.unicoil.cached.md)                                                                                                                                                        |                                                                                                                                                        [🫙](docs/regressions/regressions-dl20-passage.unicoil.cached.md)                                                                                                                                                        |
+| uniCOIL with TILDE                         |                                                                                                                                                         [🫙](docs/regressions/regressions-msmarco-v1-passage.unicoil-tilde-expansion.cached.md)                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                                                 |
+| **Learned Sparse (other)**                 |                                                                                                                                                                                                                                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                                                 |
+| DeepImpact                                 |                                                                                                                                                                [🫙](docs/regressions/regressions-msmarco-v1-passage.deepimpact.cached.md)                                                                                                                                                                |                                                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                                                 |
+| SPLADEv2                                   |                                                                                                                                                            [🫙](docs/regressions/regressions-msmarco-v1-passage.distill-splade-max.cached.md)                                                                                                                                                            |                                                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                                                 |
+| SPLADE++ CoCondenser-EnsembleDistil        |                                                                                                                         [🫙](docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.onnx.md)                                                                                                                          |                                                                                                                   [🫙](docs/regressions/regressions-dl19-passage.splade-pp-ed.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.splade-pp-ed.onnx.md)                                                                                                                   |                                                                                                                   [🫙](docs/regressions/regressions-dl20-passage.splade-pp-ed.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.splade-pp-ed.onnx.md)                                                                                                                   |
+| SPLADE++ CoCondenser-SelfDistil            |                                                                                                                         [🫙](docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.onnx.md)                                                                                                                          |                                                                                                                   [🫙](docs/regressions/regressions-dl19-passage.splade-pp-sd.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.splade-pp-sd.onnx.md)                                                                                                                   |                                                                                                                   [🫙](docs/regressions/regressions-dl20-passage.splade-pp-sd.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.splade-pp-sd.onnx.md)                                                                                                                   |
+| **Learned Dense** (HNSW indexes)           |                                                                                                                                                                                                                                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                                                 |
+| cosDPR-distil                              |     full:[🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.parquet.cos-dpr-distil.hnsw-int8.onnx.md)      |     full:[🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md)     |     full:[🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md)     |
+| BGE-base-en-v1.5                           | full:[🫙](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md)  int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md) | full:[🫙](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md) | full:[🫙](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md) |
+| OpenAI Ada2                                |                                                                                                     full:[🫙](docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.md) int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.md)                                                                                                     |                                                                                              full:[🫙](docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw.cached.md) int8:[🫙](docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw-int8.cached.md)                                                                                               |                                                                                              full:[🫙](docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw.cached.md) int8:[🫙](docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw-int8.cached.md)                                                                                               |
+| Cohere English v3.0                        |                                                                                       full:[🫙](docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md) int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md)                                                                                       |                                                                                full:[🫙](docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md) int8:[🫙](docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md)                                                                                 |                                                                                full:[🫙](docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md) int8:[🫙](docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md)                                                                                 |
+| **Learned Dense** (flat indexes)           |                                                                                                                                                                                                                                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                                                 |
+| cosDPR-distil                              |     full:[🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat.onnx.md) int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.flat-int8.onnx.md)      |     full:[🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat.onnx.md) int8:[🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat-int8.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.flat-int8.onnx.md)     |     full:[🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat.onnx.md) int8:[🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat-int8.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.flat-int8.onnx.md)     |
+| BGE-base-en-v1.5                           | full:[🫙](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat.onnx.md) int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md)  | full:[🫙](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat.onnx.md) int8:[🫙](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md) | full:[🫙](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat.onnx.md) int8:[🫙](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat-int8.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.flat-int8.onnx.md) |
+| OpenAI Ada2                                |                                                                                                    full:[🫙](docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat.cached.md) int8:[🫙️](docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.flat-int8.cached.md)                                                                                                     |                                                                                              full:[🫙](docs/regressions/regressions-dl19-passage.openai-ada2.parquet.flat.cached.md) int8:[🫙](docs/regressions/regressions-dl19-passage.openai-ada2.parquet.flat-int8.cached.md)                                                                                               |                                                                                              full:[🫙](docs/regressions/regressions-dl20-passage.openai-ada2.parquet.flat.cached.md) int8:[🫙](docs/regressions/regressions-dl20-passage.openai-ada2.parquet.flat-int8.cached.md)                                                                                               |
+| Cohere English v3.0                        |                                                                                       full:[🫙](docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat.cached.md) int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md)                                                                                       |                                                                                full:[🫙](docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.flat.cached.md) int8:[🫙](docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md)                                                                                 |                                                                                full:[🫙](docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.flat.cached.md) int8:[🫙](docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.flat-int8.cached.md)                                                                                 |
+| **Learned Dense** (Inverted; experimental) |                                                                                                                                                                                                                                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                                                 |
+| cosDPR-distil w/ "fake words"              |                                                                                                                                                                [🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.fw.md)                                                                                                                                                                |                                                                                                                                                      [🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.fw.md)                                                                                                                                                       |                                                                                                                                                      [🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.fw.md)                                                                                                                                                       |
+| cosDPR-distil w/ "LexLSH"                  |                                                                                                                                                              [🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.lexlsh.md)                                                                                                                                                              |                                                                                                                                                    [🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.lexlsh.md)                                                                                                                                                     |                                                                                                                                                    [🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.lexlsh.md)                                                                                                                                                     |
+
+<details>
+<summary>Deprecated instructions using corpora in jsonl format</summary>
 
 |                                            |                                                                                                                                                                                   dev                                                                                                                                                                                    |                                                                                                                                                                      DL19                                                                                                                                                                       |                                                                                                                                                                      DL20                                                                                                                                                                       |
 |--------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
-| **Unsupervised Sparse**                    |                                                                                                                                                                                                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                 |
-| Lucene BoW baselines                       |                                                                                                                                                         [🔑](docs/regressions/regressions-msmarco-v1-passage.md)                                                                                                                                                         |                                                                                                                                               [🔑](docs/regressions/regressions-dl19-passage.md)                                                                                                                                                |                                                                                                                                               [🔑](docs/regressions/regressions-dl20-passage.md)                                                                                                                                                |
-| Quantized BM25                             |                                                                                                                                                     [🔑](docs/regressions/regressions-msmarco-v1-passage.bm25-b8.md)                                                                                                                                                     |                                                                                                                                           [🔑](docs/regressions/regressions-dl19-passage.bm25-b8.md)                                                                                                                                            |                                                                                                                                           [🔑](docs/regressions/regressions-dl20-passage.bm25-b8.md)                                                                                                                                            |
-| WordPiece baselines (pre-tokenized)        |                                                                                                                                                     [🔑](docs/regressions/regressions-msmarco-v1-passage.wp-tok.md)                                                                                                                                                      |                                                                                                                                            [🔑](docs/regressions/regressions-dl19-passage.wp-tok.md)                                                                                                                                            |                                                                                                                                            [🔑](docs/regressions/regressions-dl20-passage.wp-tok.md)                                                                                                                                            |
-| WordPiece baselines (Huggingface)          |                                                                                                                                                     [🔑](docs/regressions/regressions-msmarco-v1-passage.wp-hgf.md)                                                                                                                                                      |                                                                                                                                            [🔑](docs/regressions/regressions-dl19-passage.wp-hgf.md)                                                                                                                                            |                                                                                                                                            [🔑](docs/regressions/regressions-dl20-passage.wp-hgf.md)                                                                                                                                            |
-| WordPiece + Lucene BoW baselines           |                                                                                                                                                      [🔑](docs/regressions/regressions-msmarco-v1-passage.wp-ca.md)                                                                                                                                                      |                                                                                                                                            [🔑](docs/regressions/regressions-dl19-passage.wp-ca.md)                                                                                                                                             |                                                                                                                                            [🔑](docs/regressions/regressions-dl20-passage.wp-ca.md)                                                                                                                                             |
-| doc2query                                  |                                                                                                                                                    [🔑](docs/regressions/regressions-msmarco-v1-passage.doc2query.md)                                                                                                                                                    |                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                 |
-| doc2query-T5                               |                                                                                                                                                  [🔑](docs/regressions/regressions-msmarco-v1-passage.docTTTTTquery.md)                                                                                                                                                  |                                                                                                                                        [🔑](docs/regressions/regressions-dl19-passage.docTTTTTquery.md)                                                                                                                                         |                                                                                                                                        [🔑](docs/regressions/regressions-dl20-passage.docTTTTTquery.md)                                                                                                                                         |
-| **Learned Sparse (uniCOIL family)**        |                                                                                                                                                                                                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                 |
-| uniCOIL noexp                              |                                                                                                                                              [🫙](docs/regressions/regressions-msmarco-v1-passage.unicoil-noexp.cached.md)                                                                                                                                               |                                                                                                                                     [🫙](docs/regressions/regressions-dl19-passage.unicoil-noexp.cached.md)                                                                                                                                     |                                                                                                                                     [🫙](docs/regressions/regressions-dl20-passage.unicoil-noexp.cached.md)                                                                                                                                     |
-| uniCOIL with doc2query-T5                  |                                                                                                                                                 [🫙](docs/regressions/regressions-msmarco-v1-passage.unicoil.cached.md)                                                                                                                                                  |                                                                                                                                        [🫙](docs/regressions/regressions-dl19-passage.unicoil.cached.md)                                                                                                                                        |                                                                                                                                        [🫙](docs/regressions/regressions-dl20-passage.unicoil.cached.md)                                                                                                                                        |
-| uniCOIL with TILDE                         |                                                                                                                                         [🫙](docs/regressions/regressions-msmarco-v1-passage.unicoil-tilde-expansion.cached.md)                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                 |
-| **Learned Sparse (other)**                 |                                                                                                                                                                                                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                 |
-| DeepImpact                                 |                                                                                                                                                [🫙](docs/regressions/regressions-msmarco-v1-passage.deepimpact.cached.md)                                                                                                                                                |                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                 |
-| SPLADEv2                                   |                                                                                                                                            [🫙](docs/regressions/regressions-msmarco-v1-passage.distill-splade-max.cached.md)                                                                                                                                            |                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                 |
-| SPLADE++ CoCondenser-EnsembleDistil        |                                                                                                         [🫙](docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.onnx.md)                                                                                                          |                                                                                                   [🫙](docs/regressions/regressions-dl19-passage.splade-pp-ed.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.splade-pp-ed.onnx.md)                                                                                                   |                                                                                                   [🫙](docs/regressions/regressions-dl20-passage.splade-pp-ed.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.splade-pp-ed.onnx.md)                                                                                                   |
-| SPLADE++ CoCondenser-SelfDistil            |                                                                                                         [🫙](docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.onnx.md)                                                                                                          |                                                                                                   [🫙](docs/regressions/regressions-dl19-passage.splade-pp-sd.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.splade-pp-sd.onnx.md)                                                                                                   |                                                                                                   [🫙](docs/regressions/regressions-dl20-passage.splade-pp-sd.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.splade-pp-sd.onnx.md)                                                                                                   |
 | **Learned Dense** (HNSW indexes)           |                                                                                                                                                                                                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                 |
 | cosDPR-distil                              |     full:[🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md)      |     full:[🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md)     |     full:[🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md)     |
 | BGE-base-en-v1.5                           | full:[🫙](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md)  int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md) | full:[🫙](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md) | full:[🫙](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md) |
@@ -139,9 +157,8 @@ See individual pages for details.
 | BGE-base-en-v1.5                           | full:[🫙](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.md) int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.md)  | full:[🫙](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.onnx.md) int8:[🫙](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.onnx.md) | full:[🫙](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.onnx.md) int8:[🫙](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.onnx.md) |
 | OpenAI Ada2                                |                                                                                            full:[🫙](docs/regressions/regressions-msmarco-v1-passage.openai-ada2.flat.cached.md) int8:[🫙️](docs/regressions/regressions-msmarco-v1-passage.openai-ada2.flat-int8.cached.md)                                                                                             |                                                                                      full:[🫙](docs/regressions/regressions-dl19-passage.openai-ada2.flat.cached.md) int8:[🫙](docs/regressions/regressions-dl19-passage.openai-ada2.flat-int8.cached.md)                                                                                       |                                                                                      full:[🫙](docs/regressions/regressions-dl20-passage.openai-ada2.flat.cached.md) int8:[🫙](docs/regressions/regressions-dl20-passage.openai-ada2.flat-int8.cached.md)                                                                                       |
 | Cohere English v3.0                        |                                                                               full:[🫙](docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.flat.cached.md) int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.flat-int8.cached.md)                                                                               |                                                                        full:[🫙](docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.flat.cached.md) int8:[🫙](docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.flat-int8.cached.md)                                                                         |                                                                        full:[🫙](docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.flat.cached.md) int8:[🫙](docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.flat-int8.cached.md)                                                                         |
-| **Learned Dense** (Inverted; experimental) |                                                                                                                                                                                                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                 |
-| cosDPR-distil w/ "fake words"              |                                                                                                                                                [🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.fw.md)                                                                                                                                                |                                                                                                                                      [🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.fw.md)                                                                                                                                       |                                                                                                                                      [🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.fw.md)                                                                                                                                       |
-| cosDPR-distil w/ "LexLSH"                  |                                                                                                                                              [🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.lexlsh.md)                                                                                                                                              |                                                                                                                                    [🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.lexlsh.md)                                                                                                                                     |                                                                                                                                    [🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.lexlsh.md)                                                                                                                                     |
+
+</details>
 
 Key:
 + 🔑 = keyword queries
@@ -151,20 +168,24 @@ Key:
 
 ### Available Corpora for Download
 
-| Corpora                                                                                                              |   Size | Checksum                           |
-|:---------------------------------------------------------------------------------------------------------------------|-------:|:-----------------------------------|
-| [Quantized BM25](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-bm25-b8.tar)                      | 1.2 GB | `0a623e2c97ac6b7e814bf1323a97b435` |
-| [uniCOIL (noexp)](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-unicoil-noexp.tar)               | 2.7 GB | `f17ddd8c7c00ff121c3c3b147d2e17d8` |
-| [uniCOIL (d2q-T5)](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-unicoil.tar)                    | 3.4 GB | `78eef752c78c8691f7d61600ceed306f` |
-| [uniCOIL (TILDE)](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-unicoil-tilde-expansion.tar)     | 3.9 GB | `12a9c289d94e32fd63a7d39c9677d75c` |
-| [DeepImpact](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-deepimpact.tar)                       | 3.6 GB | `73843885b503af3c8b3ee62e5f5a9900` |
-| [SPLADEv2](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-distill-splade-max.tar)                 | 9.9 GB | `b5d126f5d9a8e1b3ef3f5cb0ba651725` |
-| [SPLADE++ CoCondenser-EnsembleDistil](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-splade-pp-ed.tar)    | 4.2 GB | `e489133bdc54ee1e7c62a32aa582bc77` |
-| [SPLADE++ CoCondenser-SelfDistil](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-splade-pp-sd.tar)        | 4.8 GB | `cb7e264222f2bf2221dd2c9d28190be1` |
-| [cosDPR-distil](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar)                        |  57 GB | `e20ffbc8b5e7f760af31298aefeaebbd` |
-| [BGE-base-en-v1.5](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar)                   |  59 GB | `353d2c9e72e858897ad479cca4ea0db1` |
-| [OpenAI-ada2](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar)                             | 109 GB | `a4d843d522ff3a3af7edbee789a63402` |
-| [Cohere embed-english-v3.0](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar) |  38 GB | `06a6e38a0522850c6aa504db7b2617f5` |
+| Corpora                                                                                                                                  |   Size | Checksum                           |
+|:-----------------------------------------------------------------------------------------------------------------------------------------|-------:|:-----------------------------------|
+| [Quantized BM25](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-bm25-b8.tar)                                          | 1.2 GB | `0a623e2c97ac6b7e814bf1323a97b435` |
+| [uniCOIL (noexp)](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-unicoil-noexp.tar)                                   | 2.7 GB | `f17ddd8c7c00ff121c3c3b147d2e17d8` |
+| [uniCOIL (d2q-T5)](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-unicoil.tar)                                        | 3.4 GB | `78eef752c78c8691f7d61600ceed306f` |
+| [uniCOIL (TILDE)](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-unicoil-tilde-expansion.tar)                         | 3.9 GB | `12a9c289d94e32fd63a7d39c9677d75c` |
+| [DeepImpact](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-deepimpact.tar)                                           | 3.6 GB | `73843885b503af3c8b3ee62e5f5a9900` |
+| [SPLADEv2](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-distill-splade-max.tar)                                     | 9.9 GB | `b5d126f5d9a8e1b3ef3f5cb0ba651725` |
+| [SPLADE++ CoCondenser-EnsembleDistil](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-splade-pp-ed.tar)                        | 4.2 GB | `e489133bdc54ee1e7c62a32aa582bc77` |
+| [SPLADE++ CoCondenser-SelfDistil](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-splade-pp-sd.tar)                            | 4.8 GB | `cb7e264222f2bf2221dd2c9d28190be1` |
+| [cosDPR-distil](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.parquet.tar) (parquet)                          |  38 GB | `c8a204fbc3ccda581aa375936af43a97` |
+| [BGE-base-en-v1.5](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar) (parquet)                     |  39 GB | `b235e19ec492c18a18057b30b8b23fd4` |
+| [OpenAI-ada2](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar) (parquet)                               |  75 GB | `fa3637e9c4150b157270e19ef3a4f779` |
+| [Cohere embed-english-v3.0](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar) (parquet)   |  16 GB | `40c5caf33476746e93ceeb75174b8d64` |
+| [cosDPR-distil](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar) (jsonl, deprecated)                        |  57 GB | `e20ffbc8b5e7f760af31298aefeaebbd` |
+| [BGE-base-en-v1.5](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar) (jsonl, deprecated)                   |  59 GB | `353d2c9e72e858897ad479cca4ea0db1` |
+| [OpenAI-ada2](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar) (jsonl, deprecated)                             | 109 GB | `a4d843d522ff3a3af7edbee789a63402` |
+| [Cohere embed-english-v3.0](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar) (jsonl, deprecated) |  38 GB | `06a6e38a0522850c6aa504db7b2617f5` |
 
 </details>
 <details>

From cd37fea3a6a8d3da1569135d8fca59a57a5e5963 Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Thu, 5 Dec 2024 20:46:36 -0500
Subject: [PATCH 11/14] More README tweaks.

---
 README.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 71812e287..a24b76007 100644
--- a/README.md
+++ b/README.md
@@ -143,7 +143,7 @@ See individual pages for details.
 | cosDPR-distil w/ "LexLSH"                  |                                                                                                                                                              [🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.lexlsh.md)                                                                                                                                                              |                                                                                                                                                    [🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.lexlsh.md)                                                                                                                                                     |                                                                                                                                                    [🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.lexlsh.md)                                                                                                                                                     |
 
 <details>
-<summary>Deprecated instructions using corpora in jsonl format</summary>
+<summary>Deprecated instructions for learned dense models using corpora in jsonl format</summary>
 
 |                                            |                                                                                                                                                                                   dev                                                                                                                                                                                    |                                                                                                                                                                      DL19                                                                                                                                                                       |                                                                                                                                                                      DL20                                                                                                                                                                       |
 |--------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
@@ -182,11 +182,19 @@ Key:
 | [BGE-base-en-v1.5](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.parquet.tar) (parquet)                     |  39 GB | `b235e19ec492c18a18057b30b8b23fd4` |
 | [OpenAI-ada2](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.parquet.tar) (parquet)                               |  75 GB | `fa3637e9c4150b157270e19ef3a4f779` |
 | [Cohere embed-english-v3.0](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.parquet.tar) (parquet)   |  16 GB | `40c5caf33476746e93ceeb75174b8d64` |
+
+<details>
+<summary>Deprecated corpora for learned dense models using corpora in jsonl format</summary>
+
+| Corpora                                                                                                                                  |   Size | Checksum                           |
+|:-----------------------------------------------------------------------------------------------------------------------------------------|-------:|:-----------------------------------|
 | [cosDPR-distil](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar) (jsonl, deprecated)                        |  57 GB | `e20ffbc8b5e7f760af31298aefeaebbd` |
 | [BGE-base-en-v1.5](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar) (jsonl, deprecated)                   |  59 GB | `353d2c9e72e858897ad479cca4ea0db1` |
 | [OpenAI-ada2](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar) (jsonl, deprecated)                             | 109 GB | `a4d843d522ff3a3af7edbee789a63402` |
 | [Cohere embed-english-v3.0](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar) (jsonl, deprecated) |  38 GB | `06a6e38a0522850c6aa504db7b2617f5` |
 
+</details>
+
 </details>
 <details>
 <summary>MS MARCO V1 Document Regressions</summary>

From fcfe4e21be0ee7c570adb2824a2aaa68db86e632 Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Thu, 5 Dec 2024 20:50:57 -0500
Subject: [PATCH 12/14] typo

---
 README.md | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a24b76007..d89308f7d 100644
--- a/README.md
+++ b/README.md
@@ -129,7 +129,7 @@ See individual pages for details.
 | SPLADE++ CoCondenser-EnsembleDistil        |                                                                                                                         [🫙](docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.onnx.md)                                                                                                                          |                                                                                                                   [🫙](docs/regressions/regressions-dl19-passage.splade-pp-ed.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.splade-pp-ed.onnx.md)                                                                                                                   |                                                                                                                   [🫙](docs/regressions/regressions-dl20-passage.splade-pp-ed.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.splade-pp-ed.onnx.md)                                                                                                                   |
 | SPLADE++ CoCondenser-SelfDistil            |                                                                                                                         [🫙](docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.onnx.md)                                                                                                                          |                                                                                                                   [🫙](docs/regressions/regressions-dl19-passage.splade-pp-sd.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.splade-pp-sd.onnx.md)                                                                                                                   |                                                                                                                   [🫙](docs/regressions/regressions-dl20-passage.splade-pp-sd.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.splade-pp-sd.onnx.md)                                                                                                                   |
 | **Learned Dense** (HNSW indexes)           |                                                                                                                                                                                                                                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                                                 |                                                                                                                                                                                                                                                                                                                                                                                 |
-| cosDPR-distil                              |     full:[🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.parquet.cos-dpr-distil.hnsw-int8.onnx.md)      |     full:[🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md)     |     full:[🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md)     |
+| cosDPR-distil                              |     full:[🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md)      |     full:[🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md)     |     full:[🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md)     |
 | BGE-base-en-v1.5                           | full:[🫙](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md)  int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md) | full:[🫙](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md) | full:[🫙](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md) int8:[🫙](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md)[🅾️](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md) |
 | OpenAI Ada2                                |                                                                                                     full:[🫙](docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.md) int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.md)                                                                                                     |                                                                                              full:[🫙](docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw.cached.md) int8:[🫙](docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw-int8.cached.md)                                                                                               |                                                                                              full:[🫙](docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw.cached.md) int8:[🫙](docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw-int8.cached.md)                                                                                               |
 | Cohere English v3.0                        |                                                                                       full:[🫙](docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md) int8:[🫙](docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md)                                                                                       |                                                                                full:[🫙](docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md) int8:[🫙](docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md)                                                                                 |                                                                                full:[🫙](docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md) int8:[🫙](docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md)                                                                                 |
@@ -195,6 +195,8 @@ Key:
 
 </details>
 
+<hr/>
+
 </details>
 <details>
 <summary>MS MARCO V1 Document Regressions</summary>
@@ -225,6 +227,8 @@ Key:
 | [MS MARCO V1 doc: uniCOIL (noexp)](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-doc-segmented-unicoil-noexp.tar)                   |  11 GB | `11b226e1cacd9c8ae0a660fd14cdd710` |
 | [MS MARCO V1 doc: uniCOIL (d2q-T5)](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-doc-segmented-unicoil.tar)                        |  19 GB | `6a00e2c0c375cb1e52c83ae5ac377ebb` |
 
+<hr/>
+
 </details>
 <details>
 <summary>MS MARCO V2 Passage Regressions</summary>
@@ -256,6 +260,8 @@ Key:
 | [SPLADE++ CoCondenser-EnsembleDistil](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco_v2_passage_splade_pp_ed.tar) | 66 GB | `2cdb2adc259b8fa6caf666b20ebdc0e8` |
 | [SPLADE++ CoCondenser-SelfDistil](https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco_v2_passage_splade_pp_sd.tar)     | 76 GB | `061930dd615c7c807323ea7fc7957877` |
 
+<hr/>
+
 </details>
 <details>
 <summary>MS MARCO V2 Document Regressions</summary>
@@ -281,6 +287,8 @@ Key:
 | [MS MARCO V2 doc: uniCOIL (noexp)](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar)       |  55 GB | `97ba262c497164de1054f357caea0c63` |
 | [MS MARCO V2 doc: uniCOIL (d2q-T5)](https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_0shot_v2.tar)            |  72 GB | `c5639748c2cbad0152e10b0ebde3b804` |
 
+<hr/>
+
 </details>
 <details>
 <summary>MS MARCO V2.1 Document Regressions</summary>
@@ -297,6 +305,8 @@ The experiments below capture topics and qrels originally targeted at the V2 cor
 | **Unsupervised Lexical, Segmented Doc** |                                                                 |                                                                      |                                                                      |                                                                      |                                                                    |
 | baselines                               | [+](docs/regressions/regressions-msmarco-v2.1-doc-segmented.md) | [+](docs/regressions/regressions-dl21-doc-segmented-msmarco-v2.1.md) | [+](docs/regressions/regressions-dl22-doc-segmented-msmarco-v2.1.md) | [+](docs/regressions/regressions-dl23-doc-segmented-msmarco-v2.1.md) | [+](docs/regressions/regressions-rag24-doc-segmented-raggy-dev.md) |
 
+<hr/>
+
 </details>
 <details>
 <summary>BEIR (v1.0.0) Regressions</summary>
@@ -435,6 +445,8 @@ Substitute the appropriate `$MODEL` from the table below.
 | BGE (HNSW, full; ONNX)   | `bge-base-en-v1.5.parquet.hnsw.onnx`        |
 | BGE (HNSW, int8; ONNX)   | `bge-base-en-v1.5.parquet.hnsw-int8.onnx`   |
 
+<hr/>
+
 </details>
 <details>
 <summary>Cross-lingual and Multi-lingual Regressions</summary>
@@ -457,6 +469,8 @@ Substitute the appropriate `$MODEL` from the table below.
 + Regressions for CIRAL (v1.0) BM25 (query translation): [Hausa](docs/regressions/regressions-ciral-v1.0-ha.md), [Somali](docs/regressions/regressions-ciral-v1.0-so.md), [Swahili](docs/regressions/regressions-ciral-v1.0-sw.md), [Yoruba](docs/regressions/regressions-ciral-v1.0-yo.md)
 + Regressions for CIRAL (v1.0) BM25 (document translation): [Hausa](docs/regressions/regressions-ciral-v1.0-ha-en.md), [Somali](docs/regressions/regressions-ciral-v1.0-so-en.md), [Swahili](docs/regressions/regressions-ciral-v1.0-sw-en.md), [Yoruba](docs/regressions/regressions-ciral-v1.0-yo-en.md)
 
+<hr/>
+
 </details>
 <details>
 <summary>Other Regressions</summary>
@@ -473,6 +487,8 @@ Substitute the appropriate `$MODEL` from the table below.
 + Regressions for [FEVER Fact Verification](docs/regressions/regressions-fever.md)
 + Regressions for DPR Wikipedia QA baselines: [100-word splits](docs/regressions/regressions-wikipedia-dpr-100w-bm25.md), [6/3 sliding window sentences](docs/regressions/regressions-wiki-all-6-3-tamber-bm25.md)
 
+<hr/>
+
 </details>
 
 ## 📃 Additional Documentation

From 2257a57bb67fa965d1b70223082e7c15c72d8df7 Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Fri, 6 Dec 2024 07:20:27 -0500
Subject: [PATCH 13/14] Removed untrue statement.

---
 ...essions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md | 5 -----
 ...gressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md | 5 -----
 .../regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md | 3 ---
 .../regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md   | 3 ---
 ...dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md | 5 -----
 ...s-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md | 5 -----
 ...ions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.md | 3 ---
 ...ssions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md | 3 ---
 ...l19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md | 3 ---
 ...ons-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md | 3 ---
 ...age.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md | 3 ---
 ...-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md | 3 ---
 ...gressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md | 5 -----
 ...regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md | 5 -----
 .../regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md   | 3 ---
 .../regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md     | 3 ---
 ...s-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md | 5 -----
 ...ons-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md | 5 -----
 ...ssions-dl19-passage.cos-dpr-distil.parquet.hnsw.cached.md | 3 ---
 ...ressions-dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.md | 3 ---
 .../regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md | 5 -----
 .../regressions-dl19-passage.openai-ada2.hnsw.cached.md      | 3 ---
 ...ions-dl19-passage.openai-ada2.parquet.hnsw-int8.cached.md | 5 -----
 ...gressions-dl19-passage.openai-ada2.parquet.hnsw.cached.md | 3 ---
 ...essions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md | 5 -----
 ...gressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md | 5 -----
 .../regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md | 3 ---
 .../regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md   | 3 ---
 ...dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md | 5 -----
 ...s-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md | 5 -----
 ...ions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.md | 3 ---
 ...ssions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md | 3 ---
 ...l20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md | 3 ---
 ...ons-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md | 3 ---
 ...age.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md | 3 ---
 ...-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md | 3 ---
 ...gressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md | 5 -----
 ...regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md | 5 -----
 .../regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md   | 3 ---
 .../regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md     | 3 ---
 ...s-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md | 5 -----
 ...ons-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md | 5 -----
 ...ssions-dl20-passage.cos-dpr-distil.parquet.hnsw.cached.md | 3 ---
 ...ressions-dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.md | 3 ---
 .../regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md | 5 -----
 .../regressions-dl20-passage.openai-ada2.hnsw.cached.md      | 3 ---
 ...ions-dl20-passage.openai-ada2.parquet.hnsw-int8.cached.md | 5 -----
 ...gressions-dl20-passage.openai-ada2.parquet.hnsw.cached.md | 3 ---
 ...s-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md | 5 -----
 ...ons-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md | 5 -----
 ...ssions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md | 3 ---
 ...ressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md | 3 ---
 ...o-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md | 5 -----
 ...rco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md | 5 -----
 ...smarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.md | 3 ---
 ...-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md | 3 ---
 ...-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md | 5 -----
 ...marco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md | 3 ---
 ...age.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md | 5 -----
 ...-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md | 3 ---
 ...ons-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md | 5 -----
 ...sions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md | 5 -----
 ...ressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md | 3 ---
 ...egressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md | 3 ---
 ...rco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md | 5 -----
 ...marco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md | 5 -----
 ...-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.md | 3 ---
 ...ns-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.md | 3 ---
 ...ssions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md | 5 -----
 ...regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md | 3 ---
 ...smarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.md | 5 -----
 ...ons-msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.md | 3 ---
 .../dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template  | 5 -----
 .../dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template    | 5 -----
 .../dl19-passage.bge-base-en-v1.5.hnsw.cached.template       | 3 ---
 .../dl19-passage.bge-base-en-v1.5.hnsw.onnx.template         | 3 ---
 ...assage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template | 5 -----
 ...-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template | 5 -----
 ...l19-passage.bge-base-en-v1.5.parquet.hnsw.cached.template | 3 ---
 .../dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template | 3 ---
 ...ssage.cohere-embed-english-v3.0.hnsw-int8.cached.template | 3 ---
 ...19-passage.cohere-embed-english-v3.0.hnsw.cached.template | 3 ---
 ...here-embed-english-v3.0.parquet.hnsw-int8.cached.template | 3 ---
 ...ge.cohere-embed-english-v3.0.parquet.hnsw.cached.template | 3 ---
 .../dl19-passage.cos-dpr-distil.hnsw-int8.cached.template    | 5 -----
 .../dl19-passage.cos-dpr-distil.hnsw-int8.onnx.template      | 5 -----
 .../dl19-passage.cos-dpr-distil.hnsw.cached.template         | 3 ---
 .../templates/dl19-passage.cos-dpr-distil.hnsw.onnx.template | 3 ---
 ...-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template | 5 -----
 ...19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template | 5 -----
 .../dl19-passage.cos-dpr-distil.parquet.hnsw.cached.template | 3 ---
 .../dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.template   | 3 ---
 .../dl19-passage.openai-ada2.hnsw-int8.cached.template       | 5 -----
 .../templates/dl19-passage.openai-ada2.hnsw.cached.template  | 3 ---
 ...l19-passage.openai-ada2.parquet.hnsw-int8.cached.template | 5 -----
 .../dl19-passage.openai-ada2.parquet.hnsw.cached.template    | 3 ---
 .../dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template  | 5 -----
 .../dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template    | 5 -----
 .../dl20-passage.bge-base-en-v1.5.hnsw.cached.template       | 3 ---
 .../dl20-passage.bge-base-en-v1.5.hnsw.onnx.template         | 3 ---
 ...assage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template | 5 -----
 ...-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template | 5 -----
 ...l20-passage.bge-base-en-v1.5.parquet.hnsw.cached.template | 3 ---
 .../dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template | 3 ---
 ...ssage.cohere-embed-english-v3.0.hnsw-int8.cached.template | 3 ---
 ...20-passage.cohere-embed-english-v3.0.hnsw.cached.template | 3 ---
 ...here-embed-english-v3.0.parquet.hnsw-int8.cached.template | 3 ---
 ...ge.cohere-embed-english-v3.0.parquet.hnsw.cached.template | 3 ---
 .../dl20-passage.cos-dpr-distil.hnsw-int8.cached.template    | 5 -----
 .../dl20-passage.cos-dpr-distil.hnsw-int8.onnx.template      | 5 -----
 .../dl20-passage.cos-dpr-distil.hnsw.cached.template         | 3 ---
 .../templates/dl20-passage.cos-dpr-distil.hnsw.onnx.template | 3 ---
 ...-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template | 5 -----
 ...20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template | 5 -----
 .../dl20-passage.cos-dpr-distil.parquet.hnsw.cached.template | 3 ---
 .../dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.template   | 3 ---
 .../dl20-passage.openai-ada2.hnsw-int8.cached.template       | 5 -----
 .../templates/dl20-passage.openai-ada2.hnsw.cached.template  | 3 ---
 ...l20-passage.openai-ada2.parquet.hnsw-int8.cached.template | 5 -----
 .../dl20-passage.openai-ada2.parquet.hnsw.cached.template    | 3 ---
 ...rco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template | 5 -----
 ...marco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template | 5 -----
 .../msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template | 3 ---
 .../msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template   | 3 ---
 ...assage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template | 5 -----
 ...-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template | 5 -----
 ...-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.template | 3 ---
 ...co-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template | 3 ---
 ...ssage.cohere-embed-english-v3.0.hnsw-int8.cached.template | 5 -----
 ...v1-passage.cohere-embed-english-v3.0.hnsw.cached.template | 3 ---
 ...here-embed-english-v3.0.parquet.hnsw-int8.cached.template | 5 -----
 ...ge.cohere-embed-english-v3.0.parquet.hnsw.cached.template | 3 ---
 ...marco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template | 5 -----
 ...msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.template | 5 -----
 .../msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template   | 3 ---
 .../msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.template     | 3 ---
 ...-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template | 5 -----
 ...v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template | 5 -----
 ...co-v1-passage.cos-dpr-distil.parquet.hnsw.cached.template | 3 ---
 ...arco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.template | 3 ---
 .../msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template | 5 -----
 .../msmarco-v1-passage.openai-ada2.hnsw.cached.template      | 3 ---
 ...-v1-passage.openai-ada2.parquet.hnsw-int8.cached.template | 5 -----
 ...marco-v1-passage.openai-ada2.parquet.hnsw.cached.template | 3 ---
 144 files changed, 560 deletions(-)

diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md
index 0a4448980..69b83383f 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
index 2525bde01..bf8d671cf 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md
index f9e744445..d7313e5c8 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md
index ab5e4e72e..69e7512f7 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
index 67e3a90f1..befd00b70 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
index 3856bbb00..e93e37914 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
index ad4a813cd..173bb3f23 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
index 7d8acab91..547e7fd65 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
index 150e5aca5..498b55021 100644
--- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
@@ -60,9 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md
index c536817ca..aa3447b76 100644
--- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md
@@ -60,9 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
index 4c2d93583..e22067cb9 100644
--- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
@@ -60,9 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
index b8bf150e4..dc7e7d12a 100644
--- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
@@ -60,9 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md
index 82a67e248..2d24c09ff 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md
index ba80901e3..bd29d14b0 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md
index 4d80d8ea8..52f157c7f 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md
index dd0b79bad..336bf65e2 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
index ddfc2bc72..77a2dcf01 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
index ac1355ac2..3610cceb0 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.cached.md
index 2ce7954cf..0ddb63a4c 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.cached.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.md
index 8e44b1dd9..ac7b584b7 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md
index 2da259d06..3e81ea7a6 100644
--- a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md
index 15f3b32f7..1b8f65489 100644
--- a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw-int8.cached.md
index 65f311932..300fbcfc5 100644
--- a/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw-int8.cached.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw.cached.md
index 4a92ddf02..3eb678a1f 100644
--- a/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.openai-ada2.parquet.hnsw.cached.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md
index bd0481622..516e3c6c2 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
index 56a7391f0..d86d2175c 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md
index 69acf4121..ae3c3a907 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md
index 8dbf86271..8cf3c0ba5 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
index 8136df376..21d3c5fbf 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
index 8843cbd62..1497f522b 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
index f42efe36c..968167b8c 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
index e2c00aacc..e94dc71ee 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
index 477d55b2d..30e33d7ba 100644
--- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
@@ -60,9 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md
index 0d4bcbec2..b8294b8e5 100644
--- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md
@@ -60,9 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
index 845bea2bd..786c4976f 100644
--- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
@@ -60,9 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
index 26ebd5bd9..20b2e2dfc 100644
--- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
@@ -60,9 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md
index 33aded5f3..990f17546 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md
index cd06e2401..69244961f 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md
index 803c73de0..ea5604003 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md
index 9dda2d476..dd93e4d37 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
index 0e0f1fa47..50873ff71 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
index af3f79b4f..f5d869560 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.cached.md
index 1b23bc047..4bc6d86cb 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.cached.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.md
index 7f60b9814..42ea4f4fa 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md
index 17136e5c6..2392d1524 100644
--- a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md
index c7aeff6a4..2084ad0f2 100644
--- a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw-int8.cached.md
index e72b5f82d..8cbe9f82a 100644
--- a/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw-int8.cached.md
@@ -65,11 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw.cached.md
index 4b319dce3..6ef8e0980 100644
--- a/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.openai-ada2.parquet.hnsw.cached.md
@@ -65,9 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md
index b2ffd220b..6c8fcd20a 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md
@@ -62,11 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
index 1e7f03279..2bdfc8bba 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
@@ -62,11 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md
index 924b05350..6e3ede46d 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md
@@ -62,9 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md
index 4dbd0b5f3..6854478d6 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md
@@ -62,9 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
index ab86e3905..3d5923d37 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.md
@@ -62,11 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
index aa94cd91e..2ea343cd2 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.md
@@ -62,11 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
index 8c9ea1335..bba5fde61 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.md
@@ -62,9 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
index 48f938391..75a507362 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.md
@@ -62,9 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
index ec0059a8f..a0b4d2a38 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
@@ -60,11 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md
index 0a2537a0a..a517118b6 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md
@@ -60,9 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
index a55eec0a1..169d0a6c6 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.md
@@ -60,11 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
index 7ba065e89..fd7a37f61 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.md
@@ -60,9 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md
index 2c55e93ee..9107ef259 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md
@@ -62,11 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md
index a63ab27a2..93904dec5 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md
@@ -62,11 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md
index 26d84c79e..c3dc078f3 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md
@@ -62,9 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md
index e34438c6e..3486c7769 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md
@@ -62,9 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
index c3939032a..02ef95c10 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.md
@@ -62,11 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
index 755c45d86..722d907c8 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.md
@@ -62,11 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.md
index ce94082b1..6bd00c57f 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.md
@@ -62,9 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.md
index cfda2597d..4e156fe10 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.md
@@ -62,9 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md
index 5535884e4..7ed2ac4d1 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md
@@ -62,11 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md
index 24a40d1b1..94256c3a5 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md
@@ -62,9 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.md
index c659687bb..1429988ff 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.md
@@ -62,11 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.md
index df2cfd8c4..226996095 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.md
@@ -62,9 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2.parquet/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template
index 8aaeed299..10c58940e 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
index 9cca24c3d..7d69fb6d1 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template
index 8b4f7d44d..9c87aa3e9 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.onnx.template
index 40f503992..c373ed11b 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.onnx.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
index 96fccddc6..570862e6c 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
index b6b36004d..b505ca28f 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
index a2360ca28..8e7222aed 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
index 88fe261e8..a65ab6ee3 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
index 565c13fea..4b2fb337f 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
@@ -53,9 +53,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.template
index a47174b5f..8e7fc7a34 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.template
@@ -53,9 +53,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
index 231838e84..27bb71349 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
@@ -53,9 +53,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
index 3a27d94be..12a4e07c0 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
@@ -53,9 +53,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.cached.template
index 9406fe382..ba15af223 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.cached.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.template
index ff348cdbc..49cc5ff8b 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.cached.template
index e387bf030..01f1de1cd 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.cached.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.onnx.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.onnx.template
index c6d7c0e52..7ad66ac94 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.onnx.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
index 82433cd7b..4daae2e9e 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
index 04361bc70..7cce1427d 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.template
index 1849b75f2..8975af68c 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.cached.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.template
index 016f48e05..c1c2bccac 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.parquet.hnsw.onnx.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.cached.template
index 991983ff4..f6337c815 100644
--- a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.cached.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.cached.template
index 24ec176d9..cd90b0ec9 100644
--- a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.cached.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.template
index c4993c4b8..ad7617626 100644
--- a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw-int8.cached.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw.cached.template
index 589eae1f0..d835f4ff1 100644
--- a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.parquet.hnsw.cached.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template
index 4a8af6f44..405dc0841 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
index 109a8bae6..cc9e3721e 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template
index 773c46400..06c06139e 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.onnx.template
index 75d744e32..3d422266f 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.onnx.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
index 65088fd27..82170e0b8 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
index 295f0b46d..fee66c30a 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
index 6dede3d54..96c71735f 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
index 4feede0df..167943b56 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
index fcc92a8dc..d3e7c80c3 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
@@ -53,9 +53,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.template
index 1bc8727a0..20c9ffd11 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.template
@@ -53,9 +53,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
index 36896b36c..058cd1351 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
@@ -53,9 +53,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
index e87d20d8d..c2834b467 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
@@ -53,9 +53,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.cached.template
index 4c9517f5c..f0a2c03b5 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.cached.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.template
index 28dc4082c..4e1ada541 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.cached.template
index fd89cd5bd..f8d68a212 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.cached.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.onnx.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.onnx.template
index 582cc4609..b1bb67a9c 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.onnx.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
index 8755edef0..56812875c 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
index 06ca830f2..326959734 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.template
index f405fcbd1..602d59f61 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.cached.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.template
index 3d5f64432..f75f085b3 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.parquet.hnsw.onnx.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.cached.template
index ad2259f77..8c4a35adf 100644
--- a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.cached.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.cached.template
index 361c8bb69..10ba76743 100644
--- a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.cached.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.template
index 16b6d147d..e95d374dc 100644
--- a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw-int8.cached.template
@@ -58,11 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw.cached.template
index 5b0d8e242..b9a1b9b9d 100644
--- a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.parquet.hnsw.cached.template
@@ -58,9 +58,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template
index cd51e05e3..45f441d89 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template
@@ -55,11 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
index b6d111e0d..5ebb774c4 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
@@ -55,11 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template
index ba3d1d405..88882e264 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template
@@ -55,9 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template
index a627a8725..458933df5 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template
@@ -55,9 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
index 006ac5ecc..4368730ee 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.cached.template
@@ -55,11 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
index b8d3d7d35..2ab8035a7 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw-int8.onnx.template
@@ -55,11 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
index fc21af616..5a625c8f8 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.cached.template
@@ -55,9 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
index 6db3018d0..76d007749 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.parquet.hnsw.onnx.template
@@ -55,9 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
index 924906b79..ed92dcf9b 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
@@ -53,11 +53,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.template
index 5f51caffa..965646da7 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.template
@@ -53,9 +53,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
index 81495c988..57a3dad08 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw-int8.cached.template
@@ -53,11 +53,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
index 7a9578af1..cf557a981 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.parquet.hnsw.cached.template
@@ -53,9 +53,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template
index 89099d71f..47366597a 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template
@@ -55,11 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.template
index b3737d545..af73110fc 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.template
@@ -55,11 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template
index 5173b9b09..24ee24b35 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template
@@ -55,9 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.template
index 3defd04a8..9e7fdf723 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.template
@@ -55,9 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
index 81f201207..09f107bf0 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.cached.template
@@ -55,11 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
index 0bf42dcae..1d8e436bd 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw-int8.onnx.template
@@ -55,11 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.template
index b7b22a8b7..05c230b68 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.cached.template
@@ -55,9 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.template
index 5e5654929..087be53e8 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.parquet.hnsw.onnx.template
@@ -55,9 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template
index b82567bab..60ffe31a1 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template
@@ -55,11 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.cached.template
index a1827f765..ad263299d 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.cached.template
@@ -55,9 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.template
index 51f36c577..b594b90c6 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw-int8.cached.template
@@ -55,11 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.template
index dae5d39c6..89dfb95fd 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.parquet.hnsw.cached.template
@@ -55,9 +55,6 @@ ${index_cmds}
 The path `/path/to/${corpus}/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-Note that here we are explicitly using Lucene's `NoMergePolicy` merge policy, which suppresses any merging of index segments.
-This is because merging index segments is a costly operation and not worthwhile given our query set.
-
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.

From 5223ed4c752ef5c572141ee8f8a1d1fcd2242c6a Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Fri, 6 Dec 2024 10:24:28 -0500
Subject: [PATCH 14/14] regenerated docs.

---
 ...ssions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md | 6 ------
 ...ressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md | 6 ------
 ...regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md | 4 ----
 .../regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md  | 4 ----
 ...19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md | 4 ----
 ...ns-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md | 4 ----
 ...ressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md | 6 ------
 ...egressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md | 6 ------
 .../regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md  | 4 ----
 .../regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md    | 4 ----
 ...regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md | 6 ------
 .../regressions-dl19-passage.openai-ada2.hnsw.cached.md     | 4 ----
 ...ssions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md | 6 ------
 ...ressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md | 6 ------
 ...regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md | 4 ----
 .../regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md  | 4 ----
 ...20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md | 4 ----
 ...ns-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md | 4 ----
 ...ressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md | 6 ------
 ...egressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md | 6 ------
 .../regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md  | 4 ----
 .../regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md    | 4 ----
 ...regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md | 6 ------
 .../regressions-dl20-passage.openai-ada2.hnsw.cached.md     | 4 ----
 ...-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md | 6 ------
 ...ns-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md | 6 ------
 ...sions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md | 4 ----
 ...essions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md | 4 ----
 ...v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md | 6 ------
 ...arco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md | 4 ----
 ...ns-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md | 6 ------
 ...ions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md | 6 ------
 ...essions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md | 4 ----
 ...gressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md | 4 ----
 ...sions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md | 6 ------
 ...egressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md | 4 ----
 36 files changed, 176 deletions(-)

diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md
index d1c26f51e..69b83383f 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md
@@ -65,12 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
index 9cd69472d..bf8d671cf 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
@@ -65,12 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md
index 74e4f235f..d7313e5c8 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md
@@ -65,10 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md
index ded76c2f0..69e7512f7 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md
@@ -65,10 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
index c764ae062..498b55021 100644
--- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
@@ -60,10 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md
index 62f91c660..aa3447b76 100644
--- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md
@@ -60,10 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md
index b41e849ae..2d24c09ff 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md
@@ -65,12 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md
index 22c834fa0..bd29d14b0 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md
@@ -65,12 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md
index 597498421..52f157c7f 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md
@@ -65,10 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md
index 547bbf0ea..336bf65e2 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md
@@ -65,10 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md
index a0bd8e2c4..3e81ea7a6 100644
--- a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md
@@ -65,12 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md
index 11ced8708..1b8f65489 100644
--- a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md
@@ -65,10 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md
index 41481f2ab..516e3c6c2 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md
@@ -65,12 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
index ffc00c41b..d86d2175c 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
@@ -65,12 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md
index 8a793835c..ae3c3a907 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md
@@ -65,10 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md
index 18175cd2f..8cf3c0ba5 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md
@@ -65,10 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
index 2d81623d6..30e33d7ba 100644
--- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
@@ -60,10 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md
index 9d11130d8..b8294b8e5 100644
--- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md
@@ -60,10 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md
index 3e2a8917e..990f17546 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md
@@ -65,12 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md
index 8f43fbf31..69244961f 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md
@@ -65,12 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md
index 041e464c2..ea5604003 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md
@@ -65,10 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md
index 184603a0d..dd93e4d37 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md
@@ -65,10 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md
index c5e710713..2392d1524 100644
--- a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md
@@ -65,12 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md
index 666887fbe..2084ad0f2 100644
--- a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md
@@ -65,10 +65,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md
index e1011edd0..6c8fcd20a 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md
@@ -62,12 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
index c4242b62a..2bdfc8bba 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
@@ -62,12 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md
index 7fb936de8..6e3ede46d 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md
@@ -62,10 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md
index 5becabd08..6854478d6 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md
@@ -62,10 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
index a80ca92ce..a0b4d2a38 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
@@ -60,12 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md
index 8294bb7b6..a517118b6 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md
@@ -60,10 +60,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md
index 8aec8c9f4..d4ff128e7 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md
@@ -62,12 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md
index a741b5c14..93904dec5 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md
@@ -62,12 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md
index 25a552b18..e8fcd8ada 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md
@@ -62,10 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md
index abd287db6..3486c7769 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md
@@ -62,10 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md
index 501b83fa4..7ed2ac4d1 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md
@@ -62,12 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-Furthermore, we are using Lucene's [Automatic Byte Quantization](https://www.elastic.co/search-labs/blog/articles/scalar-quantization-in-lucene) feature, which increase the on-disk footprint of the indexes since we're storing both the int8 quantized vectors and the float32 vectors, but only the int8 quantized vectors need to be loaded into memory.
-See [issue #2292](https://github.com/castorini/anserini/issues/2292) for some experiments reporting the performance impact.
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md
index d8f93dfea..94256c3a5 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md
@@ -62,10 +62,6 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \
 The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above.
 Upon completion, we should have an index with 8,841,823 documents.
 
-<<<<<<< HEAD
-=======
-
->>>>>>> master
 ## Retrieval
 
 Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.