Skip to content

Commit

Permalink
Automate running benchmarks for all engines (#134)
Browse files Browse the repository at this point in the history
* ci: Run *-default benchmarks for all engines

* Update poetry.lock
  • Loading branch information
tellet-q authored Apr 18, 2024
1 parent e6049a4 commit 455b590
Show file tree
Hide file tree
Showing 6 changed files with 290 additions and 45 deletions.
38 changes: 38 additions & 0 deletions .github/workflows/actions/run-engine-benchmark/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: Run Engine Benchmark
description: "Run benchmark with specified params"
inputs:
engine:
description: "engine (i.e qdrant-default)"
required: true
dataset:
description: "dataset (i.e random-100)"
required: true
compose_file:
description: "path to docker compose"
required: true

runs:
using: "composite"
steps:
- name: Install poetry
shell: bash
run: pip install poetry
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "poetry"
- name: Install deps
shell: bash
run: poetry install
- uses: hoverkraft-tech/[email protected]
with:
compose-file: "${{ inputs.compose_file }}"
- name: Execution
shell: bash
run: |
engine="${{ inputs.engine }}"
if [[ "$engine" == *"elasticsearch"* || "$engine" == *"opensearch"* ]]; then
./tools/wait_for_green_status.sh
fi
source $(poetry env info -p)/bin/activate
poetry run python3 run.py --engines "${{ inputs.engine }}" --datasets "${{ inputs.dataset }}"
138 changes: 138 additions & 0 deletions .github/workflows/manual-all-engines-benchmark.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
name: Manual All Engines Default Benchmarks

on:
push:
branches:
- "master"
pull_request:
types:
- opened
- reopened
workflow_dispatch:

jobs:
elasticsearchBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/elasticsearch') ||
startsWith(github.event.head_commit.modified, 'engine/servers/elasticsearch') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - elasticsearch-default - random-100 - against elasticsearch-single-node-ci
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "elasticsearch-default"
dataset: "random-100"
compose_file: "engine/servers/elasticsearch-single-node-ci/docker-compose.yaml"

milvusBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/milvus') ||
startsWith(github.event.head_commit.modified, 'engine/servers/milvus') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - milvus-default - random-100 - against milvus-single-node
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "milvus-default"
dataset: "random-100"
compose_file: "engine/servers/milvus-single-node/docker-compose.yaml"

opensearchBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/opensearch') ||
startsWith(github.event.head_commit.modified, 'engine/servers/opensearch') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - opensearch-default - glove-25-angular - against opensearch-single-node-ci
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "opensearch-default"
dataset: "glove-25-angular"
compose_file: "engine/servers/opensearch-single-node-ci/docker-compose.yaml"

pgvectorBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/pgvector') ||
startsWith(github.event.head_commit.modified, 'engine/servers/pgvector') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - pgvector-default - random-100 - against pgvector-single-node
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "pgvector-default"
dataset: "random-100"
compose_file: "engine/servers/pgvector-single-node/docker-compose.yaml"

qdrantBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/qdrant') ||
startsWith(github.event.head_commit.modified, 'engine/servers/qdrant') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - qdrant-default - random-100 - against qdrant-single-node
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "qdrant-default"
dataset: "random-100"
compose_file: "engine/servers/qdrant-single-node/docker-compose.yaml"

redisBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/redis') ||
startsWith(github.event.head_commit.modified, 'engine/servers/redis') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - redis-default - random-100 - against redis-single-node
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "redis-default"
dataset: "random-100"
compose_file: "engine/servers/redis-single-node/docker-compose.yaml"

weaviateBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/weaviate') ||
startsWith(github.event.head_commit.modified, 'engine/servers/weaviate') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - weaviate-default - random-100 - against weaviate-single-node
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "weaviate-default"
dataset: "random-100"
compose_file: "engine/servers/weaviate-single-node/docker-compose.yaml"
23 changes: 23 additions & 0 deletions engine/servers/elasticsearch-single-node-ci/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
version: '3.5'

services:
es:
image: docker.elastic.co/elasticsearch/elasticsearch:8.10.2
environment:
ELASTIC_PASSWORD: "passwd"
KIBANA_PASSWORD: "passwd"
SERVER_SSL_ENABLED: "false"
discovery.type: "single-node"
xpack.security.enabled: "false"
ports:
- "9200:9200"
- "9300:9300"
logging:
driver: "json-file"
options:
max-file: 1
max-size: 10m
deploy:
resources:
limits:
memory: 4Gb
21 changes: 21 additions & 0 deletions engine/servers/opensearch-single-node-ci/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
version: '3.5'

services:
opensearch:
image: opensearchproject/opensearch:2.10.0
environment:
discovery.type: "single-node"
plugins.security.disabled: true
OPENSEARCH_JAVA_OPTS: "-Xms2g -Xmx2g"
ports:
- "9200:9200"
- "9300:9300"
logging:
driver: "json-file"
options:
max-file: 1
max-size: 10m
deploy:
resources:
limits:
memory: 4Gb
81 changes: 36 additions & 45 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions tools/wait_for_green_status.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
# This scripts helps to wait for Opensearch|Elasticsearch status to become Green

set -e

SEARCH_CLUSTER_HOST=${1:-"localhost:9200"}

# Wait until the search cluster host is available
until $(curl --output /dev/null --silent --head --fail "$SEARCH_CLUSTER_HOST"); do
printf '.'
sleep 1 # Wait for 1 second
done

# Wait for ES/OS to start
response=$(curl --write-out %{http_code} --silent --output /dev/null "$SEARCH_CLUSTER_HOST")

until [ "$response" = "200" ]; do
response=$(curl --write-out %{http_code} --silent --output /dev/null "$SEARCH_CLUSTER_HOST")
>&2 echo "Search cluster is unavailable - sleep 1s"
sleep 1
done

# Wait for ES/OS status to turn Green
health="$(curl -fsSL "$SEARCH_CLUSTER_HOST/_cat/health?h=status")"
health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')"

until [ "$health" = 'green' ]; do
health="$(curl -fsSL "$SEARCH_CLUSTER_HOST/_cat/health?h=status")"
health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')"
>&2 echo "Search cluster status is not green yet - sleep 1s"
sleep 1
done

>&2 echo "Search cluster is up"

0 comments on commit 455b590

Please sign in to comment.