From 43708e94ee066ba503a451f15146c00fb37fe6c3 Mon Sep 17 00:00:00 2001
From: shaharuk-yb <sshaikh@yugabyte.com>
Date: Wed, 8 Jan 2025 12:15:59 +0530
Subject: [PATCH 1/5] alter table set storage plan is not needed for yb

---
 vectordb_bench/backend/clients/pgvector/pgvector.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/vectordb_bench/backend/clients/pgvector/pgvector.py b/vectordb_bench/backend/clients/pgvector/pgvector.py
index 069b89381..587291b66 100644
--- a/vectordb_bench/backend/clients/pgvector/pgvector.py
+++ b/vectordb_bench/backend/clients/pgvector/pgvector.py
@@ -387,11 +387,11 @@ def _create_table(self, dim: int):
                     "CREATE TABLE IF NOT EXISTS public.{table_name} (id BIGINT PRIMARY KEY, embedding vector({dim}));"
                 ).format(table_name=sql.Identifier(self.table_name), dim=dim)
             )
-            self.cursor.execute(
-                sql.SQL(
-                    "ALTER TABLE public.{table_name} ALTER COLUMN embedding SET STORAGE PLAIN;"
-                ).format(table_name=sql.Identifier(self.table_name))
-            )
+            # self.cursor.execute(
+            #     sql.SQL(
+            #         "ALTER TABLE public.{table_name} ALTER COLUMN embedding SET STORAGE PLAIN;"
+            #     ).format(table_name=sql.Identifier(self.table_name))
+            # )
             self.conn.commit()
         except Exception as e:
             log.warning(

From 6f8718f0fbf5a74590d87953c7cdb69f72cbdfb5 Mon Sep 17 00:00:00 2001
From: shaharuk-yb <sshaikh@yugabyte.com>
Date: Fri, 10 Jan 2025 14:09:03 +0530
Subject: [PATCH 2/5] change hnsw to ybhnsw, default function to l2 for
 yugabyte

---
 vectordb_bench/backend/clients/api.py             | 2 +-
 vectordb_bench/backend/clients/pgvector/config.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/vectordb_bench/backend/clients/api.py b/vectordb_bench/backend/clients/api.py
index fe2e554f3..d34a8ea40 100644
--- a/vectordb_bench/backend/clients/api.py
+++ b/vectordb_bench/backend/clients/api.py
@@ -23,7 +23,7 @@ class IndexType(str, Enum):
     IVFSQ8 = "IVF_SQ8"
     Flat = "FLAT"
     AUTOINDEX = "AUTOINDEX"
-    ES_HNSW = "hnsw"
+    ES_HNSW = "ybhnsw"
     ES_IVFFlat = "ivfflat"
     GPU_IVF_FLAT = "GPU_IVF_FLAT"
     GPU_IVF_PQ = "GPU_IVF_PQ"
diff --git a/vectordb_bench/backend/clients/pgvector/config.py b/vectordb_bench/backend/clients/pgvector/config.py
index 16d547445..c9aae03cb 100644
--- a/vectordb_bench/backend/clients/pgvector/config.py
+++ b/vectordb_bench/backend/clients/pgvector/config.py
@@ -74,7 +74,7 @@ def parse_metric(self) -> str:
                 return "vector_l2_ops"
             elif self.metric_type == MetricType.IP:
                 return "vector_ip_ops"
-            return "vector_cosine_ops"
+            return "vector_l2_ops"
 
     def parse_metric_fun_op(self) -> LiteralString:
         if self.quantization_type == "bit":

From 89594c6be936da685e903013fffb051d35ad8811 Mon Sep 17 00:00:00 2001
From: shaharuk-yb <sshaikh@yugabyte.com>
Date: Fri, 10 Jan 2025 17:26:00 +0530
Subject: [PATCH 3/5] add support to provide parameters for before/after index
 creation

---
 .../backend/clients/pgvector/cli.py           | 27 ++++++++++++++++++-
 .../backend/clients/pgvector/config.py        |  4 +++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/vectordb_bench/backend/clients/pgvector/cli.py b/vectordb_bench/backend/clients/pgvector/cli.py
index ef8914be0..5c2920f9c 100644
--- a/vectordb_bench/backend/clients/pgvector/cli.py
+++ b/vectordb_bench/backend/clients/pgvector/cli.py
@@ -118,7 +118,28 @@ class PgVectorTypedDict(CommonTypedDict):
             callback=set_default_quantized_fetch_limit,
         )
     ]
-
+    create_index_before_load: Annotated[
+        Optional[bool],
+        click.option(
+            "--create_index_before_load",
+            type=bool,
+            help="Create index before load",
+            default=True,
+            required=False,
+            show_default=True,
+        ),
+    ]
+    create_index_after_load: Annotated[
+        Optional[bool],
+        click.option(
+            "--create_index_after_load",
+            type=bool,
+            help="Create index after load",
+            default=False,
+            required=False,
+            show_default=True,
+        ),
+    ]
     
 
 class PgVectorIVFFlatTypedDict(PgVectorTypedDict, IVFFlatTypedDict):
@@ -151,6 +172,8 @@ def PgVectorIVFFlat(
             reranking=parameters["reranking"],
             reranking_metric=parameters["reranking_metric"],
             quantized_fetch_limit=parameters["quantized_fetch_limit"],
+            create_index_before_load=parameters["create_index_before_load"],
+            create_index_after_load=parameters["create_index_after_load"],
         ),
         **parameters,
     )
@@ -188,6 +211,8 @@ def PgVectorHNSW(
             reranking=parameters["reranking"],
             reranking_metric=parameters["reranking_metric"],
             quantized_fetch_limit=parameters["quantized_fetch_limit"],
+            create_index_before_load=parameters["create_index_before_load"],
+            create_index_after_load=parameters["create_index_after_load"],
         ),
         **parameters,
     )
diff --git a/vectordb_bench/backend/clients/pgvector/config.py b/vectordb_bench/backend/clients/pgvector/config.py
index c9aae03cb..a21dc222d 100644
--- a/vectordb_bench/backend/clients/pgvector/config.py
+++ b/vectordb_bench/backend/clients/pgvector/config.py
@@ -171,6 +171,8 @@ class PgVectorIVFFlatConfig(PgVectorIndexConfig):
     reranking: Optional[bool] = None
     quantized_fetch_limit: Optional[int] = None
     reranking_metric: Optional[str] = None
+    create_index_before_load: Optional[bool] = True
+    create_index_after_load: Optional[bool] = True
 
     def index_param(self) -> PgVectorIndexParam:
         index_parameters = {"lists": self.lists}
@@ -221,6 +223,8 @@ class PgVectorHNSWConfig(PgVectorIndexConfig):
     reranking: Optional[bool] = None
     quantized_fetch_limit: Optional[int] = None
     reranking_metric: Optional[str] = None
+    create_index_before_load: Optional[bool] = True
+    create_index_after_load: Optional[bool] = False
 
     def index_param(self) -> PgVectorIndexParam:
         index_parameters = {"m": self.m, "ef_construction": self.ef_construction}

From 0f58f8d8c7e1c5309078c889e0d26d6fa2b5d5a3 Mon Sep 17 00:00:00 2001
From: Shaharuk Shaikh <56402576+shaharuk-yb@users.noreply.github.com>
Date: Thu, 30 Jan 2025 13:28:09 +0530
Subject: [PATCH 4/5] reverted to vector_cosine_ops

---
 vectordb_bench/backend/clients/pgvector/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vectordb_bench/backend/clients/pgvector/config.py b/vectordb_bench/backend/clients/pgvector/config.py
index a21dc222d..16c2c257a 100644
--- a/vectordb_bench/backend/clients/pgvector/config.py
+++ b/vectordb_bench/backend/clients/pgvector/config.py
@@ -74,7 +74,7 @@ def parse_metric(self) -> str:
                 return "vector_l2_ops"
             elif self.metric_type == MetricType.IP:
                 return "vector_ip_ops"
-            return "vector_l2_ops"
+            return "vector_cosine_ops"
 
     def parse_metric_fun_op(self) -> LiteralString:
         if self.quantization_type == "bit":

From f005e9b35889d4b0eb860e771a0b94db9ad18180 Mon Sep 17 00:00:00 2001
From: shaharuk-yb <sshaikh@yugabyte.com>
Date: Mon, 17 Feb 2025 12:35:44 +0530
Subject: [PATCH 5/5] fix pip version issue

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index aafe70750..8bc83f29b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,7 +35,7 @@ dependencies = [
     "psutil",
     "polars",
     "plotly",
-    "environs",
+    "environs<14.1.0",
     "pydantic<v2",
     "scikit-learn",
     "pymilvus", # with pandas, numpy, ujson