Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

120 changes: 96 additions & 24 deletions core/src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -665,13 +665,11 @@ pub fn join_table_cosine_similarity(
.collect::<Vec<_>>()
.join(",");

let mut bind_value_counter: i16 = 2; // Start at $2 since $1 is the vector
let mut where_filter = "WHERE 1=1".to_string();
for (column, filter_value) in filters.iter() {
for (bind_value_counter, (column, filter_value)) in (2_i16..).zip(filters.iter()) {
let operator = filter_value.operator.to_sql();
let filt = format!(" AND t0.\"{column}\" {operator} ${bind_value_counter}");
where_filter.push_str(&filt);
bind_value_counter += 1;
}

let inner_query = format!(
Expand Down Expand Up @@ -701,39 +699,34 @@ pub fn join_table_cosine_similarity(
)
}

fn build_where_filter(filters: &BTreeMap<String, FilterValue>) -> String {
let mut where_filter = "WHERE 1=1".to_string();
for (bind_value_counter, (column, filter_value)) in (3_i16..).zip(filters.iter()) {
let operator = filter_value.operator.to_sql();
let filt = format!(" AND t0.\"{column}\" {operator} ${bind_value_counter}");
where_filter.push_str(&filt);
}
where_filter
}

/// Generates the core hybrid search SELECT that returns raw table rows.
/// `$1::vector` and `$2` are sqlx bind parameter placeholders for the embedding and query text.
#[allow(clippy::too_many_arguments)]
pub fn hybrid_search_query(
fn hybrid_search_rows_sql(
job_name: &str,
src_schema: &str,
src_table: &str,
join_key: &str,
return_columns: &[String],
cols: &str,
window_size: i32,
limit: i32,
rrf_k: f32,
semantic_weight: f32,
fts_weight: f32,
filters: &BTreeMap<String, FilterValue>,
where_filter: &str,
) -> String {
let cols = &return_columns
.iter()
.map(|s| format!("t0.{s}"))
.collect::<Vec<_>>()
.join(",");

let mut bind_value_counter: i16 = 3;
let mut where_filter = "WHERE 1=1".to_string();
for (column, filter_value) in filters.iter() {
let operator = filter_value.operator.to_sql();
let filt = format!(" AND t0.\"{column}\" {operator} ${bind_value_counter}");
where_filter.push_str(&filt);
bind_value_counter += 1;
}

format!(
"
SELECT to_jsonb(t) as results
FROM (
SELECT {cols}, t.rrf_score, t.semantic_rank, t.fts_rank, t.similarity_score
FROM (
SELECT
Expand Down Expand Up @@ -779,10 +772,89 @@ pub fn hybrid_search_query(
INNER JOIN {src_schema}.{src_table} t0 ON t0.{join_key} = t.{join_key}
{where_filter}
ORDER BY t.rrf_score DESC
LIMIT {limit}
LIMIT {limit}"
)
}

/// Hybrid search returning each result row wrapped in a `results` JSONB column.
/// Used by the HTTP server.
#[allow(clippy::too_many_arguments)]
pub fn hybrid_search_query(
job_name: &str,
src_schema: &str,
src_table: &str,
join_key: &str,
return_columns: &[String],
window_size: i32,
limit: i32,
rrf_k: f32,
semantic_weight: f32,
fts_weight: f32,
filters: &BTreeMap<String, FilterValue>,
) -> String {
let cols = return_columns
.iter()
.map(|s| format!("t0.{s}"))
.collect::<Vec<_>>()
.join(",");
let where_filter = build_where_filter(filters);
let inner = hybrid_search_rows_sql(
job_name,
src_schema,
src_table,
join_key,
&cols,
window_size,
limit,
rrf_k,
semantic_weight,
fts_weight,
&where_filter,
);
format!(
"
SELECT to_jsonb(t) as results
FROM ({inner}
) t"
)
}

/// Hybrid search returning raw table columns (`t0.*` plus ranking scores).
/// Used by the SQL proxy so results arrive as a normal table, not JSON.
#[allow(clippy::too_many_arguments)]
pub fn hybrid_search_query_rows(
job_name: &str,
src_schema: &str,
src_table: &str,
join_key: &str,
return_columns: &[String],
window_size: i32,
limit: i32,
rrf_k: f32,
semantic_weight: f32,
fts_weight: f32,
filters: &BTreeMap<String, FilterValue>,
) -> String {
let cols = return_columns
.iter()
.map(|s| format!("t0.{s}"))
.collect::<Vec<_>>()
.join(",");
let where_filter = build_where_filter(filters);
hybrid_search_rows_sql(
job_name,
src_schema,
src_table,
join_key,
&cols,
window_size,
limit,
rrf_k,
semantic_weight,
fts_weight,
&where_filter,
)
}
#[cfg(test)]
mod tests {
use super::*;
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
logging: *default-logging
environment:
POSTGRES_PASSWORD: postgres
image: pgvector/pgvector:0.8.1-pg18
image: pgvector/pgvector:0.8.2-pg18
ports:
- 5432:5432
healthcheck:
Expand Down
16 changes: 15 additions & 1 deletion proxy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,16 @@ name = "vectorize-proxy"
version = "0.1.0"
edition = "2024"

[[bin]]
name = "vectorize-proxy"
path = "src/main.rs"

[lib]
name = "vectorize_proxy"
path = "src/lib.rs"

[dependencies]
clap = { version = "4.0", features = ["derive", "env"] }
vectorize_core = { package = "vectorize-core", path = "../core" }

anyhow = { workspace = true }
Expand All @@ -17,4 +26,9 @@ tracing = { workspace = true }
tracing-subscriber = { workspace = true }
url = { workspace = true }

pgwire = { version = "0.30", features = ["server-api-aws-lc-rs"] }
pgwire = { version = "0.30", features = ["server-api-aws-lc-rs"] }

[dev-dependencies]
rand = "0.8"
reqwest = { version = "0.12", features = ["json"] }
serde = { version = "1", features = ["derive"] }
Comment on lines +29 to +34
61 changes: 61 additions & 0 deletions proxy/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
## SQL proxy

The proxy gives you a SQL interface to `vectorize.search()` without installing the Postgres extension. It sits in front of Postgres, intercepts `vectorize.search()` calls, generates embeddings, rewrites the query as a hybrid (semantic + full-text) search, and returns results — all transparently over the Postgres wire protocol. Any SQL client that works with Postgres works with the proxy.

Start Postgres and the embeddings server:

```bash
docker compose up postgres vector-serve -d
```

Load the example dataset:

```bash
psql postgres://postgres:postgres@localhost:5432/postgres -f server/sql/example.sql
```

In a second terminal, start the HTTP server. This is used to manage embedding jobs and generate the initial embeddings for existing rows:

```bash
DATABASE_URL=postgres://postgres:postgres@localhost:5432/postgres \
EMBEDDING_SVC_URL=http://localhost:3000/v1 \
cargo run --bin vectorize-server
```

Initialize the table and create the embedding job:

```bash
curl -X POST http://localhost:8080/api/v1/table -d '{
"job_name": "my_job",
"src_table": "my_products",
"src_schema": "public",
"src_columns": ["product_name", "description"],
"primary_key": "product_id",
"update_time_col": "updated_at",
"model": "sentence-transformers/all-MiniLM-L6-v2"
}' -H "Content-Type: application/json"
```

In a third terminal, start the proxy. It listens on port 5433 by default:

```bash
DATABASE_URL=postgres://postgres:postgres@localhost:5432/postgres \
EMBEDDING_SVC_URL=http://localhost:3000/v1 \
cargo run --bin vectorize-proxy
```

Search using SQL by connecting `psql` to the proxy port (5433):

```bash
psql postgres://postgres:postgres@localhost:5433/postgres -c \
"SELECT * FROM vectorize.search(job=>'my_job', query=>'camping backpack', num_results=>3);"
```

```text
results
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
{"price": 45.00, "fts_rank": 1, "rrf_score": 0.03278688524590164, "product_id": 6, "updated_at": "2026-05-12T14:37:26.610753+00:00", "description": "Storage solution for carrying personal items on ones back", "product_name": "Backpack", "semantic_rank": 1, "product_category": "accessories", "similarity_score": 0.6296013593673885}
{"price": 40.00, "fts_rank": null, "rrf_score": 0.016129032258064516, "product_id": 39, "updated_at": "2026-05-12T14:37:26.610753+00:00", "description": "Sling made of fabric or netting, suspended between two points for relaxation", "product_name": "Hammock", "semantic_rank": 2, "product_category": "outdoor", "similarity_score": 0.3789524291697087}
{"price": 10.99, "fts_rank": null, "rrf_score": 0.015873015873015872, "product_id": 12, "updated_at": "2026-05-12T14:37:26.610753+00:00", "description": "Insulated container for beverages on-the-go", "product_name": "Travel Mug", "semantic_rank": 3, "product_category": "kitchenware", "similarity_score": 0.35918538314991255}
Comment on lines +55 to +59
(3 rows)
```
Loading
Loading