Skip to content

Commit

Permalink
feat: add tidb client quickstart
Browse files Browse the repository at this point in the history
  • Loading branch information
Mini256 committed Mar 7, 2025
1 parent 9233fd3 commit 760b74e
Show file tree
Hide file tree
Showing 8 changed files with 459 additions and 30 deletions.
3 changes: 2 additions & 1 deletion core/autoflow/storage/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from autoflow.storage.doc_store import DocumentStore, TiDBDocumentStore
from autoflow.storage.graph_store import KnowledgeGraphStore, TiDBKnowledgeGraphStore

from autoflow.storage.tidb.client import TiDBClient

__all__ = [
"DocumentStore",
"TiDBDocumentStore",
"KnowledgeGraphStore",
"TiDBKnowledgeGraphStore",
"TiDBClient",
]
177 changes: 159 additions & 18 deletions core/autoflow/storage/tidb/README.md
Original file line number Diff line number Diff line change
@@ -1,47 +1,188 @@
# TiDB Client (TiDB Vector Client 2.0)
# TiDB Vector SDK V2

Not just a vector client.
A powerful Python SDK for vector storage and retrieval operations with TiDB.

TODO: move to an independent package (https://github.com/pingcap/tidb-vector-python)
- 🔄 Automatic embedding generation
- 🔍 Vector similarity search
- 🎯 Advanced filtering capabilities
- 📦 Bulk operations support

## Connect to DB
## Installation

```azure
```bash
pip install autoflow-ai
# TODO: move to `pip install tidb-vector`
```

## Configuration

Go [tidbcloud.com](http://tidbcloud.com/) to create a free TiDB database cluster

Configuration can be provided through environment variables, or using `.env`:

```dotenv
DATABASE_URL=mysql+pymysql://<username>:<password>@<host>:4000/<database>
OPENAI_API_KEY=sk-proj-****
```

## Create a table
## Quick Start

```python
from autoflow.storage.tidb import TiDBClient
from sqlmodel import Field
from autoflow.llms.embeddings import EmbeddingFunction

# Connect to TiDB
# Format: mysql+pymysql://<>:<password>@<host>:4000/<database>
db = TiDBClient.connect("your_database_url")

# Define your model with auto-embedding
text_embed = EmbeddingFunction("openai/text-embedding-3-small")
class Chunk(TiDBModel, table=True):
__tablename__ = "chunks"
id: int = Field(primary_key=True)
text: str = Field()
text_vec: Optional[Any] = text_embed.VectorField(source_field="text")
user_id: int = Field()

# Create table and insert data
table = db.create_table(schema=Chunk)
table.bulk_insert([
Chunk(id=1, text="The quick brown fox jumps over the lazy dog", user_id=1),
Chunk(id=2, text="A quick brown dog runs in the park", user_id=2),
Chunk(id=3, text="The lazy fox sleeps under the tree", user_id=2),
Chunk(id=4, text="A dog and a fox play in the park", user_id=3)
])

# Search for similar texts
results = table.search("A quick fox in the park").limit(3).to_pydantic()
```

## Create a table from SQLModel
## Detailed Usage

## Create a table from Panda Dataframe
### Connect to TiDB

## Add Data
```python
from autoflow.storage.tidb import TiDBClient

db = TiDBClient.connect("your_database_url")
```

## Search Data
### Create table

### Vector Search
```python
from sqlmodel import Field
from autoflow.llms.embeddings import EmbeddingFunction
from autoflow.storage.tidb.constants import DistanceMetric

text_embed = EmbeddingFunction("openai/text-embedding-3-small")

### Fulltext Search
class Chunk(TiDBModel, table=True):
__tablename__ = "chunks"
id: int = Field(primary_key=True)
text: str = Field()
text_vec: Optional[Any] = text_embed.VectorField(source_field="text")
user_id: int = Field()

table = db.create_table(schema=Chunk)
```

### Hybrid Search
### Insert data

```python
# Insert single record
table.insert(Chunk(id=1, text="foo", user_id=1))

### Filters
# Bulk insert multiple records
table.bulk_insert([
Chunk(id=2, text="bar", user_id=2),
Chunk(id=3, text="biz", user_id=2),
Chunk(id=4, text="qux", user_id=3)
])
```

### Query Data

### Rerank Search Result
**Get data by ID**

```python
result = table.get(1)
```

## Query Data
**Query data with filters**

## Update Data
```python
results = table.query({
"user_id": 1
})
```

## Delete Data
### Update Data

```python
table.update(
values={
"text": "world"
},
filters={
"id": 1
}
)
```

### Delete Data

```python
# Delete by id
table.delete(filters={"id": 1})

# Delete multiple records
table.delete(filters={"user_id": 2})
```

### Vector Search

```python
# Search with vector
results = (
table.search([1, 2, 3])
.distance_metric(metric=DistanceMetric.COSINE) # Set distance metric
.num_candidate(20)
.filter({"user_id": 1})
.limit(5)
.to_pydantic()
)

# Search with text
results = table.search("your search text").limit(5).to_pydantic()
```

## Advanced Filtering

TiDB Client supports various filter operators for flexible querying:

| Operator | Description | Example |
|----------|-------------|---------|
| `$eq` | Equal to | `{"field": {"$eq": "hello"}}` |
| `$gt` | Greater than | `{"field": {"$gt": 1}}` |
| `$gte` | Greater than or equal | `{"field": {"$gte": 1}}` |
| `$lt` | Less than | `{"field": {"$lt": 1}}` |
| `$lte` | Less than or equal | `{"field": {"$lte": 1}}` |
| `$in` | In array | `{"field": {"$in": [1, 2, 3]}}` |
| `$nin` | Not in array | `{"field": {"$nin": [1, 2, 3]}}` |
| `$and` | Logical AND | `{"$and": [{"field1": 1}, {"field2": 2}]}` |
| `$or` | Logical OR | `{"$or": [{"field1": 1}, {"field2": 2}]}` |

```python
# Example queries using different operators
table.query({"user_id": 1}) # Implicit $eq
table.query({"id": {"$gt": 1}}) # Greater than
table.query({"id": {"$in": [1, 2, 3]}}) # In array
table.query({
"$and": [
{"user_id": 1},
{"id": {"$gt": 1}}
]
}) # Logical AND
```

4 changes: 4 additions & 0 deletions core/autoflow/storage/tidb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@
from .base import default_registry, Base
from .table import Table
from .errors import EmbeddingColumnMismatchError
from .constants import DistanceMetric
from .base import TiDBModel

__all__ = [
"TiDBClient",
"Table",
"default_registry",
"Base",
"EmbeddingColumnMismatchError",
"DistanceMetric",
"TiDBModel",
]
2 changes: 0 additions & 2 deletions core/autoflow/storage/tidb/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
Union,
Tuple,
Sequence,
override,
)

import numpy as np
Expand Down Expand Up @@ -131,7 +130,6 @@ def limit(self, k: int) -> "TiDBVectorQuery":
self._limit = k
return self

@override
def _execute(self) -> Sequence[Row]:
num_candidate = self._num_candidate if self._num_candidate else self._limit * 10

Expand Down
2 changes: 1 addition & 1 deletion core/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "autoflow-ai"
version = "0.0.1.dev7"
version = "0.0.1.dev10"
description = "Framework to index and search your vector data, make your data ready for AI apps, developed by TiDB."
authors = [
{ name = "Mini256", email = "[email protected]" },
Expand Down
10 changes: 5 additions & 5 deletions core/tests/quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
{
"cell_type": "code",
"execution_count": null,
"id": "592aef6b523d1535",
"id": "ade42f18ba66e956",
"metadata": {},
"outputs": [],
"source": [
"%pip install autoflow-ai==0.0.1.dev7\n",
"%pip install autoflow-ai==0.0.1.dev10\n",
"%pip install dotenv sqlalchemy ipywidgets pymysql"
]
},
Expand Down Expand Up @@ -137,7 +137,7 @@
"\n",
"# Create Knowledge base\n",
"kb = af.create_knowledge_base(\n",
" id=UUID('655b6cf3-8b30-4839-ba8b-5ed3c502f30e'),\n",
" id=UUID(\"655b6cf3-8b30-4839-ba8b-5ed3c502f30e\"),\n",
" name=\"New KB\",\n",
" description=\"This is a knowledge base for testing\",\n",
" index_methods=[IndexMethod.VECTOR_SEARCH, IndexMethod.KNOWLEDGE_GRAPH],\n",
Expand Down Expand Up @@ -267,7 +267,7 @@
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand All @@ -281,7 +281,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
"version": "3.12.3"
}
},
"nbformat": 4,
Expand Down
4 changes: 1 addition & 3 deletions core/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
from sqlalchemy import JSON, Integer, Column, Text, VARCHAR
from sqlmodel import Field
from tidb_vector.sqlalchemy import VectorType
from autoflow.storage.tidb import TiDBClient, Base
from autoflow.storage.tidb.base import TiDBModel
from autoflow.storage.tidb.constants import DistanceMetric
from autoflow.storage.tidb import TiDBClient, Base, TiDBModel, DistanceMetric
from autoflow.llms.embeddings import EmbeddingFunction

logger = logging.getLogger(__name__)
Expand Down
Loading

0 comments on commit 760b74e

Please sign in to comment.