Skip to content

Commit

Permalink
refactor: refine graph store
Browse files Browse the repository at this point in the history
  • Loading branch information
Mini256 committed Feb 21, 2025
1 parent 33c26dc commit 610d76b
Show file tree
Hide file tree
Showing 24 changed files with 1,517 additions and 1,205 deletions.
3 changes: 2 additions & 1 deletion backend/app/api/admin_routes/knowledge_base/chunk/models.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from pydantic import BaseModel
from pydantic import BaseModel, Field

from app.rag.retrievers.chunk.schema import VectorSearchRetrieverConfig


class KBChunkRetrievalConfig(BaseModel):
vector_search: VectorSearchRetrieverConfig
score_threshold: float = Field(gt=0, lt=1, default=0.3)
# TODO: add fulltext and knowledge graph search config


Expand Down
6 changes: 3 additions & 3 deletions backend/app/api/admin_routes/knowledge_base/chunk/routes.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import logging

from fastapi import APIRouter
from fastapi import APIRouter, HTTPException
from app.api.deps import SessionDep, CurrentSuperuserDep
from app.rag.retrievers.chunk.simple_retriever import (
ChunkSimpleRetriever,
)
from app.rag.retrievers.chunk.schema import ChunksRetrievalResult

from app.exceptions import InternalServerError, KBNotFound
from app.exceptions import InternalServerError
from .models import KBRetrieveChunksRequest

router = APIRouter()
Expand All @@ -31,7 +31,7 @@ def retrieve_chunks(
return retriever.retrieve_chunks(
request.query,
)
except KBNotFound as e:
except HTTPException as e:
raise e
except Exception as e:
logger.exception(e)
Expand Down
164 changes: 164 additions & 0 deletions backend/app/api/admin_routes/knowledge_base/graph/entity/routes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import logging

from typing import List, Annotated
from fastapi import APIRouter, HTTPException, Depends, Query
from fastapi_pagination import Params, Page

from app.api.deps import SessionDep
from app.exceptions import InternalServerError
from app.models import EntityPublic, EntityType
from app.rag.indices.knowledge_graph.schema import (
EntityCreate,
EntityFilters,
SynopsisEntityCreate,
EntityUpdate,
)
from app.rag.knowledge_base.index_store import (
get_kb_graph_editor,
get_kb_tidb_graph_store,
)
from app.rag.retrievers.knowledge_graph.schema import (
RetrievedEntity,
RetrievedKnowledgeGraph,
)
from app.repositories import knowledge_base_repo

router = APIRouter(
prefix="/admin/knowledge_bases/{kb_id}/graph/entities",
tags=["knowledge_base/graph/entity"],
)
logger = logging.getLogger(__name__)


@router.get("/", response_model=Page[EntityPublic])
def list_entities(
db_session: SessionDep,
kb_id: int,
filters: Annotated[EntityFilters, Query()] = EntityFilters(),
params: Params = Depends(),
):
try:
kb = knowledge_base_repo.must_get(db_session, kb_id)
graph_editor = get_kb_graph_editor(db_session, kb)
return graph_editor.query_entities(filters, params)
except HTTPException as e:
raise e
except Exception as e:
logger.exception(e)
raise InternalServerError()


@router.post("/", response_model=EntityPublic)
def create_entity(session: SessionDep, kb_id: int, create: EntityCreate):
try:
kb = knowledge_base_repo.must_get(session, kb_id)
graph_editor = get_kb_graph_editor(session, kb)
return graph_editor.create_entity(create)
except HTTPException as e:
raise e
except Exception as e:
logger.exception(e)
raise InternalServerError()


@router.post("/synopsis", response_model=EntityPublic)
def create_synopsis_entity(
session: SessionDep, kb_id: int, create: SynopsisEntityCreate
):
try:
kb = knowledge_base_repo.must_get(session, kb_id)
graph_editor = get_kb_graph_editor(session, kb)
return graph_editor.create_synopsis_entity(create)
except HTTPException as e:
raise e
except Exception as e:
logger.exception(e)
raise InternalServerError()


@router.get(
"/search",
)
def search_similar_entities(
session: SessionDep,
kb_id: int,
query: str,
top_k: int = 10,
nprobe: int = 10,
entity_type: EntityType = EntityType.original,
similarity_threshold: float = 0.4,
) -> List[RetrievedEntity]:
try:
kb = knowledge_base_repo.must_get(session, kb_id)
graph_store = get_kb_tidb_graph_store(session, kb)
return graph_store.retrieve_entities(
query=query,
top_k=top_k,
nprobe=nprobe,
entity_type=entity_type,
similarity_threshold=similarity_threshold,
)
except HTTPException as e:
raise e
except Exception as e:
logger.exception(e)
raise InternalServerError()


@router.get("/{entity_id}", response_model=EntityPublic)
def get_entity(session: SessionDep, kb_id: int, entity_id: int):
try:
kb = knowledge_base_repo.must_get(session, kb_id)
graph_editor = get_kb_graph_editor(session, kb)
return graph_editor.must_get_entity(entity_id)
except HTTPException as e:
raise e
except Exception as e:
logger.exception(e)
raise InternalServerError()


@router.put("/{entity_id}", response_model=EntityPublic)
def update_entity(
session: SessionDep, kb_id: int, entity_id: int, update: EntityUpdate
):
try:
kb = knowledge_base_repo.must_get(session, kb_id)
graph_editor = get_kb_graph_editor(session, kb)
return graph_editor.update_entity(entity_id, update)
except HTTPException as e:
raise e
except Exception as e:
logger.exception(e)
raise InternalServerError()


@router.delete("/{entity_id}")
def delete_entity(session: SessionDep, kb_id: int, entity_id: int):
try:
kb = knowledge_base_repo.must_get(session, kb_id)
graph_editor = get_kb_graph_editor(session, kb)
graph_editor.delete_entity(entity_id)
return {
"detail": "success",
}
except HTTPException as e:
raise e
except Exception as e:
logger.exception(e)
raise InternalServerError()


@router.get("/{entity_id}/subgraph")
def get_entity_subgraph(
session: SessionDep, kb_id: int, entity_id: int
) -> RetrievedKnowledgeGraph:
try:
kb = knowledge_base_repo.must_get(session, kb_id)
graph_editor = get_kb_graph_editor(session, kb)
return graph_editor.get_entity_subgraph(entity_id)
except HTTPException as e:
raise e
except Exception as e:
logger.exception(e)
raise InternalServerError()
Original file line number Diff line number Diff line change
@@ -1,27 +1,32 @@
from fastapi import HTTPException
from pydantic import BaseModel
from starlette import status

from app.api.admin_routes.knowledge_base.graph.models import (
KnowledgeRequest,
KnowledgeNeighborRequest,
KnowledgeChunkRequest,
)
from app.api.admin_routes.knowledge_base.graph.routes import router, logger
from app.api.deps import SessionDep
from app.exceptions import KBNotFound, InternalServerError
from app.rag.knowledge_base.index_store import get_kb_tidb_graph_store
from app.rag.knowledge_base.index_store import (
get_kb_tidb_graph_store,
get_kb_graph_editor,
)
from app.repositories import knowledge_base_repo


# Experimental interface


@router.post("/admin/knowledge_bases/{kb_id}/graph/knowledge")
def retrieve_knowledge(session: SessionDep, kb_id: int, request: KnowledgeRequest):
@router.post("/knowledge", deprecated=True)
def legacy_retrieve_knowledge(
session: SessionDep, kb_id: int, request: KnowledgeRequest
):
try:
kb = knowledge_base_repo.must_get(session, kb_id)
graph_store = get_kb_tidb_graph_store(session, kb)
data = graph_store.retrieve_graph_data(
data = graph_store.retrieve_subgraph_by_similar(
request.query,
request.top_k,
request.similarity_threshold,
Expand All @@ -37,8 +42,8 @@ def retrieve_knowledge(session: SessionDep, kb_id: int, request: KnowledgeReques
raise InternalServerError()


@router.post("/admin/knowledge_bases/{kb_id}/graph/knowledge/neighbors")
def retrieve_knowledge_neighbors(
@router.post("/knowledge/neighbors", deprecated=True)
def legacy_retrieve_knowledge_neighbors(
session: SessionDep, kb_id: int, request: KnowledgeNeighborRequest
):
try:
Expand All @@ -59,22 +64,24 @@ def retrieve_knowledge_neighbors(
raise InternalServerError()


@router.post("/admin/knowledge_bases/{kb_id}/graph/knowledge/chunks")
def retrieve_knowledge_chunks(
class KnowledgeChunkRequest(BaseModel):
pass


@router.post("/knowledge/chunks", deprecated=True)
def legacy_retrieve_knowledge_chunks(
session: SessionDep, kb_id: int, request: KnowledgeChunkRequest
):
try:
kb = knowledge_base_repo.must_get(session, kb_id)
graph_store = get_kb_tidb_graph_store(session, kb)
data = graph_store.get_chunks_by_relationships(request.relationships_ids)
graph_editor = get_kb_graph_editor(session, kb)
data = graph_editor.batch_get_chunks_by_relationships(request.relationships_ids)
if not data:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No chunks found for the given relationships",
)
return data
except KBNotFound as e:
raise e
except HTTPException as e:
raise e
except Exception as e:
Expand Down
26 changes: 3 additions & 23 deletions backend/app/api/admin_routes/knowledge_base/graph/models.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,11 @@
from typing import List, Optional
from pydantic import BaseModel, model_validator
from pydantic import BaseModel

from app.rag.retrievers.knowledge_graph.schema import (
KnowledgeGraphRetrieverConfig,
)


class SynopsisEntityCreate(BaseModel):
name: str
description: str
topic: str
meta: dict
entities: List[int]

@model_validator(mode="after")
def validate_entities(self):
if len(self.entities) == 0:
raise ValueError("Entities list should not be empty")
return self


class EntityUpdate(BaseModel):
name: Optional[str] = None
description: Optional[str] = None
meta: Optional[dict] = None


class RelationshipUpdate(BaseModel):
description: Optional[str] = None
meta: Optional[dict] = None
Expand Down Expand Up @@ -70,5 +50,5 @@ class KnowledgeNeighborRequest(BaseModel):
similarity_threshold: float = 0.55


class KnowledgeChunkRequest(BaseModel):
relationships_ids: List[int]
class RelationshipBatchRequest(BaseModel):
relationship_ids: List[int]
Loading

0 comments on commit 610d76b

Please sign in to comment.