Skip to content

Commit

Permalink
bugfix: properly update chunk full-stop instead of only in Qdrant for
Browse files Browse the repository at this point in the history
grupdate operation
  • Loading branch information
skeptrunedev committed Nov 1, 2024
1 parent 1682f3b commit 6b05342
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 153 deletions.
66 changes: 1 addition & 65 deletions clients/ts-sdk/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -2150,7 +2150,7 @@
"Chunk Group"
],
"summary": "Search Over Groups",
"description": "This route allows you to get groups as results instead of chunks. Each group returned will have the matching chunks sorted by similarity within the group. This is useful for when you want to get groups of chunks which are similar to the search query. If choosing hybrid search, the results will be re-ranked using scores from a cross encoder model. Compatible with semantic, fulltext, or hybrid search modes.",
"description": "This route allows you to get groups as results instead of chunks. Each group returned will have the matching chunks sorted by similarity within the group. This is useful for when you want to get groups of chunks which are similar to the search query. If choosing hybrid search, the top chunk of each group will be re-ranked using scores from a cross encoder model. Compatible with semantic, fulltext, or hybrid search modes.",
"operationId": "search_over_groups",
"parameters": [
{
Expand Down Expand Up @@ -2598,70 +2598,6 @@
}
]
},
"put": {
"tags": [
"Chunk Group"
],
"summary": "Update Group by Tracking ID",
"description": "Update a chunk_group with the given tracking id. Auth'ed user or api key must have an admin or owner role for the specified dataset's organization.",
"operationId": "update_group_by_tracking_id",
"parameters": [
{
"name": "TR-Dataset",
"in": "header",
"description": "The dataset id or tracking_id to use for the request. We assume you intend to use an id if the value is a valid uuid.",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
}
},
{
"name": "tracking_id",
"in": "path",
"description": "Tracking id of the chunk_group to update",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
}
}
],
"requestBody": {
"description": "JSON request payload to update a chunkGroup",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/UpdateGroupByTrackingIDReqPayload"
}
}
},
"required": true
},
"responses": {
"204": {
"description": "Confirmation that the chunkGroup was updated"
},
"400": {
"description": "Service error relating to updating the chunkGroup",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponseBody"
}
}
}
}
},
"deprecated": true,
"security": [
{
"ApiKey": [
"admin"
]
}
]
},
"delete": {
"tags": [
"Chunk Group"
Expand Down
29 changes: 0 additions & 29 deletions clients/ts-sdk/src/functions/groups/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import {
SearchWithinGroupReqPayload,
SearchWithinGroupResponseBody,
UpdateChunkGroupReqPayload,
UpdateGroupByTrackingIDReqPayload,
} from "../../fetch-client";
import { TrieveSDK } from "../../sdk";

Expand Down Expand Up @@ -164,34 +163,6 @@ export async function updateGroup(
);
}

/**
* date a chunk_group with the given tracking id. Auth’ed user or api key must have an admin or owner role for the specified dataset’s organization.
*
* Example:
* ```js
*const data = await trieve.updateGroupByTrackingId({
tracking_id: "3c90c3cc-0d44-4b50-8888-8dd25736052a",
});
* ```
*/
export async function updateGroupByTrackingId(
/** @hidden */
this: TrieveSDK,
data: UpdateGroupByTrackingIDReqPayload,
signal?: AbortSignal
) {
return this.trieve.fetch(
"/api/chunk_group/tracking_id/{tracking_id}",
"put",
{
data,
trackingId: data.tracking_id,
datasetId: this.datasetId,
},
signal
);
}

/**
* Add a chunk to a group. One of chunk_id or chunk_tracking_id must be provided. Auth’ed user or api key must have an admin or owner role for the specified dataset’s organization.
*
Expand Down
30 changes: 0 additions & 30 deletions clients/ts-sdk/src/types.gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3505,23 +3505,6 @@ export type AddChunkToGroupByTrackingIdData = {

export type AddChunkToGroupByTrackingIdResponse = (void);

export type UpdateGroupByTrackingIdData = {
/**
* JSON request payload to update a chunkGroup
*/
requestBody: UpdateGroupByTrackingIDReqPayload;
/**
* Tracking id of the chunk_group to update
*/
trackingId: string;
/**
* The dataset id or tracking_id to use for the request. We assume you intend to use an id if the value is a valid uuid.
*/
trDataset: string;
};

export type UpdateGroupByTrackingIdResponse = (void);

export type DeleteGroupByTrackingIdData = {
/**
* Delete the chunks within the group
Expand Down Expand Up @@ -4851,19 +4834,6 @@ export type $OpenApiTs = {
400: ErrorResponseBody;
};
};
put: {
req: UpdateGroupByTrackingIdData;
res: {
/**
* Confirmation that the chunkGroup was updated
*/
204: void;
/**
* Service error relating to updating the chunkGroup
*/
400: ErrorResponseBody;
};
};
delete: {
req: DeleteGroupByTrackingIdData;
res: {
Expand Down
20 changes: 20 additions & 0 deletions server/src/data/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3139,6 +3139,16 @@ pub struct DatasetTags {
pub tag: String,
}

impl DatasetTags {
pub fn from_details(dataset_id: uuid::Uuid, tag: String) -> Self {
DatasetTags {
id: uuid::Uuid::new_v4(),
dataset_id,
tag,
}
}
}

#[derive(Debug, Serialize, Deserialize, Queryable, Insertable, Selectable, Clone)]
#[diesel(table_name = chunk_metadata_tags)]
pub struct ChunkMetadataTags {
Expand All @@ -3147,6 +3157,16 @@ pub struct ChunkMetadataTags {
pub tag_id: uuid::Uuid,
}

impl ChunkMetadataTags {
pub fn from_details(chunk_metadata_id: uuid::Uuid, tag_id: uuid::Uuid) -> Self {
ChunkMetadataTags {
id: uuid::Uuid::new_v4(),
chunk_metadata_id,
tag_id,
}
}
}

#[derive(Debug)]
pub enum ApiKeyRole {
Read = 0,
Expand Down
28 changes: 14 additions & 14 deletions server/src/handlers/chunk_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -603,33 +603,33 @@ pub async fn delete_chunk_by_tracking_id(
}))]
pub struct UpdateChunkReqPayload {
/// Id of the chunk you want to update. You can provide either the chunk_id or the tracking_id. If both are provided, the chunk_id will be used.
chunk_id: Option<uuid::Uuid>,
pub chunk_id: Option<uuid::Uuid>,
/// Tracking_id of the chunk you want to update. This is required to match an existing chunk.
tracking_id: Option<String>,
pub tracking_id: Option<String>,
/// Tag set is a list of tags. This can be used to filter chunks by tag. Unlike with metadata filtering, HNSW indices will exist for each tag such that there is not a performance hit for filtering on them. If no tag_set is provided, the existing tag_set will be used.
tag_set: Option<Vec<String>>,
pub tag_set: Option<Vec<String>>,
/// Link of the chunk you want to update. This can also be any string. Frequently, this is a link to the source of the chunk. The link value will not affect the embedding creation. If no link is provided, the existing link will be used.
link: Option<String>,
pub link: Option<String>,
///Num value is an arbitrary numerical value that can be used to filter chunks. This is useful for when you want to filter chunks by numerical value. If no num_value is provided, the existing num_value will be used.
num_value: Option<f64>,
pub num_value: Option<f64>,
/// HTML content of the chunk you want to update. This can also be plaintext. The innerText of the HTML will be used to create the embedding vector. The point of using HTML is for convienience, as some users have applications where users submit HTML content. If no chunk_html is provided, the existing chunk_html will be used.
chunk_html: Option<String>,
pub chunk_html: Option<String>,
/// The metadata is a JSON object which can be used to filter chunks. This is useful for when you want to filter chunks by arbitrary metadata. Unlike with tag filtering, there is a performance hit for filtering on metadata. If no metadata is provided, the existing metadata will be used.
metadata: Option<serde_json::Value>,
pub metadata: Option<serde_json::Value>,
/// Time_stamp should be an ISO 8601 combined date and time without timezone. It is used for time window filtering and recency-biasing search results. If no time_stamp is provided, the existing time_stamp will be used.
time_stamp: Option<String>,
pub time_stamp: Option<String>,
/// Weight is a float which can be used to bias search results. This is useful for when you want to bias search results for a chunk. The magnitude only matters relative to other chunks in the chunk's dataset dataset. If no weight is provided, the existing weight will be used.
weight: Option<f64>,
pub weight: Option<f64>,
/// Group ids are the ids of the groups that the chunk should be placed into. This is useful for when you want to update a chunk and add it to a group or multiple groups in one request.
group_ids: Option<Vec<uuid::Uuid>>,
pub group_ids: Option<Vec<uuid::Uuid>>,
/// Group tracking_ids are the tracking_ids of the groups that the chunk should be placed into. This is useful for when you want to update a chunk and add it to a group or multiple groups in one request.
group_tracking_ids: Option<Vec<String>>,
pub group_tracking_ids: Option<Vec<String>>,
/// Location is a GeoInfo object which lets you specify a latitude and longitude which can be used later to filter results.
location: Option<GeoInfo>,
pub location: Option<GeoInfo>,
/// Image urls are a list of urls to images that are associated with the chunk. This is useful for when you want to associate images with a chunk. If no image_urls are provided, the existing image_urls will be used.
image_urls: Option<Vec<String>>,
pub image_urls: Option<Vec<String>>,
/// Convert HTML to raw text before processing to avoid adding noise to the vector embeddings. By default this is true. If you are using HTML content that you want to be included in the vector embeddings, set this to false.
convert_html_to_text: Option<bool>,
pub convert_html_to_text: Option<bool>,
/// Full text boost is useful for when you want to boost certain phrases in the fulltext (SPLADE) and BM25 search results. I.e. making sure that the listing for AirBNB itself ranks higher than companies who make software for AirBNB hosts by boosting the in-document-frequency of the AirBNB token (AKA word) for its official listing. Conceptually it multiples the in-document-importance second value in the tuples of the SPLADE or BM25 sparse vector of the chunk_html innerText for all tokens present in the boost phrase by the boost factor like so: (token, in-document-importance) -> (token, in-document-importance*boost_factor).
#[serde(alias = "boost_phrase")]
pub fulltext_boost: Option<FullTextBoost>,
Expand Down
1 change: 0 additions & 1 deletion server/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,6 @@ impl Modify for SecurityAddon {
handlers::group_handler::get_groups_for_chunks,
handlers::group_handler::get_group_by_tracking_id,
handlers::group_handler::delete_group_by_tracking_id,
handlers::group_handler::update_group_by_tracking_id,
handlers::group_handler::add_chunk_to_group_by_tracking_id,
handlers::group_handler::get_chunks_in_group_by_tracking_id,
handlers::group_handler::search_within_group,
Expand Down
Loading

0 comments on commit 6b05342

Please sign in to comment.