documentation: update to remove shipped soons

devflowinc · Apr 8, 2024 · 1d1ba86 · 1d1ba86
1 parent e636c0d
commit 1d1ba86
Show file tree

Hide file tree

Showing 7 changed files with 45 additions and 28 deletions.
diff --git a/server/src/handlers/chunk_handler.rs b/server/src/handlers/chunk_handler.rs
@@ -1129,7 +1129,7 @@ pub struct SearchChunkData {
     pub search_type: String,
     /// Query is the search query. This can be any string. The query will be used to create an embedding vector and/or SPLADE vector which will be used to find the result set.
     pub query: String,
-    /// Page of chunks to fetch. Each page is 10 chunks. Support for custom page size is coming soon.
+    /// Page of chunks to fetch. Page is 1-indexed.
     pub page: Option<u64>,
     /// Page size is the number of chunks to fetch. This can be used to fetch more than 10 chunks at a time.
     pub page_size: Option<u64>,

diff --git a/server/src/handlers/event_handler.rs b/server/src/handlers/event_handler.rs
@@ -25,7 +25,7 @@ pub struct GetEventsData {
 
 /// Get events for the dataset
 ///
-/// Get events for the auth'ed user. Currently, this is only for events belonging to the auth'ed user. Soon, we plan to associate events to datasets instead of users.
+/// Get events for the dataset specified by the TR-Dataset header.
 #[utoipa::path(
     post,
     path = "/events",

diff --git a/server/src/handlers/file_handler.rs b/server/src/handlers/file_handler.rs
@@ -189,7 +189,7 @@ pub async fn upload_file_handler(
 
 /// Get File
 ///
-/// Download a file from S3 attached to the server based on its id. We plan to add support for getting signed S3 URLs to download from S3 directly in a release soon.
+/// Download a file based on its id.
 #[utoipa::path(
     get,
     path = "/file/{file_id}",
@@ -293,7 +293,7 @@ pub struct DeleteFileQueryParams {
 
 /// Delete File
 ///
-/// Delete a file from S3 attached to the server based on its id. This will disassociate chunks from the file, but will not delete the chunks. We plan to add support for deleting chunks in a release soon. Auth'ed user must be an admin or owner of the dataset's organization to upload a file.
+/// Delete a file from S3 attached to the server based on its id. This will disassociate chunks from the file, but only delete them all together if you specify delete_chunks to be true. Auth'ed user must be an admin or owner of the dataset's organization to delete a file.
 #[utoipa::path(
     delete,
     path = "/file/{file_id}",

diff --git a/server/src/handlers/group_handler.rs b/server/src/handlers/group_handler.rs
@@ -131,7 +131,7 @@ pub struct DatasetGroupQuery {
     params(
         ("TR-Dataset" = String, Header, description = "The dataset id to use for the request"),
         ("dataset_id" = uuid::Uuid, description = "The id of the dataset to fetch groups for."),
-        ("page" = i64, description = "The page of groups to fetch. Each page contains 10 groups. Support for custom page size is coming soon."),
+        ("page" = i64, description = "The page of groups to fetch. Page is 1-indexed."),
     ),
     security(
         ("ApiKey" = ["readonly"]),
@@ -379,7 +379,7 @@ pub struct DeleteGroupData {
 
 /// Delete Group
 ///
-/// This will delete a chunk_group. This will not delete the chunks that are in the group. We will soon support deleting a chunk_group along with its member chunks.
+/// This will delete a chunk_group. If you set delete_chunks to true, it will also delete the chunks within the group.
 #[utoipa::path(
     delete,
     path = "/chunk_group/{group_id}",
@@ -639,7 +639,7 @@ pub struct GetAllBookmarksData {
 
 /// Get Chunks in Group
 ///
-/// Route to get all chunks for a group. The response is paginated, with each page containing 10 chunks. Support for custom page size is coming soon.
+/// Route to get all chunks for a group. The response is paginated, with each page containing 10 chunks. Page is 1-indexed.
 #[utoipa::path(
     get,
     path = "/chunk_group/{group_id}/{page}",
@@ -693,7 +693,7 @@ pub struct GetAllBookmarksByTrackingIdData {
 
 /// Get Chunks in Group by Tracking ID
 ///
-/// Route to get all chunks for a group. The response is paginated, with each page containing 10 chunks. Support for custom page size is coming soon.
+/// Route to get all chunks for a group. The response is paginated, with each page containing 10 chunks. Support for custom page size is coming soon. Page is 1-indexed.
 #[utoipa::path(
     get,
     path = "/chunk_group/tracking_id/{group_tracking_id}/{page}",
@@ -845,19 +845,19 @@ pub struct GenerateOffGroupData {
 
 #[derive(Debug, Serialize, Deserialize, Clone, ToSchema)]
 pub struct ReccomendGroupChunksRequest {
-    /// The  ids of the groups to be used as positive examples for the recommendation. The groups in this array will be used to find similar groups.
+    /// The ids of the groups to be used as positive examples for the recommendation. The groups in this array will be used to find similar groups.
     pub positive_group_ids: Option<Vec<uuid::Uuid>>,
-    /// The  ids of the groups to be used as negative examples for the recommendation. The groups in this array will be used to filter out similar groups.
+    /// The ids of the groups to be used as negative examples for the recommendation. The groups in this array will be used to filter out similar groups.
     pub negative_group_ids: Option<Vec<uuid::Uuid>>,
-    /// The  ids of the groups to be used as positive examples for the recommendation. The groups in this array will be used to find similar groups.
+    /// The ids of the groups to be used as positive examples for the recommendation. The groups in this array will be used to find similar groups.
     pub positive_group_tracking_ids: Option<Vec<String>>,
-    /// The  ids of the groups to be used as negative examples for the recommendation. The groups in this array will be used to filter out similar groups.
+    /// The ids of the groups to be used as negative examples for the recommendation. The groups in this array will be used to filter out similar groups.
     pub negative_group_tracking_ids: Option<Vec<String>>,
     /// Filters to apply to the chunks to be recommended. This is a JSON object which contains the filters to apply to the chunks to be recommended. The default is None.
     pub filters: Option<ChunkFilter>,
     /// The number of groups to return. This is the number of groups which will be returned in the response. The default is 10.
     pub limit: Option<u64>,
-    /// The number of chunks to fetch for each group. This is the number of chunks which will be returned in the response for each group. The default is 10.
+    /// The number of chunks to fetch for each group. This is the number of chunks which will be returned in the response for each group. The default is 3. If this is set to a large number, we recommend setting slim_chunks to true to avoid returning the content and chunk_html of the chunks so as to reduce latency due to content download and serialization.
     pub group_size: Option<u32>,
     /// Set slim_chunks to true to avoid returning the content and chunk_html of the chunks. This is useful for when you want to reduce amount of data over the wire for latency improvement. Default is false.
     pub slim_chunks: Option<bool>,
@@ -1081,7 +1081,7 @@ pub async fn get_recommended_groups(
 pub struct SearchWithinGroupData {
     /// The query is the search query. This can be any string. The query will be used to create an embedding vector and/or SPLADE vector which will be used to find the result set.
     pub query: String,
-    /// The page of chunks to fetch. Each page is 10 chunks. Support for custom page size is coming soon.
+    /// The page of chunks to fetch. Page is 1-indexed.
     pub page: Option<u64>,
     /// The page size is the number of chunks to fetch. This can be used to fetch more than 10 chunks at a time.
     pub page_size: Option<u64>,
@@ -1264,9 +1264,9 @@ pub struct SearchOverGroupsData {
     pub search_type: String,
     /// Query is the search query. This can be any string. The query will be used to create an embedding vector and/or SPLADE vector which will be used to find the result set.
     pub query: String,
-    /// Page of chunks to fetch. Each page is 10 chunks. Support for custom page size is coming soon.
+    /// Page of group results to fetch. Page is 1-indexed.
     pub page: Option<u64>,
-    /// Page size is the number of chunks to fetch. This can be used to fetch more than 10 chunks at a time.
+    /// Page size is the number of group results to fetch. The default is 10.
     pub page_size: Option<u32>,
     /// Filters is a JSON object which can be used to filter chunks. The values on each key in the object will be used to check for an exact substring match on the metadata values for each existing chunk. This is useful for when you want to filter chunks by arbitrary metadata. Unlike with tag filtering, there is a performance hit for filtering on metadata.
     pub filters: Option<ChunkFilter>,
@@ -1278,7 +1278,7 @@ pub struct SearchOverGroupsData {
     pub highlight_delimiters: Option<Vec<String>>,
     /// Set score_threshold to a float to filter out chunks with a score below the threshold.
     pub score_threshold: Option<f32>,
-    // Group_size is the number of chunks to fetch for each group.
+    /// Group_size is the number of chunks to fetch for each group. The default is 3. If a group has less than group_size chunks, all chunks will be returned. If this is set to a large number, we recommend setting slim_chunks to true to avoid returning the content and chunk_html of the chunks so as to lower the amount of time required for content download and serialization.
     pub group_size: Option<u32>,
     /// Set slim_chunks to true to avoid returning the content and chunk_html of the chunks. This is useful for when you want to reduce amount of data over the wire for latency improvement. Default is false.
     pub slim_chunks: Option<bool>,

diff --git a/server/src/lib.rs b/server/src/lib.rs
@@ -130,7 +130,7 @@ impl Modify for SecurityAddon {
             name = "BSL",
             url = "https://github.com/devflowinc/trieve/blob/main/LICENSE.txt",
         ),
-        version = "0.5.7",
+        version = "0.5.8",
     ),
     servers(
         (url = "https://api.trieve.ai",
@@ -311,7 +311,7 @@ impl Modify for SecurityAddon {
         (name = "chunk", description = "Chunk endpoint. Think of chunks as individual searchable units of information. The majority of your integration will likely be with the Chunk endpoint."),
         (name = "chunk_group", description = "Chunk groups endpoint. Think of a chunk_group as a bookmark folder within the dataset."),
         (name = "file", description = "File endpoint. When files are uploaded, they are stored in S3 and broken up into chunks with text extraction from Apache Tika. You can upload files of pretty much any type up to 1GB in size. See chunking algorithm details at `docs.trieve.ai` for more information on how chunking works. Improved default chunking is on our roadmap."),
-        (name = "events", description = "Notifications endpoint. Files are uploaded asynchronously and events are sent to the user when the upload is complete. Soon, chunk creation will work in the same way."),
+        (name = "events", description = "Notifications endpoint. Files are uploaded asynchronously and events are sent to the user when the upload is complete."),
         (name = "topic", description = "Topic chat endpoint. Think of topics as the storage system for gen-ai chat memory. Gen AI messages belong to topics."),
         (name = "message", description = "Message chat endpoint. Messages are units belonging to a topic in the context of a chat with a LLM. There are system, user, and assistant messages."),
         (name = "stripe", description = "Stripe endpoint. Used for the managed SaaS version of this app. Eventually this will become a micro-service. Reach out to the team using contact info found at `docs.trieve.ai` for more information."),
@@ -408,7 +408,7 @@ pub async fn main() -> std::io::Result<()> {
     let _ = create_new_qdrant_collection_query(None, None, None, quantize_vectors)
         .await
         .map_err(|err| {
-            log::error!("Failed to create qdrant group: {:?}", err);
+            log::error!("Failed to create new qdrant collection: {:?}", err);
         });
 
     if std::env::var("ADMIN_API_KEY").is_ok() {

diff --git a/server/src/operators/qdrant_operator.rs b/server/src/operators/qdrant_operator.rs
@@ -20,7 +20,7 @@ use qdrant_client::{
 use serde::{Deserialize, Serialize};
 use std::{collections::HashMap, str::FromStr};
 
-#[tracing::instrument]
+#[tracing::instrument(skip(qdrant_url, qdrant_api_key))]
 pub async fn get_qdrant_connection(
     qdrant_url: Option<&str>,
     qdrant_api_key: Option<&str>,
@@ -40,7 +40,7 @@ pub async fn get_qdrant_connection(
 }
 
 /// Create Qdrant collection and indexes needed
-#[tracing::instrument]
+#[tracing::instrument(skip(qdrant_url, qdrant_api_key))]
 pub async fn create_new_qdrant_collection_query(
     qdrant_url: Option<&str>,
     qdrant_api_key: Option<&str>,
@@ -62,9 +62,9 @@ pub async fn create_new_qdrant_collection_query(
         .await;
     if let Ok(collection) = collection {
         if collection.result.is_some() {
-            return Err(ServiceError::BadRequest(
-                "Collection already exists".to_string(),
-            ));
+            log::info!("Avoided creating collection as it already exists");
+
+            return Ok(());
         }
     }
 

diff --git a/server/src/operators/user_operator.rs b/server/src/operators/user_operator.rs
@@ -436,7 +436,7 @@ pub async fn add_user_to_organization(
     Ok(())
 }
 
-#[tracing::instrument(skip(pool))]
+#[tracing::instrument(skip(pool, api_key))]
 pub async fn create_default_user(api_key: &str, pool: web::Data<Pool>) -> Result<(), ServiceError> {
     use crate::data::schema::organizations::dsl as organization_columns;
     use crate::data::schema::user_organizations::dsl as user_organizations_columns;
@@ -452,11 +452,28 @@ pub async fn create_default_user(api_key: &str, pool: web::Data<Pool>) -> Result
         None,
     );
 
-    let user = diesel::insert_into(users_columns::users)
+    let option_user = match diesel::insert_into(users_columns::users)
         .values(&user)
         .get_result::<User>(&mut conn)
         .await
-        .map_err(|_| ServiceError::BadRequest("Failed to create default user".to_string()))?;
+    {
+        Ok(user) => Some(user),
+        Err(e) => {
+            if e.to_string()
+                .contains("duplicate key value violates unique constraint")
+            {
+                log::info!("Skipped creating default user as it already exists");
+            }
+
+            None
+        }
+    };
+
+    if option_user.is_none() {
+        return Ok(());
+    }
+
+    let user = option_user.expect("User must be present");
 
     let org = Organization::from_details("default".to_string());