Skip to content

Commit

Permalink
feature: extend autocomplete filtered results with normal results
Browse files Browse the repository at this point in the history
  • Loading branch information
skeptrunedev authored and cdxker committed May 31, 2024
1 parent ea68583 commit 02f5e01
Show file tree
Hide file tree
Showing 11 changed files with 155 additions and 61 deletions.
20 changes: 1 addition & 19 deletions convenience.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,9 @@ reset_s3_service() {
docker compose up -d s3
}

# Function to reset the script database
reset_script_redis() {
echo "Resetting the script Redis database..."
docker compose stop script-redis
docker compose rm -f script-redis
docker volume rm vault_script-redis-data
docker compose up -d script-redis
}

start_local_services() {
echo "Starting local services..."
docker compose up -d db
docker compose up -d redis
docker compose up -d qdrant-database
docker compose up -d s3
docker compose up -d s3-client
docker compose up -d keycloak
docker compose up -d keycloak-db
docker compose up -d db redis qdrant-database s3 s3-client keycloak keycloak-db tika
}

# Main script logic
Expand All @@ -47,9 +32,6 @@ while getopts ":qps3l" opt; do
3)
reset_s3_service
;;
s)
reset_script_redis
;;
l)
start_local_services
;;
Expand Down
2 changes: 2 additions & 0 deletions search/src/HomeSearch.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export const HomeSearch = () => {
const requestParams = url.split("?")[1];

const params = new URLSearchParams(requestParams);
const extendResults = params.get("extendResults") === "true" || false;
const searchType: string = params.get("searchType") ?? "search";
const groupUnique = params.get("groupUnique") === "true" || false;
const slimChunks = params.get("slimChunks") === "true" || false;
Expand Down Expand Up @@ -55,6 +56,7 @@ export const HomeSearch = () => {
<div class="mt-8 w-full max-w-7xl px-4 sm:px-8 md:px-20">
<SearchForm
searchType={searchType}
extendResults={extendResults}
groupUniqueSearch={groupUnique}
slimChunks={slimChunks}
pageSize={pageSize}
Expand Down
2 changes: 2 additions & 0 deletions search/src/components/ResultsPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export interface ResultsPageProps {
query: string;
page: number;
searchType: string;
extendResults?: boolean;
groupUnique?: boolean;
slimChunks?: boolean;
pageSize?: number;
Expand Down Expand Up @@ -160,6 +161,7 @@ const ResultsPage = (props: ResultsPageProps) => {

if (props.searchType.includes("autocomplete")) {
searchRoute = "chunk/autocomplete";
requestBody["extend_results"] = props.extendResults ?? false;
}

props.setLoading(true);
Expand Down
41 changes: 39 additions & 2 deletions search/src/components/SearchForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import { FiChevronDown, FiChevronUp } from "solid-icons/fi";
const SearchForm = (props: {
query?: string;
searchType: string;
extendResults?: boolean;
groupUniqueSearch?: boolean;
slimChunks?: boolean;
pageSize?: number;
Expand Down Expand Up @@ -85,6 +86,10 @@ const SearchForm = (props: {
// eslint-disable-next-line solid/reactivity
props.recencyBias ?? 0.0,
);
const [extendResults, setExtendResults] = createSignal(
// eslint-disable-next-line solid/reactivity
props.extendResults ?? false,
);

const resizeTextarea = (textarea: HTMLTextAreaElement | null) => {
if (!textarea) return;
Expand All @@ -108,7 +113,11 @@ const SearchForm = (props: {
const searchTypeUrlParam = searchTypeRoute
? `&searchType=${searchTypeRoute}`
: "";
let extendResultsUrlParam = "";

if (searchTypeRoute.includes("autocomplete")) {
extendResultsUrlParam = extendResults() ? "&extendResults=true" : "";
}
const groupUniqueUrlParam = groupUniqueSearch() ? "&groupUnique=true" : "";
const slimChunksUrlParam = slimChunks() ? "&slimChunks=true" : "";
const pageSizeUrlParam = pageSize() ? `&pageSize=${pageSize()}` : "";
Expand All @@ -131,7 +140,8 @@ const SearchForm = (props: {
getTotalPagesUrlParam +
highlightDelimitersUrlParam +
highlightResultsUrlParam +
recencyBiasUrlParam
recencyBiasUrlParam +
extendResultsUrlParam
: `/search?q=${searchQuery}` +
searchTypeUrlParam +
groupUniqueUrlParam +
Expand All @@ -140,7 +150,8 @@ const SearchForm = (props: {
getTotalPagesUrlParam +
highlightDelimitersUrlParam +
highlightResultsUrlParam +
recencyBiasUrlParam;
recencyBiasUrlParam +
extendResultsUrlParam;

navigate(urlToNavigateTo);
};
Expand Down Expand Up @@ -455,6 +466,32 @@ const SearchForm = (props: {
Reset
</button>
</div>
<Show
when={
searchTypes().find((type) => type.isSelected)
?.route === "autocomplete-semantic" ||
searchTypes().find((type) => type.isSelected)
?.route === "autocomplete-fulltext"
}
>
<div class="flex items-center justify-between space-x-2 p-1">
<label>Extend Results (autocomplete only):</label>
<input
class="h-4 w-4"
type="checkbox"
checked={props.extendResults}
onChange={(e) => {
if (e.target.checked) {
setExtendResults(true);
} else {
setExtendResults(false);
}

onSubmit(e);
}}
/>
</div>
</Show>
<div class="flex items-center justify-between space-x-2 p-1">
<label>Slim Chunks (Latency Improvement):</label>
<input
Expand Down
4 changes: 4 additions & 0 deletions search/src/pages/Search.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ export const Search = () => {
"!",
]);
const [recencyBias, setRecencyBias] = createSignal<number>(0.0);
const [extendResults, setExtendResults] = createSignal<boolean>(false);

createEffect(() => {
setLoading(true);

setQuery(location.query.q ?? "");
setExtendResults(location.query.extendResults === "true" || false);
setPage(Number(location.query.page) || 1);
setSearchType(location.query.searchType ?? "hybrid");
setGroupUnique(location.query.groupUnique === "true" || false);
Expand All @@ -50,6 +52,7 @@ export const Search = () => {
<div class="mx-auto mt-8 w-full max-w-[calc(100%-32px)] px-4 min-[360px]:max-w-[calc(100%-64px)] sm:px-8 md:px-20">
<SearchForm
query={query()}
extendResults={extendResults()}
groupUniqueSearch={groupUnique()}
slimChunks={slimChunks()}
searchType={searchType()}
Expand All @@ -65,6 +68,7 @@ export const Search = () => {
<ResultsPage
page={page()}
query={query()}
extendResults={extendResults()}
groupUnique={groupUnique()}
slimChunks={slimChunks()}
pageSize={pageSize()}
Expand Down
30 changes: 16 additions & 14 deletions server/src/handlers/chunk_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,7 @@ pub struct ChunkFilter {
"highlight_delimiters": ["?", ",", ".", "!"],
"score_threshold": 0.5
}))]
pub struct SearchChunkData {
pub struct SearchChunksReqPayload {
/// Can be either "semantic", "fulltext", or "hybrid". If specified as "hybrid", it will pull in one page (10 chunks) of both semantic and full-text results then re-rank them using BAAI/bge-reranker-large. "semantic" will pull in one page (10 chunks) of the nearest cosine distant vectors. "fulltext" will pull in one page (10 chunks) of full-text results based on SPLADE.
pub search_type: String,
/// Query is the search query. This can be any string. The query will be used to create an embedding vector and/or SPLADE vector which will be used to find the result set.
Expand Down Expand Up @@ -922,9 +922,9 @@ pub struct SearchChunkData {
pub content_only: Option<bool>,
}

impl Default for SearchChunkData {
impl Default for SearchChunksReqPayload {
fn default() -> Self {
SearchChunkData {
SearchChunksReqPayload {
search_type: "hybrid".to_string(),
query: "".to_string(),
page: Some(1),
Expand Down Expand Up @@ -998,7 +998,7 @@ pub fn parse_query(query: String) -> ParsedQuery {
path = "/chunk/search",
context_path = "/api",
tag = "chunk",
request_body(content = SearchChunkData, description = "JSON request payload to semantically search for chunks (chunks)", content_type = "application/json"),
request_body(content = SearchChunksReqPayload, description = "JSON request payload to semantically search for chunks (chunks)", content_type = "application/json"),
responses(
(status = 200, description = "Chunks with embedding vectors which are similar to those in the request body", body = SearchChunkQueryResponseBody),
Expand All @@ -1013,7 +1013,7 @@ pub fn parse_query(query: String) -> ParsedQuery {
)]
#[tracing::instrument(skip(pool))]
pub async fn search_chunks(
data: web::Json<SearchChunkData>,
data: web::Json<SearchChunksReqPayload>,
_user: LoggedUser,
pool: web::Data<Pool>,
dataset_org_plan_sub: DatasetAndOrgWithSubAndPlan,
Expand Down Expand Up @@ -1136,9 +1136,11 @@ pub async fn search_chunks(
"highlight_delimiters": ["?", ",", ".", "!"],
"score_threshold": 0.5
}))]
pub struct AutocompleteData {
/// Can be either "semantic", or "fulltext". "semantic" will pull in one page (10 chunks) of the nearest cosine distant vectors. "fulltext" will pull in one page (10 chunks) of full-text results based on SPLADE.
pub struct AutocompleteReqPayload {
/// Can be either "semantic", or "fulltext". "semantic" will pull in one page_size of the nearest cosine distant vectors. "fulltext" will pull in one page_size of full-text results based on SPLADE.
pub search_type: String,
/// If specified to true, this will extend the search results to include non-exact prefix matches of the same search_type such that a full page_size of results are returned. Default is false.
pub extend_results: Option<bool>,
/// Query is the search query. This can be any string. The query will be used to create an embedding vector and/or SPLADE vector which will be used to find the result set.
pub query: String,
/// Page size is the number of chunks to fetch. This can be used to fetch more than 10 chunks at a time.
Expand All @@ -1165,9 +1167,9 @@ pub struct AutocompleteData {
pub content_only: Option<bool>,
}

impl From<AutocompleteData> for SearchChunkData {
fn from(autocomplete_data: AutocompleteData) -> Self {
SearchChunkData {
impl From<AutocompleteReqPayload> for SearchChunksReqPayload {
fn from(autocomplete_data: AutocompleteReqPayload) -> Self {
SearchChunksReqPayload {
search_type: autocomplete_data.search_type,
query: autocomplete_data.query,
page: Some(1),
Expand Down Expand Up @@ -1200,7 +1202,7 @@ impl From<AutocompleteData> for SearchChunkData {
path = "/chunk/autocomplete",
context_path = "/api",
tag = "chunk",
request_body(content = AutocompleteData, description = "JSON request payload to semantically search for chunks (chunks)", content_type = "application/json"),
request_body(content = AutocompleteReqPayload, description = "JSON request payload to semantically search for chunks (chunks)", content_type = "application/json"),
responses(
(status = 200, description = "Chunks with embedding vectors which are similar to those in the request body", body = SearchChunkQueryResponseBody),
Expand All @@ -1215,7 +1217,7 @@ impl From<AutocompleteData> for SearchChunkData {
)]
#[tracing::instrument(skip(pool))]
pub async fn autocomplete(
data: web::Json<AutocompleteData>,
data: web::Json<AutocompleteReqPayload>,
_user: LoggedUser,
pool: web::Data<Pool>,
dataset_org_plan_sub: DatasetAndOrgWithSubAndPlan,
Expand All @@ -1241,7 +1243,7 @@ pub async fn autocomplete(
}

autocomplete_fulltext_chunks(
data.clone().into(),
data.clone(),
parsed_query,
pool,
dataset_org_plan_sub.dataset,
Expand All @@ -1252,7 +1254,7 @@ pub async fn autocomplete(
}
"semantic" => {
autocomplete_semantic_chunks(
data.clone().into(),
data.clone(),
parsed_query,
pool,
dataset_org_plan_sub.dataset,
Expand Down
4 changes: 2 additions & 2 deletions server/src/handlers/group_handler.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::{
auth_handler::{AdminOnly, LoggedUser},
chunk_handler::{parse_query, ChunkFilter, SearchChunkData},
chunk_handler::{parse_query, ChunkFilter, SearchChunksReqPayload},
};
use crate::{
data::models::{
Expand Down Expand Up @@ -1115,7 +1115,7 @@ pub struct SearchWithinGroupData {
pub slim_chunks: Option<bool>,
}

impl From<SearchWithinGroupData> for SearchChunkData {
impl From<SearchWithinGroupData> for SearchChunksReqPayload {
fn from(data: SearchWithinGroupData) -> Self {
Self {
query: data.query,
Expand Down
6 changes: 3 additions & 3 deletions server/src/handlers/message_handler.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::{
auth_handler::{AdminOnly, LoggedUser},
chunk_handler::{ChunkFilter, ParsedQuery, SearchChunkData},
chunk_handler::{ChunkFilter, ParsedQuery, SearchChunksReqPayload},
};
use crate::{
data::models::{
Expand Down Expand Up @@ -633,7 +633,7 @@ pub async fn stream_response(
}

let n_retrievals_to_include = dataset_config.N_RETRIEVALS_TO_INCLUDE;
let search_chunk_data = SearchChunkData {
let search_chunk_data = SearchChunksReqPayload {
search_type: "hybrid".to_string(),
query: query.clone(),
page_size: Some(n_retrievals_to_include.try_into().unwrap_or(8)),
Expand Down Expand Up @@ -892,7 +892,7 @@ pub async fn get_suggested_queries(
};

let chunk_metadatas = search_hybrid_chunks(
SearchChunkData {
SearchChunksReqPayload {
search_type: "hybrid".to_string(),
query: data.query.clone(),
page_size: Some(10),
Expand Down
4 changes: 2 additions & 2 deletions server/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,8 @@ impl Modify for SecurityAddon {
handlers::chunk_handler::UpdateChunkByTrackingIdData,
handlers::chunk_handler::SearchChunkQueryResponseBody,
handlers::chunk_handler::GenerateChunksRequest,
handlers::chunk_handler::SearchChunkData,
handlers::chunk_handler::AutocompleteData,
handlers::chunk_handler::SearchChunksReqPayload,
handlers::chunk_handler::AutocompleteReqPayload,
handlers::group_handler::SearchWithinGroupData,
handlers::group_handler::SearchOverGroupsData,
handlers::group_handler::SearchWithinGroupResults,
Expand Down
4 changes: 2 additions & 2 deletions server/src/operators/qdrant_operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,7 @@ pub async fn search_qdrant_query(
futures::future::join_all(point_count_futures)
);

let point_ids: Vec<SearchResult> = search_response
let search_results: Vec<SearchResult> = search_response
.map_err(|e| {
log::error!("Failed to search points on Qdrant {:?}", e);
ServiceError::BadRequest("Failed to search points on Qdrant".to_string())
Expand Down Expand Up @@ -901,7 +901,7 @@ pub async fn search_qdrant_query(
.min()
.unwrap_or(0);

Ok((point_ids, point_count))
Ok((search_results, point_count))
}

#[derive(Debug, Serialize, Deserialize, Clone)]
Expand Down
Loading

0 comments on commit 02f5e01

Please sign in to comment.