Skip to content

Commit

Permalink
feature: typo tolerance (#2144)
Browse files Browse the repository at this point in the history
Co-authored-by: Dens Sumesh <[email protected]>
Co-authored-by: cdxker <[email protected]>
  • Loading branch information
3 people authored Aug 28, 2024
1 parent 3f44c6d commit ed22f21
Show file tree
Hide file tree
Showing 45 changed files with 3,217 additions and 43 deletions.
144 changes: 144 additions & 0 deletions .github/workflows/push-server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -450,3 +450,147 @@ jobs:
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

word-id-cronjob:
name: Push Word ID cronjob
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [blacksmith-8vcpu-ubuntu-2204]
platform: [linux/amd64]
exclude:
- runner: blacksmith-8vcpu-ubuntu-2204
platform: linux/arm64
- runner: blacksmith-8vcpu-ubuntu-2204-arm
platform: linux/amd64
steps:
- name: Checkout the repo
uses: actions/checkout@v4

- name: Setup buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# list of Docker images to use as base name for tags
images: |
trieve/word-id-cronjob
tags: |
type=raw,latest
type=sha
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
platforms: ${{ matrix.platform }}
cache-from: type=registry,ref=trieve/buildcache:word-id-cronjob-${{matrix.runner}}
cache-to: type=registry,ref=trieve/buildcache:word-id-cronjob-${{matrix.runner}},mode=max
context: server/
file: ./server/Dockerfile.word-id-cronjob
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

word-worker:
name: Push Word Worker
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [blacksmith-8vcpu-ubuntu-2204]
platform: [linux/amd64]
exclude:
- runner: blacksmith-8vcpu-ubuntu-2204
platform: linux/arm64
- runner: blacksmith-8vcpu-ubuntu-2204-arm
platform: linux/amd64
steps:
- name: Checkout the repo
uses: actions/checkout@v4

- name: Setup buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# list of Docker images to use as base name for tags
images: |
trieve/word-worker
tags: |
type=raw,latest
type=sha
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
platforms: ${{ matrix.platform }}
cache-from: type=registry,ref=trieve/buildcache:word-worker-${{matrix.runner}}
cache-to: type=registry,ref=trieve/buildcache:word-worker-${{matrix.runner}},mode=max
context: server/
file: ./server/Dockerfile.word-worker
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

bktree-worker:
name: Push BK-Tree Worker
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [blacksmith-8vcpu-ubuntu-2204]
platform: [linux/amd64]
exclude:
- runner: blacksmith-8vcpu-ubuntu-2204
platform: linux/arm64
- runner: blacksmith-8vcpu-ubuntu-2204-arm
platform: linux/amd64
steps:
- name: Checkout the repo
uses: actions/checkout@v4

- name: Setup buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# list of Docker images to use as base name for tags
images: |
trieve/bktree-worker
tags: |
type=raw,latest
type=sha
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
platforms: ${{ matrix.platform }}
cache-from: type=registry,ref=trieve/buildcache:bktree-worker-${{matrix.runner}}
cache-to: type=registry,ref=trieve/buildcache:bktree-worker-${{matrix.runner}},mode=max
context: server/
file: ./server/Dockerfile.bktree-worker
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
12 changes: 12 additions & 0 deletions frontends/search/src/components/GroupPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,18 @@ export const GroupPage = (props: GroupPageProps) => {
slim_chunks: search.debounced.slimChunks,
page_size: search.debounced.pageSize,
get_total_pages: search.debounced.getTotalPages,
typo_options: {
correct_typos: search.debounced.correctTypos,
one_typo_word_range: {
min: search.debounced.oneTypoWordRangeMin,
max: search.debounced.oneTypoWordRangeMax,
},
two_typo_word_range: {
min: search.debounced.twoTypoWordRangeMin,
max: search.debounced.twoTypoWordRangeMax,
},
disable_on_words: search.debounced.disableOnWords,
},
highlight_options: {
highlight_results: search.debounced.highlightResults,
highlight_strategy: search.debounced.highlightStrategy,
Expand Down
12 changes: 12 additions & 0 deletions frontends/search/src/components/ResultsPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,18 @@ const ResultsPage = (props: ResultsPageProps) => {
slim_chunks: props.search.debounced.slimChunks ?? false,
page_size: props.search.debounced.pageSize ?? 10,
get_total_pages: props.search.debounced.getTotalPages ?? false,
typo_options: {
correct_typos: props.search.debounced.correctTypos,
one_typo_word_range: {
min: props.search.debounced.oneTypoWordRangeMin,
max: props.search.debounced.oneTypoWordRangeMax,
},
two_typo_word_range: {
min: props.search.debounced.twoTypoWordRangeMin,
max: props.search.debounced.twoTypoWordRangeMax,
},
disable_on_word: props.search.debounced.disableOnWords,
},
highlight_options: {
highlight_results: props.search.debounced.highlightResults ?? true,
highlight_strategy:
Expand Down
135 changes: 134 additions & 1 deletion frontends/search/src/components/SearchForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,13 @@ const SearchForm = (props: {
pageSize: 10,
getTotalPages: false,
highlightStrategy: "exactmatch",
correctTypos: false,
oneTypoWordRangeMin: 5,
oneTypoWordRangeMax: 8,
twoTypoWordRangeMin: 8,
twoTypoWordRangeMax: null,
disableOnWords: [],
typoTolerance: false,
highlightResults: true,
highlightDelimiters: ["?", ".", "!"],
highlightMaxLength: 8,
Expand Down Expand Up @@ -1195,7 +1202,7 @@ const SearchForm = (props: {
/>
</div>
<div class="flex items-center justify-between space-x-2 p-1">
<label>Remove Stop Words</label>
<label>Remove Stop Words:</label>
<input
class="h-4 w-4"
type="checkbox"
Expand All @@ -1210,6 +1217,132 @@ const SearchForm = (props: {
}}
/>
</div>
<div class="flex items-center justify-between space-x-2 p-1">
<label>Typo Tolerance (Latency Penalty):</label>
<input
class="h-4 w-4"
type="checkbox"
checked={tempSearchValues().correctTypos}
onChange={(e) => {
setTempSearchValues((prev) => {
return {
...prev,
correctTypos: e.target.checked,
};
});
}}
/>
</div>
<div class="items flex justify-between space-x-2 p-1">
<label>One typo min word length:</label>
<input
class="w-16 rounded border border-neutral-400 p-0.5 text-black"
type="number"
step="any"
value={tempSearchValues().oneTypoWordRangeMin}
onChange={(e) => {
setTempSearchValues((prev) => {
return {
...prev,
oneTypoWordRangeMin: parseInt(
e.currentTarget.value,
),
};
});
}}
/>
</div>
<div class="items flex justify-between space-x-2 p-1">
<label>One typo max word length:</label>
<input
class="w-16 rounded border border-neutral-400 p-0.5 text-black"
type="number"
step="any"
value={
tempSearchValues().oneTypoWordRangeMax?.toString() ??
""
}
onChange={(e) => {
setTempSearchValues((prev) => {
return {
...prev,
oneTypoWordRangeMax:
e.currentTarget.value === ""
? null
: parseInt(e.currentTarget.value),
};
});
}}
/>
</div>
<div class="items flex justify-between space-x-2 p-1">
<label>Two typo min word length:</label>
<input
class="w-16 rounded border border-neutral-400 p-0.5 text-black"
type="number"
step="any"
value={tempSearchValues().twoTypoWordRangeMin}
onChange={(e) => {
setTempSearchValues((prev) => {
return {
...prev,
twoTypoWordRangeMin: parseInt(
e.currentTarget.value,
),
};
});
}}
/>
</div>
<div class="items flex justify-between space-x-2 p-1">
<label>Two typo max word length:</label>
<input
class="w-16 rounded border border-neutral-400 p-0.5 text-black"
type="number"
step="any"
value={
tempSearchValues().twoTypoWordRangeMax?.toString() ??
""
}
onChange={(e) => {
setTempSearchValues((prev) => {
return {
...prev,
oneTypoWordRangeMax:
e.currentTarget.value === ""
? null
: parseInt(e.currentTarget.value),
};
});
}}
/>
</div>
<div class="items flex justify-between space-x-2 p-1">
<label>Disable typo tolerance for words:</label>
<input
class="w-16 rounded border border-neutral-400 p-0.5 text-black"
type="text"
value={tempSearchValues().disableOnWords.join(",")}
onInput={(e) => {
if (e.currentTarget.value === " ") {
setTempSearchValues((prev) => {
return {
...prev,
disableOnWords: [" "],
};
});
}

setTempSearchValues((prev) => {
return {
...prev,
disableOnWords:
e.currentTarget.value.split(","),
};
});
}}
/>
</div>
<div class="flex items-center justify-between space-x-2 p-1">
<label>Highlight Results (Latency Penalty):</label>
<input
Expand Down
Loading

0 comments on commit ed22f21

Please sign in to comment.