From 6e2e2e881e89fabfb1c5f6ac16fae11ed80774a2 Mon Sep 17 00:00:00 2001 From: trean Date: Thu, 8 Aug 2024 23:13:11 +0200 Subject: [PATCH] Index accuracy check (#199) * added vectors view to the collection info tab * format * getting accuracy * fixes and tests * separate tab for accuracy checks with logging * fix test * advanced mod switch * refactor named vectors, improve logging, rename to precision * add files * fmt * fixes * fix table and progress bar * make advanced mode work * allow to set fixed heigh to editor * add SearchQualityPannel * format * rename files --------- Co-authored-by: generall --- .../SearchQuality/SearchQuality.jsx | 97 ++++++ .../SearchQuality/SearchQualityPannel.jsx | 322 ++++++++++++++++++ .../SearchQuality/check-index-precision.js | 79 +++++ src/components/EditorCommon/index.jsx | 8 +- src/components/FilterEditorWindow/index.jsx | 4 +- src/components/Points/DataGridList.jsx | 2 +- src/pages/Collection.jsx | 3 + 7 files changed, 511 insertions(+), 4 deletions(-) create mode 100644 src/components/Collections/SearchQuality/SearchQuality.jsx create mode 100644 src/components/Collections/SearchQuality/SearchQualityPannel.jsx create mode 100644 src/components/Collections/SearchQuality/check-index-precision.js diff --git a/src/components/Collections/SearchQuality/SearchQuality.jsx b/src/components/Collections/SearchQuality/SearchQuality.jsx new file mode 100644 index 00000000..454c67b0 --- /dev/null +++ b/src/components/Collections/SearchQuality/SearchQuality.jsx @@ -0,0 +1,97 @@ +import React, { useEffect } from 'react'; +import PropTypes from 'prop-types'; +import { getSnackbarOptions } from '../../Common/utils/snackbarOptions'; +import { useClient } from '../../../context/client-context'; +import SearchQualityPannel from './SearchQualityPannel'; +import { useSnackbar } from 'notistack'; +import { Box, Card, CardHeader } from '@mui/material'; +import { CopyButton } from '../../Common/CopyButton'; +import { bigIntJSON } from '../../../common/bigIntJSON'; +import EditorCommon from '../../EditorCommon'; +import _ from 'lodash'; + +const SearchQuality = ({ collectionName }) => { + const { enqueueSnackbar, closeSnackbar } = useSnackbar(); + const { client } = useClient(); + const [collection, setCollection] = React.useState(null); + const [log, setLog] = React.useState(''); + + const handleLogUpdate = (newLog) => { + const date = new Date().toLocaleString(); + newLog = `[${date}] ${newLog}`; + setLog((prevLog) => { + return newLog + '\n' + prevLog; + }); + }; + + const clearLogs = () => { + setLog(''); + }; + + useEffect(() => { + client + .getCollection(collectionName) + .then((res) => { + setCollection(() => { + return { ...res }; + }); + }) + .catch((err) => { + enqueueSnackbar(err.message, getSnackbarOptions('error', closeSnackbar)); + }); + }, []); + + // Check that collection.config.params.vectors?.size exists and integer + const isNamedVectors = collection?.config?.params.vectors?.size && !_.isObject(collection?.config?.params?.vectors); + let vectors = {}; + if (collection) { + vectors = isNamedVectors ? collection?.config?.params?.vectors : { '': collection?.config?.params?.vectors }; + } + + return ( + <> + {collection?.config?.params?.vectors && ( + + )} + + + } + /> + + + + + + ); +}; + +SearchQuality.propTypes = { + collectionName: PropTypes.string, +}; + +export default SearchQuality; diff --git a/src/components/Collections/SearchQuality/SearchQualityPannel.jsx b/src/components/Collections/SearchQuality/SearchQualityPannel.jsx new file mode 100644 index 00000000..32698e95 --- /dev/null +++ b/src/components/Collections/SearchQuality/SearchQualityPannel.jsx @@ -0,0 +1,322 @@ +import React, { useState } from 'react'; +import PropTypes from 'prop-types'; +import { + Card, + CardHeader, + Table, + TableBody, + TableCell, + TableHead, + TableRow, + Tooltip, + IconButton, + FormControlLabel, + Switch, + CardContent, + LinearProgress, +} from '@mui/material'; +import { CopyButton } from '../../Common/CopyButton'; +import { bigIntJSON } from '../../../common/bigIntJSON'; +import Typography from '@mui/material/Typography'; +import { PublishedWithChanges } from '@mui/icons-material'; +import { checkIndexPrecision } from './check-index-precision'; +import { useClient } from '../../../context/client-context'; +import CodeEditorWindow from '../../FilterEditorWindow'; + +const VectorTableRow = ({ vectorObj, name, onCheckIndexQuality, precision, isInProgress }) => { + return ( + + + + {name == '' ? '—' : name} + + + + + {vectorObj.size} + + + + + {vectorObj.distance} + + + + {isInProgress && } + {!isInProgress && ( + <> + + {precision ? `${precision * 100}%` : '—'} + + + + + + + + )} + + + ); +}; + +VectorTableRow.propTypes = { + vectorObj: PropTypes.object, + name: PropTypes.string, + onCheckIndexQuality: PropTypes.func, + precision: PropTypes.number, + isInProgress: PropTypes.bool, +}; + +const SearchQualityPannel = ({ collectionName, vectors, loggingFoo, clearLogsFoo, ...other }) => { + const { client } = useClient(); + const vectorsNames = Object.keys(vectors); + const [precision, setPrecision] = useState(() => { + if (vectorsNames) { + return vectorsNames.reduce((precision, name) => { + precision[name] = null; + return precision; + }, {}); + } + return null; + }); + + const [advancedMod, setAdvancedMod] = useState(false); + const [inProgress, setInProgress] = useState(false); + + const [code, setCode] = useState(` +// Run this code to estimate search quality versus exact search +{ + "limit": 10, + + "params": { + "hnsw_ef": 128 + } +} + +// You can specify filters and different vector fields +// { +// "limit": 100, +// "using": "vector_name", +// "filter": { +// "must": { +// "key": "field_name", +// "match": { +// "value": "field_value" +// } +// } +// } +// } + + `); + + const queryRequestSchema = (vectorNames) => ({ + description: 'Filter request', + type: 'object', + properties: { + limit: { + description: 'Page size. Default: 10', + type: 'integer', + format: 'uint', + minimum: 1, + nullable: true, + }, + filter: { + description: 'Look only for points which satisfies this conditions. If not provided - all points.', + anyOf: [ + { + $ref: '#/components/schemas/Filter', + }, + { + nullable: true, + }, + ], + }, + using: { + description: 'Vector field name', + type: 'string', + enum: vectorNames, + }, + params: { + description: 'Additional search params', + anyOf: [ + { + $ref: '#/components/schemas/SearchParams', + }, + { + nullable: true, + }, + ], + }, + }, + }); + + if (!vectors) { + return <>No vectors; + } + + const onCheckIndexQuality = async ({ using = '', limit = 10, params = null, filter = null }) => { + setInProgress(true); + + clearLogsFoo && clearLogsFoo(); + const precisions = []; + try { + const scrollResult = await client.scroll(collectionName, { + with_payload: false, + with_vector: false, + limit: 100, + }); + + // todo: if exceeded timeout + + const pointIds = scrollResult.points.map((point) => point.id); + const total = pointIds.length; + + loggingFoo && loggingFoo('Starting measuring quality on ' + total + ' requests for ' + using || '---'); + + for (let idx = 0; idx < total; idx++) { + const pointId = pointIds[idx]; + const precision = await checkIndexPrecision( + client, + collectionName, + pointId, + loggingFoo, + idx, + total, + filter, + params, + using, + limit + ); + if (precision) { + precisions.push(precision); + } + } + + // Round to 2 decimal places + const round = (num) => Math.round((num + Number.EPSILON) * 10000) / 10000; + + const avgPrecision = round(precisions.reduce((x, val) => x + val, 0) / precisions.length); + const stdDev = round( + Math.sqrt(precisions.reduce((x, val) => x + (val - avgPrecision) ** 2, 0) / precisions.length) + ); + + loggingFoo('Mean precision@' + limit + ' for collection: ' + avgPrecision + ' ± ' + stdDev); + + setPrecision((prev) => { + return { + ...prev, + [using]: avgPrecision, + }; + }); + + setInProgress(false); + } catch (e) { + setInProgress(false); + console.error(e); + loggingFoo && loggingFoo(JSON.stringify(e)); + } + }; + + const handleRunCode = async (qulityCheckParams) => { + onCheckIndexQuality(qulityCheckParams); + }; + + return ( + + + Search Quality + setAdvancedMod(!advancedMod)} size="small" />} + label={ + + Advanced Mod + + } + /> + + } + variant="heading" + sx={{ + flexGrow: 1, + }} + action={ + <> + + + } + /> + {!advancedMod && ( + + + + + + Vector Name + + + + + Size + + + + + Distance + + + + + Precision + + + + + + + {Object.keys(vectors).map((vectorName) => ( + onCheckIndexQuality({ using: vectorName })} + precision={precision ? precision[vectorName] : null} + key={vectorName} + isInProgress={inProgress} + /> + ))} + +
+ )} + + {advancedMod && ( + + + + )} +
+ ); +}; + +SearchQualityPannel.propTypes = { + collectionName: PropTypes.string, + vectors: PropTypes.object.isRequired, + loggingFoo: PropTypes.func, + clearLogsFoo: PropTypes.func, + other: PropTypes.object, +}; + +export default SearchQualityPannel; diff --git a/src/components/Collections/SearchQuality/check-index-precision.js b/src/components/Collections/SearchQuality/check-index-precision.js new file mode 100644 index 00000000..85a6ad5d --- /dev/null +++ b/src/components/Collections/SearchQuality/check-index-precision.js @@ -0,0 +1,79 @@ +export const checkIndexPrecision = async ( + client, + collectionName, + pointId, + logFoo, + idx, + total, + filter = null, + params = null, + vectorName = null, + limit = 10 +) => { + const TIMEOUT = 20; + + try { + const exactSearchtartTime = new Date().getTime(); + + const exact = await client.query(collectionName, { + limit: limit, + with_payload: false, + with_vectors: false, + query: pointId, + params: { + exact: true, + }, + filter: filter, + using: vectorName, + timeout: TIMEOUT, + }); + + const exactSearchElapsed = new Date().getTime() - exactSearchtartTime; + + const searchStartTime = new Date().getTime(); + + const hnsw = await client.query(collectionName, { + timeout: TIMEOUT, + limit: limit, + with_payload: false, + with_vectors: false, + query: pointId, + params: params, + filter: filter, + using: vectorName, + }); + + const searchElapsed = new Date().getTime() - searchStartTime; + + const exactIds = exact.points.map((item) => item.id); + const hnswIds = hnsw.points.map((item) => item.id); + + const precision = exactIds.filter((id) => hnswIds.includes(id)).length / exactIds.length; + + logFoo && + logFoo( + 'Point ID ' + + idx + + '(' + + idx + + '/' + + total + + ') precision@' + + limit + + ': ' + + precision + + ' (search time exact: ' + + exactSearchElapsed + + 'ms, regular: ' + + searchElapsed + + 'ms)' + ); + + return precision; + } catch (e) { + console.error('Error: ', e); + console.error('Skipping point: ', idx); + // todo: throw error + return null; + } +}; diff --git a/src/components/EditorCommon/index.jsx b/src/components/EditorCommon/index.jsx index 096fdb5c..1721cf61 100644 --- a/src/components/EditorCommon/index.jsx +++ b/src/components/EditorCommon/index.jsx @@ -22,12 +22,12 @@ window.MonacoEnvironment = { loader.config({ monaco }); -const EditorCommon = ({ beforeMount, ...props }) => { +const EditorCommon = ({ beforeMount, customHeight, ...props }) => { const monacoRef = useRef(null); const editorWrapper = useRef(null); const theme = useTheme(); const { height } = useWindowResize(); - const [editorHeight, setEditorHeight] = useState(0); + const [editorHeight, setEditorHeight] = useState(customHeight || 0); function handleEditorWillMount(monaco) { monacoRef.current = monaco; @@ -52,6 +52,9 @@ const EditorCommon = ({ beforeMount, ...props }) => { }, [theme]); useEffect(() => { + if (customHeight) { + return; + } setEditorHeight(height - editorWrapper.current?.offsetTop); }, [height, editorWrapper]); @@ -70,6 +73,7 @@ const EditorCommon = ({ beforeMount, ...props }) => { EditorCommon.propTypes = { height: PropTypes.string, beforeMount: PropTypes.func, + customHeight: PropTypes.number, ...Editor.propTypes, }; diff --git a/src/components/FilterEditorWindow/index.jsx b/src/components/FilterEditorWindow/index.jsx index 0250fe4f..062a8afc 100644 --- a/src/components/FilterEditorWindow/index.jsx +++ b/src/components/FilterEditorWindow/index.jsx @@ -10,7 +10,7 @@ import { codeParse } from './config/RequestFromCode'; import './editor.css'; import EditorCommon from '../EditorCommon'; -const CodeEditorWindow = ({ onChange, code, onChangeResult, customRequestSchema }) => { +const CodeEditorWindow = ({ onChange, code, onChangeResult, customRequestSchema, customHeight = null }) => { const { enqueueSnackbar } = useSnackbar(); const editorRef = useRef(null); const lensesRef = useRef(null); @@ -97,6 +97,7 @@ const CodeEditorWindow = ({ onChange, code, onChangeResult, customRequestSchema return ( )} diff --git a/src/pages/Collection.jsx b/src/pages/Collection.jsx index c379b394..70edaa44 100644 --- a/src/pages/Collection.jsx +++ b/src/pages/Collection.jsx @@ -6,6 +6,7 @@ import Box from '@mui/material/Box'; import { SnapshotsTab } from '../components/Snapshots/SnapshotsTab'; import CollectionInfo from '../components/Collections/CollectionInfo'; import PointsTabs from '../components/Points/PointsTabs'; +import SearchQuality from '../components/Collections/SearchQuality/SearchQuality'; function Collection() { const { collectionName } = useParams(); @@ -33,6 +34,7 @@ function Collection() { + @@ -42,6 +44,7 @@ function Collection() { {currentTab === 'info' && } + {currentTab === 'quality' && } {currentTab === 'points' && } {currentTab === 'snapshots' && }