diff --git a/external/@worldbrain/memex-common b/external/@worldbrain/memex-common index 13c5ed4423..141f5e8ed9 160000 --- a/external/@worldbrain/memex-common +++ b/external/@worldbrain/memex-common @@ -1 +1 @@ -Subproject commit 13c5ed4423d014f1ade4c816f3e17c4fc4d55616 +Subproject commit 141f5e8ed940e9f0f55999b47fbc2b25561d9429 diff --git a/package.json b/package.json index 8e485e6eb1..5a5ad8257a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "worldbrain-extension", - "version": "3.19.22", + "version": "3.19.23", "homepage": "https://memex.garden", "repository": "https://github.com/WorldBrain/Memex", "scripts": { diff --git a/src/background-script/quick-and-dirty-migrations/tags-migration.test.data.ts b/src/background-script/quick-and-dirty-migrations/tags-migration.test.data.ts deleted file mode 100644 index fedb55efa6..0000000000 --- a/src/background-script/quick-and-dirty-migrations/tags-migration.test.data.ts +++ /dev/null @@ -1,40 +0,0 @@ -export const TAG_NAME_PREFIX = 'test tag ' -export const PAGE_URL_PREFIX = 'test.com/' -export const PDF_PAGE_URL_PREFIX = 'memex.cloud/ct/' - -export const createTestTagRecords = (args: { - numOfTags: number - pagesPerTag: number - annotsPerPage: number -}): Array<{ name: string; url: string }> => { - const tagNames = [...Array(args.numOfTags).keys()].map( - (i) => TAG_NAME_PREFIX + i, - ) - - return [...Array(args.pagesPerTag).keys()].flatMap((pageNumber) => { - const normalizedPageUrl = - pageNumber % 2 === 0 - ? PAGE_URL_PREFIX + pageNumber - : PDF_PAGE_URL_PREFIX + pageNumber + '.pdf' - return [ - ...tagNames.map((tagName) => ({ - name: tagName, - url: normalizedPageUrl, - })), - ...createAnnotsForPage( - normalizedPageUrl, - args.annotsPerPage, - ).flatMap((annotUrl) => - tagNames.map((tagName) => ({ - name: tagName, - url: annotUrl, - })), - ), - ] - }) -} - -const createAnnotsForPage = (normalizedPageUrl: string, numOfAnnots: number) => - [...Array(numOfAnnots).keys()].map( - (i) => `${normalizedPageUrl}/#${Date.now() + i}`, - ) diff --git a/src/background-script/quick-and-dirty-migrations/tags-migration.test.ts b/src/background-script/quick-and-dirty-migrations/tags-migration.test.ts deleted file mode 100644 index b88924ac33..0000000000 --- a/src/background-script/quick-and-dirty-migrations/tags-migration.test.ts +++ /dev/null @@ -1,277 +0,0 @@ -import type Dexie from 'dexie' -import type { DexieStorageBackend } from '@worldbrain/storex-backend-dexie' -import { isUrlForAnnotation } from '@worldbrain/memex-common/lib/annotations/utils' -import { setupBackgroundIntegrationTest } from 'src/tests/background-integration-tests' -import { migrateTagsToSpaces } from './tags-migration' -import * as DATA from './tags-migration.test.data' - -async function setupTest() { - const setup = await setupBackgroundIntegrationTest() - - return { - dexie: (setup.storageManager.backend as DexieStorageBackend) - .dexieInstance, - ...setup, - } -} - -async function testMigration( - dexie: Dexie, - tagsDataSet: Array<{ url: string; name: string }>, - chunkSize = 50, -) { - await dexie.table('tags').bulkAdd(tagsDataSet) - const queuedData = new Map() - - expect(await dexie.table('customLists').toArray()).toEqual([]) - expect(await dexie.table('pageListEntries').toArray()).toEqual([]) - expect(await dexie.table('annotListEntries').toArray()).toEqual([]) - expect(queuedData.get('customLists')).toBeUndefined() - expect(queuedData.get('pageListEntries')).toBeUndefined() - expect(queuedData.get('annotListEntries')).toBeUndefined() - - await migrateTagsToSpaces({ - dexie, - chunkSize, - queueChangesForCloudSync: async (data) => { - const prev = queuedData.get(data.collection) ?? [] - queuedData.set(data.collection, [...prev, ...data.objs]) - }, - }) - const tagNames = new Set(tagsDataSet.map((tag) => tag.name)) - - const listsData = await dexie.table('customLists').toArray() - const expectedListsData = [...tagNames].map((tagName) => ({ - id: expect.any(Number), - name: tagName, - isNestable: true, - isDeletable: true, - searchableName: tagName, - createdAt: expect.any(Date), - nameTerms: expect.any(Array), - })) - expect(listsData).toEqual(expect.arrayContaining(expectedListsData)) - expect(queuedData.get('customLists')).toEqual( - expect.arrayContaining(expectedListsData), - ) - - const tagNamesToListId = new Map() - listsData.forEach((list) => tagNamesToListId.set(list.name, list.id)) - - const expectedPageListEntryData = tagsDataSet - .filter((entry) => !isUrlForAnnotation(entry.url)) - .map((entry) => ({ - pageUrl: entry.url, - fullUrl: 'https://' + entry.url, - listId: tagNamesToListId.get(entry.name), - createdAt: expect.any(Date), - })) - const expectedAnnotListEntryData = tagsDataSet - .filter((entry) => isUrlForAnnotation(entry.url)) - .map((entry) => ({ - url: entry.url, - listId: tagNamesToListId.get(entry.name), - createdAt: expect.any(Date), - })) - - expect(await dexie.table('pageListEntries').toArray()).toEqual( - expect.arrayContaining(expectedPageListEntryData), - ) - expect(await dexie.table('annotListEntries').toArray()).toEqual( - expect.arrayContaining(expectedAnnotListEntryData), - ) - expect(queuedData.get('pageListEntries')).toEqual( - expect.arrayContaining(expectedPageListEntryData), - ) - expect(queuedData.get('annotListEntries')).toEqual( - expect.arrayContaining(expectedAnnotListEntryData), - ) -} - -describe('tags to spaces data migration', () => { - it('should create "customLists" records for all distinct tag names + "pageListEntries" and "annotListEntries" records for all tag records', async () => { - const { dexie: dexieA } = await setupTest() - await testMigration( - dexieA, - DATA.createTestTagRecords({ - numOfTags: 5, - pagesPerTag: 5, - annotsPerPage: 5, - }), - ) - - const { dexie: dexieB } = await setupTest() - await testMigration( - dexieB, - DATA.createTestTagRecords({ - numOfTags: 50, - pagesPerTag: 1, - annotsPerPage: 5, - }), - ) - }) - - it('should still work with a large number of chunks', async () => { - const { dexie: dexieA } = await setupTest() - await testMigration( - dexieA, - DATA.createTestTagRecords({ - numOfTags: 5, - pagesPerTag: 5, - annotsPerPage: 5, - }), - 5, - ) - }) - - it('should still work with duplicate space entries (not throw Dexie.BulkError on Dexie.bulkAdd)', async () => { - const { dexie: dexieA } = await setupTest() - const tagsDataSet = DATA.createTestTagRecords({ - numOfTags: 5, - pagesPerTag: 5, - annotsPerPage: 5, - }) - await dexieA.table('tags').bulkAdd(tagsDataSet) - - await dexieA.table('customLists').add({ - id: 111, - name: tagsDataSet[0].name, - createdAt: new Date(), - searchableName: tagsDataSet[0].name, - nameTerms: [], - isDeletable: true, - isNestable: true, - }) - - // This entry will be duplicated via the migration logic - await dexieA.table('pageListEntries').add({ - listId: 111, - pageUrl: tagsDataSet[0].url, - fullUrl: 'https://' + tagsDataSet[0].url, - createdAt: new Date(), - }) - - await migrateTagsToSpaces({ - dexie: dexieA, - chunkSize: 50, - queueChangesForCloudSync: async (data) => {}, - }) - }) - - it('should still work without pages and without annotations', async () => { - const { dexie: dexieA } = await setupTest() - await testMigration( - dexieA, - DATA.createTestTagRecords({ - numOfTags: 10, - pagesPerTag: 5, - annotsPerPage: 0, - }), - ) - const { dexie: dexieB } = await setupTest() - await testMigration( - dexieB, - DATA.createTestTagRecords({ - numOfTags: 100, - pagesPerTag: 0, - annotsPerPage: 0, - }), - ) - }) - - it('should add entries to existing lists, if tag names overlap', async () => { - const { dexie } = await setupTest() - - const tagsDataSet = DATA.createTestTagRecords({ - numOfTags: 5, - pagesPerTag: 5, - annotsPerPage: 5, - }) - - const existingListNames = [tagsDataSet[0].name, tagsDataSet[4].name] - - await dexie.table('tags').bulkAdd(tagsDataSet) - const queuedData = new Map() - - await dexie.table('customLists').bulkAdd( - existingListNames.map((name, i) => ({ - id: i, - createdAt: new Date(i), - name, - searchableName: name, - nameTerms: [], - isDeletable: true, - isNestable: true, - })), - ) - - await migrateTagsToSpaces({ - dexie, - chunkSize: 50, - queueChangesForCloudSync: async (data) => { - const prev = queuedData.get(data.collection) ?? [] - queuedData.set(data.collection, [...prev, ...data.objs]) - }, - }) - const tagNames = new Set(tagsDataSet.map((tag) => tag.name)) - - expect( - await dexie - .table('customLists') - .where('name') - .anyOf(existingListNames) - .primaryKeys(), - ).toEqual([0, 1]) - - const listsData = await dexie.table('customLists').toArray() - const expectedListsData = [...tagNames].map((tagName) => ({ - id: expect.any(Number), - name: tagName, - isNestable: true, - isDeletable: true, - searchableName: tagName, - createdAt: expect.any(Date), - nameTerms: expect.any(Array), - })) - expect(listsData).toEqual(expect.arrayContaining(expectedListsData)) - expect(queuedData.get('customLists')).toEqual( - expect.arrayContaining( - expectedListsData.filter( - (list) => !existingListNames.includes(list.name), - ), - ), - ) - - const tagNamesToListId = new Map() - listsData.forEach((list) => tagNamesToListId.set(list.name, list.id)) - - const expectedPageListEntryData = tagsDataSet - .filter((entry) => !isUrlForAnnotation(entry.url)) - .map((entry) => ({ - pageUrl: entry.url, - fullUrl: 'https://' + entry.url, - listId: tagNamesToListId.get(entry.name), - createdAt: expect.any(Date), - })) - const expectedAnnotListEntryData = tagsDataSet - .filter((entry) => isUrlForAnnotation(entry.url)) - .map((entry) => ({ - url: entry.url, - listId: tagNamesToListId.get(entry.name), - createdAt: expect.any(Date), - })) - - expect(await dexie.table('pageListEntries').toArray()).toEqual( - expect.arrayContaining(expectedPageListEntryData), - ) - expect(await dexie.table('annotListEntries').toArray()).toEqual( - expect.arrayContaining(expectedAnnotListEntryData), - ) - expect(queuedData.get('pageListEntries')).toEqual( - expect.arrayContaining(expectedPageListEntryData), - ) - expect(queuedData.get('annotListEntries')).toEqual( - expect.arrayContaining(expectedAnnotListEntryData), - ) - }) -}) diff --git a/src/dashboard-refactor/util.ts b/src/dashboard-refactor/util.ts index a7ba775364..9c44fdc3b1 100644 --- a/src/dashboard-refactor/util.ts +++ b/src/dashboard-refactor/util.ts @@ -9,7 +9,7 @@ import { initNormalizedState, NormalizedState, } from '@worldbrain/memex-common/lib/common-ui/utils/normalized-state' -import { eventProviderDomains } from '@worldbrain/memex-common/lib/constants' +import { EVENT_PROVIDER_DOMAINS } from '@worldbrain/memex-common/lib/constants' import type { PageAnnotationsCacheInterface, UnifiedList, @@ -110,12 +110,17 @@ export const stateToSearchParams = ( filterByListIds.push(listData.localId!) } + const isBlankSearch = !searchFilters.searchQuery.trim().length + // Blank search doesn't use standard skip+limit pagination. Instead it requires the caller to keep // track of the oldest timestamp and supply that as the new upper-bound on each reinvocation - const blankSearchUpperBound = !searchFilters.searchQuery.trim().length + const blankSearchUpperBound = isBlankSearch ? searchResults.blankSearchOldestResultTimestamp : null + // Use a larger page size for blank searches to reduce the chance of duped results, due to annots/visits/bookmarks appearing in different result pages + const limit = isBlankSearch ? searchFilters.limit * 3 : searchFilters.limit + // TODO: Dynamically set these flags based on state const termsSearchOpts: TermsSearchOpts = { matchNotes: true, @@ -126,8 +131,8 @@ export const stateToSearchParams = ( } return { + limit, skip: searchFilters.skip, - limit: searchFilters.limit, query: searchFilters.searchQuery, filterByDomains: searchFilters.domainsIncluded, filterByListIds, diff --git a/src/search/background/index.ts b/src/search/background/index.ts index 06e2fd89aa..dc7bc4c1d9 100644 --- a/src/search/background/index.ts +++ b/src/search/background/index.ts @@ -8,9 +8,10 @@ import type { RemoteSearchInterface, AnnotPage, UnifiedSearchParams, - UnifiedBlankSearchResult, UnifiedBlankSearchParams, UnifiedTermsSearchParams, + ResultDataByPage, + IntermediarySearchResult, UnifiedSearchPaginationParams, } from './types' import { SearchError, BadTermError } from './errors' @@ -36,22 +37,30 @@ import type { FavIcon, } from '@worldbrain/memex-common/lib/types/core-data-types/client' import { - sortUnifiedBlankSearchResult, + sortSearchResult, queryAnnotationsByTerms, queryPagesByTerms, splitQueryIntoTerms, + sliceSearchResult, + needToFilterSearchByUrl, } from './utils' import { AnnotationPrivacyLevels } from '@worldbrain/memex-common/lib/annotations/types' import { SPECIAL_LIST_IDS } from '@worldbrain/memex-common/lib/storage/modules/lists/constants' -import { isUrlMemexSupportedVideo } from '@worldbrain/memex-common/lib/utils/youtube-url' +import { + VIDEO_PROVIDER_URLS, + YOUTUBE_URLS, + isUrlMemexSupportedVideo, +} from '@worldbrain/memex-common/lib/utils/youtube-url' import { isUrlATweet } from '@worldbrain/memex-common/lib/twitter-integration/utils' import { isUrlAnEventPage } from '@worldbrain/memex-common/lib/unified-search/utils' import type Dexie from 'dexie' import { blobToDataURL } from 'src/util/blob-utils' +import { intersectSets } from 'src/util/map-set-helpers' +import { PDF_PAGE_URL_PREFIX } from '@worldbrain/memex-common/lib/page-indexing/constants' +import { EVENT_PROVIDER_URLS } from '@worldbrain/memex-common/lib/constants' +import { TWITTER_URLS } from '@worldbrain/memex-common/lib/twitter-integration/constants' import { normalizeUrl } from '@worldbrain/memex-common/lib/url-utils/normalize' -const dayMs = 1000 * 60 * 60 * 24 - type UnifiedSearchLookupData = { pages: Map> annotations: Map @@ -118,29 +127,29 @@ export default class SearchBackground { ): Promise { const defaultTimestamp = edge === 'bottom' ? Date.now() : 0 - const storex = this.options.storageManager + const dexie = this.options.storageManager.backend['dexie'] as Dexie + const orderedVisits = dexie.table('visits').orderBy('time') + const orderedBookmarks = dexie + .table('bookmarks') + .orderBy('time') + const orderedAnnotations = dexie + .table('annotations') + .orderBy('lastEdited') // Real lower/upper bound for blank search would be the time of the oldest/latest bookmark or visit - const edgeTimestampDocs = await Promise.all([ - storex.collection('visits').findObject( - {}, - { - order: [['time', edge === 'bottom' ? 'asc' : 'desc']], - }, - ), - storex.collection('bookmarks').findObject( - {}, - { - order: [['time', edge === 'bottom' ? 'asc' : 'desc']], - }, - ), - storex.collection('annotations').findObject( - {}, - { - order: [['lastEdited', edge === 'bottom' ? 'asc' : 'desc']], - }, - ), - ]) + const edgeTimestampDocs = await Promise.all( + edge === 'bottom' + ? [ + orderedVisits.first(), + orderedBookmarks.first(), + orderedAnnotations.first(), + ] + : [ + orderedVisits.last(), + orderedBookmarks.last(), + orderedAnnotations.last(), + ], + ) const timestamps = edgeTimestampDocs .filter(Boolean) @@ -154,26 +163,153 @@ export default class SearchBackground { : defaultTimestamp } - private sliceUnifiedSearchResults( - resultDataByPage: any[], - { limit, skip }: UnifiedSearchPaginationParams, - ): UnifiedBlankSearchResult['resultDataByPage'] { - // NOTE: Current implementation ignores annotation count, and simply paginates by the number of pages in the results - return new Map(resultDataByPage.slice(skip, skip + limit)) + private async calcBlankSearchTimeBoundEdges( + params: UnifiedSearchParams, + ): Promise<[number, number]> { + let latestTimes: number[] = [] + let oldestTimes: number[] = [] + + if ( + !needToFilterSearchByUrl(params) && + !params.filterByListIds.length + ) { + return [ + await this.calcSearchTimeBoundEdge('bottom'), + await this.calcSearchTimeBoundEdge('top'), + ] + } + + const dexie = this.options.storageManager.backend['dexie'] as Dexie + + if (params.filterByListIds) { + let [pageListEntries, annotListEntries] = await Promise.all([ + dexie + .table('pageListEntries') + .where('listId') + .anyOf(params.filterByListIds) + .toArray(), + dexie + .table('annotListEntries') + .where('listId') + .anyOf(params.filterByListIds) + .toArray(), + ]) + + // Get intersection of entries on lists (enforces "AND" nature of search) + let pageIdsToKeep = new Set() + let annotIdsToKeep = new Set() + let pageListEntriesByPage = groupBy( + pageListEntries, + (e) => e.pageUrl, + ) + let annotListEntriesByAnnot = groupBy( + annotListEntries, + (e) => e.url, + ) + for (const [pageId, entries] of Object.entries( + pageListEntriesByPage, + )) { + if (entries.length === params.filterByListIds.length) { + pageIdsToKeep.add(pageId) + } + } + for (const [annotId, entries] of Object.entries( + annotListEntriesByAnnot, + )) { + if (entries.length === params.filterByListIds.length) { + annotIdsToKeep.add(annotId) + } + } + + pageListEntries = pageListEntries.filter((e) => + pageIdsToKeep.has(e.pageUrl), + ) + annotListEntries = annotListEntries.filter((e) => + annotIdsToKeep.has(e.url), + ) + + // Now keep track of oldest/latest entries + const ascendingTimes = pageListEntries + .map((e) => e.createdAt.valueOf()) + .concat(annotListEntries.map((e) => e.createdAt.valueOf())) + .sort((a, b) => a - b) + if (ascendingTimes.length === 1) { + latestTimes.push(ascendingTimes[0]) + } else if (ascendingTimes.length > 1) { + oldestTimes.push(ascendingTimes[0]) + latestTimes.push(ascendingTimes[ascendingTimes.length - 1]) + } + } + + if (params.omitPagesWithoutAnnotations) { + const annots = dexie.table('annotations') + const latestAnnot = await annots.orderBy('lastEdited').last() + const oldestAnnot = await annots.orderBy('lastEdited').first() + if (latestAnnot) { + latestTimes.push(latestAnnot.lastEdited.valueOf()) + } + if (oldestAnnot) { + oldestTimes.push(oldestAnnot.lastEdited.valueOf()) + } + } + + // Lots of the filters are essentially just domains filters, so collect the relevant domains and do it all in one go + let domainsToQuery = [] + if (params.filterByDomains.length) { + domainsToQuery.push(...params.filterByDomains) + } + if (params.filterByPDFs) { + domainsToQuery.push(PDF_PAGE_URL_PREFIX) + } + if (params.filterByEvents) { + domainsToQuery.push(...EVENT_PROVIDER_URLS) + } + if (params.filterByVideos) { + domainsToQuery.push(...VIDEO_PROVIDER_URLS.concat(YOUTUBE_URLS)) + } + if (params.filterByTweets) { + domainsToQuery.push(...TWITTER_URLS) + } + if (domainsToQuery.length) { + const getDocsMatchingDomains = (table: Dexie.Table) => + table.where('url').startsWithAnyOf(domainsToQuery).toArray() + + const [visits, bookmarks, annotations] = await Promise.all([ + getDocsMatchingDomains(dexie.table('visits')), + getDocsMatchingDomains(dexie.table('bookmarks')), + getDocsMatchingDomains(dexie.table('annotations')), + ]) + const ascendingTimes = visits + .map((v) => v.time) + .concat(bookmarks.map((b) => b.time)) + .concat(annotations.map((a) => a.lastEdited.valueOf())) + .sort((a, b) => a - b) + + if (ascendingTimes.length === 1) { + latestTimes.push(ascendingTimes[0]) + } else if (ascendingTimes.length > 1) { + oldestTimes.push(ascendingTimes[0]) + latestTimes.push(ascendingTimes[ascendingTimes.length - 1]) + } + } + + let maxOfOldestTimes = Math.max(...oldestTimes) + let minOfLatestTimes = Math.min(...latestTimes) + // Handle edge cases where time arrays may be empty by using default extremes + maxOfOldestTimes = + Math.abs(maxOfOldestTimes) === Infinity ? 0 : maxOfOldestTimes + minOfLatestTimes = + Math.abs(minOfLatestTimes) === Infinity + ? Date.now() + : minOfLatestTimes + return [maxOfOldestTimes, minOfLatestTimes] } private async filterUnifiedSearchResultsByFilters( - resultDataByPage: UnifiedBlankSearchResult['resultDataByPage'], + resultDataByPage: ResultDataByPage, params: UnifiedSearchParams, ): Promise { - const needToFilterByUrl = - params.filterByDomains.length || - params.filterByPDFs || - params.filterByVideos || - params.filterByTweets || - params.filterByEvents || - params.omitPagesWithoutAnnotations - + const needToFilterByUrl = needToFilterSearchByUrl(params) if ( !needToFilterByUrl && !resultDataByPage.size && @@ -302,74 +438,218 @@ export default class SearchBackground { pageIdsToFilterOut.forEach((id) => resultDataByPage.delete(id)) } + private async unifiedBlankListsSearch( + params: UnifiedSearchParams, + ): Promise { + if (!params.filterByListIds.length) { + throw new Error( + 'Lists search was called but no lists were filtered', + ) + } + const resultDataByPage: ResultDataByPage = new Map() + + const dexie = this.options.storageManager.backend['dexie'] as Dexie + const [pageListEntries, annotListEntries] = await Promise.all([ + dexie + .table('pageListEntries') + .where('listId') + .anyOf(params.filterByListIds) + .toArray(), + dexie + .table('annotListEntries') + .where('listId') + .anyOf(params.filterByListIds) + .toArray(), + ]) + + const latestListEntryTimeByPage = fromPairs( + pageListEntries + .sort((a, b) => a.createdAt.valueOf() - b.createdAt.valueOf()) + .map((e) => [e.pageUrl, e.createdAt.valueOf() as number]), + ) + + // Get intersections of page + annot IDs for all lists - effectively "AND"s the lists + const validPageIds = params.filterByListIds + .map( + (listId) => + new Set( + pageListEntries + .filter((e) => e.listId === listId) + .map((e) => e.pageUrl), + ), + ) + .reduce((a, b) => intersectSets(a)(b)) + const validAnnotIds = params.filterByListIds + .map( + (listId) => + new Set( + annotListEntries + .filter((e) => e.listId === listId) + .map((e) => e.url), + ), + ) + .reduce((a, b) => intersectSets(a)(b)) + + // Get auto-shared annotations + let autoSharedAnnots = await dexie + .table('annotations') + .where('pageUrl') + .anyOf([...validPageIds]) + .toArray() + let annotPrivacyLevels = await dexie + .table('annotationPrivacyLevels') + .where('annotation') + .anyOf(autoSharedAnnots.map((a) => a.url)) + .toArray() + let autoSharedAnnotIds = new Set( + annotPrivacyLevels + .filter((l) => + [ + AnnotationPrivacyLevels.SHARED, + AnnotationPrivacyLevels.SHARED_PROTECTED, + ].includes(l.privacyLevel), + ) + .map((l) => l.annotation), + ) + autoSharedAnnots = autoSharedAnnots.filter((a) => + autoSharedAnnotIds.has(a.url), + ) + + // Get selectively-shared annotations + const selectivelySharedAnnots = await dexie + .table('annotations') + .bulkGet([...validAnnotIds]) + + // Add in all the annotations to the results + const annotsByPage = groupBy( + [...autoSharedAnnots, ...selectivelySharedAnnots], + (a) => a.pageUrl, + ) + for (const [pageId, annots] of Object.entries(annotsByPage)) { + const descOrderedAnnots = annots.sort( + (a, b) => b.lastEdited.valueOf() - a.lastEdited.valueOf(), + ) + resultDataByPage.set(pageId, { + annotations: descOrderedAnnots, + // These may get overwritten in the next loop by the latest page list entry time + latestPageTimestamp: descOrderedAnnots[0].lastEdited.valueOf(), + oldestTimestamp: 0, + }) + } + + for (const pageId of validPageIds) { + const latestPageTimestamp = latestListEntryTimeByPage[pageId] ?? 0 + const existing = resultDataByPage.get(pageId) ?? { + annotations: [], + latestPageTimestamp, + oldestTimestamp: 0, + } + resultDataByPage.set(pageId, { + ...existing, + latestPageTimestamp, + }) + } + + return { + resultDataByPage, + oldestResultTimestamp: 0, + resultsExhausted: true, + } + } + private async unifiedBlankSearch( params: UnifiedBlankSearchParams, - ): Promise { - const upperBound = params.untilWhen - const lowerBound = - params.fromWhen ?? - Math.max(upperBound - params.daysToSearch * dayMs, 0) - // const timeBoundsQuery = { $gt: lowerBound, $lt: upperBound } - - const resultDataByPage: UnifiedBlankSearchResult['resultDataByPage'] = new Map() + ): Promise { + const resultDataByPage: ResultDataByPage = new Map() // TODO: these Dexie queries are here because the storex query didn't result in an indexed query happening // Need to fix the bug in storex-backend-dexie when it comes time to port this and revert them to storex queries const dexie = this.options.storageManager.backend['dexie'] as Dexie - const [annotations, visits, bookmarks] = await Promise.all([ + let [annotations, visits, bookmarks] = await Promise.all([ dexie .table('annotations') .where('lastEdited') - .between(new Date(lowerBound), new Date(upperBound), true, true) + .below(new Date(params.untilWhen)) + .reverse() + .limit(params.limit) .toArray(), dexie .table('visits') - .where('time') - .between(lowerBound, upperBound, true, true) + .where('[time+url]') + .below([params.untilWhen]) + .reverse() + .limit(params.limit) .toArray(), dexie .table('bookmarks') .where('time') - .between(lowerBound, upperBound, true, true) + .below(params.untilWhen) + .reverse() + .limit(params.limit) .toArray(), ]) - // const [annotations, visits, bookmarks] = await Promise.all([ - // this.options.storageManager - // .collection('annotations') - // .findObjects({ lastEdited: timeBoundsQuery }), - // this.options.storageManager - // .collection('visits') - // .findObjects({ time: timeBoundsQuery }), - // this.options.storageManager - // .collection('bookmarks') - // .findObjects({ time: timeBoundsQuery }), - // ]) + + // Work with only the latest N results for this results page, discarding rest + const inScopeResults = [...annotations, ...visits, ...bookmarks] + .filter((a) => { + const time = 'lastEdited' in a ? a.lastEdited.valueOf() : a.time + return time >= params.lowestTimeBound + }) + // TODO: pick one of the latest visit of bookmark, else each bookmark's also going to show up in results via the visit + .sort((a, b) => { + const timeA = + 'lastEdited' in a ? a.lastEdited.valueOf() : a.time + const timeB = + 'lastEdited' in b ? b.lastEdited.valueOf() : b.time + return timeB - timeA + }) + .slice(0, params.limit) + + const onlyInScope = (doc: any) => inScopeResults.includes(doc) + annotations = annotations.filter(onlyInScope) + bookmarks = bookmarks.filter(onlyInScope) + visits = visits.filter(onlyInScope) // Add in all the annotations to the results const annotsByPage = groupBy(annotations, (a) => a.pageUrl) for (const [pageId, annots] of Object.entries(annotsByPage)) { - const sortedAnnots = annots.sort( + const descOrderedAnnots = annots.sort( (a, b) => b.lastEdited.valueOf() - a.lastEdited.valueOf(), ) resultDataByPage.set(pageId, { - annotations: sortedAnnots, - // This gets overwritten in the next loop by the latest visit/bookmark time (if exist in this results "page") - latestPageTimestamp: sortedAnnots[0].lastEdited.valueOf(), + annotations: descOrderedAnnots, + // These get overwritten in the next loop by the latest/oldest visit/bookmark time (if exist in this results "page") + latestPageTimestamp: descOrderedAnnots[0].lastEdited.valueOf(), + oldestTimestamp: descOrderedAnnots[ + descOrderedAnnots.length - 1 + ].lastEdited.valueOf(), }) } // Add in all the pages to the results - const descOrdered = [...bookmarks, ...visits].sort( + const ascOrdered = [...bookmarks, ...visits].sort( (a, b) => a.time - b.time, ) - for (const { url, time } of descOrdered) { + for (const { url, time } of ascOrdered) { const annotations = resultDataByPage.get(url)?.annotations ?? [] resultDataByPage.set(url, { annotations, latestPageTimestamp: time, // Should end up being the latest one, given ordering + oldestTimestamp: ascOrdered[0].time, // First visit/bm should always be older than annot }) } await this.filterUnifiedSearchResultsByFilters(resultDataByPage, params) + let lowerBound: number + if (!inScopeResults.length) { + lowerBound = 0 + } else { + const oldestResult = inScopeResults[inScopeResults.length - 1] + lowerBound = + 'lastEdited' in oldestResult + ? oldestResult.lastEdited.valueOf() + : oldestResult.time + } + return { resultDataByPage, resultsExhausted: lowerBound <= params.lowestTimeBound, @@ -379,8 +659,8 @@ export default class SearchBackground { private async unifiedTermsSearch( params: UnifiedTermsSearchParams, - ): Promise { - const resultDataByPage: UnifiedBlankSearchResult['resultDataByPage'] = new Map() + ): Promise { + const resultDataByPage: ResultDataByPage = new Map() const [pages, annotations] = await Promise.all([ params.queryPages(params.terms, params.phrases), @@ -397,6 +677,7 @@ export default class SearchBackground { annotations: sortedAnnots, // This gets overwritten in the next loop by the latest visit/bookmark time (if exist in this results "page") latestPageTimestamp: sortedAnnots[0].lastEdited.valueOf(), + oldestTimestamp: 0, }) } @@ -406,17 +687,15 @@ export default class SearchBackground { resultDataByPage.set(id, { annotations, latestPageTimestamp: latestTimestamp, + oldestTimestamp: 0, }) } await this.filterUnifiedSearchResultsByFilters(resultDataByPage, params) // Paginate! - const sortedResultPages = sortUnifiedBlankSearchResult(resultDataByPage) - const paginatedResults = this.sliceUnifiedSearchResults( - sortedResultPages, - params, - ) + const sortedResultPages = sortSearchResult(resultDataByPage) + const paginatedResults = sliceSearchResult(sortedResultPages, params) return { oldestResultTimestamp: null, @@ -425,59 +704,78 @@ export default class SearchBackground { } } - unifiedSearch: RemoteSearchInterface['unifiedSearch'] = async (params) => { - let result: UnifiedBlankSearchResult - // There's 2 separate search implementations depending on whether doing a terms search or not - if (!params.query.trim().length) { - const lowestTimeBound = await this.calcSearchTimeBoundEdge('bottom') - const highestTimeBound = await this.calcSearchTimeBoundEdge('top') - // Skip over days where there's no results, until we get results - do { - result = await this.unifiedBlankSearch({ - ...params, - untilWhen: - result?.oldestResultTimestamp ?? // allows to paginate back from prev result - params.untilWhen ?? - highestTimeBound, // default to latest timestamp in DB, to start off search - daysToSearch: 2, + private async unifiedIntermediarySearch( + params: UnifiedSearchParams & UnifiedSearchPaginationParams, + ): Promise { + const isTermsSearch = params.query.trim().length + + // There's 3 separate search implementations depending on whether doing a terms search or not and if we're filtering by list + if (!isTermsSearch) { + if (params.filterByListIds.length) { + return this.unifiedBlankListsSearch(params) + } else { + let result: IntermediarySearchResult + let [ lowestTimeBound, - }) - } while (!result.resultsExhausted && !result.resultDataByPage.size) - } else { - const { - phrases, - terms, - inTitle, - inContent, - inHighlight, - inComment, - matchTermsFuzzyStartsWith, - } = splitQueryIntoTerms(params.query) - - params.matchPageTitleUrl = inTitle - params.matchPageText = inContent - params.matchNotes = inComment - params.matchHighlights = inHighlight - params.phrases = phrases - params.terms = terms - params.matchTermsFuzzyStartsWith = matchTermsFuzzyStartsWith - result = await this.unifiedTermsSearch({ - ...params, - queryPages: queryPagesByTerms( - this.options.storageManager, - params, - ), - queryAnnotations: queryAnnotationsByTerms( - this.options.storageManager, - params, - ), - }) + highestTimeBound, + ] = await this.calcBlankSearchTimeBoundEdges(params) + // Increase the lowest time bound if there's a specified lower time bound which beats it + if (lowestTimeBound < params.fromWhen) { + lowestTimeBound = params.fromWhen + } + highestTimeBound += 1 + + // Skip over days where there's no results, until we get results + do { + result = await this.unifiedBlankSearch({ + ...params, + lowestTimeBound, + untilWhen: + result?.oldestResultTimestamp ?? + params.untilWhen ?? // affords pagination back from prev result + highestTimeBound, + }) + } while ( + !result.resultsExhausted && + !result.resultDataByPage.size + ) + return result + } } - const dataLookups = await this.lookupDataForUnifiedResults(result) - const sortedResultPages = sortUnifiedBlankSearchResult( + const { + phrases, + terms, + inTitle, + inContent, + inHighlight, + inComment, + matchTermsFuzzyStartsWith, + } = splitQueryIntoTerms(params.query) + + params.matchPageTitleUrl = inTitle + params.matchPageText = inContent + params.matchNotes = inComment + params.matchHighlights = inHighlight + params.phrases = phrases + params.terms = terms + params.matchTermsFuzzyStartsWith = matchTermsFuzzyStartsWith + return this.unifiedTermsSearch({ + ...params, + queryPages: queryPagesByTerms(this.options.storageManager, params), + queryAnnotations: queryAnnotationsByTerms( + this.options.storageManager, + params, + ), + }) + } + + unifiedSearch: RemoteSearchInterface['unifiedSearch'] = async (params) => { + const result = await this.unifiedIntermediarySearch(params) + const dataLookups = await this.lookupDataForUnifiedResults( result.resultDataByPage, ) + const sortedResultPages = sortSearchResult(result.resultDataByPage) const dataEnrichedAnnotPages = sortedResultPages .map(([pageId, { annotations }]) => { const page = dataLookups.pages.get(pageId) @@ -502,9 +800,9 @@ export default class SearchBackground { } } - private async lookupDataForUnifiedResults({ - resultDataByPage, - }: UnifiedBlankSearchResult): Promise { + private async lookupDataForUnifiedResults( + resultDataByPage: ResultDataByPage, + ): Promise { const pageIds = [...resultDataByPage.keys()] const annotIds = [ ...resultDataByPage.values(), diff --git a/src/search/background/types.ts b/src/search/background/types.ts index 3b0e1bdefa..5d83b0c675 100644 --- a/src/search/background/types.ts +++ b/src/search/background/types.ts @@ -144,12 +144,13 @@ export type UnifiedTermsSearchParams = UnifiedSearchParams & * (which gets returned from blank searches) and supply that as the new upper time bound * for subsequent blank search pages. */ -export type UnifiedBlankSearchParams = UnifiedSearchParams & { - untilWhen: number - daysToSearch: number - /** The time of the oldest visit/bookmark/annotation to determine results exhausted or not. */ - lowestTimeBound: number -} +export type UnifiedBlankSearchParams = UnifiedSearchParams & + Pick & { + /** This is how pagination is afforded for blank search. Set to page N's `oldestResultTimestamp` to get page N+1. */ + untilWhen: number + /** The time of the oldest visit/bookmark/annotation to determine results exhausted or not. */ + lowestTimeBound: number + } export type UnifiedSearchResult = { docs: AnnotPage[] @@ -157,13 +158,25 @@ export type UnifiedSearchResult = { oldestResultTimestamp: number } -export type UnifiedBlankSearchResult = { - oldestResultTimestamp: number | null +export type IntermediarySearchResult = { resultsExhausted: boolean - resultDataByPage: Map + resultDataByPage: ResultDataByPage + /** Always null for terms search. */ + oldestResultTimestamp: number | null } -export type UnifiedBlankSearchPageResultData = { - latestPageTimestamp: number +export type ResultDataByPage = Map + +export type UnifiedSearchPageResultData = { annotations: Annotation[] + /** + * Contains the latest timestamp associated with the page bookmark or visits. Not annotation. + * Used primarily for sorting and display in UI. + */ + latestPageTimestamp: number + /** + * Contains the oldest timestamp associated with the page, including bookmarks, visits, or annotations. + * Used for paging back through blank search results (oldest timestamp of batch N is the upper bound for batch N+1) + */ + oldestTimestamp: number } diff --git a/src/search/background/unified-search.test.ts b/src/search/background/unified-search.test.ts index 56ae759fea..ce9c85d2b8 100644 --- a/src/search/background/unified-search.test.ts +++ b/src/search/background/unified-search.test.ts @@ -2,18 +2,11 @@ import omit from 'lodash/omit' import type Storex from '@worldbrain/storex' import * as DATA from './unified-search.test.data' import { setupBackgroundIntegrationTest } from 'src/tests/background-integration-tests' -import { - queryAnnotationsByTerms, - queryPagesByTerms, - sortUnifiedBlankSearchResult, - splitQueryIntoTerms, -} from './utils' +import { sortSearchResult, splitQueryIntoTerms } from './utils' import type { UnifiedSearchPaginationParams, - UnifiedBlankSearchParams, - UnifiedBlankSearchResult, - UnifiedTermsSearchParams, - TermsSearchOpts, + IntermediarySearchResult, + UnifiedSearchParams, } from './types' import type { BackgroundModules } from 'src/background-script/setup' import { AnnotationPrivacyLevels } from '@worldbrain/memex-common/lib/annotations/types' @@ -26,6 +19,15 @@ async function insertTestData(storageManager: Storex) { await storageManager .collection('pages') .createObject(omit(doc, 'listIds')) + + const [latestTimestamp] = [ + ...(DATA.VISITS[doc.url] ?? []), + ...(DATA.BOOKMARKS[doc.url] ?? []), + ] + .filter(Boolean) + .map((a) => a.time as number) + .sort((a, b) => b - a) + if ('listIds' in doc) { for (const listId of doc.listIds) { await storageManager @@ -34,7 +36,7 @@ async function insertTestData(storageManager: Storex) { listId, pageUrl: doc.url, fullUrl: doc.fullUrl, - createdAt: new Date(), + createdAt: new Date(latestTimestamp), }) } } @@ -71,7 +73,7 @@ async function insertTestData(storageManager: Storex) { .createObject({ listId, url: doc.url, - createdAt: new Date(), + createdAt: new Date(doc.createdWhen), }) } } @@ -86,76 +88,27 @@ async function setupTest(opts?: { skipDataInsertion?: boolean }) { return setup } -const blankSearch = async ( +const search = async ( { search }: BackgroundModules, - params: Partial, + params: Partial, ) => { - const lowestTimeBound = await search['calcSearchTimeBoundEdge']('bottom') - const highestTimeBound = await search['calcSearchTimeBoundEdge']('top') - return search['unifiedBlankSearch']({ + return search['unifiedIntermediarySearch']({ filterByDomains: [], filterByListIds: [], - lowestTimeBound: lowestTimeBound ?? 0, - daysToSearch: 1, query: '', - untilWhen: params.untilWhen ?? highestTimeBound, - ...params, - }) -} - -const termsSearch = ( - { search }: BackgroundModules, - params: Partial & - UnifiedSearchPaginationParams & { query: string }, -) => { - const defaultTermsOpts: TermsSearchOpts = { - matchNotes: true, - matchPageText: true, - matchHighlights: true, - matchPageTitleUrl: true, - } - - const { - phrases, - terms, - inTitle, - inContent, - inHighlight, - inComment, - matchTermsFuzzyStartsWith, - } = splitQueryIntoTerms(params.query) - - params.matchPageTitleUrl = inTitle - params.matchPageText = inContent - params.matchNotes = inComment - params.matchHighlights = inHighlight - params.phrases = phrases - params.terms = terms - params.matchTermsFuzzyStartsWith = matchTermsFuzzyStartsWith - - return search['unifiedTermsSearch']({ - filterByDomains: [], - filterByListIds: [], - queryPages: queryPagesByTerms(search['options'].storageManager, { - ...defaultTermsOpts, - ...params, - }), - queryAnnotations: queryAnnotationsByTerms( - search['options'].storageManager, - { ...defaultTermsOpts, ...params }, - ), - ...defaultTermsOpts, + limit: 10, + skip: 0, ...params, }) } const formatResults = ( - result: UnifiedBlankSearchResult, + result: IntermediarySearchResult, opts?: { skipSorting?: boolean }, ) => { const sortedResults = opts?.skipSorting ? [...result.resultDataByPage] - : sortUnifiedBlankSearchResult(result.resultDataByPage) + : sortSearchResult(result.resultDataByPage) return sortedResults.map(([pageId, data]) => [ pageId, { @@ -173,9 +126,10 @@ describe('Unified search tests', () => { skipDataInsertion: true, }) const now = Date.now() - const resultA = await blankSearch(backgroundModules, { + const resultA = await search(backgroundModules, { fromWhen: 0, untilWhen: now, + limit: 99999, }) expect(resultA.resultsExhausted).toBe(true) expect([...resultA.resultDataByPage]).toEqual([]) @@ -185,14 +139,10 @@ describe('Unified search tests', () => { const { backgroundModules } = await setupTest() const now = Date.now() - const lowestTimeBound = await backgroundModules.search[ - 'calcSearchTimeBoundEdge' - ]('bottom') - const resultA = await blankSearch(backgroundModules, { + const resultA = await search(backgroundModules, { fromWhen: 0, untilWhen: now, - daysToSearch: 1, - lowestTimeBound, + limit: 99999, }) expect(resultA.resultsExhausted).toBe(true) expect(formatResults(resultA)).toEqual([ @@ -339,41 +289,42 @@ describe('Unified search tests', () => { // Check comments scattered throughout this test for more details, as things are quite intentionally structured around the test data timestamps it('should return most-recent highlights and their pages on unfiltered, paginated blank search', async () => { const { backgroundModules } = await setupTest() - const resultA = await blankSearch(backgroundModules, { + const resultA = await search(backgroundModules, { + fromWhen: 0, untilWhen: new Date('2024-03-25T20:00').valueOf(), // This is calculated based on the test data times - daysToSearch: 1, - }) - const resultB = await blankSearch(backgroundModules, { - untilWhen: new Date('2024-03-24T20:00').valueOf(), - daysToSearch: 1, + limit: 10, }) - const resultC = await blankSearch(backgroundModules, { - untilWhen: new Date('2024-03-23T20:00').valueOf(), - daysToSearch: 1, + const resultB = await search(backgroundModules, { + fromWhen: 0, + untilWhen: resultA.oldestResultTimestamp, + limit: 10, }) - const resultD = await blankSearch(backgroundModules, { - untilWhen: new Date('2024-03-22T20:00').valueOf(), - daysToSearch: 1, + const resultC = await search(backgroundModules, { + fromWhen: 0, + untilWhen: resultB.oldestResultTimestamp, + limit: 10, }) - const resultE = await blankSearch(backgroundModules, { - untilWhen: new Date('2024-03-21T20:00').valueOf(), - daysToSearch: 1, + const resultD = await search(backgroundModules, { + fromWhen: 0, + untilWhen: resultC.oldestResultTimestamp, + limit: 10, }) - const resultF = await blankSearch(backgroundModules, { - untilWhen: new Date('2024-03-20T20:00').valueOf(), - daysToSearch: 1, + const resultE = await search(backgroundModules, { + fromWhen: 0, + untilWhen: resultD.oldestResultTimestamp, + limit: 10, }) - const resultG = await blankSearch(backgroundModules, { - untilWhen: new Date('2024-03-19T20:00').valueOf(), - daysToSearch: 30, // We're really skipping ahead here as we know there's no data until about a month back + const resultF = await search(backgroundModules, { + fromWhen: 0, + untilWhen: resultE.oldestResultTimestamp, + limit: 10, }) expect(resultA.resultsExhausted).toBe(false) expect(resultB.resultsExhausted).toBe(false) expect(resultC.resultsExhausted).toBe(false) expect(resultD.resultsExhausted).toBe(false) - expect(resultE.resultsExhausted).toBe(false) - expect(resultF.resultsExhausted).toBe(false) - expect(resultG.resultsExhausted).toBe(true) + expect(resultE.resultsExhausted).toBe(true) + expect(resultF.resultsExhausted).toBe(true) expect(formatResults(resultA)).toEqual([ [ DATA.PAGE_ID_11, @@ -436,10 +387,7 @@ describe('Unified search tests', () => { ], ]) - // Should be an empty result "page" - nothing on this day - expect(formatResults(resultB)).toEqual([]) - - expect(formatResults(resultC)).toEqual([ + expect(formatResults(resultB)).toEqual([ [ DATA.PAGE_ID_12, { @@ -496,15 +444,29 @@ describe('Unified search tests', () => { DATA.ANNOTATIONS[DATA.PAGE_ID_7][3].url, DATA.ANNOTATIONS[DATA.PAGE_ID_7][2].url, DATA.ANNOTATIONS[DATA.PAGE_ID_7][1].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_7][0].url, // This will be in the next results page + ], + latestPageTimestamp: DATA.ANNOTATIONS[ + DATA.PAGE_ID_7 + ][3].lastEdited.valueOf(), + }, + ], + ]) + + expect(formatResults(resultC)).toEqual([ + [ + DATA.PAGE_ID_7, + { + annotIds: [ + // DATA.ANNOTATIONS[DATA.PAGE_ID_7][3].url, // These came in the prev page + // DATA.ANNOTATIONS[DATA.PAGE_ID_7][2].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_7][1].url, DATA.ANNOTATIONS[DATA.PAGE_ID_7][0].url, ], latestPageTimestamp: DATA.VISITS[DATA.PAGE_ID_7][0].time, }, ], - ]) - - expect(formatResults(resultD)).toEqual([ [ DATA.PAGE_ID_4, // Shows up a second time to get in some older annotations { @@ -533,6 +495,8 @@ describe('Unified search tests', () => { DATA.VISITS[DATA.PAGE_ID_6][0].time, }, ], + ]) + expect(formatResults(resultD)).toEqual([ [ DATA.PAGE_ID_5, { @@ -545,8 +509,6 @@ describe('Unified search tests', () => { DATA.VISITS[DATA.PAGE_ID_5][0].time, }, ], - ]) - expect(formatResults(resultE)).toEqual([ [ DATA.PAGE_ID_4, // Shows up a third time to get in the oldest annotations { @@ -575,6 +537,19 @@ describe('Unified search tests', () => { DATA.ANNOTATIONS[DATA.PAGE_ID_3][1].url, DATA.ANNOTATIONS[DATA.PAGE_ID_3][0].url, ], + latestPageTimestamp: DATA.ANNOTATIONS[ + DATA.PAGE_ID_3 + ][3].lastEdited.valueOf(), + }, + ], + ]) + expect(formatResults(resultE)).toEqual([ + // TODO: Ideally this result gets ommitted as it's already shown in the previous results page with all the annots. + // It shows again because there's a visit before the annots that's just on the cut-off point between results pages. + [ + DATA.PAGE_ID_3, + { + annotIds: [], latestPageTimestamp: DATA.VISITS[DATA.PAGE_ID_3][0].time, }, @@ -587,9 +562,6 @@ describe('Unified search tests', () => { DATA.BOOKMARKS[DATA.PAGE_ID_2][0].time, }, ], - ]) - - expect(formatResults(resultF)).toEqual([ [ DATA.PAGE_ID_1, { @@ -598,9 +570,6 @@ describe('Unified search tests', () => { DATA.VISITS[DATA.PAGE_ID_1][0].time, }, ], - ]) - // This is the final 30 day period search to pick up this last result, which is ~1 month before all the other data - expect(formatResults(resultG)).toEqual([ [ DATA.PAGE_ID_12, { @@ -610,29 +579,224 @@ describe('Unified search tests', () => { }, ], ]) + expect(formatResults(resultF)).toEqual([]) + }) + + it('should return most-recent highlights and their pages on unfiltered, paginated blank search with lower time bound set', async () => { + const { backgroundModules } = await setupTest() + const lowerBound = new Date('2024-03-22T06:10').valueOf() + const resultA = await search(backgroundModules, { + fromWhen: lowerBound, + untilWhen: new Date('2024-03-25T20:00').valueOf(), // This is calculated based on the test data times + limit: 10, + }) + const resultB = await search(backgroundModules, { + fromWhen: lowerBound, + untilWhen: resultA.oldestResultTimestamp, + limit: 10, + }) + const resultC = await search(backgroundModules, { + fromWhen: lowerBound, + untilWhen: resultB.oldestResultTimestamp, + limit: 10, + }) + // This should be beyond the scope of the lower time bound, thus should return no results + const resultD = await search(backgroundModules, { + fromWhen: lowerBound, + untilWhen: resultC.oldestResultTimestamp, + limit: 10, + }) + expect(resultA.resultsExhausted).toBe(false) + expect(resultB.resultsExhausted).toBe(false) + expect(resultC.resultsExhausted).toBe(true) + expect(resultD.resultsExhausted).toBe(true) + expect(formatResults(resultA)).toEqual([ + [ + DATA.PAGE_ID_11, + { + annotIds: [], + latestPageTimestamp: + DATA.VISITS[DATA.PAGE_ID_11][0].time, + }, + ], + [ + DATA.PAGE_ID_13, + { + annotIds: [], + latestPageTimestamp: + DATA.VISITS[DATA.PAGE_ID_13][0].time, + }, + ], + [ + DATA.PAGE_ID_8, + { + annotIds: [], + latestPageTimestamp: + DATA.VISITS[DATA.PAGE_ID_8][1].time, + }, + ], + [ + DATA.PAGE_ID_10, + { + annotIds: [DATA.ANNOTATIONS[DATA.PAGE_ID_10][0].url], + latestPageTimestamp: + DATA.VISITS[DATA.PAGE_ID_10][0].time, + }, + ], + [ + DATA.PAGE_ID_2, + { + annotIds: [ + DATA.ANNOTATIONS[DATA.PAGE_ID_2][2].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_2][1].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_2][0].url, + ], + latestPageTimestamp: DATA.ANNOTATIONS[ + DATA.PAGE_ID_2 + ][2].lastEdited.valueOf(), + }, + ], + [ + DATA.PAGE_ID_5, + { + annotIds: [ + DATA.ANNOTATIONS[DATA.PAGE_ID_5][0].url, + // These two should come in the next results "page", being a few days older + // DATA.ANNOTATIONS[DATA.PAGE_ID_5][2].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_5][1].url, + ], + latestPageTimestamp: DATA.ANNOTATIONS[ + DATA.PAGE_ID_5 + ][0].lastEdited.valueOf(), + }, + ], + ]) + + expect(formatResults(resultB)).toEqual([ + [ + DATA.PAGE_ID_12, + { + annotIds: [DATA.ANNOTATIONS[DATA.PAGE_ID_12][0].url], + latestPageTimestamp: DATA.ANNOTATIONS[ + DATA.PAGE_ID_12 + ][0].lastEdited.valueOf(), + }, + ], + [ + DATA.PAGE_ID_9, + { + annotIds: [ + DATA.ANNOTATIONS[DATA.PAGE_ID_9][2].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_9][1].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_9][0].url, + ], + latestPageTimestamp: + DATA.VISITS[DATA.PAGE_ID_9][0].time, + }, + ], + [ + DATA.PAGE_ID_4, + { + annotIds: [ + DATA.ANNOTATIONS[DATA.PAGE_ID_4][0].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_4][9].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_4][8].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_4][7].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_4][6].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_4][5].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_4][4].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_4][3].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_4][2].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_4][1].url, + ], + latestPageTimestamp: DATA.ANNOTATIONS[ + DATA.PAGE_ID_4 + ][0].lastEdited.valueOf(), + }, + ], + [ + DATA.PAGE_ID_8, // Shows up a second time due to a second visit + { + annotIds: [], + latestPageTimestamp: + DATA.VISITS[DATA.PAGE_ID_8][0].time, + }, + ], + [ + DATA.PAGE_ID_7, + { + annotIds: [ + DATA.ANNOTATIONS[DATA.PAGE_ID_7][3].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_7][2].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_7][1].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_7][0].url, // This will be in the next results page + ], + latestPageTimestamp: DATA.ANNOTATIONS[ + DATA.PAGE_ID_7 + ][3].lastEdited.valueOf(), + }, + ], + ]) + + expect(formatResults(resultC)).toEqual([ + [ + DATA.PAGE_ID_7, + { + annotIds: [ + // DATA.ANNOTATIONS[DATA.PAGE_ID_7][3].url, // These came in the prev page + // DATA.ANNOTATIONS[DATA.PAGE_ID_7][2].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_7][1].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_7][0].url, + ], + latestPageTimestamp: + DATA.VISITS[DATA.PAGE_ID_7][0].time, + }, + ], + [ + DATA.PAGE_ID_4, // Shows up a second time to get in some older annotations + { + annotIds: [ + // DATA.ANNOTATIONS[DATA.PAGE_ID_4][0].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_4][9].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_4][8].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_4][7].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_4][6].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_4][5].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_4][4].url, + DATA.ANNOTATIONS[DATA.PAGE_ID_4][3].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_4][2].url, + // DATA.ANNOTATIONS[DATA.PAGE_ID_4][1].url, + ], + latestPageTimestamp: DATA.ANNOTATIONS[ + DATA.PAGE_ID_4 + ][9].lastEdited.valueOf(), + }, + ], + [ + DATA.PAGE_ID_6, + { + annotIds: [], + latestPageTimestamp: + DATA.VISITS[DATA.PAGE_ID_6][0].time, + }, + ], + ]) + expect(formatResults(resultD)).toEqual([]) }) it('should return recent highlights and their pages on list filtered blank search', async () => { const { backgroundModules } = await setupTest() - const now = Date.now() - const resultA = await blankSearch(backgroundModules, { - fromWhen: 0, - untilWhen: now, + + const resultA = await search(backgroundModules, { filterByListIds: [DATA.LIST_ID_1], }) - const resultB = await blankSearch(backgroundModules, { - fromWhen: 0, - untilWhen: now, + const resultB = await search(backgroundModules, { filterByListIds: [DATA.LIST_ID_1, DATA.LIST_ID_3], // Multiple values do an AND }) - const resultC = await blankSearch(backgroundModules, { - fromWhen: 0, - untilWhen: now, + const resultC = await search(backgroundModules, { filterByListIds: [DATA.LIST_ID_2], }) - const resultD = await blankSearch(backgroundModules, { - fromWhen: 0, - untilWhen: now, + const resultD = await search(backgroundModules, { filterByListIds: [DATA.LIST_ID_2, DATA.LIST_ID_3], // Should be no overlaps }) @@ -645,8 +809,9 @@ describe('Unified search tests', () => { DATA.PAGE_ID_4, { annotIds: [DATA.ANNOTATIONS[DATA.PAGE_ID_4][1].url], - latestPageTimestamp: - DATA.VISITS[DATA.PAGE_ID_4][0].time, + latestPageTimestamp: DATA.ANNOTATIONS[ + DATA.PAGE_ID_4 + ][1].lastEdited.valueOf(), }, ], [ @@ -681,7 +846,7 @@ describe('Unified search tests', () => { DATA.ANNOTATIONS[DATA.PAGE_ID_4][5].url, DATA.ANNOTATIONS[DATA.PAGE_ID_4][4].url, DATA.ANNOTATIONS[DATA.PAGE_ID_4][3].url, - // Thess ones are selectively shared to a different list to the parent page, thus get omitted + // These ones are selectively shared to a different list to the parent page, thus get omitted // DATA.ANNOTATIONS[DATA.PAGE_ID_4][2].url, // DATA.ANNOTATIONS[DATA.PAGE_ID_4][1].url, ], @@ -695,20 +860,24 @@ describe('Unified search tests', () => { it('should return recent highlights and their pages on domain filtered blank search', async () => { const { backgroundModules } = await setupTest() - const now = Date.now() - const resultA = await blankSearch(backgroundModules, { - fromWhen: 0, - untilWhen: now, + const resultA = await search(backgroundModules, { + limit: 9999, filterByDomains: ['test.com'], }) - const resultB = await blankSearch(backgroundModules, { - fromWhen: 0, - untilWhen: now, - filterByDomains: ['test-2.com', 'test.com'], // Multiple values do an OR + const resultB = await search(backgroundModules, { + limit: 9999, + filterByDomains: ['en.test-2.com', 'test.com'], // Multiple values do an OR }) - const resultC = await blankSearch(backgroundModules, { - fromWhen: 0, - untilWhen: now, + const resultC = await search(backgroundModules, { + filterByDomains: [ + 'wikipedia.org', + 'en.wikipedia.org', + 'test-2.com', + 'en.test-2.com', + 'test.com', + ], + }) + const resultD = await search(backgroundModules, { filterByDomains: [ 'wikipedia.org', 'en.wikipedia.org', @@ -716,10 +885,24 @@ describe('Unified search tests', () => { 'en.test-2.com', 'test.com', ], + untilWhen: resultC.oldestResultTimestamp, + }) + const resultE = await search(backgroundModules, { + filterByDomains: [ + 'wikipedia.org', + 'en.wikipedia.org', + 'test-2.com', + 'en.test-2.com', + 'test.com', + ], + untilWhen: resultD.oldestResultTimestamp, }) expect(resultA.resultsExhausted).toBe(true) expect(resultB.resultsExhausted).toBe(true) + expect(resultC.resultsExhausted).toBe(false) + expect(resultD.resultsExhausted).toBe(false) + expect(resultE.resultsExhausted).toBe(true) expect(formatResults(resultA).map(([pageId]) => pageId)).toEqual([ DATA.PAGE_ID_10, ]) @@ -731,6 +914,12 @@ describe('Unified search tests', () => { DATA.PAGE_ID_10, DATA.PAGE_ID_2, DATA.PAGE_ID_9, + ]) + expect(formatResults(resultD).map(([pageId]) => pageId)).toEqual([ + DATA.PAGE_ID_9, + ]) + expect(formatResults(resultE).map(([pageId]) => pageId)).toEqual([ + DATA.PAGE_ID_2, DATA.PAGE_ID_1, ]) }) @@ -742,7 +931,7 @@ describe('Unified search tests', () => { skipDataInsertion: true, }) const now = Date.now() - const resultA = await termsSearch(backgroundModules, { + const resultA = await search(backgroundModules, { query: 'test', limit: 1000, skip: 0, @@ -755,7 +944,7 @@ describe('Unified search tests', () => { const { backgroundModules } = await setupTest() const now = Date.now() - const resultA = await termsSearch(backgroundModules, { + const resultA = await search(backgroundModules, { query: 'test', limit: 1000, skip: 0, @@ -879,7 +1068,7 @@ describe('Unified search tests', () => { const { backgroundModules } = await setupTest() const now = Date.now() - const resultA = await termsSearch(backgroundModules, { + const resultA = await search(backgroundModules, { query: '微软第三财季营收', limit: 1000, skip: 0, @@ -900,17 +1089,17 @@ describe('Unified search tests', () => { it('should return highlights and their pages on unfiltered, paginated terms search', async () => { const { backgroundModules } = await setupTest() - const resultA = await termsSearch(backgroundModules, { + const resultA = await search(backgroundModules, { query: 'test', limit: 5, skip: 0, }) - const resultB = await termsSearch(backgroundModules, { + const resultB = await search(backgroundModules, { query: 'test', limit: 5, skip: 5, }) - const resultC = await termsSearch(backgroundModules, { + const resultC = await search(backgroundModules, { query: 'test', limit: 5, skip: 10, @@ -1036,12 +1225,12 @@ describe('Unified search tests', () => { ]) // Now test multi-terms search (should "AND" all terms) - const resultD = await termsSearch(backgroundModules, { + const resultD = await search(backgroundModules, { query: 'honshu', limit: 10, skip: 0, }) - const resultE = await termsSearch(backgroundModules, { + const resultE = await search(backgroundModules, { query: 'honshu test', limit: 10, skip: 0, @@ -1085,19 +1274,19 @@ describe('Unified search tests', () => { it('should return highlights and their pages on list filtered, paginated terms search', async () => { const { backgroundModules } = await setupTest() - const resultA = await termsSearch(backgroundModules, { + const resultA = await search(backgroundModules, { query: 'test', limit: 1, skip: 0, filterByListIds: [DATA.LIST_ID_1], }) - const resultB = await termsSearch(backgroundModules, { + const resultB = await search(backgroundModules, { query: 'test', limit: 1, skip: 1, filterByListIds: [DATA.LIST_ID_1], }) - const resultC = await termsSearch(backgroundModules, { + const resultC = await search(backgroundModules, { query: 'test', limit: 1, skip: 2, @@ -1130,7 +1319,7 @@ describe('Unified search tests', () => { expect(formatResults(resultC, { skipSorting: true })).toEqual([]) // NOTE: Same query as above, but with a bigger pag limit - const resultD = await termsSearch(backgroundModules, { + const resultD = await search(backgroundModules, { query: 'test', limit: 10, skip: 0, @@ -1158,7 +1347,7 @@ describe('Unified search tests', () => { ]) // NOTE: Same query as above, but with multiple terms (ANDed) - const resultE = await termsSearch(backgroundModules, { + const resultE = await search(backgroundModules, { query: 'test honshu', limit: 10, skip: 0, @@ -1177,7 +1366,7 @@ describe('Unified search tests', () => { ], ]) - const resultF = await termsSearch(backgroundModules, { + const resultF = await search(backgroundModules, { query: 'test', limit: 10, skip: 0, @@ -1195,7 +1384,7 @@ describe('Unified search tests', () => { ], ]) - const resultG = await termsSearch(backgroundModules, { + const resultG = await search(backgroundModules, { query: 'test', limit: 10, skip: 0, @@ -1247,26 +1436,26 @@ describe('Unified search tests', () => { // ], // }) - const resultA = await termsSearch(backgroundModules, { + const resultA = await search(backgroundModules, { query: 'term', limit: 100, skip: 0, filterByDomains: ['test.com'], }) - const resultB = await termsSearch(backgroundModules, { + const resultB = await search(backgroundModules, { query: 'term', limit: 100, skip: 0, filterByDomains: ['test-2.com', 'test.com'], // Multiple values do an OR }) // Same as prev but paginated - const resultC = await termsSearch(backgroundModules, { + const resultC = await search(backgroundModules, { query: 'term', limit: 1, skip: 0, filterByDomains: ['test-2.com', 'test.com'], // Multiple values do an OR }) - const resultD = await termsSearch(backgroundModules, { + const resultD = await search(backgroundModules, { query: 'term', limit: 2, skip: 1, @@ -1274,7 +1463,7 @@ describe('Unified search tests', () => { }) // Same as prev but with multiple terms, which are only in one of the results - const resultE = await termsSearch(backgroundModules, { + const resultE = await search(backgroundModules, { query: 'term phyla', limit: 20, skip: 0, @@ -1317,22 +1506,22 @@ describe('Unified search tests', () => { const { backgroundModules } = await setupTest() const now = Date.now() - const resultA = await termsSearch(backgroundModules, { + const resultA = await search(backgroundModules, { query: '"some nonsense test text"', limit: 1000, skip: 0, }) - const resultB = await termsSearch(backgroundModules, { + const resultB = await search(backgroundModules, { query: '"apples, oranges"', limit: 1000, skip: 0, }) - const resultC = await termsSearch(backgroundModules, { + const resultC = await search(backgroundModules, { query: '"some nonsense test text" oranges', limit: 1000, skip: 0, }) - const resultD = await termsSearch(backgroundModules, { + const resultD = await search(backgroundModules, { query: '"monophyly and validity"', limit: 1000, skip: 0, @@ -1379,19 +1568,17 @@ describe('Unified search tests', () => { describe('search filter tests', () => { it('should be able to filter PDF pages in blank and terms search', async () => { const { backgroundModules } = await setupTest() - const now = Date.now() - const blankResultA = await blankSearch(backgroundModules, { - fromWhen: 0, - untilWhen: now, + const blankResultA = await search(backgroundModules, { + limit: 1000, filterByPDFs: true, }) - const termsResultB = await termsSearch(backgroundModules, { + const termsResultB = await search(backgroundModules, { query: 'text', limit: 1000, skip: 0, filterByPDFs: true, }) - const termsResultC = await termsSearch(backgroundModules, { + const termsResultC = await search(backgroundModules, { query: 'test', // NOTE: Different term limit: 1000, skip: 0, @@ -1450,19 +1637,17 @@ describe('Unified search tests', () => { it('should be able to filter video pages in blank and terms search', async () => { const { backgroundModules } = await setupTest() - const now = Date.now() - const blankResultA = await blankSearch(backgroundModules, { - fromWhen: 0, - untilWhen: now, + const blankResultA = await search(backgroundModules, { + limit: 1000, filterByVideos: true, }) - const termsResultB = await termsSearch(backgroundModules, { + const termsResultB = await search(backgroundModules, { query: 'test', limit: 1000, skip: 0, filterByVideos: true, }) - const termsResultC = await termsSearch(backgroundModules, { + const termsResultC = await search(backgroundModules, { query: 'today', limit: 1000, skip: 0, @@ -1537,19 +1722,17 @@ describe('Unified search tests', () => { it('should be able to filter tweet pages in blank and terms search', async () => { const { backgroundModules } = await setupTest() - const now = Date.now() - const blankResultA = await blankSearch(backgroundModules, { - fromWhen: 0, - untilWhen: now, + const blankResultA = await search(backgroundModules, { + limit: 1000, filterByTweets: true, }) - const termsResultB = await termsSearch(backgroundModules, { + const termsResultB = await search(backgroundModules, { query: 'test', limit: 1000, skip: 0, filterByTweets: true, }) - const termsResultC = await termsSearch(backgroundModules, { + const termsResultC = await search(backgroundModules, { query: 'insectum', limit: 1000, skip: 0, @@ -1639,19 +1822,17 @@ describe('Unified search tests', () => { it('should be able to filter event pages in blank and terms search', async () => { const { backgroundModules } = await setupTest() - const now = Date.now() - const blankResultA = await blankSearch(backgroundModules, { - fromWhen: 0, - untilWhen: now, + const blankResultA = await search(backgroundModules, { + limit: 1000, filterByEvents: true, }) - const termsResultB = await termsSearch(backgroundModules, { + const termsResultB = await search(backgroundModules, { query: 'test', limit: 1000, skip: 0, filterByEvents: true, }) - const termsResultC = await termsSearch(backgroundModules, { + const termsResultC = await search(backgroundModules, { query: 'encyclopedia', limit: 1000, skip: 0, @@ -1717,19 +1898,30 @@ describe('Unified search tests', () => { it("should be able to filter out all pages which don't contain annotations in blank and terms search", async () => { const { backgroundModules } = await setupTest() const now = Date.now() - const blankResultA = await blankSearch(backgroundModules, { - fromWhen: 0, - untilWhen: now, + const blankResultA = await search(backgroundModules, { + limit: 1000, + omitPagesWithoutAnnotations: true, + }) + // Same thing as A, but with pagination + const blankResultB = await search(backgroundModules, { + limit: 20, omitPagesWithoutAnnotations: true, }) - const termsResultB = await termsSearch(backgroundModules, { + const blankResultC = await search(backgroundModules, { + limit: 20, + untilWhen: blankResultB.oldestResultTimestamp, + omitPagesWithoutAnnotations: true, + }) + const termsResultA = await search(backgroundModules, { query: 'test', limit: 1000, skip: 0, omitPagesWithoutAnnotations: true, }) expect(blankResultA.resultsExhausted).toBe(true) - expect(termsResultB.resultsExhausted).toBe(true) + expect(blankResultB.resultsExhausted).toBe(false) + expect(blankResultC.resultsExhausted).toBe(true) + expect(termsResultA.resultsExhausted).toBe(true) expect( formatResults(blankResultA).map(([pageId]) => pageId), ).toEqual([ @@ -1747,7 +1939,31 @@ describe('Unified search tests', () => { // DATA.PAGE_ID_1, ]) expect( - formatResults(termsResultB, { skipSorting: true }).map( + formatResults(blankResultB).map(([pageId]) => pageId), + ).toEqual([ + // DATA.PAGE_ID_11, + // DATA.PAGE_ID_8, + DATA.PAGE_ID_10, + DATA.PAGE_ID_2, + DATA.PAGE_ID_5, + DATA.PAGE_ID_12, + DATA.PAGE_ID_9, + DATA.PAGE_ID_4, + DATA.PAGE_ID_7, + ]) + expect( + formatResults(blankResultC).map(([pageId]) => pageId), + ).toEqual([ + DATA.PAGE_ID_4, + DATA.PAGE_ID_5, + // The above two get repeated due to having annotations split across the result pages + + // DATA.PAGE_ID_6, + DATA.PAGE_ID_3, + // DATA.PAGE_ID_1, + ]) + expect( + formatResults(termsResultA, { skipSorting: true }).map( ([pageId]) => pageId, ), ).toEqual([ @@ -1845,17 +2061,17 @@ describe('Unified search tests', () => { it('terms search should still work the same with affixed spaces in query', async () => { const { backgroundModules } = await setupTest() - const termsResultA = await termsSearch(backgroundModules, { + const termsResultA = await search(backgroundModules, { query: 'test', limit: 1000, skip: 0, }) - const termsResultB = await termsSearch(backgroundModules, { + const termsResultB = await search(backgroundModules, { query: ' test ', limit: 1000, skip: 0, }) - const termsResultC = await termsSearch(backgroundModules, { + const termsResultC = await search(backgroundModules, { query: ' test test ', limit: 1000, skip: 0, diff --git a/src/search/background/utils.ts b/src/search/background/utils.ts index 586cf37c9e..8cd917bc8f 100644 --- a/src/search/background/utils.ts +++ b/src/search/background/utils.ts @@ -1,7 +1,10 @@ import type Storex from '@worldbrain/storex' import type { + ResultDataByPage, TermsSearchOpts, - UnifiedBlankSearchResult, + UnifiedSearchPageResultData, + UnifiedSearchPaginationParams, + UnifiedSearchParams, UnifiedTermsSearchParams, } from './types' import type { SearchParams as OldSearchParams } from '../types' @@ -63,9 +66,7 @@ export const reshapePageForDisplay = (page) => ({ annotsCount: page.annotsCount, }) -export const sortUnifiedBlankSearchResult = ( - resultDataByPage: UnifiedBlankSearchResult['resultDataByPage'], -) => +export const sortSearchResult = (resultDataByPage: ResultDataByPage) => [...resultDataByPage].sort( ([, a], [, b]) => Math.max( @@ -78,6 +79,14 @@ export const sortUnifiedBlankSearchResult = ( ), ) +export const sliceSearchResult = ( + resultDataByPage: Array<[string, UnifiedSearchPageResultData]>, + { limit, skip }: UnifiedSearchPaginationParams, +): ResultDataByPage => { + // NOTE: Current implementation ignores annotation count, and simply paginates by the number of pages in the results + return new Map(resultDataByPage.slice(skip, skip + limit)) +} + /** Given separate result sets of the same type, gets the intersection of them / ANDs them together by ID */ const intersectResults = (results: string[][]): string[] => !results.length @@ -295,3 +304,11 @@ export const splitQueryIntoTerms = ( matchTermsFuzzyStartsWith, } } + +export const needToFilterSearchByUrl = (params: UnifiedSearchParams): boolean => + params.filterByDomains.length > 0 || + params.filterByPDFs || + params.filterByVideos || + params.filterByTweets || + params.filterByEvents || + params.omitPagesWithoutAnnotations