Skip to content

Commit

Permalink
feat: fuzzy search implementation (#351)
Browse files Browse the repository at this point in the history
* feat: fuzzy search implementation

* feat: add trigram matching for the search pre-query

* feat: add missing util fn

* chore: add comment missing

* chore: refactor if condition

* feat: early return of empty array of search doesn`t match anything
  • Loading branch information
juanmahidalgo committed Sep 29, 2023
1 parent e6cccdd commit 0df60a2
Show file tree
Hide file tree
Showing 7 changed files with 243 additions and 72 deletions.
11 changes: 7 additions & 4 deletions src/ports/catalog/component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,11 @@ export function createCatalogComponent(options: {
if (filters.search) {
for (const schema of Object.values(reducedSchemas)) {
const filteredItemsById = await client.query<CollectionsItemDBResult>(
getItemIdsBySearchTextQuery(schema, filters.search)
getItemIdsBySearchTextQuery(
schema,
filters.search,
filters.category
)
)
filters.ids = [
...(filters.ids ?? []),
Expand All @@ -61,9 +65,8 @@ export function createCatalogComponent(options: {
return { data: [], total: 0 }
}
}
const results = await client.query<CollectionsItemDBResult>(
getCatalogQuery(reducedSchemas, filters)
)
const query = getCatalogQuery(reducedSchemas, filters)
const results = await client.query<CollectionsItemDBResult>(query)
catalogItems = results.rows.map((res) =>
fromCollectionsItemDbResultToCatalogItem(res, network)
)
Expand Down
128 changes: 82 additions & 46 deletions src/ports/catalog/queries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ const WEARABLE_ITEM_TYPES = [
const MAX_ORDER_TIMESTAMP = 253378408747000 // some orders have a timestmap that can't be cast by Postgres, this is the max possible value

export function getOrderBy(filters: CatalogFilters) {
const { sortBy, sortDirection, isOnSale } = filters
const { sortBy, sortDirection, isOnSale, search } = filters
const sortByParam = sortBy ?? CatalogSortBy.NEWEST
const sortDirectionParam = sortDirection ?? CatalogSortDirection.DESC

Expand All @@ -30,6 +30,11 @@ export function getOrderBy(filters: CatalogFilters) {
return ''
}

if (search) {
// If the filters have a search term, we need to order by the position of the item in the search results that is pre-computed and passed in the ids filter.
return SQL`ORDER BY array_position(${filters.ids}::text[], id) `
}

let sortByQuery:
| SQLStatement
| string = `ORDER BY first_listed_at ${sortDirectionParam}\n`
Expand Down Expand Up @@ -121,7 +126,13 @@ export const getEmotePlayModeWhere = (filters: CatalogFilters) => {
}

export const getSearchWhere = (filters: CatalogFilters) => {
return SQL`items.search_text ILIKE '%' || ${filters.search} || '%'`
if (
filters.category === NFTCategory.EMOTE ||
filters.category === NFTCategory.WEARABLE
) {
return SQL`word % ${filters.search}`
}
return SQL`word_wearable % ${filters.search} OR word_emote % ${filters.search}`
}

export const getIsSoldOutWhere = () => {
Expand Down Expand Up @@ -291,6 +302,61 @@ const getMaxPriceCase = (filters: CatalogQueryFilters) => {
`)
}

const getWhereWordsJoin = (category: CatalogQueryFilters['category']) => {
if (category === NFTCategory.EMOTE) {
return SQL`JOIN LATERAL unnest(string_to_array(metadata_emote.name, ' ')) AS word ON TRUE `
} else if (category === NFTCategory.WEARABLE) {
return SQL`JOIN LATERAL unnest(string_to_array(metadata_wearable.name, ' ')) AS word ON TRUE `
}
return SQL` LEFT JOIN LATERAL unnest(string_to_array(metadata_wearable.name, ' ')) AS word_wearable ON TRUE
LEFT JOIN LATERAL unnest(string_to_array(metadata_emote.name, ' ')) AS word_emote ON TRUE
`
}

const getMetadataJoins = (schemaVersion: string) => {
return SQL` LEFT JOIN (
SELECT
metadata.id,
wearable.description,
wearable.category,
wearable.body_shapes,
wearable.rarity,
wearable.name
FROM `
.append(schemaVersion)
.append(
SQL`.wearable_active AS wearable
JOIN `
)
.append(schemaVersion)
.append(
`.metadata_active AS metadata ON metadata.wearable = wearable.id
) AS metadata_wearable ON metadata_wearable.id = items.metadata AND (items.item_type = 'wearable_v1' OR items.item_type = 'wearable_v2' OR items.item_type = 'smart_wearable_v1')
LEFT JOIN (
SELECT
metadata.id,
emote.description,
emote.category,
emote.body_shapes,
emote.rarity,
emote.name,
emote.loop,
emote.has_sound,
emote.has_geometry
FROM `
)
.append(schemaVersion)
.append(
`.emote_active AS emote
JOIN `
)
.append(schemaVersion)
.append(
`.metadata_active AS metadata ON metadata.emote = emote.id
) AS metadata_emote ON metadata_emote.id = items.metadata AND items.item_type = 'emote_v1' `
)
}

export const getCollectionsItemsCatalogQuery = (
schemaVersion: string,
filters: CatalogQueryFilters
Expand Down Expand Up @@ -377,48 +443,9 @@ export const getCollectionsItemsCatalogQuery = (
`
GROUP BY orders.item
) AS nfts_with_orders ON nfts_with_orders.item = items.id
LEFT JOIN (
SELECT
metadata.id,
wearable.description,
wearable.category,
wearable.body_shapes,
wearable.rarity,
wearable.name
FROM `
)
.append(schemaVersion)
.append(
`.wearable_active AS wearable
JOIN `
)
.append(schemaVersion)
.append(
`.metadata_active AS metadata ON metadata.wearable = wearable.id
) AS metadata_wearable ON metadata_wearable.id = items.metadata AND (items.item_type = 'wearable_v1' OR items.item_type = 'wearable_v2' OR items.item_type = 'smart_wearable_v1')
LEFT JOIN (
SELECT
metadata.id,
emote.description,
emote.category,
emote.body_shapes,
emote.rarity,
emote.name,
emote.loop,
emote.has_sound,
emote.has_geometry
FROM `
)
.append(schemaVersion)
.append(
`.emote_active AS emote
JOIN `
)
.append(schemaVersion)
.append(
`.metadata_active AS metadata ON metadata.emote = emote.id
) AS metadata_emote ON metadata_emote.id = items.metadata AND items.item_type = 'emote_v1' `
`
)
.append(getMetadataJoins(schemaVersion))
.append(getCollectionsQueryWhere(filters))

addQuerySort(query, filters)
Expand All @@ -428,13 +455,22 @@ export const getCollectionsItemsCatalogQuery = (

export const getItemIdsBySearchTextQuery = (
schemaVersion: string,
search: CatalogQueryFilters['search']
search: CatalogQueryFilters['search'],
category: CatalogQueryFilters['category']
) => {
const query = SQL`SELECT items.id`
.append(` FROM `)
.append(schemaVersion)
.append(`.item_active AS items WHERE `)
.append(getSearchWhere({ search }))
.append(`.item_active AS items `)
.append(getMetadataJoins(schemaVersion))
.append(getWhereWordsJoin(category))
.append(`WHERE `)
.append(getSearchWhere({ search, category }))
.append(
category
? SQL` ORDER BY GREATEST(similarity(word, ${search})) DESC;`
: SQL` ORDER BY GREATEST(similarity(word_wearable, ${search}), similarity(word_emote, ${search})) DESC;`
)

return query
}
35 changes: 35 additions & 0 deletions src/ports/nfts/component.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
import nodeFetch from 'node-fetch'
import { NFTCategory, NFTFilters, NFTSortBy } from '@dcl/schemas'
import { IPgComponent } from '@well-known-components/pg-component'
import { ISubgraphComponent } from '@well-known-components/thegraph-component'
import { INFTsComponent, NFTResult } from './types'
import {
getByTokenIdQuery,
getFetchOneQuery,
getFetchQuery,
getFuzzySearchQueryForENS,
getQueryVariables,
} from './utils'
import { getMarketplaceChainId } from '../../logic/chainIds'
import {
getLatestSubgraphSchema,
getMarketplaceSubgraphNameChain,
} from '../../subgraphUtils'

export function createNFTComponent<T extends { id: string }>(options: {
subgraph: ISubgraphComponent
db?: IPgComponent
listsServer?: string
fragmentName: string
getFragment: () => string
Expand All @@ -22,6 +30,7 @@ export function createNFTComponent<T extends { id: string }>(options: {
}): INFTsComponent {
const {
subgraph,
db,
fragmentName,
getFragment,
getSortByProp,
Expand Down Expand Up @@ -89,6 +98,32 @@ export function createNFTComponent<T extends { id: string }>(options: {
)
}

// In order to support fuzzy search for ENS names, we're going to first fetch the ids matching the search text in the db using trigram matching and then pass those ids down to the graphql query
if (options.category === NFTCategory.ENS && options.search && db) {
try {
const client = await db.getPool().connect()
const schemaName = await client.query<{
entity_schema: string
}>(
getLatestSubgraphSchema(
getMarketplaceSubgraphNameChain(getMarketplaceChainId())
)
)
const ids = await client.query<{ id: string }>(
getFuzzySearchQueryForENS(
schemaName.rows[0].entity_schema,
options.search
)
)
// if there are no ids matching the search text, return empty result
if (!ids.rows.length) {
return []
}
options.ids = ids.rows.map(({ id }) => id) // adds the ids to the main `ids` filter
options.search = undefined // cleans the search text since it's already filtered
} catch (error) {}
}

const fetchFragments = getFragmentFetcher(options)
const fragments = await fetchFragments()
const nfts = fragments.map((fragment) =>
Expand Down
8 changes: 7 additions & 1 deletion src/ports/nfts/utils.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import SQL from 'sql-template-strings'
import { EmotePlayMode, NFTCategory, NFTFilters, NFTSortBy } from '@dcl/schemas'
import { getGenderFilterQuery } from '../utils'
import { QueryVariables } from './types'
Expand Down Expand Up @@ -201,7 +202,6 @@ export function getFetchQuery(
let wrapWhere = false

if (bannedNames.length) {
console.log('bannedNames inside getFetchQuery: ', bannedNames);
where.push(
`name_not_in: [${bannedNames.map((name) => `"${name}"`).join(', ')}]`
)
Expand Down Expand Up @@ -355,3 +355,9 @@ export function getByTokenIdQuery(
export function getId(contractAddress: string, tokenId: string) {
return `${contractAddress}-${tokenId}`
}

export function getFuzzySearchQueryForENS(schema: string, searchTerm: string) {
return SQL`SELECT id from `
.append(schema)
.append(SQL`.ens_active WHERE subdomain % ${searchTerm}`)
}
17 changes: 15 additions & 2 deletions src/subgraphUtils.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import SQL from 'sql-template-strings'
import { ChainId, Network } from '@dcl/schemas'


export const getLatestSubgraphSchema = (subgraphName: string) =>
SQL`
SELECT
Expand All @@ -23,4 +22,18 @@ export const getSubgraphNameForNetwork = (
: `collections-matic-${
chainId === ChainId.MATIC_MAINNET ? 'mainnet' : 'mumbai'
}`
}
}

export const getMarketplaceSubgraphNameChain = (chainId: ChainId) => {
switch (chainId) {
case ChainId.ETHEREUM_MAINNET:
return 'marketplace'
case ChainId.ETHEREUM_GOERLI:
return 'marketplace-goerli'
case ChainId.ETHEREUM_SEPOLIA:
return 'marketplace-sepolia'

default:
return 'marketplace'
}
}
Loading

0 comments on commit 0df60a2

Please sign in to comment.