Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cycle 2022.2 search improvements: explorer views & entity matching #3497

Merged
merged 5 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .env.example-full
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ IMAGE_HOSTING_R2_SECRET_ACCESS_KEY='' # optional

OPENAI_API_KEY=''

GRAPHER_DYNAMIC_THUMBNAIL_URL='' # optional; can set this to https://ourworldindata.org/grapher/thumbnail to use the live thumbnail worker

# enable search (readonly)
ALGOLIA_ID='' # optional
ALGOLIA_SEARCH_KEY='' # optional
Expand Down
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,6 @@ reindex: itsJustJavascript
node --enable-source-maps itsJustJavascript/baker/algolia/configureAlgolia.js
node --enable-source-maps itsJustJavascript/baker/algolia/indexToAlgolia.js
node --enable-source-maps itsJustJavascript/baker/algolia/indexChartsToAlgolia.js
node --enable-source-maps itsJustJavascript/baker/algolia/indexExplorersToAlgolia.js
node --enable-source-maps itsJustJavascript/baker/algolia/indexExplorerViewsToAlgolia.js

clean:
Expand Down
50 changes: 22 additions & 28 deletions baker/algolia/configureAlgolia.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import {
ALGOLIA_INDEXING,
ALGOLIA_SECRET_KEY,
} from "../../settings/serverSettings.js"
import { countries } from "@ourworldindata/utils"
import { countries, regions, excludeUndefined } from "@ourworldindata/utils"
import { SearchIndexName } from "../../site/search/searchTypes.js"
import { getIndexName } from "../../site/search/searchClient.js"

Expand All @@ -25,6 +25,11 @@ export const getAlgoliaClient = (): SearchClient | undefined => {
return client
}

const allCountryNamesAndVariants = regions.flatMap((region) => [
region.name,
...(("variantNames" in region && region.variantNames) || []),
])

// This function initializes and applies settings to the Algolia search indices
// Algolia settings should be configured here rather than in the Algolia dashboard UI, as then
// they are recorded and transferrable across dev/prod instances
Expand Down Expand Up @@ -125,24 +130,6 @@ export const configureAlgolia = async () => {
disablePrefixOnAttributes: ["content"],
})

const explorersIndex = client.initIndex(
getIndexName(SearchIndexName.Explorers)
)

await explorersIndex.setSettings({
...baseSettings,
searchableAttributes: [
"unordered(slug)",
"unordered(title)",
"unordered(subtitle)",
"unordered(text)",
],
customRanking: ["desc(views_7d)"],
attributeForDistinct: "slug",
attributesForFaceting: [],
disableTypoToleranceOnAttributes: ["text"],
})

const explorerViewsIndex = client.initIndex(
getIndexName(SearchIndexName.ExplorerViews)
)
Expand All @@ -164,6 +151,7 @@ export const configureAlgolia = async () => {
attributeForDistinct: "explorerSlug",
distinct: 4,
minWordSizefor1Typo: 6,
optionalWords: allCountryNamesAndVariants,
})

const synonyms = [
Expand Down Expand Up @@ -308,12 +296,6 @@ export const configureAlgolia = async () => {
["solar", "photovoltaic", "photovoltaics", "pv"],
]

// Send all our country variant names to algolia as synonyms
for (const country of countries) {
if (country.variantNames)
synonyms.push([country.name].concat(country.variantNames))
}

const algoliaSynonyms = synonyms.map((s) => {
return {
objectID: s.join("-"),
Expand All @@ -322,15 +304,27 @@ export const configureAlgolia = async () => {
} as Synonym
})

// Send all our country variant names to algolia as one-way synonyms
for (const country of countries) {
const alternatives = excludeUndefined([
country.shortName,
...(country.variantNames ?? []),
])
for (const alternative of alternatives)
algoliaSynonyms.push({
objectID: `${alternative}->${country.name}`,
type: "oneWaySynonym",
input: alternative,
synonyms: [country.name],
})
}

await pagesIndex.saveSynonyms(algoliaSynonyms, {
replaceExistingSynonyms: true,
})
await chartsIndex.saveSynonyms(algoliaSynonyms, {
replaceExistingSynonyms: true,
})
await explorersIndex.saveSynonyms(algoliaSynonyms, {
replaceExistingSynonyms: true,
})
await explorerViewsIndex.saveSynonyms(algoliaSynonyms, {
replaceExistingSynonyms: true,
})
Expand Down
35 changes: 34 additions & 1 deletion baker/algolia/indexChartsToAlgolia.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import {
OwidGdocLinkType,
excludeNullish,
isNil,
countries,
orderBy,
removeTrailingParenthetical,
} from "@ourworldindata/utils"
import { MarkdownTextWrap } from "@ourworldindata/components"
import { getAnalyticsPageviewsByUrlObj } from "../../db/model/Pageview.js"
Expand All @@ -20,6 +23,35 @@ const computeScore = (record: Omit<ChartRecord, "score">): number => {
return numRelatedArticles * 500 + views_7d
}

const countriesWithVariantNames = new Set(
countries
.filter((country) => country.variantNames?.length || country.shortName)
.map((country) => country.name)
)

const processAvailableEntities = (availableEntities: string[] | null) => {
if (!availableEntities) return []

// Algolia is a bit weird with synonyms:
// If we have a synonym "USA" -> "United States", and we search for "USA",
// then it seems that Algolia can only find that within `availableEntities`
// if "USA" is within the first 100-or-so entries of the array.
// So, the easy solution is to sort the entities to ensure that countries
// with variant names are at the top.
// - @marcelgerber, 2024-03-25
return orderBy(
availableEntities,
[
(entityName) =>
countriesWithVariantNames.has(
removeTrailingParenthetical(entityName)
),
(entityName) => entityName,
],
["desc", "asc"]
)
}

const getChartsRecords = async (
knex: db.KnexReadonlyTransaction
): Promise<ChartRecord[]> => {
Expand Down Expand Up @@ -81,14 +113,15 @@ const getChartsRecords = async (
if (c.entityNames.length < 12000)
c.entityNames = excludeNullish(
JSON.parse(c.entityNames as string) as (string | null)[]
)
) as string[]
else {
console.info(
`Chart ${c.id} has too many entities, skipping its entities`
)
c.entityNames = []
}
}
c.entityNames = processAvailableEntities(c.entityNames)

c.tags = JSON.parse(c.tags)
c.keyChartForTags = JSON.parse(c.keyChartForTags as string).filter(
Expand Down
10 changes: 9 additions & 1 deletion baker/algolia/indexExplorerViewsToAlgolia.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import * as db from "../../db/db.js"
import { ExplorerBlockGraphers } from "./indexExplorersToAlgolia.js"
import { DecisionMatrix } from "../../explorer/ExplorerDecisionMatrix.js"
import { tsvFormat } from "d3-dsv"
import {
Expand All @@ -15,6 +14,15 @@ import { SearchIndexName } from "../../site/search/searchTypes.js"
import { groupBy, keyBy, orderBy } from "lodash"
import { MarkdownTextWrap } from "@ourworldindata/components"

export type ExplorerBlockGraphers = {
type: "graphers"
block: {
title?: string
subtitle?: string
grapherId?: number
}[]
}

interface ExplorerViewEntry {
viewTitle: string
viewSubtitle: string
Expand Down
Loading
Loading