Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(search): index explorer views to Algolia #3369

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ reindex: itsJustJavascript
node --enable-source-maps itsJustJavascript/baker/algolia/indexToAlgolia.js
node --enable-source-maps itsJustJavascript/baker/algolia/indexChartsToAlgolia.js
node --enable-source-maps itsJustJavascript/baker/algolia/indexExplorersToAlgolia.js
node --enable-source-maps itsJustJavascript/baker/algolia/indexExplorerViewsToAlgolia.js

clean:
rm -rf node_modules itsJustJavascript
16 changes: 16 additions & 0 deletions baker/algolia/configureAlgolia.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,22 @@ export const configureAlgolia = async () => {
disableTypoToleranceOnAttributes: ["text"],
})

const explorerViewsIndex = client.initIndex(
getIndexName(SearchIndexName.ExplorerViews)
)

await explorerViewsIndex.setSettings({
...baseSettings,
searchableAttributes: [
"unordered(viewTitle)",
"unordered(viewSettings)",
],
customRanking: ["desc(score)", "asc(viewIndexWithinExplorer)"],
attributeForDistinct: "viewTitleAndExplorerSlug",
distinct: true,
minWordSizefor1Typo: 6,
})

const synonyms = [
["kids", "children"],
["pork", "pigmeat"],
Expand Down
238 changes: 238 additions & 0 deletions baker/algolia/indexExplorerViewsToAlgolia.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
import * as db from "../../db/db.js"
import { ExplorerBlockGraphers } from "./indexExplorersToAlgolia.js"
import { DecisionMatrix } from "../../explorer/ExplorerDecisionMatrix.js"
import { tsvFormat } from "d3-dsv"
import {
ExplorerChoiceParams,
ExplorerControlType,
} from "../../explorer/ExplorerConstants.js"
import { GridBoolean } from "../../gridLang/GridLangConstants.js"
import { getAnalyticsPageviewsByUrlObj } from "../../db/model/Pageview.js"
import { ALGOLIA_INDEXING } from "../../settings/serverSettings.js"
import { getAlgoliaClient } from "./configureAlgolia.js"
import { getIndexName } from "../../site/search/searchClient.js"
import { SearchIndexName } from "../../site/search/searchTypes.js"
import { keyBy } from "lodash"

interface ExplorerViewEntry {
viewTitle: string
viewSubtitle: string
viewSettings: string[]
viewQueryParams: string

viewGrapherId?: number

// Potential ranking criteria
viewIndexWithinExplorer: number
titleLength: number
numNonDefaultSettings: number
// viewViews_7d: number
}

interface ExplorerViewEntryWithExplorerInfo extends ExplorerViewEntry {
explorerSlug: string
explorerTitle: string
explorerViews_7d: number
viewTitleAndExplorerSlug: string // used for deduplication: `viewTitle | explorerSlug`

score: number

objectID?: string
}

// Creates a search-ready string from a choice.
// Special handling is pretty much only necessary for checkboxes: If they are not ticked, then their name is not included.
// Imagine a "Per capita" checkbox, for example. If it's not ticked, then we don't want searches for "per capita" to wrongfully match it.
const explorerChoiceToViewSettings = (
choices: ExplorerChoiceParams,
decisionMatrix: DecisionMatrix
): string[] => {
return Object.entries(choices).map(([choiceName, choiceValue]) => {
const choiceControlType =
decisionMatrix.choiceNameToControlTypeMap.get(choiceName)
if (choiceControlType === ExplorerControlType.Checkbox)
return choiceValue === GridBoolean.true ? choiceName : ""
else return choiceValue
})
}

const getExplorerViewRecordsForExplorerSlug = async (
trx: db.KnexReadonlyTransaction,
slug: string
): Promise<ExplorerViewEntry[]> => {
const explorerConfig = await trx
.table("explorers")
.select("config")
.where({ slug })
.first()
.then((row) => JSON.parse(row.config) as any)

const explorerGrapherBlock: ExplorerBlockGraphers =
explorerConfig.blocks.filter(
(block: any) => block.type === "graphers"
)[0] as ExplorerBlockGraphers

if (explorerGrapherBlock === undefined)
throw new Error(`Explorer ${slug} has no grapher block`)

// TODO: Maybe make DecisionMatrix accept JSON directly
const tsv = tsvFormat(explorerGrapherBlock.block)
const explorerDecisionMatrix = new DecisionMatrix(tsv)

console.log(
`Processing explorer ${slug} (${explorerDecisionMatrix.numRows} rows)`
)

const defaultSettings = explorerDecisionMatrix.defaultSettings

const records = explorerDecisionMatrix
.allDecisionsAsQueryParams()
.map((choice, i) => {
explorerDecisionMatrix.setValuesFromChoiceParams(choice)

// Check which choices are non-default, i.e. are not the first available option in a dropdown/radio
const nonDefaultSettings = Object.entries(
explorerDecisionMatrix.availableChoiceOptions
).filter(([choiceName, choiceOptions]) => {
// Keep only choices which are not the default, which is:
// - either the options marked as `default` in the decision matrix
// - or the first available option in the decision matrix
return (
choiceOptions.length > 1 &&
!(defaultSettings[choiceName] !== undefined
? defaultSettings[choiceName] === choice[choiceName]
: choice[choiceName] === choiceOptions[0])
)
})

const record: ExplorerViewEntry = {
viewTitle: explorerDecisionMatrix.selectedRow.title,
viewSubtitle: explorerDecisionMatrix.selectedRow.subtitle,
viewSettings: explorerChoiceToViewSettings(
choice,
explorerDecisionMatrix
),
viewGrapherId: explorerDecisionMatrix.selectedRow.grapherId,
viewQueryParams: explorerDecisionMatrix.toString(),

viewIndexWithinExplorer: i,
titleLength: explorerDecisionMatrix.selectedRow.title?.length,
numNonDefaultSettings: nonDefaultSettings.length,
}
return record
})

// Enrich `grapherId`-powered views with title/subtitle
const grapherIds = records
.filter((record) => record.viewGrapherId !== undefined)
.map((record) => record.viewGrapherId as number)

if (grapherIds.length) {
console.log(
`Fetching grapher info from ${grapherIds.length} graphers for explorer ${slug}`
)
const grapherIdToTitle = await trx
.table("charts")
.select(
"id",
trx.raw("config->>'$.title' as title"),
trx.raw("config->>'$.subtitle' as subtitle")
)
.whereIn("id", grapherIds)
.andWhereRaw("config->>'$.isPublished' = 'true'")
.then((rows) => keyBy(rows, "id"))

for (const record of records) {
if (record.viewGrapherId !== undefined) {
const grapherInfo = grapherIdToTitle[record.viewGrapherId]
if (grapherInfo === undefined) {
console.warn(
`Grapher id ${record.viewGrapherId} not found for explorer ${slug}`
)
continue
}
record.viewTitle = grapherInfo.title
record.viewSubtitle = grapherInfo.subtitle
record.titleLength = grapherInfo.title?.length
}
}
}

// TODO: Handle indicator-based explorers

return records
}

const getExplorerViewRecords = async (
trx: db.KnexReadonlyTransaction
): Promise<ExplorerViewEntryWithExplorerInfo[]> => {
const publishedExplorers = Object.values(
await db.getPublishedExplorersBySlug(trx)
)

const pageviews = await getAnalyticsPageviewsByUrlObj(trx)

let records = [] as ExplorerViewEntryWithExplorerInfo[]
for (const explorerInfo of publishedExplorers) {
const explorerViewRecords = await getExplorerViewRecordsForExplorerSlug(
trx,
explorerInfo.slug
)

const explorerPageviews =
pageviews[`/explorers/${explorerInfo.slug}`]?.views_7d ?? 0
records = records.concat(
explorerViewRecords.map(
(record, i): ExplorerViewEntryWithExplorerInfo => ({
...record,
explorerSlug: explorerInfo.slug,
explorerTitle: explorerInfo.title,
explorerViews_7d: explorerPageviews,
viewTitleAndExplorerSlug: `${record.viewTitle} | ${explorerInfo.slug}`,
// Scoring function
score:
explorerPageviews * 10 -
record.numNonDefaultSettings * 50 -
record.titleLength,

objectID: `${explorerInfo.slug}-${i}`,
})
)
)
}

return records
}

const indexExplorerViewsToAlgolia = async () => {
if (!ALGOLIA_INDEXING) return

const client = getAlgoliaClient()
if (!client) {
console.error(
`Failed indexing explorer views (Algolia client not initialized)`
)
return
}

try {
const index = client.initIndex(
getIndexName(SearchIndexName.ExplorerViews)
)

const records = await db.knexReadonlyTransaction(
getExplorerViewRecords,
db.TransactionCloseMode.Close
)
await index.replaceAllObjects(records)
} catch (e) {
console.log("Error indexing explorer views to Algolia:", e)
}
}

process.on("unhandledRejection", (e) => {
console.error(e)
process.exit(1)
})

void indexExplorerViewsToAlgolia()
2 changes: 1 addition & 1 deletion baker/algolia/indexExplorersToAlgolia.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ type ExplorerBlockColumns = {
block: { name: string; additionalInfo?: string }[]
}

type ExplorerBlockGraphers = {
export type ExplorerBlockGraphers = {
type: "graphers"
block: {
title?: string
Expand Down
12 changes: 6 additions & 6 deletions explorer/ExplorerDecisionMatrix.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ export class DecisionMatrix {
table: CoreTable
@observable currentParams: ExplorerChoiceParams = {}
constructor(delimited: string, hash = "") {
this.choices = makeChoicesMap(delimited)
this.choiceNameToControlTypeMap = makeChoicesMap(delimited)
this.table = new CoreTable(parseDelimited(dropColumnTypes(delimited)), [
// todo: remove col def?
{
Expand Down Expand Up @@ -141,7 +141,7 @@ export class DecisionMatrix {
)
}

private choices: Map<ChoiceName, ExplorerControlType>
choiceNameToControlTypeMap: Map<ChoiceName, ExplorerControlType>
hash: string

toConstrainedOptions(): ExplorerChoiceParams {
Expand Down Expand Up @@ -243,7 +243,7 @@ export class DecisionMatrix {
}

@computed private get choiceNames(): ChoiceName[] {
return Array.from(this.choices.keys())
return Array.from(this.choiceNameToControlTypeMap.keys())
}

@computed private get allChoiceOptions(): ChoiceMap {
Expand All @@ -256,7 +256,7 @@ export class DecisionMatrix {
return choiceMap
}

@computed private get availableChoiceOptions(): ChoiceMap {
@computed get availableChoiceOptions(): ChoiceMap {
const result: ChoiceMap = {}
this.choiceNames.forEach((choiceName) => {
result[choiceName] = this.allChoiceOptions[choiceName].filter(
Expand Down Expand Up @@ -317,7 +317,7 @@ export class DecisionMatrix {
}

// The first row with defaultView column value of "true" determines the default view to use
private get defaultSettings() {
get defaultSettings() {
const hits = this.rowsWith({
[GrapherGrammar.defaultView.keyword]: "true",
})
Expand Down Expand Up @@ -373,7 +373,7 @@ export class DecisionMatrix {
constrainedOptions
)
)
const type = this.choices.get(title)!
const type = this.choiceNameToControlTypeMap.get(title)!

return {
title,
Expand Down
36 changes: 29 additions & 7 deletions site/search/Autocomplete.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,12 @@ const getItemUrl: AutocompleteSource<BaseItem>["getItemUrl"] = ({ item }) =>
const prependSubdirectoryToAlgoliaItemUrl = (item: BaseItem): string => {
const indexName = parseIndexName(item.__autocomplete_indexName as string)
const subdirectory = indexNameToSubdirectoryMap[indexName]
return `${subdirectory}/${item.slug}`
switch (indexName) {
case SearchIndexName.ExplorerViews:
return `${subdirectory}/${item.explorerSlug}${item.viewQueryParams}`
default:
return `${subdirectory}/${item.slug}`
}
}

const FeaturedSearchesSource: AutocompleteSource<BaseItem> = {
Expand Down Expand Up @@ -133,6 +138,14 @@ const AlgoliaSource: AutocompleteSource<BaseItem> = {
distinct: true,
},
},
{
indexName: getIndexName(SearchIndexName.ExplorerViews),
query,
params: {
hitsPerPage: 1,
distinct: true,
},
},
{
indexName: getIndexName(SearchIndexName.Explorers),
query,
Expand All @@ -152,11 +165,20 @@ const AlgoliaSource: AutocompleteSource<BaseItem> = {
item.__autocomplete_indexName as string
)
const indexLabel =
index === SearchIndexName.Charts
? "Chart"
: index === SearchIndexName.Explorers
? "Explorer"
: pageTypeDisplayNames[item.type as PageType]
index === SearchIndexName.Charts ? (
"Chart"
) : index === SearchIndexName.Explorers ? (
"Explorer"
) : index === SearchIndexName.ExplorerViews ? (
<>
in <em>{item.explorerTitle} Data Explorer</em>
</>
) : (
pageTypeDisplayNames[item.type as PageType]
)

const mainAttribute =
index === SearchIndexName.ExplorerViews ? "viewTitle" : "title"

return (
<div
Expand All @@ -167,7 +189,7 @@ const AlgoliaSource: AutocompleteSource<BaseItem> = {
<span>
<components.Highlight
hit={item}
attribute="title"
attribute={mainAttribute}
tagName="strong"
/>
</span>
Expand Down
Loading