-
-
Notifications
You must be signed in to change notification settings - Fork 229
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3418 from owid/available-entities-accurate
feat: index chart entities into `charts_x_entities` table
- Loading branch information
Showing
6 changed files
with
331 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,240 @@ | ||
/** | ||
* Updates the charts_x_entities table with the available entities for all published charts. | ||
* This is useful in search, where we want to be able to filter charts by entities that can be selected. | ||
* To do this, we need to instantiate a grapher, download its data, and then look at the available entities. | ||
*/ | ||
|
||
import { Grapher } from "@ourworldindata/grapher" | ||
import { | ||
ChartsTableName, | ||
ChartsXEntitiesTableName, | ||
DbRawChart, | ||
GrapherInterface, | ||
GrapherTabOption, | ||
MultipleOwidVariableDataDimensionsMap, | ||
OwidVariableDataMetadataDimensions, | ||
} from "@ourworldindata/types" | ||
import * as db from "../db/db.js" | ||
import pMap from "p-map" | ||
import { mapEntityNamesToEntityIds } from "../db/model/Entity.js" | ||
import { getVariableData } from "../db/model/Variable.js" | ||
import { uniq } from "@ourworldindata/utils" | ||
import yargs from "yargs" | ||
import { hideBin } from "yargs/helpers" | ||
|
||
const FETCH_CONCURRENCY = 10 | ||
const VARIABLES_TO_PREFETCH = 300 | ||
|
||
let _commonVariablesMap: | ||
| Map<number, OwidVariableDataMetadataDimensions> | ||
| undefined = undefined | ||
|
||
const _fetchVariablesCounters = { cached: 0, fetched: 0 } | ||
|
||
// This is a poor-man's cache for variable data. | ||
// It is unrealistic to cache all variables in memory - at the time of writing, there are about 8000 distinct variables. | ||
// Instead, we pre-fetch the most common variables and cache them in memory. | ||
// These include very common variables: Continents, Population, GDP per capita, etc. | ||
const preFetchCommonVariables = async ( | ||
trx: db.KnexReadonlyTransaction | ||
): Promise<void> => { | ||
const commonVariables = (await db.knexRaw( | ||
trx, | ||
`-- sql | ||
SELECT variableId, COUNT(variableId) AS useCount | ||
FROM chart_dimensions cd | ||
JOIN charts c ON cd.chartId = c.id | ||
WHERE config ->> "$.isPublished" = "true" | ||
GROUP BY variableId | ||
ORDER BY COUNT(variableId) DESC | ||
LIMIT ??`, | ||
[VARIABLES_TO_PREFETCH] | ||
)) as { variableId: number; useCount: number }[] | ||
|
||
_commonVariablesMap = new Map( | ||
await pMap( | ||
commonVariables, | ||
async ({ variableId, useCount }) => { | ||
const variableData = await getVariableData(variableId) | ||
console.log( | ||
`Pre-fetched variable ${variableId}: ${variableData.metadata.name} (${useCount} uses)` | ||
) | ||
return [variableId, variableData] | ||
}, | ||
{ concurrency: FETCH_CONCURRENCY } | ||
) | ||
) | ||
} | ||
|
||
const getVariableDataUsingCache = async ( | ||
variableId: number | ||
): Promise<OwidVariableDataMetadataDimensions> => { | ||
if (_commonVariablesMap?.has(variableId)) { | ||
_fetchVariablesCounters.cached++ | ||
return _commonVariablesMap.get(variableId)! | ||
} | ||
|
||
_fetchVariablesCounters.fetched++ | ||
return getVariableData(variableId) | ||
} | ||
|
||
const obtainAvailableEntitiesForGrapherConfig = async ( | ||
grapherConfig: GrapherInterface | ||
) => { | ||
const grapher = new Grapher({ ...grapherConfig, manuallyProvideData: true }) | ||
|
||
// Manually fetch data for grapher, so we can employ caching | ||
const variableIds = uniq(grapher.dimensions.map((d) => d.variableId)) | ||
const variableData: MultipleOwidVariableDataDimensionsMap = new Map( | ||
await pMap(variableIds, async (variableId) => [ | ||
variableId, | ||
await getVariableDataUsingCache(variableId), | ||
]) | ||
) | ||
grapher.receiveOwidData(variableData) | ||
|
||
// If the grapher has a chart tab, then the available entities there are the "most interesting" ones to us | ||
if (grapher.hasChartTab) { | ||
grapher.tab = GrapherTabOption.chart | ||
|
||
// If the grapher allows for changing or multi-selecting entities, then let's index all entities the | ||
// user can choose from. Otherwise, we'll just use the default-selected entities. | ||
const canChangeEntities = | ||
grapher.canChangeEntity || grapher.canSelectMultipleEntities | ||
|
||
// In these chart types, an unselected entity is still shown | ||
const chartTypeShowsUnselectedEntities = | ||
grapher.isScatter || grapher.isSlopeChart || grapher.isMarimekko | ||
|
||
if (canChangeEntities || chartTypeShowsUnselectedEntities) | ||
return grapher.tableForSelection.availableEntityNames as string[] | ||
else return grapher.selectedEntityNames | ||
} else if (grapher.hasMapTab) { | ||
grapher.tab = GrapherTabOption.map | ||
// On a map tab, tableAfterAuthorTimelineAndActiveChartTransform contains all | ||
// mappable entities for which data is available | ||
return grapher.tableAfterAuthorTimelineAndActiveChartTransform | ||
.availableEntityNames as string[] | ||
} else return [] | ||
} | ||
|
||
const obtainAvailableEntitiesForAllGraphers = async ( | ||
trx: db.KnexReadonlyTransaction | ||
) => { | ||
const entityNameToIdMap = await mapEntityNamesToEntityIds(trx) | ||
|
||
const allPublishedGraphers = (await trx | ||
.select("id", "config") | ||
.from(ChartsTableName) | ||
.whereRaw("config ->> '$.isPublished' = 'true'")) as Pick< | ||
DbRawChart, | ||
"id" | "config" | ||
>[] | ||
|
||
const availableEntitiesByChartId = new Map<number, number[]>() | ||
await pMap( | ||
allPublishedGraphers, | ||
async (grapher) => { | ||
const config = JSON.parse(grapher.config) as GrapherInterface | ||
const availableEntities = | ||
await obtainAvailableEntitiesForGrapherConfig(config) | ||
const availableEntityIds = availableEntities.flatMap( | ||
(entityName) => { | ||
const entityId = entityNameToIdMap.get(entityName) | ||
if (entityId === undefined) { | ||
console.error( | ||
`Entity not found for chart ${grapher.id}: "${entityName}"` | ||
) | ||
return [] | ||
} | ||
return [entityId] | ||
} | ||
) | ||
availableEntitiesByChartId.set(grapher.id, availableEntityIds) | ||
|
||
console.log( | ||
grapher.id, | ||
config.slug, | ||
`[${availableEntities.length} entities]` | ||
) | ||
}, | ||
{ concurrency: FETCH_CONCURRENCY } | ||
) | ||
|
||
return availableEntitiesByChartId | ||
} | ||
|
||
// Obtains available entities for ALL published graphers and updates the charts_x_entities table | ||
// (by clearing it out and re-inserting all entries). | ||
const updateAvailableEntitiesForAllGraphers = async ( | ||
trx: db.KnexReadWriteTransaction | ||
) => { | ||
console.log( | ||
`--- Pre-fetching ${VARIABLES_TO_PREFETCH} most common variables ---` | ||
) | ||
await preFetchCommonVariables(trx) | ||
|
||
console.log( | ||
"--- Obtaining available entity ids for all published graphers ---" | ||
) | ||
const availableEntitiesByChartId = | ||
await obtainAvailableEntitiesForAllGraphers(trx) | ||
|
||
console.log("--- Fetch stats ---") | ||
console.log( | ||
`Fetched ${_fetchVariablesCounters.fetched} variables; cached ${_fetchVariablesCounters.cached} variable loads using ${VARIABLES_TO_PREFETCH} pre-fetched variables` | ||
) | ||
|
||
console.log("--- Updating charts_x_entities ---") | ||
|
||
await trx.delete().from(ChartsXEntitiesTableName) // clears out the WHOLE table | ||
for (const [chartId, availableEntityIds] of availableEntitiesByChartId) { | ||
const rows = availableEntityIds.map((entityId) => ({ | ||
chartId, | ||
entityId, | ||
})) | ||
if (rows.length) await trx(ChartsXEntitiesTableName).insert(rows) | ||
} | ||
|
||
console.log("--- ✅ Done ---") | ||
} | ||
|
||
process.on("unhandledRejection", (e) => { | ||
console.error(e) | ||
process.exit(1) | ||
}) | ||
|
||
if (require.main === module) { | ||
void yargs(hideBin(process.argv)) | ||
.command( | ||
"$0", | ||
"Update charts_x_entities table", | ||
(yargs) => { | ||
yargs | ||
.option("all", { | ||
boolean: true, | ||
default: false, | ||
description: | ||
"Update available entities for all published charts", | ||
}) | ||
.check(({ all }) => { | ||
if (!all) { | ||
console.error( | ||
"Please use --all. Currently, no other mode is supported." | ||
) | ||
return false | ||
} | ||
return true | ||
}) | ||
}, | ||
async ({ all }) => { | ||
if (all) | ||
await db.knexReadWriteTransaction( | ||
updateAvailableEntitiesForAllGraphers, | ||
db.TransactionCloseMode.Close | ||
) | ||
} | ||
) | ||
.help() | ||
.strict().argv | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import { MigrationInterface, QueryRunner } from "typeorm" | ||
|
||
export class CreateChartEntitiesTable1711549786507 | ||
implements MigrationInterface | ||
{ | ||
public async up(queryRunner: QueryRunner): Promise<void> { | ||
await queryRunner.query(`-- sql | ||
CREATE TABLE charts_x_entities ( | ||
chartId integer NOT NULL, | ||
entityId integer NOT NULL, | ||
FOREIGN KEY (chartId) REFERENCES charts (id) ON DELETE CASCADE ON UPDATE CASCADE, | ||
FOREIGN KEY (entityId) REFERENCES entities (id) ON DELETE RESTRICT ON UPDATE RESTRICT, | ||
PRIMARY KEY (chartId, entityId), | ||
-- we can use the primary key to look up by chartId, but might also want fast | ||
-- lookups by entityId, so we add an index explicitly | ||
INDEX (entityId) | ||
) | ||
`) | ||
} | ||
|
||
public async down(queryRunner: QueryRunner): Promise<void> { | ||
await queryRunner.query(`DROP TABLE charts_x_entities`) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import { DbPlainEntity, EntitiesTableName } from "@ourworldindata/types" | ||
import * as db from "../db" | ||
|
||
export async function mapEntityNamesToEntityIds( | ||
knex: db.KnexReadonlyTransaction | ||
): Promise<Map<string, number>> { | ||
const entities = (await knex(EntitiesTableName).select( | ||
"id", | ||
"name" | ||
)) as Pick<DbPlainEntity, "id" | "name">[] | ||
const entityNameToIdMap = new Map<string, number>( | ||
entities.map((entity) => [entity.name, entity.id]) | ||
) | ||
|
||
return entityNameToIdMap | ||
} | ||
|
||
export async function mapEntityIdsToEntityNames( | ||
knex: db.KnexReadonlyTransaction | ||
): Promise<Map<number, string>> { | ||
const entities = (await knex(EntitiesTableName).select( | ||
"id", | ||
"name" | ||
)) as Pick<DbPlainEntity, "id" | "name">[] | ||
const entityIdToNameMap = new Map<number, string>( | ||
entities.map((entity) => [entity.id, entity.name]) | ||
) | ||
|
||
return entityIdToNameMap | ||
} |
8 changes: 8 additions & 0 deletions
8
packages/@ourworldindata/types/src/dbTypes/ChartsXEntities.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
export const ChartsXEntitiesTableName = "charts_x_entities" | ||
|
||
export interface DbInsertChartXEntity { | ||
chartId: number | ||
entityId: number | ||
} | ||
|
||
export type DbPlainChartXEntity = Required<DbInsertChartXEntity> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters