Skip to content

Commit

Permalink
🔨 migrate PostLink to knex (#3259)
Browse files Browse the repository at this point in the history
Migrates the access to the post_links table from TypeORM to knex
  • Loading branch information
danyx23 authored Mar 26, 2024
2 parents c4128e2 + d45e80e commit c031fae
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 79 deletions.
31 changes: 17 additions & 14 deletions baker/postUpdatedHook.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,19 @@ import {
getLinksToAddAndRemoveForPost,
} from "../db/syncPostsToGrapher.js"
import { postsTable, select } from "../db/model/Post.js"
import { PostLink } from "../db/model/PostLink.js"
import {
deleteManyPostLinks,
getPostLinksBySourceId,
insertManyPostLinks,
} from "../db/model/PostLink.js"
import { Knex } from "knex"
const argv = parseArgs(process.argv.slice(2))

const zeroDateString = "0000-00-00 00:00:00"

// Sync post from the wordpress database to OWID database
const syncPostToGrapher = async (
knex: Knex<any, any[]>,
postId: number
): Promise<string | undefined> => {
const rows = await wpdb.singleton.query(
Expand Down Expand Up @@ -163,9 +169,10 @@ const syncPostToGrapher = async (
)[0]

if (postRow) {
const existingLinksForPost = await PostLink.findBy({
sourceId: wpPost.ID,
})
const existingLinksForPost = await getPostLinksBySourceId(
knex,
wpPost.ID
)

const { linksToAdd, linksToDelete } = getLinksToAddAndRemoveForPost(
postRow,
Expand All @@ -177,19 +184,15 @@ const syncPostToGrapher = async (
// TODO: unify our DB access and then do everything in one transaction
if (linksToAdd.length) {
console.log("linksToAdd", linksToAdd.length)
await PostLink.createQueryBuilder()
.insert()
.into(PostLink)
.values(linksToAdd)
.execute()
await insertManyPostLinks(knex, linksToAdd)
}

if (linksToDelete.length) {
console.log("linksToDelete", linksToDelete.length)
await PostLink.createQueryBuilder()
.where("id in (:ids)", { ids: linksToDelete.map((x) => x.id) })
.delete()
.execute()
await deleteManyPostLinks(
knex,
linksToDelete.map((x) => x.id)
)
}
}
return newPost ? newPost.slug : undefined
Expand All @@ -203,7 +206,7 @@ const main = async (
) => {
console.log(email, name, postId)
try {
const slug = await syncPostToGrapher(postId)
const slug = await syncPostToGrapher(db.knexInstance(), postId)

if (BAKE_ON_CHANGE)
await new DeployQueueServer().enqueueChange({
Expand Down
125 changes: 85 additions & 40 deletions db/model/PostLink.ts
Original file line number Diff line number Diff line change
@@ -1,47 +1,92 @@
import { Entity, PrimaryGeneratedColumn, Column, BaseEntity } from "typeorm"
import { formatUrls } from "../../site/formatting.js"
import { Url } from "@ourworldindata/utils"
import {
DbInsertPostLink,
DbPlainPostLink,
PostsLinksTableName,
Url,
} from "@ourworldindata/utils"
import { getLinkType, getUrlTarget } from "@ourworldindata/components"
import { Knex } from "knex"
export function postLinkCreateFromUrl({
url,
sourceId,
text = "",
componentType = "",
}: {
url: string
sourceId: number
text?: string
componentType?: string
}): Omit<DbPlainPostLink, "id"> {
const formattedUrl = formatUrls(url)
const urlObject = Url.fromURL(formattedUrl)
const linkType = getLinkType(formattedUrl)
const target = getUrlTarget(formattedUrl)
const queryString = urlObject.queryStr
const hash = urlObject.hash
return {
target,
linkType,
queryString,
hash,
sourceId,
text,
componentType,
}
}

@Entity("posts_links")
export class PostLink extends BaseEntity {
@PrimaryGeneratedColumn() id!: number
// TODO: posts is not a TypeORM but a Knex class so we can't use a TypeORM relationship here yet
export async function getPostLinkById(
knex: Knex<any, any[]>,
id: number
): Promise<DbPlainPostLink | undefined> {
return knex<DbPlainPostLink>(PostsLinksTableName).where({ id }).first()
}

@Column({ type: "int", nullable: false }) sourceId!: number
export async function getAllPostLinks(
knex: Knex<any, any[]>
): Promise<DbPlainPostLink[]> {
return knex<DbPlainPostLink>(PostsLinksTableName)
}

@Column() linkType!: "gdoc" | "url" | "grapher" | "explorer"
@Column() target!: string
@Column() queryString!: string
@Column() hash!: string
@Column() componentType!: string
@Column() text!: string
export async function getPostLinksBySourceId(
knex: Knex<any, any[]>,
sourceId: number
): Promise<DbPlainPostLink[]> {
return knex<DbPlainPostLink>(PostsLinksTableName).where({ sourceId })
}

static createFromUrl({
url,
sourceId,
text = "",
componentType = "",
}: {
url: string
sourceId: number
text?: string
componentType?: string
}): PostLink {
const formattedUrl = formatUrls(url)
const urlObject = Url.fromURL(formattedUrl)
const linkType = getLinkType(formattedUrl)
const target = getUrlTarget(formattedUrl)
const queryString = urlObject.queryStr
const hash = urlObject.hash
return PostLink.create({
target,
linkType,
queryString,
hash,
sourceId,
text,
componentType,
})
}
export async function insertPostLink(
knex: Knex<any, any[]>,
postLink: DbInsertPostLink
): Promise<{ id: number }> {
return knex(PostsLinksTableName).returning("id").insert(postLink)
}

export async function insertManyPostLinks(
knex: Knex<any, any[]>,
postLinks: DbInsertPostLink[]
): Promise<void> {
return knex.batchInsert(PostsLinksTableName, postLinks)
}

export async function updatePostLink(
knex: Knex<any, any[]>,
id: number,
postLink: DbInsertPostLink
): Promise<void> {
return knex(PostsLinksTableName).where({ id }).update(postLink)
}

export async function deletePostLink(
knex: Knex<any, any[]>,
id: number
): Promise<void> {
return knex(PostsLinksTableName).where({ id }).delete()
}

export async function deleteManyPostLinks(
knex: Knex<any, any[]>,
ids: number[]
): Promise<void> {
return knex(PostsLinksTableName).whereIn("id", ids).delete()
}
62 changes: 37 additions & 25 deletions db/syncPostsToGrapher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,19 @@ import {
DbEnrichedPost,
sortBy,
serializePostRow,
DbPlainPostLink,
DbInsertPostLink,
} from "@ourworldindata/utils"
import { postsTable, select } from "./model/Post.js"
import { PostLink } from "./model/PostLink.js"
import {
deleteManyPostLinks,
getAllPostLinks,
insertManyPostLinks,
postLinkCreateFromUrl,
} from "./model/PostLink.js"
import { renderTablePress } from "../site/Tablepress.js"
import pMap from "p-map"
import { Knex } from "knex"

const zeroDateString = "0000-00-00 00:00:00"

Expand Down Expand Up @@ -157,15 +165,18 @@ export async function buildTablePressResolver(): Promise<BlockResolveFunction> {
replaceTablePressShortcodes(content, replacerFunction)
}

export const postLinkCompareStringGenerator = (item: PostLink): string =>
export const postLinkCompareStringGenerator = (item: DbPlainPostLink): string =>
`${item.linkType} - ${item.target} - ${item.hash} - ${item.queryString}`

export function getLinksToAddAndRemoveForPost(
post: DbEnrichedPost,
existingLinksForPost: PostLink[],
existingLinksForPost: DbPlainPostLink[],
content: string,
postId: number
): { linksToAdd: PostLink[]; linksToDelete: PostLink[] } {
): {
linksToAdd: Omit<DbPlainPostLink, "id">[]
linksToDelete: DbPlainPostLink[]
} {
const linksInDb = groupBy(
existingLinksForPost,
postLinkCompareStringGenerator
Expand Down Expand Up @@ -206,30 +217,31 @@ export function getLinksToAddAndRemoveForPost(
)
const linksInDocument = keyBy(
[
...allHrefs.map((link) => PostLink.createFromUrl(link)),
...allSrcs.map((link) => PostLink.createFromUrl(link)),
...allProminentLinks.map((link) => PostLink.createFromUrl(link)),
...allHrefs.map((link) => postLinkCreateFromUrl(link)),
...allSrcs.map((link) => postLinkCreateFromUrl(link)),
...allProminentLinks.map((link) => postLinkCreateFromUrl(link)),
],
postLinkCompareStringGenerator
)

const linksToAdd: PostLink[] = []
const linksToDelete: PostLink[] = []
const linksToAdd: Omit<DbPlainPostLink, "id">[] = []
const linksToDelete: DbPlainPostLink[] = []

// This is doing a set difference, but we want to do the set operation on a subset
// of fields (the ones we stringify into the compare key) while retaining the full
// object so that we can e.g. delete efficiently by id later on.
for (const [linkInDocCompareKey, linkInDoc] of Object.entries(
linksInDocument
))
if (!(linkInDocCompareKey in linksInDb)) linksToAdd.push(linkInDoc)
if (!(linkInDocCompareKey in linksInDb))
linksToAdd.push(linkInDoc as Omit<DbPlainPostLink, "id">)
for (const [linkInDbCompareKey, linkInDb] of Object.entries(linksInDb))
if (!(linkInDbCompareKey in linksInDocument))
linksToDelete.push(...linkInDb)
return { linksToAdd, linksToDelete }
}

const syncPostsToGrapher = async (): Promise<void> => {
const syncPostsToGrapher = async (knex: Knex<any, any[]>): Promise<void> => {
const dereferenceReusableBlocksFn = await buildReusableBlocksResolver()
const dereferenceTablePressFn = await buildTablePressResolver()

Expand Down Expand Up @@ -372,11 +384,14 @@ const syncPostsToGrapher = async (): Promise<void> => {
},
{ concurrency: 20 }
)) as DbEnrichedPost[]
const postLinks = await PostLink.find()
const postLinksById = groupBy(postLinks, (link: PostLink) => link.sourceId)
const postLinks = await getAllPostLinks(knex)
const postLinksById = groupBy(
postLinks,
(link: DbPlainPostLink) => link.sourceId
)

const linksToAdd: PostLink[] = []
const linksToDelete: PostLink[] = []
const linksToAdd: DbInsertPostLink[] = []
const linksToDelete: DbPlainPostLink[] = []

for (const post of rows) {
const existingLinksForPost = postLinksById[post.ID]
Expand Down Expand Up @@ -409,26 +424,23 @@ const syncPostsToGrapher = async (): Promise<void> => {
// TODO: unify our DB access and then do everything in one transaction
if (linksToAdd.length) {
console.log("linksToAdd", linksToAdd.length)
await PostLink.createQueryBuilder()
.insert()
.into(PostLink)
.values(linksToAdd)
.execute()
await insertManyPostLinks(knex, postLinks)
}

if (linksToDelete.length) {
console.log("linksToDelete", linksToDelete.length)
await PostLink.createQueryBuilder()
.where("id in (:ids)", { ids: linksToDelete.map((x) => x.id) })
.delete()
.execute()
await deleteManyPostLinks(
knex,
linksToDelete.map((link) => link.id)
)
}
}

const main = async (): Promise<void> => {
try {
await db.getConnection()
await syncPostsToGrapher()
const knex = db.knexInstance()
await syncPostsToGrapher(knex)
} finally {
await wpdb.singleton.end()
await db.closeTypeOrmAndKnexConnections()
Expand Down

0 comments on commit c031fae

Please sign in to comment.