From 8dbca8bb0d67ac9d2c1aa94958078c84655255ec Mon Sep 17 00:00:00 2001 From: Matthieu Bergel Date: Tue, 7 May 2024 12:23:22 +0000 Subject: [PATCH 1/9] feat(author): relax r&w block constraint for alternate author tpl --- db/model/Gdoc/rawToEnriched.ts | 38 +--------------------------------- 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/db/model/Gdoc/rawToEnriched.ts b/db/model/Gdoc/rawToEnriched.ts index 9557b7d4c1f..01bd1e2ce68 100644 --- a/db/model/Gdoc/rawToEnriched.ts +++ b/db/model/Gdoc/rawToEnriched.ts @@ -1636,40 +1636,6 @@ function parseExpandableParagraph( function parseResearchAndWritingBlock( raw: RawBlockResearchAndWriting ): EnrichedBlockResearchAndWriting { - const createError = ( - error: ParseError, - heading = "", - hideAuthors = false, - primary = [ - { - value: { url: "" }, - }, - ], - secondary = [ - { - value: { url: "" }, - }, - ], - more: EnrichedBlockResearchAndWritingRow = { - heading: "", - articles: [], - }, - latest: EnrichedBlockResearchAndWritingRow = { - heading: "", - articles: [], - }, - rows: EnrichedBlockResearchAndWritingRow[] = [] - ): EnrichedBlockResearchAndWriting => ({ - type: "research-and-writing", - heading, - "hide-authors": hideAuthors, - primary, - secondary, - more, - latest, - rows, - parseErrors: [error], - }) const parseErrors: ParseError[] = [] function enrichLink( @@ -1729,12 +1695,10 @@ function parseResearchAndWritingBlock( }) } - if (!raw.value.primary) - return createError({ message: "Missing primary link" }) const primary: EnrichedBlockResearchAndWritingLink[] = [] if (isArray(raw.value.primary)) { primary.push(...raw.value.primary.map((link) => enrichLink(link))) - } else { + } else if (raw.value.primary) { primary.push(enrichLink(raw.value.primary)) } From a78ea12da40baa5ae843cc9d132afbf26ad47f93 Mon Sep 17 00:00:00 2001 From: Matthieu Bergel Date: Tue, 7 May 2024 12:23:52 +0000 Subject: [PATCH 2/9] feat(author): add all work export --- adminSiteServer/apiRouter.ts | 93 ++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index b3f18a9794d..2184c837ac6 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -76,6 +76,7 @@ import { PostsGdocsTableName, DbPlainDataset, DbInsertUser, + OwidGdocContent, } from "@ourworldindata/types" import { getVariableDataRoute, @@ -148,6 +149,7 @@ import { match } from "ts-pattern" import { GdocDataInsight } from "../db/model/Gdoc/GdocDataInsight.js" import { GdocHomepage } from "../db/model/Gdoc/GdocHomepage.js" import { GdocAuthor } from "../db/model/Gdoc/GdocAuthor.js" +import path from "path" const apiRouter = new FunctionalRouter() @@ -2543,4 +2545,95 @@ deleteRouteWithRWTransaction( } ) +// Get an ArchieML output of all the work produced by an author. This includes +// gdoc articles, gdoc modular/linear topic pages and wordpress modular topic +// pages. Data insights are excluded. This is used to manually populate the +// [.secondary] section of the {.research-and-writing} block of author pages +// using the alternate template, which highlights topics rather than articles. +getRouteWithROTransaction(apiRouter, "/all-work", async (req, res, trx) => { + type WordpressPageRecord = Record< + "slug" | "title" | "type" | "thumbnail" | "authors", + string + > + type GdocRecord = Pick & + Pick + + const author = req.query.author || "Max Roser" + const gdocs = await db.knexRaw( + trx, + `-- sql + SELECT id, content->>'$.title' as title, content->>'$.type' + FROM posts_gdocs + WHERE JSON_CONTAINS(content->'$.authors', '"${author}"') + AND type NOT IN ("data-insight", "article", "fragment") + AND published = 1 + ORDER BY publishedAt DESC + ` + ) + + // type: page + const wpModularTopicPages = await db.knexRaw( + trx, + `-- sql + SELECT + wpApiSnapshot->>"$.slug" as slug, + wpApiSnapshot->>"$.title.rendered" as title, + wpApiSnapshot->>"$.type" as type, + wpApiSnapshot->>"$.authors_name" as authors, + wpApiSnapshot->>"$.featured_media_paths.medium_large" as thumbnail + FROM posts p + WHERE wpApiSnapshot->>"$.content" LIKE '%topic-page%' + AND JSON_CONTAINS(wpApiSnapshot->'$.authors_name', '"${author}"') + AND wpApiSnapshot->>"$.status" = 'publish' + AND NOT EXISTS ( + SELECT 1 FROM posts_gdocs pg + WHERE pg.slug = p.slug + AND pg.content->>'$.type' LIKE '%topic-page' + ) + ORDER BY wpApiSnapshot->>"$.date" DESC + ` + ) + + const isWordpressPage = ( + post: WordpressPageRecord | GdocRecord + ): post is WordpressPageRecord => post.type === "page" + + function* generateProperty(key: string, value: string) { + yield `${key}: ${value}\n` + } + + function* generateAllWorkArchieMl() { + for (const post of [...gdocs, ...wpModularTopicPages]) { + if (isWordpressPage(post)) { + yield* generateProperty("url", post.slug) + yield* generateProperty("title", post.title) + yield* generateProperty( + "authors", + JSON.parse(post.authors).join(", ") + ) + const parsedPath = path.parse(post.thumbnail) + yield* generateProperty( + "filename", + // /app/uploads/2021/09/reducing-fertilizer-768x301.png -> reducing-fertilizer.png + path.format({ + name: parsedPath.name.replace(/-\d+x\d+$/, ""), + ext: parsedPath.ext, + }) + ) + yield "\n" + } else { + // this is a gdoc + yield* generateProperty( + "url", + `https://docs.google.com/document/d/${post.id}/edit` + ) + yield "\n" + } + } + } + + res.type("text/plain") + res.send([...generateAllWorkArchieMl()].join("")) +}) + export { apiRouter } From c1a2efb264acbc1e7efad9a128cee8b01c1eecd1 Mon Sep 17 00:00:00 2001 From: Matthieu Bergel Date: Wed, 8 May 2024 12:43:19 +0000 Subject: [PATCH 3/9] enhance(author): add articles to export --- adminSiteServer/apiRouter.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index 2184c837ac6..15c6f244b48 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -2565,7 +2565,7 @@ getRouteWithROTransaction(apiRouter, "/all-work", async (req, res, trx) => { SELECT id, content->>'$.title' as title, content->>'$.type' FROM posts_gdocs WHERE JSON_CONTAINS(content->'$.authors', '"${author}"') - AND type NOT IN ("data-insight", "article", "fragment") + AND type NOT IN ("data-insight", "fragment") AND published = 1 ORDER BY publishedAt DESC ` From 29d2298422d3e298996985c480d52585871c28e0 Mon Sep 17 00:00:00 2001 From: Matthieu Bergel Date: Wed, 8 May 2024 13:25:12 +0000 Subject: [PATCH 4/9] fix(author): more representative type for RawSocialLink --- .../types/src/gdocTypes/ArchieMlComponents.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/packages/@ourworldindata/types/src/gdocTypes/ArchieMlComponents.ts b/packages/@ourworldindata/types/src/gdocTypes/ArchieMlComponents.ts index fcb2f19400a..9ef9f855798 100644 --- a/packages/@ourworldindata/types/src/gdocTypes/ArchieMlComponents.ts +++ b/packages/@ourworldindata/types/src/gdocTypes/ArchieMlComponents.ts @@ -852,8 +852,8 @@ export enum SocialLinkType { } export type RawSocialLink = { - text: string - url: string + text?: string + url?: string type?: SocialLinkType } @@ -862,7 +862,11 @@ export type RawBlockSocials = { value: RawSocialLink[] | ArchieMLUnexpectedNonObjectValue } -export type EnrichedSocialLink = RawSocialLink +export type EnrichedSocialLink = { + text: string + url: string + type?: SocialLinkType +} export type EnrichedBlockSocials = { type: "socials" From 7ecf0cca4479d375128e4567ae1a31b614654d86 Mon Sep 17 00:00:00 2001 From: Matthieu Bergel Date: Wed, 8 May 2024 14:02:07 +0000 Subject: [PATCH 5/9] fix(author): sorting a "all work" --- adminSiteServer/apiRouter.ts | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index 15c6f244b48..9dea5eba9cd 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -2552,22 +2552,21 @@ deleteRouteWithRWTransaction( // using the alternate template, which highlights topics rather than articles. getRouteWithROTransaction(apiRouter, "/all-work", async (req, res, trx) => { type WordpressPageRecord = Record< - "slug" | "title" | "type" | "thumbnail" | "authors", + "slug" | "title" | "type" | "thumbnail" | "authors" | "publishedAt", string > - type GdocRecord = Pick & + type GdocRecord = Pick & Pick const author = req.query.author || "Max Roser" const gdocs = await db.knexRaw( trx, `-- sql - SELECT id, content->>'$.title' as title, content->>'$.type' + SELECT id, content->>'$.title' as title, content->>'$.type', publishedAt FROM posts_gdocs WHERE JSON_CONTAINS(content->'$.authors', '"${author}"') AND type NOT IN ("data-insight", "fragment") AND published = 1 - ORDER BY publishedAt DESC ` ) @@ -2580,7 +2579,8 @@ getRouteWithROTransaction(apiRouter, "/all-work", async (req, res, trx) => { wpApiSnapshot->>"$.title.rendered" as title, wpApiSnapshot->>"$.type" as type, wpApiSnapshot->>"$.authors_name" as authors, - wpApiSnapshot->>"$.featured_media_paths.medium_large" as thumbnail + wpApiSnapshot->>"$.featured_media_paths.medium_large" as thumbnail, + wpApiSnapshot->>"$.date" as publishedAt FROM posts p WHERE wpApiSnapshot->>"$.content" LIKE '%topic-page%' AND JSON_CONTAINS(wpApiSnapshot->'$.authors_name', '"${author}"') @@ -2590,7 +2590,6 @@ getRouteWithROTransaction(apiRouter, "/all-work", async (req, res, trx) => { WHERE pg.slug = p.slug AND pg.content->>'$.type' LIKE '%topic-page' ) - ORDER BY wpApiSnapshot->>"$.date" DESC ` ) @@ -2602,8 +2601,21 @@ getRouteWithROTransaction(apiRouter, "/all-work", async (req, res, trx) => { yield `${key}: ${value}\n` } + const sortByDateDesc = ( + a: GdocRecord | WordpressPageRecord, + b: GdocRecord | WordpressPageRecord + ): number => { + if (!a.publishedAt || !b.publishedAt) return 0 + return ( + new Date(b.publishedAt).getTime() - + new Date(a.publishedAt).getTime() + ) + } + function* generateAllWorkArchieMl() { - for (const post of [...gdocs, ...wpModularTopicPages]) { + for (const post of [...gdocs, ...wpModularTopicPages].sort( + sortByDateDesc + )) { if (isWordpressPage(post)) { yield* generateProperty("url", post.slug) yield* generateProperty("title", post.title) From 0531f7c9ad02f8398b94484d8658ed4fb4d94cb1 Mon Sep 17 00:00:00 2001 From: Matthieu Bergel Date: Wed, 8 May 2024 14:15:01 +0000 Subject: [PATCH 6/9] fix(author): add subtitle to wp post export --- adminSiteServer/apiRouter.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index 9dea5eba9cd..03783527ee7 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -2552,7 +2552,13 @@ deleteRouteWithRWTransaction( // using the alternate template, which highlights topics rather than articles. getRouteWithROTransaction(apiRouter, "/all-work", async (req, res, trx) => { type WordpressPageRecord = Record< - "slug" | "title" | "type" | "thumbnail" | "authors" | "publishedAt", + | "slug" + | "title" + | "subtitle" + | "type" + | "thumbnail" + | "authors" + | "publishedAt", string > type GdocRecord = Pick & @@ -2577,6 +2583,7 @@ getRouteWithROTransaction(apiRouter, "/all-work", async (req, res, trx) => { SELECT wpApiSnapshot->>"$.slug" as slug, wpApiSnapshot->>"$.title.rendered" as title, + wpApiSnapshot->>"$.excerpt.rendered" as subtitle, wpApiSnapshot->>"$.type" as type, wpApiSnapshot->>"$.authors_name" as authors, wpApiSnapshot->>"$.featured_media_paths.medium_large" as thumbnail, @@ -2619,6 +2626,7 @@ getRouteWithROTransaction(apiRouter, "/all-work", async (req, res, trx) => { if (isWordpressPage(post)) { yield* generateProperty("url", post.slug) yield* generateProperty("title", post.title) + yield* generateProperty("subtitle", post.subtitle) yield* generateProperty( "authors", JSON.parse(post.authors).join(", ") From bbb244e797f46154ea84d34c1d65f84db1def45f Mon Sep 17 00:00:00 2001 From: Matthieu Bergel Date: Wed, 8 May 2024 14:17:26 +0000 Subject: [PATCH 7/9] fix(author): full production url in export --- adminSiteServer/apiRouter.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index 03783527ee7..612f9e1a0c7 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -2624,7 +2624,10 @@ getRouteWithROTransaction(apiRouter, "/all-work", async (req, res, trx) => { sortByDateDesc )) { if (isWordpressPage(post)) { - yield* generateProperty("url", post.slug) + yield* generateProperty( + "url", + `https://ourworldindata.org/${post.slug}` + ) yield* generateProperty("title", post.title) yield* generateProperty("subtitle", post.subtitle) yield* generateProperty( From 36cda48e216791b4efe421ed1bde52ca39dcb0e9 Mon Sep 17 00:00:00 2001 From: Ike Saunders Date: Fri, 10 May 2024 09:40:10 -0400 Subject: [PATCH 8/9] =?UTF-8?q?=F0=9F=90=9B=20fix=20getFilenameExtension?= =?UTF-8?q?=20for=20files=20with=20multiple=20periods?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/@ourworldindata/utils/src/image.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/@ourworldindata/utils/src/image.ts b/packages/@ourworldindata/utils/src/image.ts index 9579cd6e693..6f7aa55b1bf 100644 --- a/packages/@ourworldindata/utils/src/image.ts +++ b/packages/@ourworldindata/utils/src/image.ts @@ -48,7 +48,7 @@ export function getFilenameWithoutExtension( export function getFilenameExtension( filename: ImageMetadata["filename"] ): string { - return filename.slice(filename.indexOf(".") + 1) + return filename.slice(filename.lastIndexOf(".") + 1) } export function getFilenameAsPng(filename: ImageMetadata["filename"]): string { From e5edd0086a50724b0925a9256721c871abad26a7 Mon Sep 17 00:00:00 2001 From: Matthieu Bergel Date: Mon, 13 May 2024 09:15:15 +0000 Subject: [PATCH 9/9] refactor(author): cleaner implementation of all-work --- adminSiteServer/apiRouter.ts | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index 612f9e1a0c7..1a7745e2902 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -76,7 +76,6 @@ import { PostsGdocsTableName, DbPlainDataset, DbInsertUser, - OwidGdocContent, } from "@ourworldindata/types" import { getVariableDataRoute, @@ -2551,24 +2550,19 @@ deleteRouteWithRWTransaction( // [.secondary] section of the {.research-and-writing} block of author pages // using the alternate template, which highlights topics rather than articles. getRouteWithROTransaction(apiRouter, "/all-work", async (req, res, trx) => { - type WordpressPageRecord = Record< - | "slug" - | "title" - | "subtitle" - | "type" - | "thumbnail" - | "authors" - | "publishedAt", + type WordpressPageRecord = { + isWordpressPage: number + } & Record< + "slug" | "title" | "subtitle" | "thumbnail" | "authors" | "publishedAt", string > - type GdocRecord = Pick & - Pick + type GdocRecord = Pick const author = req.query.author || "Max Roser" const gdocs = await db.knexRaw( trx, `-- sql - SELECT id, content->>'$.title' as title, content->>'$.type', publishedAt + SELECT id, publishedAt FROM posts_gdocs WHERE JSON_CONTAINS(content->'$.authors', '"${author}"') AND type NOT IN ("data-insight", "fragment") @@ -2584,7 +2578,7 @@ getRouteWithROTransaction(apiRouter, "/all-work", async (req, res, trx) => { wpApiSnapshot->>"$.slug" as slug, wpApiSnapshot->>"$.title.rendered" as title, wpApiSnapshot->>"$.excerpt.rendered" as subtitle, - wpApiSnapshot->>"$.type" as type, + TRUE as isWordpressPage, wpApiSnapshot->>"$.authors_name" as authors, wpApiSnapshot->>"$.featured_media_paths.medium_large" as thumbnail, wpApiSnapshot->>"$.date" as publishedAt @@ -2602,7 +2596,8 @@ getRouteWithROTransaction(apiRouter, "/all-work", async (req, res, trx) => { const isWordpressPage = ( post: WordpressPageRecord | GdocRecord - ): post is WordpressPageRecord => post.type === "page" + ): post is WordpressPageRecord => + (post as WordpressPageRecord).isWordpressPage === 1 function* generateProperty(key: string, value: string) { yield `${key}: ${value}\n`