diff --git a/.eslintrc b/.eslintrc index 8b04c236..16eb6fb0 100644 --- a/.eslintrc +++ b/.eslintrc @@ -7,6 +7,7 @@ "@typescript-eslint" ], "rules": { + "@typescript-eslint/camelcase": 0, "@typescript-eslint/explicit-function-return-type": 0, "@typescript-eslint/member-delimiter-style": 0, "@typescript-eslint/no-explicit-any": 0, diff --git a/migrations/0028_episodes_ids.sql b/migrations/0028_episodes_ids.sql index d1529459..a49ce317 100644 --- a/migrations/0028_episodes_ids.sql +++ b/migrations/0028_episodes_ids.sql @@ -7,6 +7,12 @@ ALTER TABLE episodes ALTER SEQUENCE episodes_int_id_seq OWNED BY episodes.int_id; COMMIT; +/* + NOTE: the int_id columns exist mainly so that the Manticore index jobs have + a reliable numeric identifier for selecting ~10000 rows at a time as part + of the index operation. +*/ + CREATE UNIQUE INDEX CONCURRENTLY episodes_int_id_key ON episodes (int_id); -- Use a script to report the UPDATE and VACUUM commands until not episodes with int_ids are left diff --git a/migrations/0030_int_id_indexes.sql b/migrations/0030_int_id_indexes.sql index 97810d62..8168f8b4 100644 --- a/migrations/0030_int_id_indexes.sql +++ b/migrations/0030_int_id_indexes.sql @@ -1,3 +1,9 @@ +/* + NOTE: the int_id columns exist mainly so that the Manticore index jobs have + a reliable numeric identifier for selecting ~10000 rows at a time as part + of the index operation. +*/ + CREATE INDEX CONCURRENTLY "authors_int_id_index" ON "authors" ("int_id"); CREATE INDEX CONCURRENTLY "categories_int_id_index" ON "categories" ("int_id"); CREATE INDEX CONCURRENTLY "feedUrls_int_id_index" ON "feedUrls" ("int_id"); diff --git a/migrations/0055_podcasts_flag_status.sql b/migrations/0055_podcasts_flag_status.sql new file mode 100644 index 00000000..7c594f74 --- /dev/null +++ b/migrations/0055_podcasts_flag_status.sql @@ -0,0 +1,4 @@ +CREATE TYPE flag_status_enum AS ENUM ('none', 'spam', 'takedown', 'other', 'always-allow'); + +ALTER TABLE podcasts +ADD COLUMN flag_status flag_status_enum DEFAULT 'none'; diff --git a/package.json b/package.json index d4c276a0..6898b0c4 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "podverse-api", - "version": "4.16.5", + "version": "4.16.6", "description": "Data API, database migration scripts, and backend services for all Podverse models.", "contributors": [ "Mitch Downey" diff --git a/src/entities/podcast.ts b/src/entities/podcast.ts index e2d4dab1..0cf2a840 100644 --- a/src/entities/podcast.ts +++ b/src/entities/podcast.ts @@ -24,6 +24,8 @@ type Funding = { value?: string } +type PodcastFlagStatus = 'none' | 'spam' | 'takedown' | 'other' | 'always-allow' + @Index(['hasVideo', 'pastAllTimeTotalUniquePageviews']) @Index(['hasVideo', 'pastHourTotalUniquePageviews']) @Index(['hasVideo', 'pastDayTotalUniquePageviews']) @@ -88,6 +90,14 @@ export class Podcast { @Column({ nullable: true }) feedLastUpdated?: Date + @Index() + @Column({ + type: 'enum', + enum: ['none', 'spam', 'takedown', 'other', 'always-allow'], + default: 'none' + }) + flag_status: PodcastFlagStatus + @Column('simple-json', { nullable: true }) funding: Funding[] diff --git a/src/services/parser.ts b/src/services/parser.ts index 0b19ce8d..48428f40 100644 --- a/src/services/parser.ts +++ b/src/services/parser.ts @@ -86,6 +86,30 @@ export const parseFeedUrl = async (feedUrl, forceReparsing = false, cacheBust = }, abortTimeLimit) try { + let podcast = new Podcast() + if (feedUrl.podcast) { + logPerformance('feedUrl.podcast getPodcast', _logStart) + const savedPodcast = await getPodcast(feedUrl.podcast.id, false, allowNonPublic) + logPerformance('feedUrl.podcast getPodcast', _logEnd) + if (!savedPodcast) throw Error('Invalid podcast id provided.') + podcast = savedPodcast + } + + if (podcast.flag_status === 'always-allow') { + // do nothing + } else if (podcast.flag_status === 'spam') { + console.log(`Aborting parser: podcast id ${podcast.id} marked as flag_status = spam`) + return + } else if (podcast.flag_status === 'takedown') { + console.log(`Aborting parser: podcast id ${podcast.id} marked as flag_status = takedown`) + return + } else if (podcast.flag_status === 'other') { + console.log(`Aborting parser: podcast id ${podcast.id} marked as flag_status = other`) + return + } + + const podcastRepo = getRepository(Podcast) + /* Temporary: Stop parsing papi.qingting.fm domain until mediaUrl/guid switch is completed */ const isQingTing = feedUrl.url.indexOf('qingting.fm') > -1 if (isQingTing) { @@ -294,13 +318,14 @@ export const parseFeedUrl = async (feedUrl, forceReparsing = false, cacheBust = const parsedEpisodes = parsedFeed.items.map(itemCompat) const parsedLiveItemEpisodes = meta.liveItems.map(liveItemCompatToParsedEpisode) - let podcast = new Podcast() - if (feedUrl.podcast) { - logPerformance('feedUrl.podcast getPodcast', _logStart) - const savedPodcast = await getPodcast(feedUrl.podcast.id, false, allowNonPublic) - logPerformance('feedUrl.podcast getPodcast', _logEnd) - if (!savedPodcast) throw Error('Invalid podcast id provided.') - podcast = savedPodcast + if ( + podcast.flag_status !== 'always-allow' && + (parsedEpisodes?.length >= 1 || parsedLiveItemEpisodes?.length >= 1) + ) { + console.log('Aborting parser: too many episodes. Marking podcast as spam.') + podcast.flag_status = 'spam' + await podcastRepo.save(podcast) + return } logPerformance('podcast id', podcast.id) @@ -484,8 +509,6 @@ export const parseFeedUrl = async (feedUrl, forceReparsing = false, cacheBust = } } - const podcastRepo = getRepository(Podcast) - logPerformance('podcastRepo.save', _logStart) await podcastRepo.save(podcast) logPerformance('podcastRepo.save', _logEnd) diff --git a/src/services/stats.ts b/src/services/stats.ts index ef068649..9f6fff6b 100644 --- a/src/services/stats.ts +++ b/src/services/stats.ts @@ -64,7 +64,7 @@ export const queryUniquePageviews = async (pagePath: string, timeRange) => { const startDate = new Date(timeRange === 'allTime' ? '2017-01-01' : offsetDate(startDateOffset)) const endDate = new Date(offsetDate()) - const numberOfIntervals = ['allTime'].includes(timeRange) ? 60 : ['year'].includes(timeRange) ? 12 : 1 + const numberOfIntervals = ['allTime'].includes(timeRange) ? 120 : ['year'].includes(timeRange) ? 24 : 1 const dateIntervals = splitDateIntoEqualIntervals(startDate, endDate, numberOfIntervals) let data: any[] = []