diff --git a/backend/src/api/index.ts b/backend/src/api/index.ts index 05ef5df43e..054ae52304 100644 --- a/backend/src/api/index.ts +++ b/backend/src/api/index.ts @@ -147,6 +147,14 @@ setImmediate(async () => { app.use(bodyParser.urlencoded({ limit: '5mb', extended: true })) + app.use((err: any, req: any, res: any, next: any) => { + if (err.type === 'entity.parse.failed') { + res.status(400).json({ error: { code: 'BAD_REQUEST', message: 'Invalid JSON body' } }) + return + } + next(err) + }) + app.use((req, res, next) => { // @ts-ignore req.userData = { diff --git a/backend/src/api/public/index.ts b/backend/src/api/public/index.ts index 10b4b94abf..dfbcd078cb 100644 --- a/backend/src/api/public/index.ts +++ b/backend/src/api/public/index.ts @@ -1,18 +1,12 @@ import { Router } from 'express' -import { AUTH0_CONFIG } from '../../conf' - import { errorHandler } from './middlewares/errorHandler' -import { oauth2Middleware } from './middlewares/oauth2Middleware' -import { staticApiKeyMiddleware } from './middlewares/staticApiKeyMiddleware' import { v1Router } from './v1' -import { devStatsRouter } from './v1/dev-stats' export function publicRouter(): Router { const router = Router() - router.use('/v1/dev-stats', staticApiKeyMiddleware(), devStatsRouter()) - router.use('/v1', oauth2Middleware(AUTH0_CONFIG), v1Router()) + router.use('/v1', v1Router()) router.use(errorHandler) return router diff --git a/backend/src/api/public/middlewares/errorHandler.ts b/backend/src/api/public/middlewares/errorHandler.ts index 8b514093b0..4da3552876 100644 --- a/backend/src/api/public/middlewares/errorHandler.ts +++ b/backend/src/api/public/middlewares/errorHandler.ts @@ -35,7 +35,13 @@ export const errorHandler: ErrorRequestHandler = ( } req.log.error( - { error, url: req.url, method: req.method, query: req.query, body: req.body }, + { + error: { name: error?.name, message: error?.message, stack: error?.stack }, + url: req.url, + method: req.method, + query: req.query, + body: req.body, + }, 'Unhandled error in public API', ) diff --git a/backend/src/api/public/v1/dev-stats/getAffiliations.ts b/backend/src/api/public/v1/dev-stats/getAffiliations.ts new file mode 100644 index 0000000000..9179081a13 --- /dev/null +++ b/backend/src/api/public/v1/dev-stats/getAffiliations.ts @@ -0,0 +1,89 @@ +import type { Request, Response } from 'express' +import { z } from 'zod' + +import { + findMembersByGithubHandles, + findVerifiedEmailsByMemberIds, + optionsQx, + resolveAffiliationsByMemberIds, +} from '@crowd/data-access-layer' + +import { ok } from '@/utils/api' +import { validateOrThrow } from '@/utils/validation' + +const MAX_HANDLES = 100 +const DEFAULT_PAGE_SIZE = 20 + +const bodySchema = z.object({ + githubHandles: z + .array(z.string().trim().min(1).toLowerCase()) + .min(1) + .max(MAX_HANDLES, `Maximum ${MAX_HANDLES} handles per request`), +}) + +const querySchema = z.object({ + page: z.coerce.number().int().min(1).default(1), + pageSize: z.coerce.number().int().min(1).max(MAX_HANDLES).default(DEFAULT_PAGE_SIZE), +}) + +export async function getAffiliations(req: Request, res: Response): Promise { + const { githubHandles } = validateOrThrow(bodySchema, req.body) + const { page, pageSize } = validateOrThrow(querySchema, req.query) + const qx = optionsQx(req) + + const offset = (page - 1) * pageSize + + // Step 1: find all verified members across all handles + const allMemberRows = await findMembersByGithubHandles(qx, githubHandles) + + const foundHandles = new Set(allMemberRows.map((r) => r.githubHandle.toLowerCase())) + const notFound = githubHandles.filter((h) => !foundHandles.has(h)) + + const pageMemberRows = allMemberRows.slice(offset, offset + pageSize) + + if (pageMemberRows.length === 0) { + ok(res, { + total: githubHandles.length, + totalFound: allMemberRows.length, + page, + pageSize, + contributorsInPage: 0, + contributors: [], + notFound, + }) + return + } + + const memberIds = pageMemberRows.map((r) => r.memberId) + + // Step 2: fetch verified emails for current page + const emailRows = await findVerifiedEmailsByMemberIds(qx, memberIds) + + const emailsByMember = new Map() + for (const row of emailRows) { + const list = emailsByMember.get(row.memberId) ?? [] + list.push(row.email) + emailsByMember.set(row.memberId, list) + } + + // Step 3: resolve affiliations for current page only + const affiliationsByMember = await resolveAffiliationsByMemberIds(qx, memberIds) + + // Step 4: build response + const contributors = pageMemberRows.map((member) => ({ + githubHandle: member.githubHandle, + name: member.displayName, + emails: emailsByMember.get(member.memberId) ?? [], + affiliations: affiliationsByMember.get(member.memberId) ?? [], + })) + + ok(res, { + total: githubHandles.length, + totalFound: allMemberRows.length, + page, + pageSize, + contributorsInPage: contributors.length, + contributors, + notFound, + }) +} diff --git a/backend/src/api/public/v1/dev-stats/index.ts b/backend/src/api/public/v1/dev-stats/index.ts index 3dc77716a3..4fc7ca5ffa 100644 --- a/backend/src/api/public/v1/dev-stats/index.ts +++ b/backend/src/api/public/v1/dev-stats/index.ts @@ -2,8 +2,11 @@ import { Router } from 'express' import { createRateLimiter } from '@/api/apiRateLimiter' import { requireScopes } from '@/api/public/middlewares/requireScopes' +import { safeWrap } from '@/middlewares/errorMiddleware' import { SCOPES } from '@/security/scopes' +import { getAffiliations } from './getAffiliations' + const rateLimiter = createRateLimiter({ max: 60, windowMs: 60 * 1000 }) export function devStatsRouter(): Router { @@ -11,9 +14,7 @@ export function devStatsRouter(): Router { router.use(rateLimiter) - router.post('/affiliations', requireScopes([SCOPES.READ_AFFILIATIONS]), (_req, res) => { - res.json({ status: 'ok' }) - }) + router.post('/', requireScopes([SCOPES.READ_AFFILIATIONS]), safeWrap(getAffiliations)) return router } diff --git a/backend/src/api/public/v1/index.ts b/backend/src/api/public/v1/index.ts index 43ae7c7988..7bbed58f32 100644 --- a/backend/src/api/public/v1/index.ts +++ b/backend/src/api/public/v1/index.ts @@ -1,13 +1,25 @@ import { Router } from 'express' +import { NotFoundError } from '@crowd/common' + +import { AUTH0_CONFIG } from '../../../conf' +import { oauth2Middleware } from '../middlewares/oauth2Middleware' +import { staticApiKeyMiddleware } from '../middlewares/staticApiKeyMiddleware' + +import { devStatsRouter } from './dev-stats' import { membersRouter } from './members' import { organizationsRouter } from './organizations' export function v1Router(): Router { const router = Router() - router.use('/members', membersRouter()) - router.use('/organizations', organizationsRouter()) + router.use('/members', oauth2Middleware(AUTH0_CONFIG), membersRouter()) + router.use('/organizations', oauth2Middleware(AUTH0_CONFIG), organizationsRouter()) + router.use('/member-organization-affiliations', staticApiKeyMiddleware(), devStatsRouter()) + + router.use(() => { + throw new NotFoundError() + }) return router } diff --git a/services/libs/data-access-layer/src/affiliations/index.ts b/services/libs/data-access-layer/src/affiliations/index.ts new file mode 100644 index 0000000000..92b4936baa --- /dev/null +++ b/services/libs/data-access-layer/src/affiliations/index.ts @@ -0,0 +1,343 @@ +import { QueryExecutor } from '../queryExecutor' + +const BLACKLISTED_TITLES = ['investor', 'mentor', 'board member'] + +export interface IAffiliationPeriod { + organization: string + startDate: string | null + endDate: string | null +} + +interface IWorkRow { + id: string + memberId: string + organizationId: string + organizationName: string + title: string | null + dateStart: string | null + dateEnd: string | null + createdAt: Date | string + isPrimaryWorkExperience: boolean + memberCount: number + segmentId: string | null +} + +/** + * this intentionally differs from the equivalent query in member-organization-affiliation/index.ts + * which uses organizationSegmentsAgg to compute memberCount. This is because the api should be faster + */ +export async function findWorkExperiencesBulk( + qx: QueryExecutor, + memberIds: string[], +): Promise { + const rows: IWorkRow[] = await qx.select( + ` + WITH relevant_orgs AS ( + SELECT DISTINCT "organizationId" + FROM "memberOrganizations" + WHERE "memberId" IN ($(memberIds:csv)) + AND "deletedAt" IS NULL + ), + aggs AS ( + SELECT "organizationId", COUNT(DISTINCT "memberId") AS total_count + FROM "memberOrganizations" + WHERE "organizationId" IN (SELECT "organizationId" FROM relevant_orgs) + AND "deletedAt" IS NULL + GROUP BY "organizationId" + ) + SELECT + mo.id, + mo."memberId", + mo."organizationId", + o."displayName" AS "organizationName", + mo.title, + mo."dateStart", + mo."dateEnd", + mo."createdAt", + COALESCE(ovr."isPrimaryWorkExperience", false) AS "isPrimaryWorkExperience", + COALESCE(a.total_count, 0) AS "memberCount", + NULL::text AS "segmentId" + FROM "memberOrganizations" mo + JOIN organizations o ON mo."organizationId" = o.id + LEFT JOIN "memberOrganizationAffiliationOverrides" ovr ON ovr."memberOrganizationId" = mo.id + LEFT JOIN aggs a ON a."organizationId" = mo."organizationId" + WHERE mo."memberId" IN ($(memberIds:csv)) + AND mo."deletedAt" IS NULL + AND COALESCE(ovr."allowAffiliation", true) = true + `, + { memberIds }, + ) + + return rows.filter( + (r) => !r.title || !BLACKLISTED_TITLES.some((t) => r.title?.toLowerCase().includes(t)), + ) +} + +export async function findManualAffiliationsBulk( + qx: QueryExecutor, + memberIds: string[], +): Promise { + return qx.select( + ` + SELECT + msa.id, + msa."memberId", + msa."organizationId", + o."displayName" AS "organizationName", + NULL AS title, + msa."dateStart", + msa."dateEnd", + NULL::timestamptz AS "createdAt", + false AS "isPrimaryWorkExperience", + 0 AS "memberCount", + msa."segmentId" + FROM "memberSegmentAffiliations" msa + JOIN organizations o ON msa."organizationId" = o.id + WHERE msa."memberId" IN ($(memberIds:csv)) + `, + { memberIds }, + ) +} + +function durationMs(org: IWorkRow): number { + const start = new Date(org.dateStart ?? '').getTime() + const end = new Date(org.dateEnd ?? '9999-12-31').getTime() + return end - start +} + +function longestDateRange(orgs: IWorkRow[]): IWorkRow { + const withDates = orgs.filter((r) => r.dateStart) + const candidates = withDates.length > 0 ? withDates : orgs + return candidates.reduce((best, org) => (durationMs(org) > durationMs(best) ? org : best)) +} + +function selectPrimaryWorkExperience(orgs: IWorkRow[]): IWorkRow { + if (orgs.length === 1) return orgs[0] + + // 1. Manual affiliations (segmentId non-null) always win + const manual = orgs.filter((r) => r.segmentId !== null) + if (manual.length > 0) { + if (manual.length === 1) return manual[0] + return longestDateRange(manual) + } + + // 2. isPrimaryWorkExperience = true — prefer those with a dateStart + const primary = orgs.filter((r) => r.isPrimaryWorkExperience) + if (primary.length > 0) return primary.find((r) => r.dateStart) ?? primary[0] + + // 3. Only one org has a dateStart — pick it + const withDates = orgs.filter((r) => r.dateStart) + if (withDates.length === 1) return withDates[0] + + // 4. Org with strictly more members wins; if tied, fall through + const sorted = [...orgs].sort((a, b) => b.memberCount - a.memberCount) + if (sorted.length >= 2 && sorted[0].memberCount > sorted[1].memberCount) { + return sorted[0] + } + + // 5. Longest date range as final tiebreaker + return longestDateRange(orgs) +} + +/** Returns the org used to fill gaps — primary undated wins, then earliest-created undated. */ +function findFallbackOrg(rows: IWorkRow[]): IWorkRow | null { + const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) + if (primaryUndated) return primaryUndated + + return ( + rows + .filter((r) => !r.dateStart && !r.dateEnd) + .sort((a, b) => new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime()) + .at(0) ?? null + ) +} + +/** + * Collects all date boundaries from the dated rows, capped at today. + * Each dateStart and (dateEnd + 1 day) marks a point where active orgs can change. + */ +function collectBoundaries(datedRows: IWorkRow[]): Date[] { + const today = startOfDay(new Date()) + + const ms = new Set([today.getTime()]) + + for (const row of datedRows) { + const start = startOfDay(row.dateStart ?? '') + if (start <= today) ms.add(start.getTime()) + + if (row.dateEnd) { + const afterEnd = startOfDay(row.dateEnd) + afterEnd.setUTCDate(afterEnd.getUTCDate() + 1) + if (afterEnd <= today) ms.add(afterEnd.getTime()) + } + } + + return Array.from(ms) + .sort((a, b) => a - b) + .map((t) => new Date(t)) +} + +function orgsActiveAt(rows: IWorkRow[], boundaryDate: Date): IWorkRow[] { + return rows.filter((role) => { + if (!role.dateStart && !role.dateEnd) return true // truly undated: active at every boundary + + const roleStart = role.dateStart ? startOfDay(role.dateStart) : null + const roleEnd = role.dateEnd ? startOfDay(role.dateEnd) : null + + // org is active if the boundary date falls within its employment period + return (!roleStart || boundaryDate >= roleStart) && (!roleEnd || boundaryDate <= roleEnd) + }) +} + +function startOfDay(date: Date | string): Date { + const d = new Date(date) + d.setUTCHours(0, 0, 0, 0) + return d +} + +function dayBefore(date: Date): Date { + const d = new Date(date) + d.setUTCDate(d.getUTCDate() - 1) + return d +} + +/** Iterates boundary intervals and builds non-overlapping affiliation windows. */ +function buildTimeline( + allRows: IWorkRow[], + fallbackOrg: IWorkRow | null, + boundaries: Date[], +): IAffiliationPeriod[] { + const affiliations: IAffiliationPeriod[] = [] + let currentOrg: IWorkRow = null + let currentWindowStart: Date = null + let uncoveredPeriodStart: Date = null + + for (const boundaryDate of boundaries) { + const activeOrgsAtBoundary = orgsActiveAt(allRows, boundaryDate) + + // No orgs active at this boundary — close the current window and start tracking a gap + if (activeOrgsAtBoundary.length === 0) { + if (currentOrg && currentWindowStart) { + affiliations.push({ + organization: currentOrg.organizationName, + startDate: currentWindowStart.toISOString(), + endDate: dayBefore(boundaryDate).toISOString(), + }) + currentOrg = null + currentWindowStart = null + } + + if (uncoveredPeriodStart === null) { + uncoveredPeriodStart = boundaryDate + } + + continue + } + + // Orgs are active again — close the uncovered period using the fallback org if available + if (uncoveredPeriodStart !== null) { + if (fallbackOrg) { + affiliations.push({ + organization: fallbackOrg.organizationName, + startDate: uncoveredPeriodStart.toISOString(), + endDate: dayBefore(boundaryDate).toISOString(), + }) + } + + uncoveredPeriodStart = null + } + + const winningAffiliation = selectPrimaryWorkExperience(activeOrgsAtBoundary) + + // No current window open — start a new one with the winning org + if (!currentOrg) { + currentOrg = winningAffiliation + currentWindowStart = boundaryDate + continue + } + + // Winning org changed — close the current window and open a new one + if (currentOrg.organizationId !== winningAffiliation.organizationId) { + affiliations.push({ + organization: currentOrg.organizationName, + startDate: currentWindowStart.toISOString(), + endDate: dayBefore(boundaryDate).toISOString(), + }) + currentOrg = winningAffiliation + currentWindowStart = boundaryDate + } + } + + // Close the last open window using the org's actual end date (null = ongoing) + if (currentOrg && currentWindowStart) { + affiliations.push({ + organization: currentOrg.organizationName, + startDate: currentWindowStart.toISOString(), + endDate: currentOrg.dateEnd ? new Date(currentOrg.dateEnd).toISOString() : null, + }) + } + + // Close a trailing uncovered period using the fallback org (ongoing, no end date) + if (uncoveredPeriodStart !== null && fallbackOrg) { + affiliations.push({ + organization: fallbackOrg.organizationName, + startDate: uncoveredPeriodStart.toISOString(), + endDate: null, + }) + } + + return affiliations +} + +function resolveAffiliationsForMember(rows: IWorkRow[]): IAffiliationPeriod[] { + // If one undated work-experience org is marked primary, drop other undated work-experience orgs + // to avoid infinite conflicts. Manual affiliations (segmentId !== null) are never dropped. + const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) + const cleaned = primaryUndated + ? rows.filter((r) => r.segmentId !== null || r.dateStart || r.id === primaryUndated.id) + : rows + + const fallbackOrg = findFallbackOrg(cleaned) + const datedRows = cleaned.filter((r) => r.dateStart) + + if (datedRows.length === 0) { + if (fallbackOrg) { + return [{ organization: fallbackOrg.organizationName, startDate: null, endDate: null }] + } + return [] + } + + const boundaries = collectBoundaries(datedRows) + + // Pass all cleaned rows (not just dated) so undated orgs compete at every boundary (bug 2 fix) + const timeline = buildTimeline(cleaned, fallbackOrg, boundaries) + + return timeline.sort((a, b) => { + if (!a.startDate) return 1 + if (!b.startDate) return -1 + return new Date(b.startDate).getTime() - new Date(a.startDate).getTime() + }) +} + +export async function resolveAffiliationsByMemberIds( + qx: QueryExecutor, + memberIds: string[], +): Promise> { + const [workExperiences, manualAffiliations] = await Promise.all([ + findWorkExperiencesBulk(qx, memberIds), + findManualAffiliationsBulk(qx, memberIds), + ]) + + const byMember = new Map() + for (const row of [...workExperiences, ...manualAffiliations]) { + const list = byMember.get(row.memberId) ?? [] + list.push(row) + byMember.set(row.memberId, list) + } + + const result = new Map() + for (const id of memberIds) { + result.set(id, resolveAffiliationsForMember(byMember.get(id) ?? [])) + } + return result +} diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts new file mode 100644 index 0000000000..166d1b9992 --- /dev/null +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -0,0 +1,56 @@ +import { MemberIdentityType, PlatformType } from '@crowd/types' + +import { QueryExecutor } from '../queryExecutor' + +export interface IDevStatsMemberRow { + githubHandle: string + memberId: string + displayName: string | null +} + +export async function findMembersByGithubHandles( + qx: QueryExecutor, + lowercasedHandles: string[], +): Promise { + return qx.select( + ` + SELECT + mi.value AS "githubHandle", + mi."memberId", + m."displayName" + FROM "memberIdentities" mi + JOIN members m ON m.id = mi."memberId" + WHERE mi.platform = $(platform) + AND mi.type = $(type) + AND mi.verified = true + AND lower(mi.value) IN ($(lowercasedHandles:csv)) + AND mi."deletedAt" IS NULL + AND m."deletedAt" IS NULL + `, + { + platform: PlatformType.GITHUB, + type: MemberIdentityType.USERNAME, + lowercasedHandles, + }, + ) +} + +export async function findVerifiedEmailsByMemberIds( + qx: QueryExecutor, + memberIds: string[], +): Promise<{ memberId: string; email: string }[]> { + return qx.select( + ` + SELECT "memberId", value AS email + FROM "memberIdentities" + WHERE "memberId" IN ($(memberIds:csv)) + AND type = $(type) + AND verified = true + AND "deletedAt" IS NULL + `, + { + memberIds, + type: MemberIdentityType.EMAIL, + }, + ) +} diff --git a/services/libs/data-access-layer/src/index.ts b/services/libs/data-access-layer/src/index.ts index 639f0547b8..1d092b26b6 100644 --- a/services/libs/data-access-layer/src/index.ts +++ b/services/libs/data-access-layer/src/index.ts @@ -1,4 +1,6 @@ export * from './activities' +export * from './affiliations' +export * from './devStats' export * from './activityRelations' export * from './apiKeys' export * from './dashboards'