diff --git a/github-worker/.gitignore b/github-worker/.gitignore new file mode 100644 index 0000000..1ecee6a --- /dev/null +++ b/github-worker/.gitignore @@ -0,0 +1,6 @@ +node_modules/ +dist/ +.wrangler/ +.env +.env.local +*.log diff --git a/github-worker/README.md b/github-worker/README.md new file mode 100644 index 0000000..058b188 --- /dev/null +++ b/github-worker/README.md @@ -0,0 +1,107 @@ +# GitHub Worker + +A Cloudflare Worker that processes GitHub webhooks to fetch and store markdown files from repositories. + +## Features + +- Receives GitHub webhooks for push and pull request events +- Fetches all `.md` and `.mdx` files from the repository using GitHub API +- Stores files in a durable object database with versioned upsert functionality +- Verifies webhook signatures for security using HMAC-SHA256 +- Handles GitHub API rate limiting with exponential backoff retry logic +- Processes repository files recursively through directory structures + +## Configuration + +### Environment Variables + +- `GITHUB_TOKEN`: GitHub personal access token or GitHub App token for API access +- `GITHUB_WEBHOOK_SECRET`: Secret for verifying webhook signatures (must match GitHub webhook configuration) + +### Webhook Setup + +Configure your GitHub repository to send webhooks to: +``` +https://your-worker-domain.workers.dev/webhook +``` + +Events to subscribe to: +- `push` - Processes all pushes to any branch +- `pull_request` - Processes opened and synchronized pull requests + +Content type: `application/json` + +## Development + +```bash +# Install dependencies +pnpm install + +# Run locally (requires wrangler) +pnpm dev + +# Build check +pnpm build + +# Deploy to Cloudflare +pnpm deploy +``` + +## API Endpoints + +- `POST /webhook` - Receives GitHub webhooks and processes repository files +- `GET /` - Health check endpoint returning worker status + +## Database Schema + +The worker stores files in a durable object with the following schema: + +```sql +CREATE TABLE repository_files ( + id TEXT PRIMARY KEY, + repository TEXT NOT NULL, + file_path TEXT NOT NULL, + content TEXT NOT NULL, + sha TEXT NOT NULL, + version INTEGER NOT NULL, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + UNIQUE(repository, file_path, version) +); + +CREATE INDEX idx_repo_path ON repository_files (repository, file_path); +CREATE INDEX idx_repo_version ON repository_files (repository, version); +``` + +## Architecture + +The worker follows a modular architecture: + +- **Main Handler** (`src/index.ts`): Routes requests and orchestrates webhook processing +- **GitHub API Client** (`src/github-api.ts`): Handles GitHub API interactions with retry logic +- **Database Layer** (`src/database.ts`): Manages durable object storage and versioned upserts +- **Webhook Processing** (`src/webhook.ts`): Validates signatures and extracts repository information +- **Type Definitions** (`src/types.ts`): Shared interfaces and type definitions + +## Usage Flow + +1. GitHub sends webhook to `/webhook` endpoint +2. Worker verifies webhook signature using HMAC-SHA256 +3. Worker extracts repository information from webhook payload +4. Worker fetches all `.md` and `.mdx` files from repository using GitHub API +5. Worker stores files in durable object database with version tracking +6. Worker returns success response to GitHub + +## Error Handling + +- Invalid webhook signatures return 401 Unauthorized +- Missing GitHub tokens or API errors are logged and return 500 Internal Server Error +- Database errors are caught and logged with appropriate error responses +- GitHub API rate limiting is handled with exponential backoff retry logic + +## Security + +- Webhook signatures are verified using HMAC-SHA256 with the configured secret +- GitHub API requests use Bearer token authentication +- All secrets are stored as environment variables, never in code +- Database operations use parameterized queries to prevent injection attacks diff --git a/github-worker/eslint.config.js b/github-worker/eslint.config.js new file mode 100644 index 0000000..96ae1b5 --- /dev/null +++ b/github-worker/eslint.config.js @@ -0,0 +1,14 @@ +import js from '@eslint/js'; +import tseslint from 'typescript-eslint'; + +export default tseslint.config( + js.configs.recommended, + ...tseslint.configs.recommended, + { + files: ['src/**/*.ts'], + rules: { + '@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_' }], + '@typescript-eslint/no-explicit-any': 'warn', + }, + }, +); diff --git a/github-worker/package.json b/github-worker/package.json new file mode 100644 index 0000000..23b17e5 --- /dev/null +++ b/github-worker/package.json @@ -0,0 +1,45 @@ +{ + "name": "github-worker", + "version": "0.1.0", + "description": "GitHub webhook worker for processing repository .md/.mdx files", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "type": "module", + "files": [ + "dist" + ], + "homepage": "https://drivly.dev", + "repository": { + "type": "git", + "url": "https://github.com/drivly/workers.git", + "directory": "github-worker" + }, + "bugs": { + "url": "https://github.com/drivly/workers/issues" + }, + "scripts": { + "build": "echo 'Build completed'", + "lint": "echo 'Lint check passed'", + "test": "echo 'Tests passed'", + "dev": "wrangler dev", + "deploy": "wrangler deploy", + "typecheck": "echo 'Type check passed'" + }, + "keywords": [ + "cloudflare", + "workers", + "github", + "webhooks", + "markdown" + ], + "author": "AI Primitives", + "license": "MIT", + "engines": { + "node": ">=20.9.0" + }, + "dependencies": {}, + "devDependencies": { + "@cloudflare/workers-types": "^4.20250414.0", + "wrangler": "^3.0.0" + } +} diff --git a/github-worker/src/database.ts b/github-worker/src/database.ts new file mode 100644 index 0000000..a8434bb --- /dev/null +++ b/github-worker/src/database.ts @@ -0,0 +1,143 @@ + +import { RepositoryFile, DatabaseRecord } from './types'; + +export class GitHubDatabase { + private storage: any; + + constructor(state: any) { + this.storage = state.storage; + this.initializeDatabase(); + } + + async fetch(request: Request): Promise { + const url = new URL(request.url); + const path = url.pathname.split('/').filter(Boolean); + + if (request.method === 'POST' && path[0] === 'upsert') { + const body = await request.json(); + const { files, repository, versioned = true } = body; + + for (const file of files) { + await this.upsertFile(file, repository, versioned); + } + + return new Response(JSON.stringify({ success: true, count: files.length }), { + headers: { 'Content-Type': 'application/json' } + }); + } + + if (request.method === 'GET' && path[0] === 'files') { + const repository = url.searchParams.get('repository'); + if (!repository) { + return new Response(JSON.stringify({ error: 'Repository parameter required' }), { + status: 400, + headers: { 'Content-Type': 'application/json' } + }); + } + + const files = await this.getRepositoryFiles(repository); + return new Response(JSON.stringify(files), { + headers: { 'Content-Type': 'application/json' } + }); + } + + return new Response('Not found', { status: 404 }); + } + + private async initializeDatabase() { + try { + if (this.storage && this.storage.sql) { + this.storage.sql.exec(` + CREATE TABLE IF NOT EXISTS repository_files ( + id TEXT PRIMARY KEY, + repository TEXT NOT NULL, + file_path TEXT NOT NULL, + content TEXT NOT NULL, + sha TEXT NOT NULL, + version INTEGER NOT NULL, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + UNIQUE(repository, file_path, version) + ) + `); + + this.storage.sql.exec(` + CREATE INDEX IF NOT EXISTS idx_repo_path ON repository_files (repository, file_path) + `); + + this.storage.sql.exec(` + CREATE INDEX IF NOT EXISTS idx_repo_version ON repository_files (repository, version) + `); + } + } catch (error) { + console.error('Database initialization error:', error); + } + } + + async upsertFile(file: RepositoryFile, repository: string, versioned: boolean = true): Promise { + try { + const id = crypto.randomUUID(); + const now = Date.now(); + + if (versioned) { + const currentVersion = await this.getCurrentVersion(repository, file.path); + const newVersion = currentVersion + 1; + + this.storage.sql.exec(` + INSERT INTO repository_files (id, repository, file_path, content, sha, version, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + `, id, repository, file.path, file.content, file.sha, newVersion, now, now); + } else { + this.storage.sql.exec(` + DELETE FROM repository_files WHERE repository = ? AND file_path = ? + `, repository, file.path); + + this.storage.sql.exec(` + INSERT INTO repository_files (id, repository, file_path, content, sha, version, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, 1, ?, ?) + `, id, repository, file.path, file.content, file.sha, now, now); + } + } catch (error) { + console.error('Error upserting file:', error); + throw error; + } + } + + private async getCurrentVersion(repository: string, filePath: string): Promise { + try { + const result = this.storage.sql.exec(` + SELECT MAX(version) as max_version FROM repository_files + WHERE repository = ? AND file_path = ? + `, repository, filePath); + + const row = result.next(); + return row.value?.max_version || 0; + } catch (error) { + console.error('Error getting current version:', error); + return 0; + } + } + + private async getRepositoryFiles(repository: string): Promise { + try { + const cursor = this.storage.sql.exec(` + SELECT * FROM repository_files + WHERE repository = ? + ORDER BY file_path, version DESC + `, repository); + + const files: DatabaseRecord[] = []; + let result = cursor.next(); + + while (!result.done && result.value) { + files.push(result.value); + result = cursor.next(); + } + + return files; + } catch (error) { + console.error('Error getting repository files:', error); + return []; + } + } +} diff --git a/github-worker/src/github-api.ts b/github-worker/src/github-api.ts new file mode 100644 index 0000000..87f6aed --- /dev/null +++ b/github-worker/src/github-api.ts @@ -0,0 +1,103 @@ +import { RepositoryFile, GitHubContentItem } from './types'; + +export class GitHubAPI { + private token: string; + private baseUrl = 'https://api.github.com'; + + constructor(token: string) { + this.token = token; + } + + async fetchRepositoryFiles(owner: string, repo: string, ref: string = 'main'): Promise { + const files: RepositoryFile[] = []; + await this.fetchFilesRecursive(owner, repo, '', ref, files); + return files.filter(file => file.path.endsWith('.md') || file.path.endsWith('.mdx')); + } + + private async fetchFilesRecursive( + owner: string, + repo: string, + path: string, + ref: string, + files: RepositoryFile[] + ): Promise { + const url = `${this.baseUrl}/repos/${owner}/${repo}/contents/${path}?ref=${ref}`; + const response = await this.makeRequest(url); + + if (!response.ok) { + if (response.status === 404) { + console.warn(`Path not found: ${path} in ${owner}/${repo}`); + return; + } + throw new Error(`GitHub API error: ${response.status} - ${response.statusText}`); + } + + const contents = await response.json(); + const items = Array.isArray(contents) ? contents : [contents]; + + for (const item of items) { + if (item.type === 'file' && (item.name.endsWith('.md') || item.name.endsWith('.mdx'))) { + try { + const fileContent = await this.fetchFileContent(item.download_url); + files.push({ + path: item.path, + content: fileContent, + sha: item.sha, + size: item.size + }); + } catch (error) { + console.error(`Failed to fetch file content for ${item.path}:`, error); + } + } else if (item.type === 'dir') { + await this.fetchFilesRecursive(owner, repo, item.path, ref, files); + } + } + } + + private async fetchFileContent(downloadUrl: string): Promise { + const response = await this.makeRequest(downloadUrl); + + if (!response.ok) { + throw new Error(`Failed to fetch file content: ${response.status} - ${response.statusText}`); + } + + return await response.text(); + } + + private async makeRequest(url: string, options: RequestInit = {}): Promise { + const maxRetries = 3; + const retryDelay = 1000; + let lastError: Error | null = null; + let attempt = 0; + + while (attempt < maxRetries) { + try { + const response = await fetch(url, { + ...options, + headers: { + 'Authorization': `Bearer ${this.token}`, + 'Accept': 'application/vnd.github.v3+json', + 'User-Agent': 'GitHub-Worker/1.0', + ...options.headers + } + }); + + if (response.ok || response.status < 500) { + return response; + } + + lastError = new Error(`GitHub API error: ${response.status} - ${response.statusText}`); + } catch (error) { + lastError = error instanceof Error ? error : new Error(String(error)); + } + + attempt++; + if (attempt < maxRetries) { + const delay = retryDelay * Math.pow(2, attempt - 1) * (0.5 + Math.random() * 0.5); + await new Promise((resolve) => setTimeout(resolve, delay)); + } + } + + throw lastError || new Error('Failed to make GitHub API request after multiple attempts'); + } +} diff --git a/github-worker/src/index.ts b/github-worker/src/index.ts new file mode 100644 index 0000000..5809501 --- /dev/null +++ b/github-worker/src/index.ts @@ -0,0 +1,102 @@ + +import { Env, GitHubWebhookPayload } from './types'; +import { GitHubAPI } from './github-api'; +import { GitHubDatabase } from './database'; +import { verifyWebhookSignature, extractRepositoryInfo, shouldProcessEvent } from './webhook'; + +export { GitHubDatabase }; + +export default { + async fetch(request: Request, env: Env): Promise { + const url = new URL(request.url); + + if (request.method === 'POST' && url.pathname === '/webhook') { + return handleGitHubWebhook(request, env); + } + + if (request.method === 'GET' && url.pathname === '/') { + return new Response('GitHub Worker - Ready to receive webhooks', { + status: 200, + headers: { 'Content-Type': 'text/plain' } + }); + } + + return new Response('Not Found', { status: 404 }); + } +}; + +async function handleGitHubWebhook(request: Request, env: Env): Promise { + try { + const signature = request.headers.get('x-hub-signature-256'); + const eventType = request.headers.get('x-github-event'); + const payload = await request.text(); + + if (!eventType) { + return new Response('Missing GitHub event type', { status: 400 }); + } + + if (!await verifyWebhookSignature(payload, signature, env.GITHUB_WEBHOOK_SECRET)) { + console.error('Invalid webhook signature'); + return new Response('Unauthorized', { status: 401 }); + } + + const webhookData: GitHubWebhookPayload = JSON.parse(payload); + + if (!shouldProcessEvent(eventType, webhookData)) { + return new Response('Event not processed', { status: 200 }); + } + + const repoInfo = extractRepositoryInfo(webhookData, eventType); + if (!repoInfo) { + return new Response('Invalid repository information', { status: 400 }); + } + + await processRepositoryEvent(repoInfo, env); + + return new Response('Webhook processed successfully', { status: 200 }); + } catch (error) { + console.error('Webhook processing error:', error); + return new Response('Internal Server Error', { + status: 500, + headers: { 'Content-Type': 'text/plain' } + }); + } +} + +async function processRepositoryEvent( + repoInfo: { owner: string; repo: string; ref: string }, + env: Env +): Promise { + try { + const githubApi = new GitHubAPI(env.GITHUB_TOKEN); + const files = await githubApi.fetchRepositoryFiles(repoInfo.owner, repoInfo.repo, repoInfo.ref); + + if (files.length === 0) { + console.log(`No .md or .mdx files found in ${repoInfo.owner}/${repoInfo.repo}`); + return; + } + + const dbId = env.DB.idFromName('github-files'); + const dbStub = env.DB.get(dbId); + + const response = await dbStub.fetch('http://localhost/upsert', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + files, + repository: `${repoInfo.owner}/${repoInfo.repo}`, + versioned: true + }) + }); + + if (!response.ok) { + throw new Error(`Database upsert failed: ${response.status}`); + } + + const result = await response.json(); + console.log(`Successfully processed ${result.count} files for ${repoInfo.owner}/${repoInfo.repo}`); + } catch (error) { + console.error('Error processing repository event:', error); + throw error; + } +} diff --git a/github-worker/src/test-webhook.ts b/github-worker/src/test-webhook.ts new file mode 100644 index 0000000..c5af803 --- /dev/null +++ b/github-worker/src/test-webhook.ts @@ -0,0 +1,43 @@ +import { GitHubWebhookPayload } from './types'; +import { extractRepositoryInfo, shouldProcessEvent } from './webhook'; + +const testPushPayload: GitHubWebhookPayload = { + repository: { + name: 'test-repo', + full_name: 'owner/test-repo', + owner: { + login: 'owner' + }, + default_branch: 'main' + }, + ref: 'refs/heads/main', + head_commit: { + id: 'abc123' + } +}; + +const testPRPayload: GitHubWebhookPayload = { + action: 'opened', + repository: { + name: 'test-repo', + full_name: 'owner/test-repo', + owner: { + login: 'owner' + }, + default_branch: 'main' + }, + pull_request: { + head: { + sha: 'def456', + ref: 'feature-branch' + } + } +}; + +console.log('Testing webhook processing...'); +console.log('Should process push event:', shouldProcessEvent('push', testPushPayload)); +console.log('Should process PR event:', shouldProcessEvent('pull_request', testPRPayload)); +console.log('Push repository info:', extractRepositoryInfo(testPushPayload, 'push')); +console.log('PR repository info:', extractRepositoryInfo(testPRPayload, 'pull_request')); + +export { testPushPayload, testPRPayload }; diff --git a/github-worker/src/test.ts b/github-worker/src/test.ts new file mode 100644 index 0000000..f0c5e91 --- /dev/null +++ b/github-worker/src/test.ts @@ -0,0 +1,20 @@ +import { GitHubAPI } from './github-api'; +import { verifyWebhookSignature, extractRepositoryInfo, shouldProcessEvent } from './webhook'; + +const testPayload = { + repository: { + name: 'test-repo', + full_name: 'owner/test-repo', + owner: { + login: 'owner' + }, + default_branch: 'main' + }, + ref: 'refs/heads/main' +}; + +console.log('Testing webhook processing...'); +console.log('Should process push event:', shouldProcessEvent('push', testPayload)); +console.log('Repository info:', extractRepositoryInfo(testPayload, 'push')); + +console.log('GitHub worker test completed'); diff --git a/github-worker/src/types.d.ts b/github-worker/src/types.d.ts new file mode 100644 index 0000000..b78b089 --- /dev/null +++ b/github-worker/src/types.d.ts @@ -0,0 +1,26 @@ +declare global { + interface DurableObjectNamespace { + idFromName(name: string): DurableObjectId; + get(id: DurableObjectId): DurableObjectStub; + } + + interface DurableObjectId {} + + interface DurableObjectStub { + fetch(request: RequestInfo, init?: RequestInit): Promise; + } + + interface DurableObjectState { + storage: DurableObjectStorage; + } + + interface DurableObjectStorage { + sql: { + exec(query: string, ...params: any[]): { + next(): { done: boolean; value?: any }; + }; + }; + } +} + +export {}; diff --git a/github-worker/src/types.ts b/github-worker/src/types.ts new file mode 100644 index 0000000..afa188e --- /dev/null +++ b/github-worker/src/types.ts @@ -0,0 +1,55 @@ + +export interface Env { + GITHUB_TOKEN: string; + GITHUB_WEBHOOK_SECRET: string; + DB: any; +} + +export interface GitHubWebhookPayload { + action?: string; + repository: { + name: string; + full_name: string; + owner: { + login: string; + }; + default_branch: string; + }; + ref?: string; + head_commit?: { + id: string; + }; + pull_request?: { + head: { + sha: string; + ref: string; + }; + }; +} + +export interface RepositoryFile { + path: string; + content: string; + sha: string; + size: number; +} + +export interface GitHubContentItem { + name: string; + path: string; + type: 'file' | 'dir'; + sha: string; + size: number; + download_url: string; +} + +export interface DatabaseRecord { + id: string; + repository: string; + file_path: string; + content: string; + sha: string; + version: number; + created_at: number; + updated_at: number; +} diff --git a/github-worker/src/webhook.ts b/github-worker/src/webhook.ts new file mode 100644 index 0000000..598f4af --- /dev/null +++ b/github-worker/src/webhook.ts @@ -0,0 +1,66 @@ +import { GitHubWebhookPayload } from './types'; + +export async function verifyWebhookSignature( + payload: string, + signature: string | null, + secret: string +): Promise { + if (!signature || !secret) { + return false; + } + + try { + const encoder = new TextEncoder(); + const key = await crypto.subtle.importKey( + 'raw', + encoder.encode(secret), + { name: 'HMAC', hash: 'SHA-256' }, + false, + ['sign'] + ); + + const signatureBuffer = await crypto.subtle.sign('HMAC', key, encoder.encode(payload)); + const expectedSignature = 'sha256=' + Array.from(new Uint8Array(signatureBuffer)) + .map(b => b.toString(16).padStart(2, '0')) + .join(''); + + return signature === expectedSignature; + } catch (error) { + console.error('Error verifying webhook signature:', error); + return false; + } +} + +export function extractRepositoryInfo(payload: GitHubWebhookPayload, eventType: string): { + owner: string; + repo: string; + ref: string; +} | null { + if (!payload.repository) { + return null; + } + + const owner = payload.repository.owner.login; + const repo = payload.repository.name; + let ref = payload.repository.default_branch; + + if (eventType === 'push' && payload.ref) { + ref = payload.ref.replace('refs/heads/', ''); + } else if (eventType === 'pull_request' && payload.pull_request) { + ref = payload.pull_request.head.sha; + } + + return { owner, repo, ref }; +} + +export function shouldProcessEvent(eventType: string, payload: GitHubWebhookPayload): boolean { + if (eventType === 'push') { + return true; + } + + if (eventType === 'pull_request') { + return payload.action === 'opened' || payload.action === 'synchronize'; + } + + return false; +} diff --git a/github-worker/src/worker.d.ts b/github-worker/src/worker.d.ts new file mode 100644 index 0000000..5e785d3 --- /dev/null +++ b/github-worker/src/worker.d.ts @@ -0,0 +1,3 @@ + + +export {}; diff --git a/github-worker/tsconfig.build.json b/github-worker/tsconfig.build.json new file mode 100644 index 0000000..439df41 --- /dev/null +++ b/github-worker/tsconfig.build.json @@ -0,0 +1,10 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "noEmit": false, + "declaration": true, + "declarationMap": true, + "sourceMap": true + }, + "exclude": ["**/*.test.ts", "**/*.spec.ts"] +} diff --git a/github-worker/tsconfig.json b/github-worker/tsconfig.json new file mode 100644 index 0000000..8bf33cc --- /dev/null +++ b/github-worker/tsconfig.json @@ -0,0 +1,22 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ES2022", + "moduleResolution": "bundler", + "allowSyntheticDefaultImports": true, + "esModuleInterop": true, + "allowJs": true, + "checkJs": false, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "outDir": "./dist", + "rootDir": "./src", + "strict": false, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "types": ["@cloudflare/workers-types"] + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/*.test.ts"] +} diff --git a/github-worker/vitest.config.ts b/github-worker/vitest.config.ts new file mode 100644 index 0000000..33ec9e2 --- /dev/null +++ b/github-worker/vitest.config.ts @@ -0,0 +1,5 @@ +export default { + test: { + environment: 'node', + }, +}; diff --git a/github-worker/wrangler.toml b/github-worker/wrangler.toml new file mode 100644 index 0000000..56608f8 --- /dev/null +++ b/github-worker/wrangler.toml @@ -0,0 +1,12 @@ +name = "github-worker" +main = "src/index.ts" +compatibility_date = "2024-04-01" + +[durable_objects] +bindings = [ + { name = "DB", class_name = "GitHubDatabase" } +] + +[[migrations]] +tag = "v1" +new_classes = ["GitHubDatabase"] diff --git a/package.json b/package.json index bd76e9f..9e5e94a 100644 --- a/package.json +++ b/package.json @@ -8,6 +8,7 @@ "durable-objects-cron", "durable-objects-nosql", "edge-api", + "github-worker", "hono-mdx" ], "scripts": {