diff --git a/src/constants.ts b/src/constants.ts index 796196e..696ba40 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -25,3 +25,11 @@ export const ExtendedHeader = { } as const; export const ExtendedHeaderValues = Object.values(ExtendedHeader); + +export const NORMAL_CHUNK_RE = + /^@@\s\-(\d+),?(\d+)?\s\+(\d+),?(\d+)?\s@@\s?(.+)?/; + +export const COMBINED_CHUNK_RE = + /^@@@\s\-(\d+),?(\d+)?\s\-(\d+),?(\d+)?\s\+(\d+),?(\d+)?\s@@@\s?(.+)?/; + +export const BINARY_CHUNK_RE = /^Binary\sfiles\s(.*)\sand\s(.*)\sdiffer$/; diff --git a/src/context.ts b/src/context.ts index 35f2cd2..77d178d 100644 --- a/src/context.ts +++ b/src/context.ts @@ -1,27 +1,94 @@ +import type { Interface } from 'node:readline'; import { FilledGitDiffOptions, GitDiffOptions } from './types'; +import { isReadlineInterface } from './utils'; export default class Context { - private line: number = 1; - private lines: string[] = []; + private lines: Generator; public options: FilledGitDiffOptions = { noPrefix: false, }; - public constructor(diff: string, options?: GitDiffOptions) { - this.lines = diff.split('\n'); + private _currentLine: string; + private _isEof = false; + + public constructor( + diff: string | Generator, + options?: GitDiffOptions + ) { + if (typeof diff === 'string') { + this.lines = this.getGeneratorFromString(diff); + } else { + this.lines = diff; + } + + this._currentLine = this.lines.next().value; this.options.noPrefix = !!options?.noPrefix; } + private *getGeneratorFromString(text: string) { + for (const line of text.split('\n')) { + yield line; + } + } + public getCurLine(): string { - return this.lines[this.line - 1]; + return this._currentLine; } public nextLine(): string | undefined { - this.line++; + const next = this.lines.next(); + this._isEof = Boolean(next.done); + this._currentLine = next.value; + return this.getCurLine(); + } + + public isEof(): boolean { + return this._isEof; + } +} + +export class AsyncContext { + public options: FilledGitDiffOptions = { + noPrefix: false, + }; + private _currentLine: string = ''; + private _isEof = false; + private opened = false; + private lines: AsyncGenerator; + + public constructor( + diff: AsyncGenerator | Interface, + options?: GitDiffOptions + ) { + if (isReadlineInterface(diff)) { + this.lines = this.getGenerator(diff); + } else { + this.lines = diff; + } + + this.options.noPrefix = !!options?.noPrefix; + } + + async *getGenerator(stream: Interface) { + for await (const line of stream) { + yield line; + } + } + + public async getCurLine(): Promise { + if (!this.opened) await this.nextLine(); + return this._currentLine; + } + + public async nextLine(): Promise { + this.opened = true; + const next = await this.lines.next(); + this._isEof = Boolean(next.done); + this._currentLine = next.value; return this.getCurLine(); } public isEof(): boolean { - return this.line > this.lines.length; + return this._isEof; } } diff --git a/src/parse-git-diff-async.ts b/src/parse-git-diff-async.ts new file mode 100644 index 0000000..2aa8819 --- /dev/null +++ b/src/parse-git-diff-async.ts @@ -0,0 +1,365 @@ +import type { Interface } from 'node:readline'; +import { + BINARY_CHUNK_RE, + COMBINED_CHUNK_RE, + ExtendedHeader, + ExtendedHeaderValues, + FileType, + LineType, + NORMAL_CHUNK_RE, +} from './constants.js'; +import { AsyncContext } from './context.js'; +import { + getFilePath, + getLineType, + isComparisonInputLine, +} from './parse-git-diff.js'; +import type { + AnyChunk, + AnyFileChange, + AnyLineChange, + ChunkRange, + GitDiffOptions, +} from './types.js'; + +export default function parseGitDiff( + diff: AsyncGenerator | Interface, + options?: GitDiffOptions +): AsyncGenerator { + const ctx = new AsyncContext(diff, options); + + return parseFileChanges(ctx); +} + +async function* parseFileChanges( + ctx: AsyncContext +): AsyncGenerator { + while (!ctx.isEof()) { + const changed = await parseFileChange(ctx); + if (!changed) { + break; + } + yield changed; + } +} + +async function parseFileChange( + ctx: AsyncContext +): Promise { + if (!isComparisonInputLine(await ctx.getCurLine())) { + return; + } + await ctx.nextLine(); + + let isDeleted = false; + let isNew = false; + let isRename = false; + let pathBefore = ''; + let pathAfter = ''; + while (!ctx.isEof()) { + const extHeader = await parseExtendedHeader(ctx); + if (!extHeader) { + break; + } + if (extHeader.type === ExtendedHeader.Deleted) isDeleted = true; + if (extHeader.type === ExtendedHeader.NewFile) isNew = true; + if (extHeader.type === ExtendedHeader.RenameFrom) { + isRename = true; + pathBefore = extHeader.path as string; + } + if (extHeader.type === ExtendedHeader.RenameTo) { + isRename = true; + pathAfter = extHeader.path as string; + } + } + + const changeMarkers = await parseChangeMarkers(ctx); + const chunks = await parseChunks(ctx); + + if (isDeleted && changeMarkers) { + return { + type: FileType.Deleted, + chunks, + path: changeMarkers.deleted, + }; + } else if ( + isDeleted && + chunks.length && + chunks[0].type === 'BinaryFilesChunk' + ) { + return { + type: FileType.Deleted, + chunks, + path: chunks[0].pathBefore, + }; + } else if (isNew && changeMarkers) { + return { + type: FileType.Added, + chunks, + path: changeMarkers.added, + }; + } else if (isNew && chunks.length && chunks[0].type === 'BinaryFilesChunk') { + return { + type: FileType.Added, + chunks, + path: chunks[0].pathAfter, + }; + } else if (isRename) { + return { + type: FileType.Renamed, + pathAfter, + pathBefore, + chunks, + }; + } else if (changeMarkers) { + return { + type: FileType.Changed, + chunks, + path: changeMarkers.added, + }; + } else if ( + chunks.length && + chunks[0].type === 'BinaryFilesChunk' && + chunks[0].pathAfter + ) { + return { + type: FileType.Changed, + chunks, + path: chunks[0].pathAfter, + }; + } + + return; +} + +async function parseChunks(context: AsyncContext): Promise { + const chunks: AnyChunk[] = []; + + while (!context.isEof()) { + const chunk = await parseChunk(context); + if (!chunk) { + break; + } + chunks.push(chunk); + } + return chunks; +} + +async function parseChunk( + context: AsyncContext +): Promise { + const chunkHeader = await parseChunkHeader(context); + if (!chunkHeader) { + return; + } + + if (chunkHeader.type === 'Normal') { + const changes = await parseChanges( + context, + chunkHeader.fromFileRange, + chunkHeader.toFileRange + ); + return { + ...chunkHeader, + type: 'Chunk', + changes, + }; + } else if ( + chunkHeader.type === 'Combined' && + chunkHeader.fromFileRangeA && + chunkHeader.fromFileRangeB + ) { + const changes = await parseChanges( + context, + chunkHeader.fromFileRangeA.start < chunkHeader.fromFileRangeB.start + ? chunkHeader.fromFileRangeA + : chunkHeader.fromFileRangeB, + chunkHeader.toFileRange + ); + return { + ...chunkHeader, + type: 'CombinedChunk', + changes, + }; + } else if ( + chunkHeader.type === 'BinaryFiles' && + chunkHeader.fileA && + chunkHeader.fileB + ) { + return { + type: 'BinaryFilesChunk', + pathBefore: chunkHeader.fileA, + pathAfter: chunkHeader.fileB, + }; + } +} + +async function parseExtendedHeader(ctx: AsyncContext) { + const line = await ctx.getCurLine(); + const type = ExtendedHeaderValues.find((v) => line.startsWith(v)); + + if (type) { + await ctx.nextLine(); + } + + if (type === ExtendedHeader.RenameFrom || type === ExtendedHeader.RenameTo) { + return { + type, + path: line.slice(`${type} `.length), + } as const; + } else if (type) { + return { + type, + } as const; + } + + return null; +} + +async function parseChunkHeader(ctx: AsyncContext) { + const line = await ctx.getCurLine(); + const normalChunkExec = NORMAL_CHUNK_RE.exec(line); + if (!normalChunkExec) { + const combinedChunkExec = COMBINED_CHUNK_RE.exec(line); + + if (!combinedChunkExec) { + const binaryChunkExec = BINARY_CHUNK_RE.exec(line); + if (binaryChunkExec) { + const [all, fileA, fileB] = binaryChunkExec; + await ctx.nextLine(); + return { + type: 'BinaryFiles', + fileA: getFilePath(ctx, fileA, 'src'), + fileB: getFilePath(ctx, fileB, 'dst'), + } as const; + } + + return null; + } + + const [ + all, + delStartA, + delLinesA, + delStartB, + delLinesB, + addStart, + addLines, + context, + ] = combinedChunkExec; + await ctx.nextLine(); + return { + context, + type: 'Combined', + fromFileRangeA: getRange(delStartA, delLinesA), + fromFileRangeB: getRange(delStartB, delLinesB), + toFileRange: getRange(addStart, addLines), + } as const; + } + + const [all, delStart, delLines, addStart, addLines, context] = + normalChunkExec; + await ctx.nextLine(); + return { + context, + type: 'Normal', + toFileRange: getRange(addStart, addLines), + fromFileRange: getRange(delStart, delLines), + }; +} + +function getRange(start: string, lines?: string) { + const startNum = parseInt(start, 10); + return { + start: startNum, + lines: lines === undefined ? startNum : parseInt(lines, 10), + }; +} + +async function parseChangeMarkers(context: AsyncContext): Promise<{ + deleted: string; + added: string; +} | null> { + const deleterMarker = await parseMarker(context, '--- '); + const deleted = deleterMarker + ? getFilePath(context, deleterMarker, 'src') + : deleterMarker; + + const addedMarker = await parseMarker(context, '+++ '); + const added = addedMarker + ? getFilePath(context, addedMarker, 'dst') + : addedMarker; + return added && deleted ? { added, deleted } : null; +} + +async function parseMarker( + context: AsyncContext, + marker: string +): Promise { + const line = await context.getCurLine(); + if (line?.startsWith(marker)) { + await context.nextLine(); + return line.replace(marker, ''); + } + return null; +} + +type LineType = AnyLineChange['type']; + +async function parseChanges( + ctx: AsyncContext, + rangeBefore: ChunkRange, + rangeAfter: ChunkRange +): Promise { + const changes: AnyLineChange[] = []; + let lineBefore = rangeBefore.start; + let lineAfter = rangeAfter.start; + + while (!ctx.isEof()) { + const line = await ctx.getCurLine()!; + const type = getLineType(line); + if (!type) { + break; + } + await ctx.nextLine(); + + let change: AnyLineChange; + const content = line.slice(1); + switch (type) { + case LineType.Added: { + change = { + type, + lineAfter: lineAfter++, + content, + }; + break; + } + case LineType.Deleted: { + change = { + type, + lineBefore: lineBefore++, + content, + }; + break; + } + case LineType.Unchanged: { + change = { + type, + lineBefore: lineBefore++, + lineAfter: lineAfter++, + content, + }; + break; + } + case LineType.Message: { + change = { + type, + content: content.trim(), + }; + break; + } + } + changes.push(change); + } + return changes; +} diff --git a/src/parse-git-diff.ts b/src/parse-git-diff.ts index f7c7dad..9e5ed2d 100644 --- a/src/parse-git-diff.ts +++ b/src/parse-git-diff.ts @@ -1,45 +1,58 @@ -import Context from './context.js'; -import type { - GitDiff, - AnyFileChange, - AnyLineChange, - Chunk, - ChunkRange, - CombinedChunk, - AnyChunk, - FilledGitDiffOptions, - GitDiffOptions, -} from './types.js'; import { + BINARY_CHUNK_RE, + COMBINED_CHUNK_RE, ExtendedHeader, ExtendedHeaderValues, FileType, LineType, + NORMAL_CHUNK_RE, } from './constants.js'; +import Context, { AsyncContext } from './context.js'; +import type { + AnyChunk, + AnyFileChange, + AnyLineChange, + ChunkRange, + GitDiff, + GitDiffOptions, +} from './types.js'; +export default function parseGitDiff( + diff: Generator, + options?: GitDiffOptions +): Generator; export default function parseGitDiff( diff: string, options?: GitDiffOptions -): GitDiff { +): GitDiff; +export default function parseGitDiff( + diff: string | Generator, + options?: GitDiffOptions +): GitDiff | Generator { const ctx = new Context(diff, options); + const files = parseFileChanges(ctx); - return { - type: 'GitDiff', - files, - }; + if (typeof diff === 'string') { + return { + type: 'GitDiff', + files: Array.from(files), + }; + } + + return files; } -function parseFileChanges(ctx: Context): AnyFileChange[] { - const changedFiles: AnyFileChange[] = []; +function* parseFileChanges( + ctx: Context +): Generator { while (!ctx.isEof()) { const changed = parseFileChange(ctx); if (!changed) { break; } - changedFiles.push(changed); + yield changed; } - return changedFiles; } function parseFileChange(ctx: Context): AnyFileChange | undefined { @@ -129,7 +142,7 @@ function parseFileChange(ctx: Context): AnyFileChange | undefined { return; } -function isComparisonInputLine(line: string): boolean { +export function isComparisonInputLine(line: string): boolean { return line.indexOf('diff') === 0; } @@ -161,7 +174,7 @@ function parseChunk(context: Context): AnyChunk | undefined { return { ...chunkHeader, type: 'Chunk', - changes, + changes: Array.from(changes), }; } else if ( chunkHeader.type === 'Combined' && @@ -178,7 +191,7 @@ function parseChunk(context: Context): AnyChunk | undefined { return { ...chunkHeader, type: 'CombinedChunk', - changes, + changes: Array.from(changes), }; } else if ( chunkHeader.type === 'BinaryFiles' && @@ -217,18 +230,12 @@ function parseExtendedHeader(ctx: Context) { function parseChunkHeader(ctx: Context) { const line = ctx.getCurLine(); - const normalChunkExec = - /^@@\s\-(\d+),?(\d+)?\s\+(\d+),?(\d+)?\s@@\s?(.+)?/.exec(line); + const normalChunkExec = NORMAL_CHUNK_RE.exec(line); if (!normalChunkExec) { - const combinedChunkExec = - /^@@@\s\-(\d+),?(\d+)?\s\-(\d+),?(\d+)?\s\+(\d+),?(\d+)?\s@@@\s?(.+)?/.exec( - line - ); + const combinedChunkExec = COMBINED_CHUNK_RE.exec(line); if (!combinedChunkExec) { - const binaryChunkExec = /^Binary\sfiles\s(.*)\sand\s(.*)\sdiffer$/.exec( - line - ); + const binaryChunkExec = BINARY_CHUNK_RE.exec(line); if (binaryChunkExec) { const [all, fileA, fileB] = binaryChunkExec; ctx.nextLine(); @@ -315,12 +322,11 @@ const CHAR_TYPE_MAP: Record = { '\\': LineType.Message, }; -function parseChanges( +function* parseChanges( ctx: Context, rangeBefore: ChunkRange, rangeAfter: ChunkRange -): AnyLineChange[] { - const changes: AnyLineChange[] = []; +): Generator { let lineBefore = rangeBefore.start; let lineAfter = rangeAfter.start; @@ -368,16 +374,19 @@ function parseChanges( break; } } - changes.push(change); + yield change; } - return changes; } -function getLineType(line: string): LineType | null { +export function getLineType(line: string): LineType | null { return CHAR_TYPE_MAP[line[0]] || null; } -function getFilePath(ctx: Context, input: string, type: 'src' | 'dst') { +export function getFilePath( + ctx: Context | AsyncContext, + input: string, + type: 'src' | 'dst' +) { if (ctx.options.noPrefix) { return input; } diff --git a/src/utils.ts b/src/utils.ts new file mode 100644 index 0000000..55eae6b --- /dev/null +++ b/src/utils.ts @@ -0,0 +1,16 @@ +import type { Interface } from 'node:readline'; + +export function isAsyncGenerator(value: unknown): value is AsyncGenerator { + return ( + typeof value === 'object' && value !== null && Symbol.asyncIterator in value + ); +} + +export function isReadlineInterface(value: unknown): value is Interface { + return ( + typeof value === 'object' && + value !== null && + Symbol.asyncIterator in value && + 'getPrompt' in value + ); +}