@@ -4,10 +4,10 @@ const fs = require('fs');
44const path = require ( 'path' ) ;
55
66// Configuration constants
7- const MAX_TOKENS_PER_REQUEST = 100000 ; // Conservative limit for Gemini 2.5 Flash
7+ const MAX_TOKENS_PER_REQUEST = 80000 ; // Conservative limit for Gemini 2.5 Flash
88const CHARS_PER_TOKEN = 4 ; // Rough estimation
9- const MAX_CHARS_PER_CHUNK = MAX_TOKENS_PER_REQUEST * CHARS_PER_TOKEN ;
10- const MAX_CHUNKS = 10 ; // Limit to prevent excessive API calls
9+ // const MAX_CHARS_PER_CHUNK = MAX_TOKENS_PER_REQUEST * CHARS_PER_TOKEN;
10+ const MAX_CHUNKS = 3 ; // Limit to prevent excessive API calls
1111
1212/**
1313 * Estimate token count for text (rough approximation)
@@ -16,35 +16,80 @@ function estimateTokens(text) {
1616 return Math . ceil ( text . length / CHARS_PER_TOKEN ) ;
1717}
1818
19+
20+ function sleep ( ms ) {
21+ return new Promise ( resolve => setTimeout ( resolve , ms ) ) ;
22+ }
23+
24+
25+ function splitStringByTokens ( str , maxTokens ) {
26+ console . error ( 'splitStringByTokens' ) ;
27+ const words = str . split ( ' ' ) ;
28+ const result = [ ] ;
29+ let currentLine = '' ;
30+
31+ for ( const word of words ) {
32+ if ( estimateTokens ( currentLine + word ) <= maxTokens ) {
33+ currentLine += ( currentLine ? ' ' : '' ) + word ;
34+ } else {
35+ if ( currentLine ) result . push ( currentLine ) ;
36+ currentLine = word ;
37+ }
38+ }
39+
40+ if ( currentLine ) result . push ( currentLine ) ;
41+
42+ return result ;
43+ }
44+
45+
1946/**
2047 * Split diff into chunks by file boundaries
2148 */
2249function chunkDiffByFiles ( diffContent ) {
50+ console . error ( 'chunkDiffByFiles' ) ;
2351 const fileChunks = [ ] ;
2452 const lines = diffContent . split ( '\n' ) ;
2553 let currentChunk = '' ;
2654 let currentFile = '' ;
55+ let tokenCount = 0 ;
2756
2857 for ( const line of lines ) {
2958 // Check if this is a new file header
59+ //console.error(`Line is estimated at ${estimateTokens(line)} tokens`);
60+ tokenCount += estimateTokens ( line ) ;
61+ //console.error(`Total tokens for this chunk is ${tokenCount}`);
3062 if ( line . startsWith ( 'diff --git' ) || line . startsWith ( '+++' ) || line . startsWith ( '---' ) ) {
3163 // If we have content and it's getting large, save current chunk
32- if ( currentChunk && estimateTokens ( currentChunk ) > MAX_CHARS_PER_CHUNK / 2 ) {
64+ if ( currentChunk && tokenCount > MAX_TOKENS_PER_REQUEST ) {
3365 fileChunks . push ( {
3466 content : currentChunk . trim ( ) ,
3567 file : currentFile ,
3668 type : 'file-chunk'
3769 } ) ;
3870 currentChunk = '' ;
71+ tokenCount = 0 ;
3972 }
4073
4174 // Start new chunk
4275 currentChunk = line + '\n' ;
4376
77+
4478 // Extract filename for reference
4579 if ( line . startsWith ( '+++' ) ) {
4680 currentFile = line . replace ( '+++ b/' , '' ) . replace ( '+++ a/' , '' ) ;
4781 }
82+ if ( tokenCount > MAX_TOKENS_PER_REQUEST ) {
83+ const split_chunk = splitStringByTokens ( currentChunk , MAX_TOKENS_PER_REQUEST ) ;
84+ currentChunk = split_chunk [ split_chunk . length - 1 ] ;
85+ for ( let i = 0 ; i < split_chunk . length - 1 ; i ++ ) {
86+ fileChunks . push ( {
87+ content : split_chunk [ i ] . trim ( ) ,
88+ file : currentFile ,
89+ type : 'file-chunk'
90+ } ) ;
91+ }
92+ }
4893 } else {
4994 currentChunk += line + '\n' ;
5095 }
@@ -107,7 +152,8 @@ ${diffContent}`;
107152 * Call Gemini API with the given prompt
108153 */
109154async function callGeminiAPI ( prompt , apiKey ) {
110- const response = await fetch ( `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key=${ apiKey } ` , {
155+ console . error ( `Sending prompt with an estimated ${ estimateTokens ( prompt ) } tokens` ) ;
156+ const response = await fetch ( `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${ apiKey } ` , {
111157 method : 'POST' ,
112158 headers : { 'Content-Type' : 'application/json' } ,
113159 body : JSON . stringify ( {
@@ -147,6 +193,7 @@ async function callGeminiAPI(prompt, apiKey) {
147193 * Process diff chunks and combine results
148194 */
149195async function processChunks ( chunks , apiKey ) {
196+ console . error ( 'processchunks' ) ;
150197 if ( chunks . length === 1 ) {
151198 // Single chunk, process normally
152199 return await callGeminiAPI ( createPRPrompt ( chunks [ 0 ] . content ) , apiKey ) ;
@@ -157,6 +204,10 @@ async function processChunks(chunks, apiKey) {
157204
158205 for ( let i = 0 ; i < Math . min ( chunks . length , MAX_CHUNKS ) ; i ++ ) {
159206 const chunk = chunks [ i ] ;
207+ if ( i > 0 ) {
208+ // sleep for 3 seconds
209+ sleep ( 5 * 1000 ) ;
210+ }
160211 console . error ( `Processing chunk ${ i + 1 } /${ Math . min ( chunks . length , MAX_CHUNKS ) } (${ chunk . file || 'unknown file' } )` ) ;
161212
162213 try {
@@ -174,7 +225,7 @@ async function processChunks(chunks, apiKey) {
174225 if ( chunkResults . length === 0 ) {
175226 throw new Error ( 'Failed to process any chunks' ) ;
176227 }
177-
228+ sleep ( 5 * 1000 ) ;
178229 // Combine results from multiple chunks
179230 const combinedPrompt = `Combine these pull request descriptions into a single, coherent PR description. Use the same format:
180231
@@ -215,42 +266,12 @@ Create a unified description that captures the overall changes across all files.
215266 if ( estimatedTokens > MAX_TOKENS_PER_REQUEST ) {
216267 console . error ( 'Large diff detected, using chunking strategy...' ) ;
217268
218- // For extremely large diffs, first try to summarize
219- if ( estimatedTokens > MAX_TOKENS_PER_REQUEST * 5 ) {
220- console . error ( 'Extremely large diff detected, using summary approach...' ) ;
221- const summaryPrompt = createSummaryPrompt ( diffContent ) ;
222- result = await callGeminiAPI ( summaryPrompt , apiKey ) ;
223-
224- // Create a simplified PR description based on the summary
225- const prPrompt = `Based on this summary of changes, create a pull request description using this format:
226-
227- ## Description
228- Brief summary of changes (1-2 sentences max).
229-
230- ## Changes
231- - [ ] Key change 1
232- - [ ] Key change 2
233- - [ ] Key change 3 (max 5 items)
234-
235- ## Verification
236- - [ ] Test step 1
237- - [ ] Test step 2
238- - [ ] Test step 3 (max 3 items)
239-
240- Summary: ${ result } ` ;
241-
242- result = await callGeminiAPI ( prPrompt , apiKey ) ;
243- } else {
244- // Chunk the diff and process
245- const chunks = chunkDiffByFiles ( diffContent ) ;
246- console . error ( `Split diff into ${ chunks . length } chunks` ) ;
247-
248- if ( chunks . length > MAX_CHUNKS ) {
249- console . error ( `Warning: Too many chunks (${ chunks . length } ), processing first ${ MAX_CHUNKS } chunks only` ) ;
250- }
251-
252- result = await processChunks ( chunks , apiKey ) ;
269+ const chunks = chunkDiffByFiles ( diffContent ) ;
270+ console . error ( `Split diff into ${ chunks . length } chunks` ) ;
271+ if ( chunks . length > MAX_CHUNKS ) {
272+ console . error ( `Warning: Too many chunks (${ chunks . length } ), processing first ${ MAX_CHUNKS } chunks only` ) ;
253273 }
274+ result = await processChunks ( chunks , apiKey ) ;
254275 } else {
255276 // Small diff, process normally
256277 result = await callGeminiAPI ( createPRPrompt ( diffContent ) , apiKey ) ;
0 commit comments