-
Notifications
You must be signed in to change notification settings - Fork 1
/
github_diff.go
516 lines (453 loc) · 18.8 KB
/
github_diff.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
package github
import (
"bufio"
"context"
"errors"
"io"
"log"
"net/http"
"path/filepath"
"regexp"
"strconv"
"strings"
"github.com/google/go-github/v57/github"
)
type PullRequestURL struct {
Owner string
Repo string
PRNumber int
}
type GitDiff struct {
// FilePathOld represents the old file path in the diff, typically
// indicated by a line starting with "---". This is the file path
// before the changes were made.
FilePathOld string
// FilePathNew represents the new file path in the diff, typically
// indicated by a line starting with "+++ ". This is the file path
// after the changes were made. In most cases, it is the same as
// FilePathOld unless the file was renamed or moved.
FilePathNew string
// Index is a string that usually contains the hash values before
// and after the changes, along with some additional metadata.
// This line typically starts with "index" in the diff output.
Index string
// DiffContents contains the actual content of the diff. This part
// of the struct includes the changes made to the file, typically
// represented by lines starting with "+" (additions) or "-"
// (deletions). It includes all the lines that show the modifications
// to the file.
DiffContents string
}
// ParsePullRequestURL parses a GitHub pull request URL and returns the owner, repository,
// and pull request number. The function expects a standard GitHub pull request URL format.
// It splits the URL into segments and extracts the relevant information.
//
// The expected URL format is: https://github.com/[owner]/[repo]/pull/[prNumber]
// where [owner] is the GitHub username or organization name, [repo] is the repository name,
// and [prNumber] is the pull request number.
//
// Parameters:
// - pullRequestURL: A string representing the full URL of a GitHub pull request.
//
// Returns:
// - A pointer to a PullRequestURL struct containing the extracted information (owner, repo, PRNumber).
// - An error if the URL format is invalid or if the pull request number cannot be converted to an integer.
//
// Example:
//
// prURL, err := ParsePullRequestURL("https://github.com/username/repository/pull/123")
// if err != nil {
// // Handle error
// }
// // Use prURL.Owner, prURL.Repo, and prURL.PRNumber
//
// This function is particularly useful for applications that need to process or respond to GitHub pull requests,
// allowing them to easily extract and use the key components of a pull request URL.
func ParsePullRequestURL(pullRequestURL string) (*PullRequestURL, error) {
parts := strings.Split(pullRequestURL, "/")
if len(parts) != 7 {
return nil, errors.New("invalid pull request URL")
}
owner := parts[3]
repo := parts[4]
prNumber, err := strconv.Atoi(parts[6])
if err != nil {
return nil, err
}
return &PullRequestURL{
Owner: owner,
Repo: repo,
PRNumber: prNumber,
}, nil
}
// Deprecated: Use GetPullRequestWithClient or GetPullRequestFromGithub instead.
// GetPullRequest will be removed in a future version.
//
// GetPullRequest retrieves the contents of a pull request's Git diff from GitHub.
// The function takes a context and a PullRequestURL struct, which contains the
// information needed to identify the specific pull request. It uses the GitHub API
// client to fetch the pull request and then calls getDiffContents to obtain the
// raw diff data.
//
// Parameters:
// - ctx: A context.Context object, which allows for managing the lifecycle of
// the request, such as canceling it or setting a timeout.
// - pr: A pointer to a PullRequestURL struct, which contains the owner,
// repository, and pull request number required to identify the pull request.
//
// Returns:
// - A string containing the raw contents of the Git diff for the specified pull request.
// - An error if the pull request retrieval fails or if there is an issue obtaining
// the diff contents.
//
// The function first creates a new GitHub API client. It then uses this client to
// fetch the pull request specified by the PullRequestURL struct. If the pull request
// is successfully retrieved, the function extracts the URL to the pull request's diff
// and uses getDiffContents to fetch the diff data.
//
// Example:
//
// prURL := &PullRequestURL{Owner: "username", Repo: "repository", PRNumber: 123}
// diff, err := GetPullRequest(context.Background(), prURL)
// if err != nil {
// // Handle error
// }
// // Use diff as a string containing the Git diff
//
// This function is useful in applications that need to programmatically access
// and process the contents of pull requests from GitHub, such as in automated
// code review tools, continuous integration systems, or other development workflows.
func GetPullRequest(ctx context.Context, pr *PullRequestURL, client *github.Client) (string, error) {
pullRequest, _, err := client.PullRequests.Get(ctx, pr.Owner, pr.Repo, pr.PRNumber)
if err != nil {
return "", err
}
return getDiffContents(pullRequest.GetDiffURL())
}
// GetPullRequestWithClient retrieves the contents of a pull request's Git diff from GitHub using an injected client.
// This function is similar to GetPullRequest, but it allows for dependency injection of a GitHubClientInterface,
// making it more flexible and easier to test.
//
// Parameters:
// - ctx: A context.Context object, which is used for managing the lifecycle of the request,
// such as canceling it or setting a timeout.
// - pr: A pointer to a PullRequestURL struct, containing the owner, repository, and pull request number.
// This struct identifies the specific pull request for which the diff is to be retrieved.
// - client: An implementation of the GitHubClientInterface. This interface abstraction allows
// for injecting different implementations, such as a real GitHub client or a mock client for testing.
//
// Returns:
// - A string containing the raw contents of the Git diff for the specified pull request.
// - An error if there is a failure in retrieving the pull request or in obtaining the diff contents.
//
// The function uses the provided client to fetch the pull request specified by the PullRequestURL struct.
// If the pull request is successfully retrieved, it extracts the URL of the pull request's diff and
// calls getDiffContents to fetch the actual diff data. This approach allows for better testability and
// flexibility, as different client implementations can be used depending on the context (e.g., testing,
// production).
//
// Example:
//
// prURL := &PullRequestURL{Owner: "username", Repo: "repository", PRNumber: 123}
// diff, err := GetPullRequestWithClient(context.Background(), prURL, injectedClient)
// if err != nil {
// // Handle error
// }
// // Use diff as a string containing the Git diff
//
// This function is particularly useful in scenarios where dependency injection is preferred for
// better control and testing, such as in automated code review tools, continuous integration systems,
// or other applications that interact with GitHub pull requests programmatically.
func GetPullRequestWithClient(ctx context.Context, pr *PullRequestURL, client GitHubClientInterface) (string, error) {
pullRequest, _, err := client.Get(ctx, pr.Owner, pr.Repo, pr.PRNumber)
if err != nil {
return "", err
}
return getDiffContents(pullRequest.GetDiffURL())
}
// GetPullRequestFromGithub retrieves the contents of a pull request's Git diff from GitHub using the default client.
// This function simplifies the process of fetching a pull request diff by using the standard GitHub client,
// which is created within the function. It's suitable for scenarios where dependency injection is not required
// and the default client configuration is appropriate.
//
// Parameters:
// - ctx: A context.Context object, used for managing the lifecycle of the request,
// including cancellation and timeouts.
// - pr: A pointer to a PullRequestURL struct, specifying the owner, repository, and pull request number.
// This struct is used to identify the specific pull request whose diff is to be retrieved.
//
// Returns:
// - A string containing the raw contents of the Git diff for the specified pull request.
// - An error if there is a problem retrieving the pull request or obtaining the diff contents.
//
// The function creates a new instance of the default GitHub client and uses it to fetch the specified pull request.
// After successfully retrieving the pull request, it extracts the URL of the pull request's diff and
// utilizes getDiffContents to obtain the actual diff data.
//
// Example:
//
// prURL := &PullRequestURL{Owner: "username", Repo: "repository", PRNumber: 123}
// diff, err := GetPullRequestFromGithub(context.Background(), prURL)
// if err != nil {
// // Handle error
// }
// // Use diff as a string containing the Git diff
//
// This function is ideal for use cases where a simple, straightforward approach to interacting with GitHub pull
// requests is needed, without the requirement for advanced configuration or dependency injection.
func GetPullRequestFromGithub(ctx context.Context, pr *PullRequestURL) (string, error) {
client := github.NewClient(nil)
return GetPullRequestWithClient(ctx, pr, &GitHubClientWrapper{Client: client})
}
// GetPullRequestWithDetails retrieves detailed information about a specific pull request from GitHub.
// This function is useful for applications that require more than just the diff content of a pull request,
// such as the pull request's metadata, comments, review status, and more.
//
// Parameters:
// - ctx: A context.Context object, which is used to control the request's execution. It allows for
// things like canceling the request or setting a deadline.
// - pr: A pointer to a PullRequestURL struct, containing the owner, repository, and pull request number.
// This struct is used to identify the specific pull request from which to fetch details.
// - client: A *github.Client object, which is the GitHub API client used to make requests to the GitHub API.
//
// Returns:
// - A pointer to a github.PullRequest struct, which contains detailed information about the pull request.
// - An error if there is an issue fetching the pull request or if the GitHub API returns an error.
//
// The function makes a call to the GitHub API's PullRequests.Get method using the provided GitHub client,
// owner, repo, and pull request number. It then returns the resulting github.PullRequest struct, which includes
// comprehensive details about the pull request, or an error if the request fails.
//
// Example:
//
// prDetails, err := GetPullRequestWithDetails(context.Background(), prURL, githubClient)
// if err != nil {
// // Handle error
// }
// // Use prDetails to access detailed information about the pull request
func GetPullRequestWithDetails(
ctx context.Context,
pr *PullRequestURL,
client GitHubClientInterface) (*github.PullRequest, error) {
pullRequest, _, err := client.Get(ctx, pr.Owner, pr.Repo, pr.PRNumber)
if err != nil {
return nil, err
}
return pullRequest, nil
}
// ParseGitDiff takes a string representing a combined Git diff and a list of
// file extensions to ignore. It returns a slice of GitDiff structs, each representing
// a parsed file diff. The function performs the following steps:
// 1. Splits the combined Git diff into individual file diffs using the
// splitDiffIntoFiles function. This function looks for "diff --git" as a
// delimiter to separate each file's diff.
// 2. Iterates over each file diff string. For each string, it:
// a. Attempts to parse the string into a GitDiff struct using the
// parseGitDiffFileString function. This function extracts the old and new
// file paths, index information, and the actual diff content.
// b. Checks for parsing errors. If an error occurs, it skips the current file
// diff and continues with the next one.
// 3. Filters out file diffs based on the provided ignore list. The ignore list
// contains file extensions (e.g., ".mod"). The function uses the
// getFileExtension helper to extract the file extension from the new file path
// (FilePathNew) of each GitDiff struct. If the extension matches any in the
// ignore list, the file diff is skipped.
// 4. Appends the successfully parsed and non-ignored GitDiff structs to the
// filteredList slice.
//
// Parameters:
// - diff: A string representing the combined Git diff.
// - ignoreList: A slice of strings representing the file extensions to ignore.
//
// Returns:
// - A slice of GitDiff structs, each representing a parsed and non-ignored file diff.
func ParseGitDiff(diff string, ignoreList []string) []*GitDiff {
files := splitDiffIntoFiles(diff)
var filteredList []*GitDiff
for _, file := range files {
gitDiff, err := parseGitDiffFileString(file)
if err != nil {
continue
}
if matchIgnoreFilter(gitDiff, ignoreList) {
continue
}
filteredList = append(filteredList, gitDiff)
}
return filteredList
}
// getDiffContents retrieves the contents of a Git diff from a specified URL. The function
// makes an HTTP GET request to the provided diffURL and returns the content as a string.
// This function is designed to work with URLs pointing to raw diff data, typically used
// in the context of GitHub or similar version control systems.
//
// Parameters:
// - diffURL: A string representing the URL from which the Git diff contents are to be retrieved.
//
// Returns:
// - A string containing the contents of the Git diff.
// - An error if the HTTP request fails, or if reading the response body fails.
//
// The function handles HTTP errors and read errors by returning an empty string and the
// respective error. It ensures that the body of the HTTP response is read completely into
// a byte slice, which is then converted into a string.
//
// Example:
//
// diff, err := getDiffContents("https://github.com/user/repo/pull/123.diff")
// if err != nil {
// // Handle error
// }
// // Use diff as a string containing the Git diff
//
// This function is useful in scenarios where an application needs to process or analyze
// the contents of a Git diff, such as in automated code review tools, continuous integration
// systems, or other applications that interact with version control systems.
func getDiffContents(diffURL string) (string, error) {
diffContents, err := http.Get(diffURL)
if err != nil {
return "", err
}
bodyBytes, err := io.ReadAll(diffContents.Body)
if err != nil {
return "", err
}
// Close the body
defer func(Body io.ReadCloser) {
if err := Body.Close(); err != nil {
log.Printf("Error closing response body: %v", err)
}
}(diffContents.Body)
if diffContents.StatusCode != http.StatusOK {
return "", errors.New("failed to get diff contents")
}
return string(bodyBytes), nil
}
func matchIgnoreFilter(file *GitDiff, ignoreList []string) bool {
for _, pattern := range ignoreList {
match, err := matchFile(pattern, file.FilePathNew)
if err != nil {
// consider finding a way to notify the caller
// an error has occurred.
return false
}
if match {
return true
}
}
return false
}
// matchFile takes a regex pattern and a file path and returns true if the
// file path matches the pattern, and false otherwise. It returns an error
// if the regex pattern is invalid.
func matchFile(pattern, file string) (bool, error) {
if pattern == "" {
return false, nil
}
rx, err := regexp.Compile(pattern)
if err != nil {
return false, err
}
return rx.MatchString(file), nil
}
// splitDiffIntoFiles splits a single diff string into a slice of
// strings, where each string represents the diff of an individual file.
// It assumes that 'diff --git' is used as a delimiter between file diffs.
func splitDiffIntoFiles(diff string) []string {
var files []string
var curFile strings.Builder
scanner := bufio.NewScanner(strings.NewReader(diff))
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "diff --git") {
// Detected start of new file
if curFile.Len() > 0 {
files = append(files, strings.TrimSpace(curFile.String()))
curFile.Reset()
}
curFile.WriteString(line + "\n")
} else {
curFile.WriteString(line + "\n")
}
}
// Add the last file diff to the list
if curFile.Len() > 0 {
files = append(files, strings.TrimSpace(curFile.String()))
}
return files
}
// ParseGitDiffFileString takes a string input representing a Git diff of a single file
// and returns a GitDiff struct containing the parsed information. The input
// string is expected to contain at least four lines, including the file paths
// line, the index line, and the diff content. The function performs the following
// steps to parse the diff:
// 1. Split the input string into lines.
// 2. Validate that there are enough lines to form a valid Git diff.
// 3. Extract the old and new file paths from the first line. The line is
// expected to contain two file paths separated by a space.
// 4. Extract the index information from the second line. The line should
// start with "index " followed by the index information.
// 5. Join the remaining lines, starting from the third line, to form the
// diff content.
//
// The function returns an error if the input is not in the expected format,
// such as if there are not enough lines, if the file paths line is invalid,
// or if the index line is incorrectly formatted.
//
// Parameters:
// - input: A string representing the Git diff of a single file.
//
// Returns:
// - A pointer to a GitDiff struct containing the parsed file paths, index,
// and diff content.
// - An error if the input string is not in the expected format or if any
// parsing step fails.
func parseGitDiffFileString(input string) (*GitDiff, error) {
scanner := bufio.NewScanner(strings.NewReader(input))
scanner.Split(bufio.ScanLines)
var (
filePaths []string
index string
diff []string
)
for scanner.Scan() {
line := scanner.Text()
switch {
case strings.HasPrefix(line, "diff --git"):
filePaths = strings.Fields(line)[2:]
if len(filePaths) != 2 {
return nil, errors.New("invalid file paths")
}
case strings.HasPrefix(line, "index "):
index = strings.TrimSpace(line[6:])
default:
diff = append(diff, line)
}
}
if len(filePaths) == 0 || len(index) == 0 || len(diff) == 0 {
return nil, errors.New("invalid git diff format")
}
return &GitDiff{
FilePathOld: filePaths[0],
FilePathNew: filePaths[1],
Index: index,
DiffContents: strings.Join(diff, "\n"),
}, nil
}
func getFileExtension(path string) string {
// If the path ends with a slash, it's a directory; return an empty string
if strings.HasSuffix(path, string(filepath.Separator)) {
return ""
}
fileName := filepath.Base(path)
// Check if the path is a directory or empty
if fileName == "." || fileName == "/" || fileName == "" {
return ""
}
// Check for dot files (hidden files in Unix-based systems)
if len(fileName) > 1 && fileName[0] == '.' && strings.Count(fileName, ".") == 1 {
return fileName
}
// Extract the extension
return filepath.Ext(fileName)
}