Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 35 additions & 32 deletions pr_agent/algo/git_patch_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,15 +212,11 @@ def check_if_hunk_lines_matches_to_file(i, original_lines, patch_lines, start1):


def extract_hunk_headers(match):
res = list(match.groups())
for i in range(len(res)):
if res[i] is None:
res[i] = 0
try:
start1, size1, start2, size2 = map(int, res[:4])
except: # '@@ -0,0 +1 @@' case
start1, size1, size2 = map(int, res[:3])
start2 = 0
res = match.groups()
start1 = int(res[0]) if res[0] is not None else 0
size1 = int(res[1]) if res[1] is not None else 0
start2 = int(res[2]) if res[2] is not None else 0
size2 = int(res[3]) if res[3] is not None else 0
section_header = res[4]
return section_header, size1, size2, start1, start2

Expand Down Expand Up @@ -336,9 +332,9 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
if hasattr(file, 'edit_type') and file.edit_type == EDIT_TYPE.DELETED:
return f"\n\n## File '{file.filename.strip()}' was deleted\n"

patch_with_lines_str = f"\n\n## File: '{file.filename.strip()}'\n"
patch_with_lines_str = [f"\n\n## File: '{file.filename.strip()}'\n"]
else:
patch_with_lines_str = ""
patch_with_lines_str = []

patch_lines = patch.splitlines()
RE_HUNK_HEADER = re.compile(
Expand All @@ -349,6 +345,19 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
start1, size1, start2, size2 = -1, -1, -1, -1
prev_header_line = []
header_line = []

def any_plus_lines(lines):
for line in lines:
if line.startswith('+'):
return True
return False

def any_minus_lines(lines):
for line in lines:
if line.startswith('-'):
return True
return False

for line_i, line in enumerate(patch_lines):
if 'no newline at end of file' in line.lower():
continue
Expand All @@ -358,20 +367,17 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
match = RE_HUNK_HEADER.match(line)
if match and (new_content_lines or old_content_lines): # found a new hunk, split the previous lines
if prev_header_line:
patch_with_lines_str += f'\n{prev_header_line}\n'
is_plus_lines = is_minus_lines = False
if new_content_lines:
is_plus_lines = any([line.startswith('+') for line in new_content_lines])
if old_content_lines:
is_minus_lines = any([line.startswith('-') for line in old_content_lines])
patch_with_lines_str.append(f'\n{prev_header_line}\n')
is_plus_lines = any_plus_lines(new_content_lines) if new_content_lines else False
is_minus_lines = any_minus_lines(old_content_lines) if old_content_lines else False
if is_plus_lines or is_minus_lines: # notice 'True' here - we always present __new hunk__ for section, otherwise LLM gets confused
patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__new hunk__\n'
patch_with_lines_str.append('__new hunk__\n')
for i, line_new in enumerate(new_content_lines):
patch_with_lines_str += f"{start2 + i} {line_new}\n"
patch_with_lines_str.append(f"{start2 + i} {line_new}\n")
if is_minus_lines:
patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n'
patch_with_lines_str.append('__old hunk__\n')
for line_old in old_content_lines:
patch_with_lines_str += f"{line_old}\n"
patch_with_lines_str.append(f"{line_old}\n")
new_content_lines = []
old_content_lines = []
if match:
Expand All @@ -394,22 +400,19 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:

# finishing last hunk
if match and new_content_lines:
patch_with_lines_str += f'\n{header_line}\n'
is_plus_lines = is_minus_lines = False
if new_content_lines:
is_plus_lines = any([line.startswith('+') for line in new_content_lines])
if old_content_lines:
is_minus_lines = any([line.startswith('-') for line in old_content_lines])
patch_with_lines_str.append(f'\n{header_line}\n')
is_plus_lines = any_plus_lines(new_content_lines) if new_content_lines else False
is_minus_lines = any_minus_lines(old_content_lines) if old_content_lines else False
if is_plus_lines or is_minus_lines: # notice 'True' here - we always present __new hunk__ for section, otherwise LLM gets confused
patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__new hunk__\n'
patch_with_lines_str.append('__new hunk__\n')
for i, line_new in enumerate(new_content_lines):
patch_with_lines_str += f"{start2 + i} {line_new}\n"
patch_with_lines_str.append(f"{start2 + i} {line_new}\n")
if is_minus_lines:
patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n'
patch_with_lines_str.append('__old hunk__\n')
for line_old in old_content_lines:
patch_with_lines_str += f"{line_old}\n"
patch_with_lines_str.append(f"{line_old}\n")

return patch_with_lines_str.rstrip()
return ''.join(patch_with_lines_str).rstrip()


def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side, remove_trailing_chars: bool = True) -> tuple[str, str]:
Expand Down