Skip to content

Commit db5138d

Browse files
committed
Improve YAML parsing with additional fallback strategies for AI predictions
1 parent 9a9feb4 commit db5138d

File tree

1 file changed

+61
-8
lines changed

1 file changed

+61
-8
lines changed

pr_agent/algo/utils.py

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -731,8 +731,9 @@ def try_fix_yaml(response_text: str,
731731
response_text_original="") -> dict:
732732
response_text_lines = response_text.split('\n')
733733

734-
keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:']
734+
keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:', 'label:']
735735
keys_yaml = keys_yaml + keys_fix_yaml
736+
736737
# first fallback - try to convert 'relevant line: ...' to relevant line: |-\n ...'
737738
response_text_lines_copy = response_text_lines.copy()
738739
for i in range(0, len(response_text_lines_copy)):
@@ -747,8 +748,29 @@ def try_fix_yaml(response_text: str,
747748
except:
748749
pass
749750

750-
# second fallback - try to extract only range from first ```yaml to ````
751-
snippet_pattern = r'```(yaml)?[\s\S]*?```'
751+
# 1.5 fallback - try to convert '|' to '|2'. Will solve cases of indent decreasing during the code
752+
response_text_copy = copy.deepcopy(response_text)
753+
response_text_copy = response_text_copy.replace('|\n', '|2\n')
754+
try:
755+
data = yaml.safe_load(response_text_copy)
756+
get_logger().info(f"Successfully parsed AI prediction after replacing | with |2")
757+
return data
758+
except:
759+
# if it fails, we can try to add spaces to the lines that are not indented properly, and contain '}'.
760+
response_text_lines_copy = response_text_copy.split('\n')
761+
for i in range(0, len(response_text_lines_copy)):
762+
initial_space = len(response_text_lines_copy[i]) - len(response_text_lines_copy[i].lstrip())
763+
if initial_space == 2 and '|2' not in response_text_lines_copy[i] and '}' in response_text_lines_copy[i]:
764+
response_text_lines_copy[i] = ' ' + response_text_lines_copy[i].lstrip()
765+
try:
766+
data = yaml.safe_load('\n'.join(response_text_lines_copy))
767+
get_logger().info(f"Successfully parsed AI prediction after replacing | with |2 and adding spaces")
768+
return data
769+
except:
770+
pass
771+
772+
# second fallback - try to extract only range from first ```yaml to the last ```
773+
snippet_pattern = r'```yaml([\s\S]*?)```(?=\s*$|")'
752774
snippet = re.search(snippet_pattern, '\n'.join(response_text_lines_copy))
753775
if not snippet:
754776
snippet = re.search(snippet_pattern, response_text_original) # before we removed the "```"
@@ -803,16 +825,47 @@ def try_fix_yaml(response_text: str,
803825
except:
804826
pass
805827

806-
# sixth fallback - try to remove last lines
807-
for i in range(1, len(response_text_lines)):
808-
response_text_lines_tmp = '\n'.join(response_text_lines[:-i])
828+
# sixth fallback - replace tabs with spaces
829+
if '\t' in response_text:
830+
response_text_copy = copy.deepcopy(response_text)
831+
response_text_copy = response_text_copy.replace('\t', ' ')
809832
try:
810-
data = yaml.safe_load(response_text_lines_tmp)
811-
get_logger().info(f"Successfully parsed AI prediction after removing {i} lines")
833+
data = yaml.safe_load(response_text_copy)
834+
get_logger().info(f"Successfully parsed AI prediction after replacing tabs with spaces")
812835
return data
813836
except:
814837
pass
815838

839+
# seventh fallback - add indent for sections of code blocks
840+
response_text_copy = copy.deepcopy(response_text)
841+
response_text_copy_lines = response_text_copy.split('\n')
842+
start_line = -1
843+
for i, line in enumerate(response_text_copy_lines):
844+
if 'existing_code:' in line or 'improved_code:' in line:
845+
start_line = i
846+
elif line.endswith(': |') or line.endswith(': |-') or line.endswith(': |2') or line.endswith(':'):
847+
start_line = -1
848+
elif start_line != -1:
849+
response_text_copy_lines[i] = ' ' + line
850+
response_text_copy = '\n'.join(response_text_copy_lines)
851+
try:
852+
data = yaml.safe_load(response_text_copy)
853+
get_logger().info(f"Successfully parsed AI prediction after adding indent for sections of code blocks")
854+
return data
855+
except:
856+
pass
857+
858+
# # sixth fallback - try to remove last lines
859+
# for i in range(1, len(response_text_lines)):
860+
# response_text_lines_tmp = '\n'.join(response_text_lines[:-i])
861+
# try:
862+
# data = yaml.safe_load(response_text_lines_tmp)
863+
# get_logger().info(f"Successfully parsed AI prediction after removing {i} lines")
864+
# return data
865+
# except:
866+
# pass
867+
868+
816869

817870
def set_custom_labels(variables, git_provider=None):
818871
if not get_settings().config.enable_custom_labels:

0 commit comments

Comments
 (0)