From 45be1f42d5016d810d8986cf21cb1068a35a858e Mon Sep 17 00:00:00 2001 From: Doriane Olewicki Date: Wed, 25 Jun 2025 09:12:01 -0400 Subject: [PATCH 1/6] adding the typing generation --- bugbug/tools/code_review.py | 117 ++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/bugbug/tools/code_review.py b/bugbug/tools/code_review.py index c099ec3d11..ef8a5747ec 100644 --- a/bugbug/tools/code_review.py +++ b/bugbug/tools/code_review.py @@ -53,6 +53,8 @@ class InlineComment: is_generated: bool | None = None explanation: str | None = None order: int | None = None + type: str | None = None + type_justification: str | None = None class ModelResultError(Exception): @@ -326,6 +328,85 @@ class LargeDiffError(Exception): """ +PROMPT_TEMPLATE_LABELLING = """You are an expert in source code reviews. + +Your task is to review each comment in the "Review Comments List" and assign it one of the following five categories, along with a justification based on its subcategory. + +Categories and Subcategories +1. Readability: +Focus: Making the code easier to read and understand. +Subcategories include: + * Refactoring - Consistency: Uniform coding styles and practices. + * Refactoring - Naming Convention: Clear, descriptive identifiers. + * Refactoring - Readability: General clarity improvements. + * Refactoring - Simplification: Reducing unnecessary complexity. + * Refactoring - Visual Representation: Improving code layout and formatting. + +2. Design and Maintainability: +Focus: Improving structure and long-term upkeep. +Subcategories include: + * Discussion - Design discussion: Architectural or structural decisions. + * Functional - Support: Adding or enhancing support functionality. + * Refactoring - Alternate Output: Changing what the code returns or prints. + * Refactoring - Code Duplication: Removing repeated code. + * Refactoring - Code Simplification: Streamlining logic. + * Refactoring - Magic Numbers: Replacing hard-coded values with named constants. + * Refactoring - Organization of the code: Logical structuring of code. + * Refactoring - Solution approach: Rethinking problem-solving approaches. + * Refactoring - Unused Variables: Removing variables not in use. + * Refactoring - Variable Declarations: Improving how variables are declared or initialized. + +3. Performance: +Focus: Making the code faster or more efficient. +Subcategories include: + * Functional - Performance: General performance improvements. + * Functional - Performance Optimization: Specific performance-focused refactoring. + * Functional - Performance and Safety: Balancing speed and reliability. + * Functional - Resource: Efficient use of memory, CPU, etc. + * Refactoring - Performance Optimization: Improving performance through code changes. + +4. Defect: +Focus: Fixing bugs and potential issues. +Subcategories include: + * Functional - Conditional Compilation + * Functional - Consistency and Thread Safety + * Functional - Error Handling + * Functional - Exception Handling + * Functional - Initialization + * Functional - Interface + * Functional - Lambda Usage + * Functional - Logical + * Functional - Null Handling + * Functional - Security + * Functional - Serialization + * Functional - Syntax + * Functional - Timing + * Functional - Type Safety + * Functional - Validation + +5. Other: +Use only if none of the above apply: +Subcategories include: + * None of the above + * Does not apply + +Format: +Use the following JSON format to label and justify each comment. Do not modify the comment content—only append the "label" and "label_justification" fields. +[ + {{ + "file": "com/br/main/Pressure.java", + "code_line": 458, + "comment" : "In the third code block, you are using `nsAutoStringN<256>` instead of `nsString`. This is a good change as `nsAutoStringN<256>` is more efficient for small strings. However, you should ensure that the size of `tempString` does not exceed 256 characters, as `nsAutoStringN<256>` has a fixed size." + "label": "Performance", + "label_justification": "Functional - Performance Optimization: Specific performance-focused refactoring." + }} +] + +Review Comments List: +{comments} +""" + + class ReviewRequest: patch_id: str @@ -1122,6 +1203,10 @@ def generate_processed_output(output: str, patch: PatchSet) -> Iterable[InlineCo on_removed_code=not scope["has_added_lines"], explanation=comment["explanation"], order=comment["order"], + type=comment["type"] if "type" in comment else None, + type_justification=comment["type_justification"] + if "type_justification" in comment + else None, ) @@ -1202,6 +1287,12 @@ def __init__( verbose=verbose, ) + self.com_type_labelling = LLMChain( + prompt=PromptTemplate.from_template(PROMPT_TEMPLATE_LABELLING), + llm=self.llm, + verbose=verbose, + ) + self.function_search = function_search self.review_comments_db = review_comments_db @@ -1350,6 +1441,30 @@ def _generate_suggestions(self, patch: Patch): return output + def _get_comment_type(self, comments): + output = self.com_type_labelling.invoke( + { + "comments": comments, + }, + return_only_outputs=True, + )["text"] + labelling = parse_model_output(output) + dir_labels = { + f"{lab['file']}{lab['code_line']}{lab['comment']}": { + "type": lab["label"], + "type_justification": lab["label_justification"], + } + for lab in labelling + } + + for com in comments: + if f"{com['file']}{com['code_line']}{com['comment']}" in dir_labels: + com.update( + dir_labels[f"{com['file']}{com['code_line']}{com['comment']}"] + ) + + return json.dumps(comments) + @retry(retry=retry_if_exception_type(ModelResultError), stop=stop_after_attempt(3)) def run(self, patch: Patch) -> list[InlineComment] | None: if self.count_tokens(patch.raw_diff) > 21000: @@ -1376,6 +1491,8 @@ def run(self, patch: Patch) -> list[InlineComment] | None: return_only_outputs=True, )["text"] + raw_output = self._get_comment_type(raw_output) + if self.verbose: GenerativeModelTool._print_answer(raw_output) From 3f808b567cc040458d244f56447ca0c6e2b324ea Mon Sep 17 00:00:00 2001 From: olewicki Date: Wed, 16 Jul 2025 08:25:05 -0400 Subject: [PATCH 2/6] Update bugbug/tools/code_review.py Co-authored-by: Suhaib Mujahid --- bugbug/tools/code_review.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bugbug/tools/code_review.py b/bugbug/tools/code_review.py index ef8a5747ec..9d211d9131 100644 --- a/bugbug/tools/code_review.py +++ b/bugbug/tools/code_review.py @@ -1203,10 +1203,8 @@ def generate_processed_output(output: str, patch: PatchSet) -> Iterable[InlineCo on_removed_code=not scope["has_added_lines"], explanation=comment["explanation"], order=comment["order"], - type=comment["type"] if "type" in comment else None, - type_justification=comment["type_justification"] - if "type_justification" in comment - else None, + type=comment.get("type"), + type_justification=comment.get("type_justification"), ) From dda069961c6f403d7400b0e183b81eaf8a912ea0 Mon Sep 17 00:00:00 2001 From: olewicki Date: Wed, 16 Jul 2025 08:25:15 -0400 Subject: [PATCH 3/6] Update bugbug/tools/code_review.py Co-authored-by: Suhaib Mujahid --- bugbug/tools/code_review.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bugbug/tools/code_review.py b/bugbug/tools/code_review.py index 9d211d9131..240d4b8007 100644 --- a/bugbug/tools/code_review.py +++ b/bugbug/tools/code_review.py @@ -1285,7 +1285,7 @@ def __init__( verbose=verbose, ) - self.com_type_labelling = LLMChain( + self.type_labelling_chain = LLMChain( prompt=PromptTemplate.from_template(PROMPT_TEMPLATE_LABELLING), llm=self.llm, verbose=verbose, From de593e460cd227e242fd4a1c039e63de9fcd2351 Mon Sep 17 00:00:00 2001 From: olewicki Date: Wed, 16 Jul 2025 08:25:26 -0400 Subject: [PATCH 4/6] Update bugbug/tools/code_review.py Co-authored-by: Suhaib Mujahid --- bugbug/tools/code_review.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bugbug/tools/code_review.py b/bugbug/tools/code_review.py index 240d4b8007..834930ed1c 100644 --- a/bugbug/tools/code_review.py +++ b/bugbug/tools/code_review.py @@ -1448,7 +1448,7 @@ def _get_comment_type(self, comments): )["text"] labelling = parse_model_output(output) dir_labels = { - f"{lab['file']}{lab['code_line']}{lab['comment']}": { + (lab["file"], lab["code_line"], lab["comment"]): { "type": lab["label"], "type_justification": lab["label_justification"], } From eae4a99198c08369ecb326d9a47db39930cb750b Mon Sep 17 00:00:00 2001 From: olewicki Date: Wed, 16 Jul 2025 08:27:45 -0400 Subject: [PATCH 5/6] Update bugbug/tools/code_review.py Co-authored-by: Suhaib Mujahid --- bugbug/tools/code_review.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bugbug/tools/code_review.py b/bugbug/tools/code_review.py index 834930ed1c..0e297f03b5 100644 --- a/bugbug/tools/code_review.py +++ b/bugbug/tools/code_review.py @@ -1456,10 +1456,9 @@ def _get_comment_type(self, comments): } for com in comments: - if f"{com['file']}{com['code_line']}{com['comment']}" in dir_labels: - com.update( - dir_labels[f"{com['file']}{com['code_line']}{com['comment']}"] - ) + label_info = dir_labels.get((com["file"], com["code_line"], com["comment"])) + if label: + com.update(label_info) return json.dumps(comments) From fe90105da2b2a2bb9b7c04138478f65e0e06291d Mon Sep 17 00:00:00 2001 From: olewicki Date: Wed, 16 Jul 2025 08:55:52 -0400 Subject: [PATCH 6/6] label_justification to label_subcategory + shift get_comment_type to end of the pipeline --- bugbug/tools/code_review.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/bugbug/tools/code_review.py b/bugbug/tools/code_review.py index 0e297f03b5..73727c0cd2 100644 --- a/bugbug/tools/code_review.py +++ b/bugbug/tools/code_review.py @@ -391,14 +391,14 @@ class LargeDiffError(Exception): * Does not apply Format: -Use the following JSON format to label and justify each comment. Do not modify the comment content—only append the "label" and "label_justification" fields. +Use the following JSON format to label and justify each comment. Do not modify the comment content—only append the "label" and "label_subcategory" fields. [ {{ "file": "com/br/main/Pressure.java", "code_line": 458, "comment" : "In the third code block, you are using `nsAutoStringN<256>` instead of `nsString`. This is a good change as `nsAutoStringN<256>` is more efficient for small strings. However, you should ensure that the size of `tempString` does not exceed 256 characters, as `nsAutoStringN<256>` has a fixed size." "label": "Performance", - "label_justification": "Functional - Performance Optimization: Specific performance-focused refactoring." + "label_subcategory": "Functional - Performance Optimization: Specific performance-focused refactoring." }} ] @@ -1442,7 +1442,7 @@ def _generate_suggestions(self, patch: Patch): def _get_comment_type(self, comments): output = self.com_type_labelling.invoke( { - "comments": comments, + "comments": json.dumps(comments), }, return_only_outputs=True, )["text"] @@ -1450,17 +1450,17 @@ def _get_comment_type(self, comments): dir_labels = { (lab["file"], lab["code_line"], lab["comment"]): { "type": lab["label"], - "type_justification": lab["label_justification"], + "label_subcategory": lab["label_subcategory"], } for lab in labelling } for com in comments: label_info = dir_labels.get((com["file"], com["code_line"], com["comment"])) - if label: + if label_info: com.update(label_info) - return json.dumps(comments) + return comments @retry(retry=retry_if_exception_type(ModelResultError), stop=stop_after_attempt(3)) def run(self, patch: Patch) -> list[InlineComment] | None: @@ -1488,12 +1488,14 @@ def run(self, patch: Patch) -> list[InlineComment] | None: return_only_outputs=True, )["text"] - raw_output = self._get_comment_type(raw_output) + processed_output = list(generate_processed_output(raw_output, patch.patch_set)) + + typed_output = self._get_comment_type(processed_output) if self.verbose: - GenerativeModelTool._print_answer(raw_output) + GenerativeModelTool._print_answer(typed_output) - return list(generate_processed_output(raw_output, patch.patch_set)) + return typed_output def _get_generated_examples(self, patch, created_before: datetime | None = None): """Get examples of comments that were generated by an LLM.