google · DavidKorczynski · Mar 5, 2025 · Mar 4, 2025 · Mar 4, 2025
diff --git a/agent/enhancer.py b/agent/enhancer.py
@@ -17,8 +17,7 @@
 import logger
 from agent.prototyper import Prototyper
 #from experiment.workdir import WorkDirs
-from llm_toolkit.prompt_builder import (DefaultTemplateBuilder,
-                                        JvmErrorFixingBuilder)
+from llm_toolkit.prompt_builder import DefaultTemplateBuilder, JvmFixingBuilder
 from llm_toolkit.prompts import Prompt
 from results import AnalysisResult, Result
 
@@ -39,11 +38,8 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
 
     if benchmark.language == 'jvm':
       # TODO: Do this in a separate agent for JVM coverage.
-      jvm_coverage_fix = True
-      error_desc, errors = '', []
-      builder = JvmErrorFixingBuilder(self.llm, benchmark,
-                                      last_result.run_result.fuzz_target_source,
-                                      errors, jvm_coverage_fix)
+      builder = JvmFixingBuilder(self.llm, benchmark,
+                                 last_result.run_result.fuzz_target_source, [])
       prompt = builder.build([], None, None)
     else:
       error_desc, errors = last_result.semantic_result.get_error_info()

diff --git a/agent/one_prompt_enhancer.py b/agent/one_prompt_enhancer.py
@@ -17,8 +17,7 @@
 import logger
 from agent.one_prompt_prototyper import OnePromptPrototyper
 from experiment.workdir import WorkDirs
-from llm_toolkit.prompt_builder import (DefaultTemplateBuilder,
-                                        JvmErrorFixingBuilder)
+from llm_toolkit.prompt_builder import DefaultTemplateBuilder, JvmFixingBuilder
 from llm_toolkit.prompts import Prompt
 from results import AnalysisResult, BuildResult, Result
 
@@ -39,11 +38,8 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
 
     if benchmark.language == 'jvm':
       # TODO: Do this in a separate agent for JVM coverage.
-      jvm_coverage_fix = True
-      error_desc, errors = '', []
-      builder = JvmErrorFixingBuilder(self.llm, benchmark,
-                                      last_result.run_result.fuzz_target_source,
-                                      errors, jvm_coverage_fix)
+      builder = JvmFixingBuilder(self.llm, benchmark,
+                                 last_result.run_result.fuzz_target_source, [])
       prompt = builder.build([], None, None)
     else:
       error_desc, errors = last_result.semantic_result.get_error_info()

diff --git a/agent/one_prompt_prototyper.py b/agent/one_prompt_prototyper.py
@@ -139,9 +139,9 @@ def _advice_fuzz_target(self, build_result: BuildResult,
         build_result.benchmark.language)
     build_result.compile_error = '\n'.join(errors)
     if build_result.benchmark.language == 'jvm':
-      builder = prompt_builder.JvmErrorFixingBuilder(
+      builder = prompt_builder.JvmFixingBuilder(
           fixer_model, build_result.benchmark, build_result.fuzz_target_source,
-          build_result.compile_error.split('\n'), False)
+          build_result.compile_error.split('\n'))
       prompt = builder.build([], None, None)
     else:
       builder = prompt_builder.DefaultTemplateBuilder(fixer_model)

diff --git a/experiment/evaluator.py b/experiment/evaluator.py
@@ -320,12 +320,9 @@ def _fix_generated_fuzz_target(self, ai_binary: str,
                                  run_result: Optional[RunResult],
                                  dual_logger: _Logger, language: str):
     """Fixes the generated fuzz target."""
-    jvm_coverage_fix = False
     error_desc, errors = '', []
     if build_result.succeeded:
-      if language == 'jvm':
-        jvm_coverage_fix = True
-      else:
+      if language != 'jvm':
         if run_result:
           error_desc, errors = run_result.semantic_check.get_error_info()
         else:
@@ -336,7 +333,7 @@ def _fix_generated_fuzz_target(self, ai_binary: str,
 
     code_fixer.llm_fix(ai_binary, target_path, self.benchmark, iteration,
                        error_desc, errors, self.builder_runner.fixer_model_name,
-                       language, jvm_coverage_fix)
+                       language)
     shutil.copyfile(
         target_path,
         os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects',
@@ -486,7 +483,7 @@ def check_target(self, ai_binary, target_path: str) -> Result:
         # it may make a good fuzz target bad.
         # Should concern run_result.succeeded for analyzes to know semantic
         # errors
-        gen_succ = build_result.succeeded  # and run_result and run_result.succeeded
+        gen_succ = build_result.succeeded
 
       if gen_succ or llm_fix_count >= LLM_FIX_LIMIT:
         # Exit cond 1: successfully generate the fuzz target.

diff --git a/llm_toolkit/code_fixer.py b/llm_toolkit/code_fixer.py
@@ -392,7 +392,7 @@ def group_error_messages(error_lines: list[str]) -> list[str]:
 
 def llm_fix(ai_binary: str, target_path: str, benchmark: benchmarklib.Benchmark,
             llm_fix_id: int, error_desc: Optional[str], errors: list[str],
-            fixer_model_name: str, language: str, jvm_cov_fix: bool) -> None:
+            fixer_model_name: str, language: str) -> None:
   """Reads and fixes |target_path| in place with LLM based on |error_log|."""
   fuzz_target_source_code = parser.parse_code(target_path)
 
@@ -409,7 +409,6 @@ def llm_fix(ai_binary: str, target_path: str, benchmark: benchmarklib.Benchmark,
                 prompt_path,
                 response_dir,
                 language,
-                jvm_cov_fix,
                 fixer_model_name,
                 temperature=0.5 - llm_fix_id * 0.04)
 
@@ -452,7 +451,6 @@ def apply_llm_fix(ai_binary: str,
                   prompt_path: str,
                   response_dir: str,
                   language: str,
-                  jvm_cov_fix: bool,
                   fixer_model_name: str = models.DefaultModel.name,
                   temperature: float = 0.4):
   """Queries LLM to fix the code."""
@@ -464,9 +462,8 @@ def apply_llm_fix(ai_binary: str,
   )
 
   if language == 'jvm':
-    builder = prompt_builder.JvmErrorFixingBuilder(fixer_model, benchmark,
-                                                   fuzz_target_source_code,
-                                                   errors, jvm_cov_fix)
+    builder = prompt_builder.JvmFixingBuilder(fixer_model, benchmark,
+                                              fuzz_target_source_code, errors)
     prompt = builder.build([], None, None)
     prompt.save(prompt_path)
   else:

diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py
@@ -1207,30 +1207,24 @@ def post_process_generated_code(self, generated_code: str) -> str:
     return generated_code
 
 
-class JvmErrorFixingBuilder(PromptBuilder):
-  """Prompt builder for fixing JVM harness with complication error."""
+class JvmFixingBuilder(PromptBuilder):
+  """Prompt builder for fixing JVM harness with complication error or
+  to increase code coverage."""
 
   def __init__(self,
                model: models.LLM,
                benchmark: Benchmark,
                generated_harness: str,
                errors: list[str],
-               jvm_cov_fix: bool,
                template_dir: str = DEFAULT_TEMPLATE_DIR):
     super().__init__(model)
     self._template_dir = template_dir
     self.benchmark = benchmark
     self.generated_harness = generated_harness
     self.error_str = '\n'.join(errors)
-    self.jvm_cov_fix = jvm_cov_fix
 
     # Load templates.
-    if self.jvm_cov_fix:
-      self.template_file = self._find_template(
-          template_dir, 'jvm_requirement_coverage_fixing.txt')
-    else:
-      self.template_file = self._find_template(
-          template_dir, 'jvm_requirement_error_fixing.txt')
+    self.template_file = self._find_template(template_dir, 'jvm_fixer.txt')
 
   def _find_template(self, template_dir: str, template_name: str) -> str:
     """Finds template file based on |template_dir|."""
@@ -1263,34 +1257,37 @@ def build(self,
     # Format the repository
     target_repository = oss_fuzz_checkout.get_project_repository(
         self.benchmark.project)
+
+    # Add information
     prompt_text = prompt_text.replace('{TARGET_REPO}', target_repository)
     prompt_text = prompt_text.replace('{HARNESS_NAME}',
                                       self.benchmark.target_name)
-
-    # Add the generated harness to prompt
     prompt_text = prompt_text.replace('{GENERATED_HARNESS}',
                                       self.generated_harness)
 
-    if self.jvm_cov_fix:
-      # Add source code of all existing harnesses to prompt
-      source_list = []
-      harnesses = introspector.query_introspector_for_harness_intrinsics(proj)
-      for pair in harnesses:
-        path = pair.get('source', '')
-        if path:
-          source = introspector.query_introspector_source_code(proj, path)
-          if source:
-            source_list.append(source)
-
-      prompt_text = prompt_text.replace('{EXISTING_HARNESS}',
-                                        '\n---\n'.join(source_list))
-
-      # Add all public candidates to prompt
-      methods = introspector.query_introspector_all_public_candidates(proj)
-      name = [method['function_name'] for method in methods]
-      prompt_text = prompt_text.replace('{PUBLIC_METHODS}', ','.join(name))
+    # Add all public candidates to prompt
+    methods = introspector.query_introspector_all_public_candidates(proj)
+    name = [method['function_name'] for method in methods]
+    prompt_text = prompt_text.replace('{PUBLIC_METHODS}', ','.join(name))
+
+    # Add source code of all existing harnesses to prompt
+    source_list = []
+    harnesses = introspector.query_introspector_for_harness_intrinsics(proj)
+    for pair in harnesses:
+      path = pair.get('source', '')
+      if path:
+        source = introspector.query_introspector_source_code(proj, path)
+        if source:
+          source_list.append(source)
+
+    prompt_text = prompt_text.replace('{EXISTING_HARNESS}',
+                                      '\n---\n'.join(source_list))
+
+    if self.error_str:
+      prompt_text = prompt_text.replace('{ERRORS}',
+                                        ('There are no errors, please consider '
+                                         'increasing the code coverage.'))
     else:
-      # Add the error string to prompt
       prompt_text = prompt_text.replace('{ERRORS}', self.error_str)
 
     self._prompt.add_priming(prompt_text)

diff --git a/prompts/template_xml/jvm_fixer.txt b/prompts/template_xml/jvm_fixer.txt
@@ -0,0 +1,40 @@
+I'm a security engineer looking to write a good fuzzing harness. I want you to help me fix some compilation errors and improve my fuzzing harness so it covers more of the code in the target project.
+
+The target library is {TARGET_REPO}.
+The target project is written in the Java programming language.
+This is a Java programming language so the harness should be written in Java.
+The fuzzing harness should be executable under the Jazzer fuzzing framework.
+
+Here is the fuzzing harness that has compilation error.
+<code>
+{GENERATED_HARNESS}
+</code>
+
+And I got the following errors from the compiler. Please help me fix them while keeping all the format and other logics unchanged.
+<error>
+{ERRORS}
+</error>
+
+For reference, the source code for all existing harnesses of the project is provided below, separated by `---`:
+<code>
+{EXISTING_HARNESS}
+</code>
+
+Additionally, a list of all public methods and constructors of the project is included for your reference, you should try to expand the fuzzing harness that calls these targets to improve the overall fuzzing coverage:
+{PUBLIC_METHODS}
+
+REQUIREMENTS THAT YOU MUST FOLLOW:
+1. In your response, include ONLY the code for the harness, nothing more. You should wrap the code in <code></code> tags.
+2. YOU MUST first analyze the error messages with the fuzz target and the build script carefully to identify the root cause.
+3. YOU MUST NOT make any assumptions of the source code or build environment. Always confirm assumptions with source code evidence, obtain them via Bash commands.
+4. Once you are absolutely certain of the error root cause, output the FULL SOURCE CODE of the fuzz target, assuming the build script cannot be changed.
+5. ONLY consider coverage increase AFTER you are certain that the fuzzing harness can compile successfully.
+
+TIPS:
+1. If necessary, add additional import statements for missing class dependencies.
+2. Consult existing cross refencing code or unit testing to help you identify the error roort cause.
+3. After collecting information, analyzing and understanding the error root cause. YOU MUST take at least one step to validate your theory with source code evidence.
+4. Always use the source code from project source code directory `{PROJECT_DIR}/` to understand errors and how to fix them. For example, search for the key words (e.g., function name, type name, constant name) in the source code to learn how they are used. Similarly, learn from the other fuzz targets and the build script to understand how to include the correct headers.
+5. Once you have verified the error root cause, output the FULL SOURCE CODE of the fuzz target.
+6. Focus on writing a compilable fuzzing harness that calls the target method {FUNCTION_SIGNATURE}, first try to ensure it calls the target method {FUNCTION_SIGNATURE} and can compile successfully, then to consider about code coverage.
+7. If an error happens repeatedly and cannot be fixed, try to mitigate it. For example, replace or remove the line.
diff --git a/prompts/template_xml/jvm_requirement_coverage_fixing.txt b/prompts/template_xml/jvm_requirement_coverage_fixing.txt