Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JVM: Refine JVM fixer prompt builder #836

Merged
merged 2 commits into from
Mar 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions agent/enhancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
import logger
from agent.prototyper import Prototyper
#from experiment.workdir import WorkDirs
from llm_toolkit.prompt_builder import (DefaultTemplateBuilder,
JvmErrorFixingBuilder)
from llm_toolkit.prompt_builder import DefaultTemplateBuilder, JvmFixingBuilder
from llm_toolkit.prompts import Prompt
from results import AnalysisResult, Result

Expand All @@ -39,11 +38,8 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:

if benchmark.language == 'jvm':
# TODO: Do this in a separate agent for JVM coverage.
jvm_coverage_fix = True
error_desc, errors = '', []
builder = JvmErrorFixingBuilder(self.llm, benchmark,
last_result.run_result.fuzz_target_source,
errors, jvm_coverage_fix)
builder = JvmFixingBuilder(self.llm, benchmark,
last_result.run_result.fuzz_target_source, [])
prompt = builder.build([], None, None)
else:
error_desc, errors = last_result.semantic_result.get_error_info()
Expand Down
10 changes: 3 additions & 7 deletions agent/one_prompt_enhancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
import logger
from agent.one_prompt_prototyper import OnePromptPrototyper
from experiment.workdir import WorkDirs
from llm_toolkit.prompt_builder import (DefaultTemplateBuilder,
JvmErrorFixingBuilder)
from llm_toolkit.prompt_builder import DefaultTemplateBuilder, JvmFixingBuilder
from llm_toolkit.prompts import Prompt
from results import AnalysisResult, BuildResult, Result

Expand All @@ -39,11 +38,8 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:

if benchmark.language == 'jvm':
# TODO: Do this in a separate agent for JVM coverage.
jvm_coverage_fix = True
error_desc, errors = '', []
builder = JvmErrorFixingBuilder(self.llm, benchmark,
last_result.run_result.fuzz_target_source,
errors, jvm_coverage_fix)
builder = JvmFixingBuilder(self.llm, benchmark,
last_result.run_result.fuzz_target_source, [])
prompt = builder.build([], None, None)
else:
error_desc, errors = last_result.semantic_result.get_error_info()
Expand Down
4 changes: 2 additions & 2 deletions agent/one_prompt_prototyper.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,9 @@ def _advice_fuzz_target(self, build_result: BuildResult,
build_result.benchmark.language)
build_result.compile_error = '\n'.join(errors)
if build_result.benchmark.language == 'jvm':
builder = prompt_builder.JvmErrorFixingBuilder(
builder = prompt_builder.JvmFixingBuilder(
fixer_model, build_result.benchmark, build_result.fuzz_target_source,
build_result.compile_error.split('\n'), False)
build_result.compile_error.split('\n'))
prompt = builder.build([], None, None)
else:
builder = prompt_builder.DefaultTemplateBuilder(fixer_model)
Expand Down
9 changes: 3 additions & 6 deletions experiment/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,12 +320,9 @@ def _fix_generated_fuzz_target(self, ai_binary: str,
run_result: Optional[RunResult],
dual_logger: _Logger, language: str):
"""Fixes the generated fuzz target."""
jvm_coverage_fix = False
error_desc, errors = '', []
if build_result.succeeded:
if language == 'jvm':
jvm_coverage_fix = True
else:
if language != 'jvm':
if run_result:
error_desc, errors = run_result.semantic_check.get_error_info()
else:
Expand All @@ -336,7 +333,7 @@ def _fix_generated_fuzz_target(self, ai_binary: str,

code_fixer.llm_fix(ai_binary, target_path, self.benchmark, iteration,
error_desc, errors, self.builder_runner.fixer_model_name,
language, jvm_coverage_fix)
language)
shutil.copyfile(
target_path,
os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects',
Expand Down Expand Up @@ -486,7 +483,7 @@ def check_target(self, ai_binary, target_path: str) -> Result:
# it may make a good fuzz target bad.
# Should concern run_result.succeeded for analyzes to know semantic
# errors
gen_succ = build_result.succeeded # and run_result and run_result.succeeded
gen_succ = build_result.succeeded

if gen_succ or llm_fix_count >= LLM_FIX_LIMIT:
# Exit cond 1: successfully generate the fuzz target.
Expand Down
9 changes: 3 additions & 6 deletions llm_toolkit/code_fixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def group_error_messages(error_lines: list[str]) -> list[str]:

def llm_fix(ai_binary: str, target_path: str, benchmark: benchmarklib.Benchmark,
llm_fix_id: int, error_desc: Optional[str], errors: list[str],
fixer_model_name: str, language: str, jvm_cov_fix: bool) -> None:
fixer_model_name: str, language: str) -> None:
"""Reads and fixes |target_path| in place with LLM based on |error_log|."""
fuzz_target_source_code = parser.parse_code(target_path)

Expand All @@ -409,7 +409,6 @@ def llm_fix(ai_binary: str, target_path: str, benchmark: benchmarklib.Benchmark,
prompt_path,
response_dir,
language,
jvm_cov_fix,
fixer_model_name,
temperature=0.5 - llm_fix_id * 0.04)

Expand Down Expand Up @@ -452,7 +451,6 @@ def apply_llm_fix(ai_binary: str,
prompt_path: str,
response_dir: str,
language: str,
jvm_cov_fix: bool,
fixer_model_name: str = models.DefaultModel.name,
temperature: float = 0.4):
"""Queries LLM to fix the code."""
Expand All @@ -464,9 +462,8 @@ def apply_llm_fix(ai_binary: str,
)

if language == 'jvm':
builder = prompt_builder.JvmErrorFixingBuilder(fixer_model, benchmark,
fuzz_target_source_code,
errors, jvm_cov_fix)
builder = prompt_builder.JvmFixingBuilder(fixer_model, benchmark,
fuzz_target_source_code, errors)
prompt = builder.build([], None, None)
prompt.save(prompt_path)
else:
Expand Down
59 changes: 28 additions & 31 deletions llm_toolkit/prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1207,30 +1207,24 @@ def post_process_generated_code(self, generated_code: str) -> str:
return generated_code


class JvmErrorFixingBuilder(PromptBuilder):
"""Prompt builder for fixing JVM harness with complication error."""
class JvmFixingBuilder(PromptBuilder):
"""Prompt builder for fixing JVM harness with complication error or
to increase code coverage."""

def __init__(self,
model: models.LLM,
benchmark: Benchmark,
generated_harness: str,
errors: list[str],
jvm_cov_fix: bool,
template_dir: str = DEFAULT_TEMPLATE_DIR):
super().__init__(model)
self._template_dir = template_dir
self.benchmark = benchmark
self.generated_harness = generated_harness
self.error_str = '\n'.join(errors)
self.jvm_cov_fix = jvm_cov_fix

# Load templates.
if self.jvm_cov_fix:
self.template_file = self._find_template(
template_dir, 'jvm_requirement_coverage_fixing.txt')
else:
self.template_file = self._find_template(
template_dir, 'jvm_requirement_error_fixing.txt')
self.template_file = self._find_template(template_dir, 'jvm_fixer.txt')

def _find_template(self, template_dir: str, template_name: str) -> str:
"""Finds template file based on |template_dir|."""
Expand Down Expand Up @@ -1263,34 +1257,37 @@ def build(self,
# Format the repository
target_repository = oss_fuzz_checkout.get_project_repository(
self.benchmark.project)

# Add information
prompt_text = prompt_text.replace('{TARGET_REPO}', target_repository)
prompt_text = prompt_text.replace('{HARNESS_NAME}',
self.benchmark.target_name)

# Add the generated harness to prompt
prompt_text = prompt_text.replace('{GENERATED_HARNESS}',
self.generated_harness)

if self.jvm_cov_fix:
# Add source code of all existing harnesses to prompt
source_list = []
harnesses = introspector.query_introspector_for_harness_intrinsics(proj)
for pair in harnesses:
path = pair.get('source', '')
if path:
source = introspector.query_introspector_source_code(proj, path)
if source:
source_list.append(source)

prompt_text = prompt_text.replace('{EXISTING_HARNESS}',
'\n---\n'.join(source_list))

# Add all public candidates to prompt
methods = introspector.query_introspector_all_public_candidates(proj)
name = [method['function_name'] for method in methods]
prompt_text = prompt_text.replace('{PUBLIC_METHODS}', ','.join(name))
# Add all public candidates to prompt
methods = introspector.query_introspector_all_public_candidates(proj)
name = [method['function_name'] for method in methods]
prompt_text = prompt_text.replace('{PUBLIC_METHODS}', ','.join(name))

# Add source code of all existing harnesses to prompt
source_list = []
harnesses = introspector.query_introspector_for_harness_intrinsics(proj)
for pair in harnesses:
path = pair.get('source', '')
if path:
source = introspector.query_introspector_source_code(proj, path)
if source:
source_list.append(source)

prompt_text = prompt_text.replace('{EXISTING_HARNESS}',
'\n---\n'.join(source_list))

if self.error_str:
prompt_text = prompt_text.replace('{ERRORS}',
('There are no errors, please consider '
'increasing the code coverage.'))
else:
# Add the error string to prompt
prompt_text = prompt_text.replace('{ERRORS}', self.error_str)

self._prompt.add_priming(prompt_text)
Expand Down
40 changes: 40 additions & 0 deletions prompts/template_xml/jvm_fixer.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
I'm a security engineer looking to write a good fuzzing harness. I want you to help me fix some compilation errors and improve my fuzzing harness so it covers more of the code in the target project.

The target library is {TARGET_REPO}.
The target project is written in the Java programming language.
This is a Java programming language so the harness should be written in Java.
The fuzzing harness should be executable under the Jazzer fuzzing framework.

Here is the fuzzing harness that has compilation error.
<code>
{GENERATED_HARNESS}
</code>

And I got the following errors from the compiler. Please help me fix them while keeping all the format and other logics unchanged.
<error>
{ERRORS}
</error>

For reference, the source code for all existing harnesses of the project is provided below, separated by `---`:
<code>
{EXISTING_HARNESS}
</code>

Additionally, a list of all public methods and constructors of the project is included for your reference, you should try to expand the fuzzing harness that calls these targets to improve the overall fuzzing coverage:
{PUBLIC_METHODS}

REQUIREMENTS THAT YOU MUST FOLLOW:
1. In your response, include ONLY the code for the harness, nothing more. You should wrap the code in <code></code> tags.
2. YOU MUST first analyze the error messages with the fuzz target and the build script carefully to identify the root cause.
3. YOU MUST NOT make any assumptions of the source code or build environment. Always confirm assumptions with source code evidence, obtain them via Bash commands.
4. Once you are absolutely certain of the error root cause, output the FULL SOURCE CODE of the fuzz target, assuming the build script cannot be changed.
5. ONLY consider coverage increase AFTER you are certain that the fuzzing harness can compile successfully.

TIPS:
1. If necessary, add additional import statements for missing class dependencies.
2. Consult existing cross refencing code or unit testing to help you identify the error roort cause.
3. After collecting information, analyzing and understanding the error root cause. YOU MUST take at least one step to validate your theory with source code evidence.
4. Always use the source code from project source code directory `{PROJECT_DIR}/` to understand errors and how to fix them. For example, search for the key words (e.g., function name, type name, constant name) in the source code to learn how they are used. Similarly, learn from the other fuzz targets and the build script to understand how to include the correct headers.
5. Once you have verified the error root cause, output the FULL SOURCE CODE of the fuzz target.
6. Focus on writing a compilable fuzzing harness that calls the target method {FUNCTION_SIGNATURE}, first try to ensure it calls the target method {FUNCTION_SIGNATURE} and can compile successfully, then to consider about code coverage.
7. If an error happens repeatedly and cannot be fixed, try to mitigate it. For example, replace or remove the line.
94 changes: 0 additions & 94 deletions prompts/template_xml/jvm_requirement_coverage_fixing.txt

This file was deleted.

Loading