Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,15 +181,25 @@ python merge.py --base-dataset-path $dataset_path/ --decompiled-datasets $datase

This section describes the evaluation of decompiled code.

Before evaluation, integrate all decompiler outputs, including those from LLMs, into a single dataset saved at `./decompiled_ds_all`. Then, execute:
Before evaluation, integrate all decompiler outputs, including those from LLMs, into a single dataset saved at `./decompiled_ds_all`.

**Step 1: Generate base libfunction.so files (required for CER evaluation)**

```shell
python evaluate_rsr.py --config ./config.yaml --decompiled-dataset $dataset_path/decompiled_ds --decompilers func
```

This compiles the original extracted functions into ground-truth shared libraries at `{oss_fuzz_path}/build/challenges/{project}/{function}/libfunction.so`.

**Step 2: Evaluate decompiler outputs**

```shell
python evaluate_rsr.py --decompiled-dataset $dataset_path/decompiled_ds --decompilers hexrays
python evaluate_rsr.py --config ./config.yaml --decompiled-dataset $dataset_path/decompiled_ds --decompilers hexrays
```

Enable the debug parameter to print error messages for specific data. This script recompiles the specified decompiler outputs in Docker, applies fixes, and reports success rates across different optimization levels. Successfully compiled functions are stored as shared libraries in `{oss_fuzz_path}/build/challenges` for further evaluation.
Enable the debug parameter to print error messages for specific data. This script recompiles the specified decompiler outputs in Docker, applies fixes, and reports success rates across different optimization levels.

To assess coverage differences before and after replacing with decompiled code, run:
**Step 3: Assess coverage differences**

```shell
python evaluate_cer.py --dataset $dataset_path/decompiled_ds
Expand Down
22 changes: 11 additions & 11 deletions compile_ossfuzz.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import functools
import os
import pathlib
import re
Expand Down Expand Up @@ -168,7 +169,7 @@ def process_project_linearly(project_path):
OUTPUT_BINARY_PATH = OUTPUT_PATH / "binary"
OUTPUT_BINARY_PATH.mkdir(exist_ok=True, parents=True)

extra_flags = ' '.join([
extra_flags = [
"-mno-sse",
"-fno-eliminate-unused-debug-types",
"-fno-lto",
Expand All @@ -177,7 +178,7 @@ def process_project_linearly(project_path):
# "-fno-inline-functions-called-once", # not supported in clang
"-fno-inline",
# "-fno-reorder-blocks-and-partition", # not supported in clang
])
]


def compile(row, container: DockerContainer):
Expand All @@ -197,12 +198,11 @@ def compile(row, container: DockerContainer):
f.write(func)

output_file = OUTPUT_BINARY_PATH / f'task-{idx}-{opt}.so'
output_file_indocker = pathlib.Path(
'/challenges') / f'task-{idx}-{opt}.so'
output_file_indocker = pathlib.Path('/challenges/binary') / f'task-{idx}-{opt}.so'
cmd = ['clang', filepath, f'-{opt}', '-shared', '-fPIC',
'-o', output_file_indocker, extra_flags, '-lm']
container.exec_in_container(
cmd, cwd='/challenges', shell=True, check=True)
'-o', str(output_file_indocker)] + extra_flags + ['-lm']
out = container.exec_in_container(
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable out is assigned but never used. If the intention is to capture and suppress the output from the Docker container execution, consider either using the result (e.g., for logging or debugging) or removing the assignment and just calling the function directly.

Suggested change
out = container.exec_in_container(
container.exec_in_container(

Copilot uses AI. Check for mistakes.
cmd, cwd='/challenges', shell=False, check=True, capture_output=True)

ret = subprocess.run(
f'nm {output_file} | egrep " {function_name}$"', stdout=subprocess.PIPE, shell=True, check=True)
Expand All @@ -216,18 +216,18 @@ def compile(row, container: DockerContainer):
'path': str(output_file.relative_to(OUTPUT_PATH)),
})
except subprocess.CalledProcessError as e:
logger.error(f"Error compiling {idx} with {opt}: {e}")
logger.error(f"Error compiling {idx}: {e}")
finally:
# os.remove(filepath)
pass

return challenge


def tqdm_progress_map(func, iterable, num_workers, container):
def tqdm_progress_map(func, iterable, num_workers):
results = []
with Pool(num_workers) as pool:
for result in tqdm(pool.imap_unordered(func, iterable, container), total=len(iterable)):
for result in tqdm(pool.imap_unordered(func, iterable), total=len(iterable)):
results.append(result)
return results

Expand All @@ -236,7 +236,7 @@ def tqdm_progress_map(func, iterable, num_workers, container):
f'{OUTPUT_PATH}': '/challenges',
'/dev/shm': '/dev/shm'
}) as container:
res = tqdm_progress_map(compile, ds, args.num_workers, container)
res = tqdm_progress_map(functools.partial(compile, container=container), ds, args.num_workers)
res = list(chain(*res))
ds = datasets.Dataset.from_list(res)
print(len(ds))
Expand Down
40 changes: 25 additions & 15 deletions evaluate_cer.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def get_func_offsets(so_path: pathlib.Path,


WORKER_COUNT = os.cpu_count()
TIMEOUT = 300


class ReexecutableRateEvaluator(OSSFuzzDatasetGenerator):
Expand Down Expand Up @@ -210,10 +211,10 @@ def diff_base_for_function(self, fuzzer: str, function_name: str):
f'OUTPUT_TXT=/challenges/{function_name}/{fuzzer}/base.txt',
f'MAPPING_TXT=/challenges/{function_name}/address_mapping.txt',
f'LD_PRELOAD=/oss-fuzz/ld.so'
], timeout=30, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
# result.check_returncode()
], timeout=TIMEOUT, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# Stream file line-by-line to reduce memory usage
with open(str(base_txt_path), 'r') as f:
base_result = f.read().split('\n')
base_result = [line.rstrip('\n') for line in f]
if txt_length != 0 and len(base_result) != txt_length:
logger.error(
f"base txt length mismatch, expected {txt_length}, got {len(base_result)}")
Expand All @@ -230,6 +231,11 @@ def diff_base_for_function(self, fuzzer: str, function_name: str):
if idx < max_trails - 1:
prev_diff_length = diff_length

except subprocess.CalledProcessError as e:
logger.error(f"Base coverage generation failed with exit code {e.returncode}")
logger.error(f"stdout: {e.stdout.decode('utf-8', errors='replace') if e.stdout else ''}")
logger.error(f"stderr: {e.stderr.decode('utf-8', errors='replace') if e.stderr else ''}")
return (fuzzer, function_name, {})
except Exception as e:
logger.error(
f"base txt generation failed:{e}")
Expand All @@ -253,32 +259,37 @@ def diff_base_for_function(self, fuzzer: str, function_name: str):
target_txt_path = pathlib.Path(self.oss_fuzz_path) / 'build' / 'challenges' / \
self.project / function_name / fuzzer / f'{options}.txt'
try:
self.exec_in_container(cmd=cmd, envs=[
result = self.exec_in_container(cmd=cmd, envs=[
f'LD_LIBRARY_PATH={target_lib_path}:/work/lib/',
f'LLVM_PROFILE_FILE=/challenges/{function_name}/{fuzzer}/{options}.profraw',
f'OUTPUT_PROFDATA=/challenges/{function_name}/{fuzzer}/{options}.profdata',
f'OUTPUT_TXT=/challenges/{function_name}/{fuzzer}/{options}.txt',
f'MAPPING_TXT=/challenges/{function_name}/address_mapping.txt',
f'LD_PRELOAD=/oss-fuzz/ld.so',
], timeout=30, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
# result.check_returncode()
with open(str(target_txt_path), 'r') as f:
target_result = f.read().split('\n')
], timeout=TIMEOUT, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# Stream and compare line-by-line to reduce memory usage
target_difference = []
for i, line in enumerate(target_result):
if len(log_set[i]) == 1 and line not in log_set[i]:
target_difference.append(i)
with open(str(target_txt_path), 'r') as f:
for i, line in enumerate(f):
line = line.rstrip('\n')
if len(log_set[i]) == 1 and line not in log_set[i]:
target_difference.append(i)
if len(target_difference) == 0:
logger.info(
f"--- target txt diff {self.project} {function_name} {fuzzer} {options}")
f"--- target txt diff {self.project} {function_name} {fuzzer} {options} length:0")
diff_result[options] = True
else:
logger.error(
f"--- target txt diff {self.project} {function_name} {fuzzer} {options}, differences length:{len(target_difference)}")
diff_result[options] = False
except subprocess.CalledProcessError as e:
logger.error(f"Target coverage generation failed for {options} with exit code {e.returncode}")
logger.error(f"stdout: {e.stdout.decode('utf-8', errors='replace') if e.stdout else ''}")
logger.error(f"stderr: {e.stderr.decode('utf-8', errors='replace') if e.stderr else ''}")
diff_result[options] = False
except Exception as e:
logger.error(
f"--- target txt diff {self.project} {function_name} {fuzzer} {options}: target txt generation failed", e)
f"--- target txt diff {self.project} {function_name} {fuzzer} {options}: target txt generation failed {e}")
diff_result[options] = False

self.exec_in_container(
Expand Down Expand Up @@ -430,8 +441,7 @@ def main():
try:
show_statistics(all_project_results, dataset, decompilers, opts)
except Exception as e:
import ipdb
ipdb.set_trace()
logger.exception("Error while showing statistics")


if __name__ == '__main__':
Expand Down
108 changes: 64 additions & 44 deletions evaluate_rsr.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,32 +19,19 @@

repo_path = pathlib.Path(__file__).resolve().parent

parser = argparse.ArgumentParser()
parser.add_argument('--config', type=str, default="./config.yaml",
help='Path to the configuration file')
parser.add_argument("--decompiled-dataset", type=str)
parser.add_argument("--decompilers", type=str, nargs='*',
help="Decompilers to evaluate, leave empty to evaluate all decompilers specified in the config")
args = parser.parse_args()
oss_fuzz_path: pathlib.Path | None = None
decompilers: Set[str] = set()

with open(args.config, 'r') as f:
config = yaml.safe_load(f)

oss_fuzz_path = pathlib.Path(config['oss_fuzz_path'])
decompilers: Set[str] = set(config['decompilers'])

if args.decompilers:
decompilers = decompilers.intersection(set(args.decompilers))

ds_with_decompile_code = datasets.Dataset.load_from_disk(
args.decompiled_dataset)

for col in ['include', 'opt']:
if col not in ds_with_decompile_code.column_names:
raise ValueError(f"Column {col} not found in the dataset, please make sure the dataset is a merged dataset")

df = ds_with_decompile_code.to_pandas()
assert isinstance(df, pd.DataFrame)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--config', type=str, default="./config.yaml",
help='Path to the configuration file')
parser.add_argument("--decompiled-dataset", type=str, required=True,
help="Path to the merged decompiled dataset produced earlier")
parser.add_argument("--decompilers", type=str, nargs='*',
help="Decompilers to evaluate, leave empty to evaluate all decompilers specified in the config")
return parser.parse_args()


class DockerContainer:
Expand Down Expand Up @@ -329,28 +316,61 @@ def decompile_pass_rate(gen_results, compiler, num_workers, container):
return ret


for d in decompilers:
print(f'Decompiler: {d}')
def main():
global oss_fuzz_path, decompilers

args = parse_args()

with open(args.config, 'r') as f:
config = yaml.safe_load(f)

oss_fuzz_path = pathlib.Path(config['oss_fuzz_path'])
decompilers = set(config['decompilers'])

if args.decompilers:
decompilers = decompilers.intersection(set(args.decompilers))

if d not in df.columns:
continue
if not args.decompiled_dataset:
raise ValueError(
"--decompiled-dataset is required. Please provide the path to the merged dataset.")

Comment on lines +333 to 336
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This validation check is redundant because the argument parser already has required=True for the --decompiled-dataset argument (line 30). The argparse module will raise an error if this argument is not provided, so this condition will never be true.

Suggested change
if not args.decompiled_dataset:
raise ValueError(
"--decompiled-dataset is required. Please provide the path to the merged dataset.")

Copilot uses AI. Check for mistakes.
with DockerContainer('evaluate_in_docker', {
f'{oss_fuzz_path}/build/challenges': '/challenges',
f'{repo_path}/fix': '/fix'
}) as container:
eval_result_df = pd.DataFrame(
decompile_pass_rate(df, d, 64, container))
ds_with_decompile_code = datasets.Dataset.load_from_disk(
args.decompiled_dataset)

for col in ['include', 'opt']:
if col not in ds_with_decompile_code.column_names:
raise ValueError(
f"Column {col} not found in the dataset, please make sure the dataset is a merged dataset")

df = ds_with_decompile_code.to_pandas()
assert isinstance(df, pd.DataFrame)

for d in decompilers:
print(f'Decompiler: {d}')

if d not in df.columns:
continue

with DockerContainer('evaluate_in_docker', {
f'{oss_fuzz_path}/build/challenges': '/challenges',
f'{repo_path}/fix': '/fix'
}) as container:
eval_result_df = pd.DataFrame(
decompile_pass_rate(df, d, 64, container))

for opt, per_opt_df in eval_result_df.groupby('opt'):
compile_rate = per_opt_df['flag_compile'].mean()

print(
f"{d} Optimization {opt}: Compile Rate: {compile_rate:.4f}")
print('-' * 30)

for opt, per_opt_df in eval_result_df.groupby('opt'):
compile_rate = per_opt_df['flag_compile'].mean()
rm_docker_cmd = "docker rm -f evaluate_in_docker"
result = subprocess.run(rm_docker_cmd, shell=True,
capture_output=True, text=True)
if result.returncode == 0:
print("Container evaluate_in_docker removed successfully")

print(
f"Optimization {opt}: Compile Rate: {compile_rate:.4f}")
print('-' * 30)

rm_docker_cmd = "docker rm -f evaluate_in_docker"
result = subprocess.run(rm_docker_cmd, shell=True,
capture_output=True, text=True)
if result.returncode == 0:
print("Container evaluate_in_docker removed successfully")
if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion extract_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def main():
break
except Exception as e:
logger.error(f"Error in {project}: {e}")
raise
#raise


if __name__ == '__main__':
Expand Down
Loading