diff --git a/README.md b/README.md index cb58408..a43d074 100644 --- a/README.md +++ b/README.md @@ -181,15 +181,25 @@ python merge.py --base-dataset-path $dataset_path/ --decompiled-datasets $datase This section describes the evaluation of decompiled code. -Before evaluation, integrate all decompiler outputs, including those from LLMs, into a single dataset saved at `./decompiled_ds_all`. Then, execute: +Before evaluation, integrate all decompiler outputs, including those from LLMs, into a single dataset saved at `./decompiled_ds_all`. + +**Step 1: Generate base libfunction.so files (required for CER evaluation)** + +```shell +python evaluate_rsr.py --config ./config.yaml --decompiled-dataset $dataset_path/decompiled_ds --decompilers func +``` + +This compiles the original extracted functions into ground-truth shared libraries at `{oss_fuzz_path}/build/challenges/{project}/{function}/libfunction.so`. + +**Step 2: Evaluate decompiler outputs** ```shell -python evaluate_rsr.py --decompiled-dataset $dataset_path/decompiled_ds --decompilers hexrays +python evaluate_rsr.py --config ./config.yaml --decompiled-dataset $dataset_path/decompiled_ds --decompilers hexrays ``` -Enable the debug parameter to print error messages for specific data. This script recompiles the specified decompiler outputs in Docker, applies fixes, and reports success rates across different optimization levels. Successfully compiled functions are stored as shared libraries in `{oss_fuzz_path}/build/challenges` for further evaluation. +Enable the debug parameter to print error messages for specific data. This script recompiles the specified decompiler outputs in Docker, applies fixes, and reports success rates across different optimization levels. -To assess coverage differences before and after replacing with decompiled code, run: +**Step 3: Assess coverage differences** ```shell python evaluate_cer.py --dataset $dataset_path/decompiled_ds diff --git a/compile_ossfuzz.py b/compile_ossfuzz.py index 438f617..f589927 100644 --- a/compile_ossfuzz.py +++ b/compile_ossfuzz.py @@ -1,4 +1,5 @@ import argparse +import functools import os import pathlib import re @@ -168,7 +169,7 @@ def process_project_linearly(project_path): OUTPUT_BINARY_PATH = OUTPUT_PATH / "binary" OUTPUT_BINARY_PATH.mkdir(exist_ok=True, parents=True) -extra_flags = ' '.join([ +extra_flags = [ "-mno-sse", "-fno-eliminate-unused-debug-types", "-fno-lto", @@ -177,7 +178,7 @@ def process_project_linearly(project_path): # "-fno-inline-functions-called-once", # not supported in clang "-fno-inline", # "-fno-reorder-blocks-and-partition", # not supported in clang -]) +] def compile(row, container: DockerContainer): @@ -197,12 +198,11 @@ def compile(row, container: DockerContainer): f.write(func) output_file = OUTPUT_BINARY_PATH / f'task-{idx}-{opt}.so' - output_file_indocker = pathlib.Path( - '/challenges') / f'task-{idx}-{opt}.so' + output_file_indocker = pathlib.Path('/challenges/binary') / f'task-{idx}-{opt}.so' cmd = ['clang', filepath, f'-{opt}', '-shared', '-fPIC', - '-o', output_file_indocker, extra_flags, '-lm'] - container.exec_in_container( - cmd, cwd='/challenges', shell=True, check=True) + '-o', str(output_file_indocker)] + extra_flags + ['-lm'] + out = container.exec_in_container( + cmd, cwd='/challenges', shell=False, check=True, capture_output=True) ret = subprocess.run( f'nm {output_file} | egrep " {function_name}$"', stdout=subprocess.PIPE, shell=True, check=True) @@ -216,7 +216,7 @@ def compile(row, container: DockerContainer): 'path': str(output_file.relative_to(OUTPUT_PATH)), }) except subprocess.CalledProcessError as e: - logger.error(f"Error compiling {idx} with {opt}: {e}") + logger.error(f"Error compiling {idx}: {e}") finally: # os.remove(filepath) pass @@ -224,10 +224,10 @@ def compile(row, container: DockerContainer): return challenge -def tqdm_progress_map(func, iterable, num_workers, container): +def tqdm_progress_map(func, iterable, num_workers): results = [] with Pool(num_workers) as pool: - for result in tqdm(pool.imap_unordered(func, iterable, container), total=len(iterable)): + for result in tqdm(pool.imap_unordered(func, iterable), total=len(iterable)): results.append(result) return results @@ -236,7 +236,7 @@ def tqdm_progress_map(func, iterable, num_workers, container): f'{OUTPUT_PATH}': '/challenges', '/dev/shm': '/dev/shm' }) as container: - res = tqdm_progress_map(compile, ds, args.num_workers, container) + res = tqdm_progress_map(functools.partial(compile, container=container), ds, args.num_workers) res = list(chain(*res)) ds = datasets.Dataset.from_list(res) print(len(ds)) diff --git a/evaluate_cer.py b/evaluate_cer.py index d23e49e..148d37d 100644 --- a/evaluate_cer.py +++ b/evaluate_cer.py @@ -110,6 +110,7 @@ def get_func_offsets(so_path: pathlib.Path, WORKER_COUNT = os.cpu_count() +TIMEOUT = 300 class ReexecutableRateEvaluator(OSSFuzzDatasetGenerator): @@ -210,10 +211,10 @@ def diff_base_for_function(self, fuzzer: str, function_name: str): f'OUTPUT_TXT=/challenges/{function_name}/{fuzzer}/base.txt', f'MAPPING_TXT=/challenges/{function_name}/address_mapping.txt', f'LD_PRELOAD=/oss-fuzz/ld.so' - ], timeout=30, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - # result.check_returncode() + ], timeout=TIMEOUT, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + # Stream file line-by-line to reduce memory usage with open(str(base_txt_path), 'r') as f: - base_result = f.read().split('\n') + base_result = [line.rstrip('\n') for line in f] if txt_length != 0 and len(base_result) != txt_length: logger.error( f"base txt length mismatch, expected {txt_length}, got {len(base_result)}") @@ -230,6 +231,11 @@ def diff_base_for_function(self, fuzzer: str, function_name: str): if idx < max_trails - 1: prev_diff_length = diff_length + except subprocess.CalledProcessError as e: + logger.error(f"Base coverage generation failed with exit code {e.returncode}") + logger.error(f"stdout: {e.stdout.decode('utf-8', errors='replace') if e.stdout else ''}") + logger.error(f"stderr: {e.stderr.decode('utf-8', errors='replace') if e.stderr else ''}") + return (fuzzer, function_name, {}) except Exception as e: logger.error( f"base txt generation failed:{e}") @@ -253,32 +259,37 @@ def diff_base_for_function(self, fuzzer: str, function_name: str): target_txt_path = pathlib.Path(self.oss_fuzz_path) / 'build' / 'challenges' / \ self.project / function_name / fuzzer / f'{options}.txt' try: - self.exec_in_container(cmd=cmd, envs=[ + result = self.exec_in_container(cmd=cmd, envs=[ f'LD_LIBRARY_PATH={target_lib_path}:/work/lib/', f'LLVM_PROFILE_FILE=/challenges/{function_name}/{fuzzer}/{options}.profraw', f'OUTPUT_PROFDATA=/challenges/{function_name}/{fuzzer}/{options}.profdata', f'OUTPUT_TXT=/challenges/{function_name}/{fuzzer}/{options}.txt', f'MAPPING_TXT=/challenges/{function_name}/address_mapping.txt', f'LD_PRELOAD=/oss-fuzz/ld.so', - ], timeout=30, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - # result.check_returncode() - with open(str(target_txt_path), 'r') as f: - target_result = f.read().split('\n') + ], timeout=TIMEOUT, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + # Stream and compare line-by-line to reduce memory usage target_difference = [] - for i, line in enumerate(target_result): - if len(log_set[i]) == 1 and line not in log_set[i]: - target_difference.append(i) + with open(str(target_txt_path), 'r') as f: + for i, line in enumerate(f): + line = line.rstrip('\n') + if len(log_set[i]) == 1 and line not in log_set[i]: + target_difference.append(i) if len(target_difference) == 0: logger.info( - f"--- target txt diff {self.project} {function_name} {fuzzer} {options}") + f"--- target txt diff {self.project} {function_name} {fuzzer} {options} length:0") diff_result[options] = True else: logger.error( f"--- target txt diff {self.project} {function_name} {fuzzer} {options}, differences length:{len(target_difference)}") diff_result[options] = False + except subprocess.CalledProcessError as e: + logger.error(f"Target coverage generation failed for {options} with exit code {e.returncode}") + logger.error(f"stdout: {e.stdout.decode('utf-8', errors='replace') if e.stdout else ''}") + logger.error(f"stderr: {e.stderr.decode('utf-8', errors='replace') if e.stderr else ''}") + diff_result[options] = False except Exception as e: logger.error( - f"--- target txt diff {self.project} {function_name} {fuzzer} {options}: target txt generation failed", e) + f"--- target txt diff {self.project} {function_name} {fuzzer} {options}: target txt generation failed {e}") diff_result[options] = False self.exec_in_container( @@ -430,8 +441,7 @@ def main(): try: show_statistics(all_project_results, dataset, decompilers, opts) except Exception as e: - import ipdb - ipdb.set_trace() + logger.exception("Error while showing statistics") if __name__ == '__main__': diff --git a/evaluate_rsr.py b/evaluate_rsr.py index 7a9ec3c..0ffcff3 100644 --- a/evaluate_rsr.py +++ b/evaluate_rsr.py @@ -19,32 +19,19 @@ repo_path = pathlib.Path(__file__).resolve().parent -parser = argparse.ArgumentParser() -parser.add_argument('--config', type=str, default="./config.yaml", - help='Path to the configuration file') -parser.add_argument("--decompiled-dataset", type=str) -parser.add_argument("--decompilers", type=str, nargs='*', - help="Decompilers to evaluate, leave empty to evaluate all decompilers specified in the config") -args = parser.parse_args() +oss_fuzz_path: pathlib.Path | None = None +decompilers: Set[str] = set() -with open(args.config, 'r') as f: - config = yaml.safe_load(f) -oss_fuzz_path = pathlib.Path(config['oss_fuzz_path']) -decompilers: Set[str] = set(config['decompilers']) - -if args.decompilers: - decompilers = decompilers.intersection(set(args.decompilers)) - -ds_with_decompile_code = datasets.Dataset.load_from_disk( - args.decompiled_dataset) - -for col in ['include', 'opt']: - if col not in ds_with_decompile_code.column_names: - raise ValueError(f"Column {col} not found in the dataset, please make sure the dataset is a merged dataset") - -df = ds_with_decompile_code.to_pandas() -assert isinstance(df, pd.DataFrame) +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--config', type=str, default="./config.yaml", + help='Path to the configuration file') + parser.add_argument("--decompiled-dataset", type=str, required=True, + help="Path to the merged decompiled dataset produced earlier") + parser.add_argument("--decompilers", type=str, nargs='*', + help="Decompilers to evaluate, leave empty to evaluate all decompilers specified in the config") + return parser.parse_args() class DockerContainer: @@ -329,28 +316,61 @@ def decompile_pass_rate(gen_results, compiler, num_workers, container): return ret -for d in decompilers: - print(f'Decompiler: {d}') +def main(): + global oss_fuzz_path, decompilers + + args = parse_args() + + with open(args.config, 'r') as f: + config = yaml.safe_load(f) + + oss_fuzz_path = pathlib.Path(config['oss_fuzz_path']) + decompilers = set(config['decompilers']) + + if args.decompilers: + decompilers = decompilers.intersection(set(args.decompilers)) - if d not in df.columns: - continue + if not args.decompiled_dataset: + raise ValueError( + "--decompiled-dataset is required. Please provide the path to the merged dataset.") - with DockerContainer('evaluate_in_docker', { - f'{oss_fuzz_path}/build/challenges': '/challenges', - f'{repo_path}/fix': '/fix' - }) as container: - eval_result_df = pd.DataFrame( - decompile_pass_rate(df, d, 64, container)) + ds_with_decompile_code = datasets.Dataset.load_from_disk( + args.decompiled_dataset) + + for col in ['include', 'opt']: + if col not in ds_with_decompile_code.column_names: + raise ValueError( + f"Column {col} not found in the dataset, please make sure the dataset is a merged dataset") + + df = ds_with_decompile_code.to_pandas() + assert isinstance(df, pd.DataFrame) + + for d in decompilers: + print(f'Decompiler: {d}') + + if d not in df.columns: + continue + + with DockerContainer('evaluate_in_docker', { + f'{oss_fuzz_path}/build/challenges': '/challenges', + f'{repo_path}/fix': '/fix' + }) as container: + eval_result_df = pd.DataFrame( + decompile_pass_rate(df, d, 64, container)) + + for opt, per_opt_df in eval_result_df.groupby('opt'): + compile_rate = per_opt_df['flag_compile'].mean() + + print( + f"{d} Optimization {opt}: Compile Rate: {compile_rate:.4f}") + print('-' * 30) - for opt, per_opt_df in eval_result_df.groupby('opt'): - compile_rate = per_opt_df['flag_compile'].mean() + rm_docker_cmd = "docker rm -f evaluate_in_docker" + result = subprocess.run(rm_docker_cmd, shell=True, + capture_output=True, text=True) + if result.returncode == 0: + print("Container evaluate_in_docker removed successfully") - print( - f"Optimization {opt}: Compile Rate: {compile_rate:.4f}") - print('-' * 30) -rm_docker_cmd = "docker rm -f evaluate_in_docker" -result = subprocess.run(rm_docker_cmd, shell=True, - capture_output=True, text=True) -if result.returncode == 0: - print("Container evaluate_in_docker removed successfully") +if __name__ == "__main__": + main() diff --git a/extract_functions.py b/extract_functions.py index 0c23ebc..4115868 100644 --- a/extract_functions.py +++ b/extract_functions.py @@ -445,7 +445,7 @@ def main(): break except Exception as e: logger.error(f"Error in {project}: {e}") - raise + #raise if __name__ == '__main__': diff --git a/script.bash b/script.bash new file mode 100644 index 0000000..7f9dd59 --- /dev/null +++ b/script.bash @@ -0,0 +1,71 @@ +#!/bin/bash + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +DATASET_DIR="$DIR/dataset" +DECOMPILERS=("ghidra" "hexrays") +#PROJECTS=("minizip" "avahi" "qt" "libdwarf" "c-blosc" "unbound" "qpdf" "file" "wavpack" "libsrtp" "fribidi" "libconfig" "jansson" "strongswan" "pjsip" "croaring") +PROJECTS=("file") +WORKERS=4 +export LIBCLANG_PATH="/usr/lib/llvm-18/lib/libclang-18.so.1" + +test -f "$LIBCLANG_PATH" || { echo "Please set LIBCLANG_PATH correctly."; exit 1; } + +test -f "$DIR/libfunction.so" || { echo "Please build libfunction.so first."; exit 1; } + +test -d "$DIR/oss-fuzz/build" && { echo "Build directory already exists. Please remove it before running this script."; exit 1; } + +test -d "$DATASET_DIR" && { echo "Dataset directory already exists. Please remove it before running this script."; exit 1; } + +test -d "$DIR/tmp_results" && { echo "Temporary results directory already exists. Please remove it before running this script."; exit 1; } + +set -xeuo pipefail + +# create a comma-separated project list from the PROJECTS array +PROJECTS_CSV="$(IFS=, ; echo "${PROJECTS[*]}")" +echo "Using projects: $PROJECTS_CSV" +python extract_functions.py --worker-count "$WORKERS" --config "$DIR/config.yaml" --project "$PROJECTS_CSV" + +python compile_ossfuzz.py --config "$DIR/config.yaml" --output "$DATASET_DIR" + +# generate --with- flags from the DECOMPILERS array +DECOMPILER_FLAGS=() +for d in "${DECOMPILERS[@]}"; do + DECOMPILER_FLAGS+=("--with-$d") +done +echo "Using decompilers: ${DECOMPILERS[*]}" + +python "$DIR/decompiler-service/manage.py" "${DECOMPILER_FLAGS[@]}" build + +python "$DIR/decompiler-service/manage.py" "${DECOMPILER_FLAGS[@]}" start & +echo "Started decompiler service" +sleep 5 # wait for the decompiler service to start + +# gracefully stop the decompiler service on exit +stop_decompiler_service() { + python "$DIR/decompiler-service/manage.py" "${DECOMPILER_FLAGS[@]}" stop || true +} +trap 'stop_decompiler_service' EXIT INT TERM + +rm my_task_queue.json || true + +python "$DIR/decompiler-service/scripts/test_decompile_async.py" + +test -f my_task_queue.json || { echo "Decompilation task queue not found!"; exit 1; } + +# create a comma-separated decompiler list from the DECOMPILERS array +DECOMPILERS_CSV="$(IFS=, ; echo "${DECOMPILERS[*]}")" +echo "Using decompilers: $DECOMPILERS_CSV" +python decompile.py --base-dataset-path "$DATASET_DIR" --output "$DATASET_DIR/decompiled_ds" --decompilers "$DECOMPILERS_CSV" + +echo "Merging base dataset ($DATASET_DIR) with decompiled dataset ($DATASET_DIR/decompiled_ds)" +python merge.py --base-dataset-path "$DATASET_DIR" --decompiled-datasets "$DATASET_DIR/decompiled_ds" --output "$DATASET_DIR/decompiled_ds_all" + +# Generate base libfunction.so files required for CER evaluation +python evaluate_rsr.py --config "$DIR/config.yaml" --decompiled-dataset "$DATASET_DIR/decompiled_ds_all" --decompilers func + +# Evaluate RSR for the configured traditional decompilers (space-separated list) +python evaluate_rsr.py --config "$DIR/config.yaml" --decompiled-dataset "$DATASET_DIR/decompiled_ds_all" --decompilers "${DECOMPILERS[@]}" + +# Run CER evaluation (coverage) on the merged dataset +python evaluate_cer.py --dataset "$DATASET_DIR/decompiled_ds_all" --worker-count "$WORKERS" +