vul337 · edmcman · Dec 11, 2025 · Dec 17, 2025 · Dec 17, 2025 · Dec 17, 2025
diff --git a/README.md b/README.md
@@ -181,15 +181,25 @@ python merge.py --base-dataset-path $dataset_path/ --decompiled-datasets $datase
 
 This section describes the evaluation of decompiled code.
 
-Before evaluation, integrate all decompiler outputs, including those from LLMs, into a single dataset saved at `./decompiled_ds_all`. Then, execute:
+Before evaluation, integrate all decompiler outputs, including those from LLMs, into a single dataset saved at `./decompiled_ds_all`.
+
+**Step 1: Generate base libfunction.so files (required for CER evaluation)**
+
+```shell
+python evaluate_rsr.py --config ./config.yaml --decompiled-dataset $dataset_path/decompiled_ds --decompilers func
+```
+
+This compiles the original extracted functions into ground-truth shared libraries at `{oss_fuzz_path}/build/challenges/{project}/{function}/libfunction.so`.
+
+**Step 2: Evaluate decompiler outputs**
 
 ```shell
-python evaluate_rsr.py --decompiled-dataset $dataset_path/decompiled_ds --decompilers hexrays
+python evaluate_rsr.py --config ./config.yaml --decompiled-dataset $dataset_path/decompiled_ds --decompilers hexrays
 ```
 
-Enable the debug parameter to print error messages for specific data. This script recompiles the specified decompiler outputs in Docker, applies fixes, and reports success rates across different optimization levels. Successfully compiled functions are stored as shared libraries in `{oss_fuzz_path}/build/challenges` for further evaluation.
+Enable the debug parameter to print error messages for specific data. This script recompiles the specified decompiler outputs in Docker, applies fixes, and reports success rates across different optimization levels.
 
-To assess coverage differences before and after replacing with decompiled code, run:
+**Step 3: Assess coverage differences**
 
 ```shell
 python evaluate_cer.py --dataset $dataset_path/decompiled_ds

diff --git a/compile_ossfuzz.py b/compile_ossfuzz.py
@@ -1,4 +1,5 @@
 import argparse
+import functools
 import os
 import pathlib
 import re
@@ -168,7 +169,7 @@ def process_project_linearly(project_path):
 OUTPUT_BINARY_PATH = OUTPUT_PATH / "binary"
 OUTPUT_BINARY_PATH.mkdir(exist_ok=True, parents=True)
 
-extra_flags = ' '.join([
+extra_flags = [
     "-mno-sse",
     "-fno-eliminate-unused-debug-types",
     "-fno-lto",
@@ -177,7 +178,7 @@ def process_project_linearly(project_path):
     # "-fno-inline-functions-called-once",  # not supported in clang
     "-fno-inline",
     # "-fno-reorder-blocks-and-partition",  # not supported in clang
-])
+]
 
 
 def compile(row, container: DockerContainer):
@@ -197,12 +198,11 @@ def compile(row, container: DockerContainer):
                 f.write(func)
 
             output_file = OUTPUT_BINARY_PATH / f'task-{idx}-{opt}.so'
-            output_file_indocker = pathlib.Path(
-                '/challenges') / f'task-{idx}-{opt}.so'
+            output_file_indocker = pathlib.Path('/challenges/binary') / f'task-{idx}-{opt}.so'
             cmd = ['clang', filepath, f'-{opt}', '-shared', '-fPIC',
-                   '-o', output_file_indocker, extra_flags, '-lm']
-            container.exec_in_container(
-                cmd, cwd='/challenges', shell=True, check=True)
+                   '-o', str(output_file_indocker)] + extra_flags + ['-lm']
+            out = container.exec_in_container(
-            out = container.exec_in_container(
+            container.exec_in_container(
-            out = container.exec_in_container(
+            container.exec_in_container(
+                cmd, cwd='/challenges', shell=False, check=True, capture_output=True)
 
             ret = subprocess.run(
                 f'nm {output_file} | egrep " {function_name}$"', stdout=subprocess.PIPE, shell=True, check=True)
@@ -216,18 +216,18 @@ def compile(row, container: DockerContainer):
                 'path': str(output_file.relative_to(OUTPUT_PATH)),
             })
     except subprocess.CalledProcessError as e:
-        logger.error(f"Error compiling {idx} with {opt}: {e}")
+        logger.error(f"Error compiling {idx}: {e}")
     finally:
         # os.remove(filepath)
         pass
 
     return challenge
 
 
-def tqdm_progress_map(func, iterable, num_workers, container):
+def tqdm_progress_map(func, iterable, num_workers):
     results = []
     with Pool(num_workers) as pool:
-        for result in tqdm(pool.imap_unordered(func, iterable, container), total=len(iterable)):
+        for result in tqdm(pool.imap_unordered(func, iterable), total=len(iterable)):
             results.append(result)
     return results
 
@@ -236,7 +236,7 @@ def tqdm_progress_map(func, iterable, num_workers, container):
     f'{OUTPUT_PATH}': '/challenges',
     '/dev/shm': '/dev/shm'
 }) as container:
-    res = tqdm_progress_map(compile, ds, args.num_workers, container)
+    res = tqdm_progress_map(functools.partial(compile, container=container), ds, args.num_workers)
 res = list(chain(*res))
 ds = datasets.Dataset.from_list(res)
 print(len(ds))

diff --git a/evaluate_cer.py b/evaluate_cer.py
@@ -110,6 +110,7 @@ def get_func_offsets(so_path: pathlib.Path,
 
 
 WORKER_COUNT = os.cpu_count()
+TIMEOUT = 300
 
 
 class ReexecutableRateEvaluator(OSSFuzzDatasetGenerator):
@@ -210,10 +211,10 @@ def diff_base_for_function(self, fuzzer: str, function_name: str):
                     f'OUTPUT_TXT=/challenges/{function_name}/{fuzzer}/base.txt',
                     f'MAPPING_TXT=/challenges/{function_name}/address_mapping.txt',
                     f'LD_PRELOAD=/oss-fuzz/ld.so'
-                ], timeout=30, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-                # result.check_returncode()
+                ], timeout=TIMEOUT, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                # Stream file line-by-line to reduce memory usage
                 with open(str(base_txt_path), 'r') as f:
-                    base_result = f.read().split('\n')
+                    base_result = [line.rstrip('\n') for line in f]
                 if txt_length != 0 and len(base_result) != txt_length:
                     logger.error(
                         f"base txt length mismatch, expected {txt_length}, got {len(base_result)}")
@@ -230,6 +231,11 @@ def diff_base_for_function(self, fuzzer: str, function_name: str):
                 if idx < max_trails - 1:
                     prev_diff_length = diff_length
 
+            except subprocess.CalledProcessError as e:
+                logger.error(f"Base coverage generation failed with exit code {e.returncode}")
+                logger.error(f"stdout: {e.stdout.decode('utf-8', errors='replace') if e.stdout else ''}")
+                logger.error(f"stderr: {e.stderr.decode('utf-8', errors='replace') if e.stderr else ''}")
+                return (fuzzer, function_name, {})
             except Exception as e:
                 logger.error(
                     f"base txt generation failed:{e}")
@@ -253,32 +259,37 @@ def diff_base_for_function(self, fuzzer: str, function_name: str):
             target_txt_path = pathlib.Path(self.oss_fuzz_path) / 'build' / 'challenges' / \
                 self.project / function_name / fuzzer / f'{options}.txt'
             try:
-                self.exec_in_container(cmd=cmd, envs=[
+                result = self.exec_in_container(cmd=cmd, envs=[
                     f'LD_LIBRARY_PATH={target_lib_path}:/work/lib/',
                     f'LLVM_PROFILE_FILE=/challenges/{function_name}/{fuzzer}/{options}.profraw',
                     f'OUTPUT_PROFDATA=/challenges/{function_name}/{fuzzer}/{options}.profdata',
                     f'OUTPUT_TXT=/challenges/{function_name}/{fuzzer}/{options}.txt',
                     f'MAPPING_TXT=/challenges/{function_name}/address_mapping.txt',
                     f'LD_PRELOAD=/oss-fuzz/ld.so',
-                ], timeout=30, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-                # result.check_returncode()
-                with open(str(target_txt_path), 'r') as f:
-                    target_result = f.read().split('\n')
+                ], timeout=TIMEOUT, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                # Stream and compare line-by-line to reduce memory usage
                 target_difference = []
-                for i, line in enumerate(target_result):
-                    if len(log_set[i]) == 1 and line not in log_set[i]:
-                        target_difference.append(i)
+                with open(str(target_txt_path), 'r') as f:
+                    for i, line in enumerate(f):
+                        line = line.rstrip('\n')
+                        if len(log_set[i]) == 1 and line not in log_set[i]:
+                            target_difference.append(i)
                 if len(target_difference) == 0:
                     logger.info(
-                        f"--- target txt diff {self.project} {function_name} {fuzzer} {options}")
+                        f"--- target txt diff {self.project} {function_name} {fuzzer} {options} length:0")
                     diff_result[options] = True
                 else:
                     logger.error(
                         f"--- target txt diff {self.project} {function_name} {fuzzer} {options}, differences length:{len(target_difference)}")
                     diff_result[options] = False
+            except subprocess.CalledProcessError as e:
+                logger.error(f"Target coverage generation failed for {options} with exit code {e.returncode}")
+                logger.error(f"stdout: {e.stdout.decode('utf-8', errors='replace') if e.stdout else ''}")
+                logger.error(f"stderr: {e.stderr.decode('utf-8', errors='replace') if e.stderr else ''}")
+                diff_result[options] = False
             except Exception as e:
                 logger.error(
-                    f"--- target txt diff {self.project} {function_name} {fuzzer} {options}: target txt generation failed", e)
+                    f"--- target txt diff {self.project} {function_name} {fuzzer} {options}: target txt generation failed {e}")
                 diff_result[options] = False
 
         self.exec_in_container(
@@ -430,8 +441,7 @@ def main():
     try:
         show_statistics(all_project_results, dataset, decompilers, opts)
     except Exception as e:
-        import ipdb
-        ipdb.set_trace()
+        logger.exception("Error while showing statistics")
 
 
 if __name__ == '__main__':

diff --git a/evaluate_rsr.py b/evaluate_rsr.py
@@ -19,32 +19,19 @@
 
 repo_path = pathlib.Path(__file__).resolve().parent
 
-parser = argparse.ArgumentParser()
-parser.add_argument('--config', type=str, default="./config.yaml",
-                    help='Path to the configuration file')
-parser.add_argument("--decompiled-dataset", type=str)
-parser.add_argument("--decompilers", type=str, nargs='*',
-                    help="Decompilers to evaluate, leave empty to evaluate all decompilers specified in the config")
-args = parser.parse_args()
+oss_fuzz_path: pathlib.Path | None = None
+decompilers: Set[str] = set()
 
-with open(args.config, 'r') as f:
-    config = yaml.safe_load(f)
 
-oss_fuzz_path = pathlib.Path(config['oss_fuzz_path'])
-decompilers: Set[str] = set(config['decompilers'])
-
-if args.decompilers:
-    decompilers = decompilers.intersection(set(args.decompilers))
-
-ds_with_decompile_code = datasets.Dataset.load_from_disk(
-    args.decompiled_dataset)
-
-for col in ['include', 'opt']:
-    if col not in ds_with_decompile_code.column_names:
-        raise ValueError(f"Column {col} not found in the dataset, please make sure the dataset is a merged dataset")
-
-df = ds_with_decompile_code.to_pandas()
-assert isinstance(df, pd.DataFrame)
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config', type=str, default="./config.yaml",
+                        help='Path to the configuration file')
+    parser.add_argument("--decompiled-dataset", type=str, required=True,
+                        help="Path to the merged decompiled dataset produced earlier")
+    parser.add_argument("--decompilers", type=str, nargs='*',
+                        help="Decompilers to evaluate, leave empty to evaluate all decompilers specified in the config")
+    return parser.parse_args()
 
 
 class DockerContainer:
@@ -329,28 +316,61 @@ def decompile_pass_rate(gen_results, compiler, num_workers, container):
     return ret
 
 
-for d in decompilers:
-    print(f'Decompiler: {d}')
+def main():
+    global oss_fuzz_path, decompilers
+
+    args = parse_args()
+
+    with open(args.config, 'r') as f:
+        config = yaml.safe_load(f)
+
+    oss_fuzz_path = pathlib.Path(config['oss_fuzz_path'])
+    decompilers = set(config['decompilers'])
+
+    if args.decompilers:
+        decompilers = decompilers.intersection(set(args.decompilers))
 
-    if d not in df.columns:
-        continue
+    if not args.decompiled_dataset:
+        raise ValueError(
+            "--decompiled-dataset is required. Please provide the path to the merged dataset.")
 
-    if not args.decompiled_dataset:
-        raise ValueError(
-            "--decompiled-dataset is required. Please provide the path to the merged dataset.")
-    if not args.decompiled_dataset:
-        raise ValueError(
-            "--decompiled-dataset is required. Please provide the path to the merged dataset.")
-    with DockerContainer('evaluate_in_docker', {
-        f'{oss_fuzz_path}/build/challenges': '/challenges',
-        f'{repo_path}/fix': '/fix'
-    }) as container:
-        eval_result_df = pd.DataFrame(
-            decompile_pass_rate(df, d, 64, container))
+    ds_with_decompile_code = datasets.Dataset.load_from_disk(
+        args.decompiled_dataset)
+
+    for col in ['include', 'opt']:
+        if col not in ds_with_decompile_code.column_names:
+            raise ValueError(
+                f"Column {col} not found in the dataset, please make sure the dataset is a merged dataset")
+
+    df = ds_with_decompile_code.to_pandas()
+    assert isinstance(df, pd.DataFrame)
+
+    for d in decompilers:
+        print(f'Decompiler: {d}')
+
+        if d not in df.columns:
+            continue
+
+        with DockerContainer('evaluate_in_docker', {
+            f'{oss_fuzz_path}/build/challenges': '/challenges',
+            f'{repo_path}/fix': '/fix'
+        }) as container:
+            eval_result_df = pd.DataFrame(
+                decompile_pass_rate(df, d, 64, container))
+
+        for opt, per_opt_df in eval_result_df.groupby('opt'):
+            compile_rate = per_opt_df['flag_compile'].mean()
+
+            print(
+                f"{d} Optimization {opt}: Compile Rate: {compile_rate:.4f}")
+        print('-' * 30)
 
-    for opt, per_opt_df in eval_result_df.groupby('opt'):
-        compile_rate = per_opt_df['flag_compile'].mean()
+    rm_docker_cmd = "docker rm -f evaluate_in_docker"
+    result = subprocess.run(rm_docker_cmd, shell=True,
+                            capture_output=True, text=True)
+    if result.returncode == 0:
+        print("Container evaluate_in_docker removed successfully")
 
-        print(
-            f"Optimization {opt}: Compile Rate: {compile_rate:.4f}")
-    print('-' * 30)
 
-rm_docker_cmd = "docker rm -f evaluate_in_docker"
-result = subprocess.run(rm_docker_cmd, shell=True,
-                        capture_output=True, text=True)
-if result.returncode == 0:
-    print("Container evaluate_in_docker removed successfully")
+if __name__ == "__main__":
+    main()
diff --git a/extract_functions.py b/extract_functions.py
@@ -445,7 +445,7 @@ def main():
             break
         except Exception as e:
             logger.error(f"Error in {project}: {e}")
-            raise
+            #raise
 
 
 if __name__ == '__main__':
-Original file line number
+Diff line change
@@ Expand Up / @@ -445,7 +445,7 @@ def main(): @@
                 break
             except Exception as e:
                 logger.error(f"Error in {project}: {e}")
-                raise
+                #raise
     if __name__ == '__main__':
@@ Expand Down @@