hgarrereyn · f0rki · Jan 16, 2024 · Jan 16, 2024 · Jan 16, 2024 · Jan 16, 2024
diff --git a/cli/gfuzz/commands/gen/cpp/gen_cpp.py b/cli/gfuzz/commands/gen/cpp/gen_cpp.py
@@ -5,6 +5,7 @@
 import json
 import re
 import yaml
+import codecs
 
 from gfuzz.commands.cliopt import CLIOpt
 from gfuzz.schema import Schema
@@ -53,7 +54,7 @@
 '''
 
 CPP_SAVE_REF = '''\
-    out_ref[{out_ref_idx}] = reinterpret_cast<void *>({arg_name});
+    out_ref[{out_ref_idx}] = (void*)({arg_name});
 '''
 
 CPP_LOAD_SIMPLE = '''\
@@ -90,6 +91,30 @@
     std::cout << "}};" << std::endl;
 '''
 
+CPP_PRINT_CHAR_ARRAY = '''\
+    std::cout << "// {var_type} {arg_name}[{count}] = \\"";
+    for (int i = 0; i < {count}; ++i) {{
+        char c = {arg_name}[i];
+        if (isprint(c) && c != '"' && c != '\\n') {{
+            std::cout << c;
+        }} else {{
+            char x[8] = {{0,}};
+            unsigned char _c = c;
+            snprintf(x, sizeof(x), "\\\\x%02x", _c);
+            std::cout << x;
+        }}
+    }}
+    std::cout << "\\";" << std::endl;
+    std::cout << "    {var_type} {arg_name}[{count}] = {{";
+    for (int i = 0; i < {count}; ++i) {{
+        if (i % 16 == 0 && {count} > 16) std::cout << std::endl << "        ";
+        std::cout << {print_cast}{arg_name}[i];
+        if (i < {count} - 1) std::cout << ", ";
+    }}
+    if ({count} > 16) std::cout << std::endl << "    ";
+    std::cout << "}};" << std::endl;
+'''
+
 CPP_LOAD_ENUM = '''\
     {var_type} {arg_name};
     switch (*(reinterpret_cast<const unsigned char *>(context + {context_offset})) % {enum_size}) {{
@@ -174,17 +199,11 @@
 #include <string.h>
 #include <string>
 #include <iostream>
+#include <cstdio>
 
 unsigned long CURR_ID = 0;
 
-extern "C" void __attribute__((visibility ("default"))) global_init(int *argc, char ***argv) {{
-    char **new_argv = (char **)malloc((*argc + 2) * sizeof(char *));
-    memcpy(new_argv, *argv, sizeof(*new_argv) * *argc);
-    new_argv[*argc] = (char *)"-detect_leaks=0";
-    new_argv[*argc + 1] = 0;
-    (*argc)++;
-    *argv = new_argv;
-}}
+extern "C" void __attribute__((visibility ("default"))) global_init(int *argc, char ***argv) {{ }}
 
 extern "C" void __attribute__((visibility ("default"))) shim_init() {{
     CURR_ID = 0;
@@ -193,9 +212,12 @@
     std::cout << "{macros}" << std::endl;
 
     std::cout << "int main() {{" << std::endl;
+    std::cout << "{global_init}" << std::endl;
+    std::cout << "{initializer}" << std::endl;
 }}
 
 extern "C" void __attribute__((visibility ("default"))) shim_finalize() {{
+    std::cout << "{finalizer}" << std::endl;
     std::cout << "}}" << std::endl;
 }}
 '''
@@ -851,7 +873,10 @@ def build_write_shim(self, shim_name) -> str:
                         context_offset=context_offset,
                         context_size=k['context_size']
                     ))
-                    inner_load_args.append(CPP_PRINT_SIMPLE_ARRAY.format(
+                    fmtstring = CPP_PRINT_SIMPLE_ARRAY
+                    if k['name'] == 'char':
+                        fmtstring = CPP_PRINT_CHAR_ARRAY
+                    inner_load_args.append(fmtstring.format(
                         var_type=k['name'],
                         count=self.arg_count[i],
                         arg_name=name,
@@ -1117,10 +1142,42 @@ def make_write_harness(schema: Schema, scopes: List[CPPScope]) -> str:
     includes = ''.join(includes)
     header_string = includes.replace('"', "\\\"").replace('\n', '\\n')
 
+    # Check for custom initializer/finalizer
+    global_init = ''
+    initializer = ''
+    finalizer = ''
+    for k in schema.objects:
+        if schema.objects[k]['type'] == 'config':
+            if 'global_init' in schema.objects[k]:
+                global_init = schema.objects[k]['global_init']
+            if 'initializer' in schema.objects[k]:
+                initializer = schema.objects[k]['initializer']
+            if 'finalizer' in schema.objects[k]:
+                finalizer = schema.objects[k]['finalizer']
+    # override the writer
+    for k in schema.objects:
+        if schema.objects[k]['type'] == 'config_writer':
+            global_init = ''
+            initializer = ''
+            finalizer = ''
+            if 'global_init' in schema.objects[k]:
+                global_init = schema.objects[k]['global_init']
+            if 'initializer' in schema.objects[k]:
+                initializer = schema.objects[k]['initializer']
+            if 'finalizer' in schema.objects[k]:
+                finalizer = schema.objects[k]['finalizer']
+
+    global_init = codecs.escape_encode(bytes(global_init, 'ascii'))[0].decode('ascii')
+    initializer = codecs.escape_encode(bytes(initializer, 'ascii'))[0].decode('ascii')
+    finalizer = codecs.escape_encode(bytes(finalizer, 'ascii'))[0].decode('ascii')
+
     code = FULL_HARNESS.format(
         includes='',
         header=SHIM_HEADER_WRITE.format(
             header_string=header_string,
+            global_init=global_init,
+            initializer=initializer,
+            finalizer=finalizer,
             macros=SHIM_MACROS.replace('"', "\\\"").replace('\n', '\\n')
         ),
         shim_code=shim_code,

diff --git a/cli/gfuzz/schema.py b/cli/gfuzz/schema.py
@@ -1,13 +1,36 @@
-
 from typing import Set
 import yaml
 
 
+# for input validation
+SCHEMA_OBJECT_ALLOWED_KEYS = { 'headers', 'c_headers', 'name', 'type', 'static_methods', 'methods', 'context_size', 'values', 'print_cast', 'load', 'finalizer', 'initializer', 'global_init', 'load_arr' }
+SCHEMA_METHOD_ALLOWED_KEYS = { 'inputs', 'outputs', 'args', 'exec' }
+
+
 def _validate_obj(name: str, obj: dict, orig_path: str) -> dict:
     if not 'type' in obj:
         print(f'[!] Error {name} has no attribute "type"')
         return None
 
+    for k in obj:
+        if k not in SCHEMA_OBJECT_ALLOWED_KEYS:
+            print(f'[!] Error {name} has invalid attribute {k!r}')
+            return None
+    for method_type in ('methods', 'static_methods'):
+        for method_specs in obj.get(method_type, []):
+            if isinstance(method_specs, dict):
+                for (method_name, d) in method_specs.items():
+                    for k in d:
+                        if k not in SCHEMA_METHOD_ALLOWED_KEYS:
+                            print(f'[!] Error {name}.{method_type} -> {method_name} has invalid attribute {k!r}')
+                            return None
+            elif isinstance(method_specs, str):
+                # ok like that
+                pass
+            else:
+                print(f'[!] Error {name}.{method_type} -> {method_specs!r} has invalid type {type(method_specs)} (expected str or dict)')
+                return None
+
     obj['orig_path'] = orig_path
     obj['headers'] = obj.get('headers') or []
     obj['c_headers'] = obj.get('c_headers') or []
@@ -17,12 +40,18 @@ def _validate_obj(name: str, obj: dict, orig_path: str) -> dict:
 
     obj['name'] = obj.get('name') or ''
 
-    if obj['type'] in ['struct', 'class', 'file']:
+    if obj['type'] in {'struct', 'class', 'file'}:
         obj['methods'] = obj.get('methods') or []
         obj['static_methods'] = obj.get('static_methods') or []
+    elif obj['type'] in {'config', 'enum', 'simple', 'ignore'}:
+        pass
+    else:
+        print(f'[!] Error {name} has invalid attribute "type": {obj["type"] !r}')
+        return None
 
     return obj
 
+
 class Schema(object):
     """A schema represents the API surface of a target."""
 
@@ -65,7 +94,7 @@ def load(path: str, loaded: Set[str] = None) -> 'Schema':
         # Process include list.
         if loaded is None:
             loaded = set()
-            
+
         include = objects.get('include') or []
         for sub_path in include:
             if sub_path in loaded:

diff --git a/core/graph.hpp b/core/graph.hpp
@@ -347,11 +347,18 @@ class TGraph {
      * Recursively sample and append nodes from a type tree.
      */
     void AppendTree(unsigned int node_idx, unsigned int conn_idx, int layer, bool forward, TypeTree tree) {
-        int sample = ((unsigned int)rand()) % tree.num_subtrees;
-        unsigned int sample_idx = -1;
-        while (sample >= 0) {
+        // fail loud when assertions are enabled
+        assert(tree.num_subtrees > 0);
+        assert(tree.children.size() > 0);
+        // return early in case the assertions are disabled
+        if (tree.num_subtrees == 0 || tree.children.size() == 0) return;
+
+        unsigned int sample = ((unsigned int)rand()) % tree.num_subtrees;
+        unsigned int sample_idx = 0;
+        unsigned int subtree_sum = tree.children[sample_idx].num_subtrees;
+        while (subtree_sum < sample && sample_idx < tree.children.size()) {
             sample_idx += 1;
-            sample -= tree.children[sample_idx].num_subtrees;
+            subtree_sum += tree.children[sample_idx].num_subtrees;
         }
 
         ScopeTree stree = tree.children[sample_idx];

diff --git a/core/harness.cpp b/core/harness.cpp
@@ -255,13 +255,24 @@ extern "C" int InitCorpus(const char *init_corpus_dir) {
         g.CreateWithScope(def);
 
         std::string fpath = base_dir + "/seed_" + to_string(i);
+        if (graphfuzz_debug) {
+          cerr << "\tattempting to write to " << fpath << endl;
+        }
 
         bool err = false;
         string out_str = g.Write(&err);
-        if (err) return -1;
+        if (err) {
+          if (graphfuzz_debug) {
+            cerr << "\tFailed to write graph - aborting" << endl;
+          }
+          return -1;
+        }
 
         ofstream out(fpath);
         out.write(out_str.data(), out_str.size());
+        if (graphfuzz_debug) {
+          cerr << "\twrote graph to " << fpath << endl;
+        }
     }
 
     cerr << "[*] Done" << endl;
@@ -426,12 +437,24 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
 
     vector<Node> nodes = g.GetOrderedNodes();
     void *ref[nodes.size()][MAX_CONN];
+    if (graphfuzz_debug) {
+        // catch uninitialized reads early on
+        memset(ref, 0xca, sizeof(ref));
+    }
 
     for (Node n : nodes) {
-        void *in_ref[n.in_ref_size()];
-        void *out_ref[n.out_ref_size()];
+        // allocate at least 1 even if we use only 0 to avoid zero-sized stack
+        // arrays that are UB.
+        void *in_ref[n.in_ref_size() + 1];
+        void *out_ref[n.out_ref_size() + 1];
         const char *context = n.context().data();
 
+        if (graphfuzz_debug) {
+            // fail more obviously if we have an uninit read for some reason
+            memset(in_ref, 0xca, sizeof(in_ref));
+            memset(out_ref, 0xca, sizeof(out_ref));
+        }
+
         // Load inputs.
         for (int i = 0; i < n.in_ref_size(); ++i) {
             in_ref[i] = ref[n.index()][i];
@@ -446,15 +469,20 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
         // Unset bail flag.
         will_bail = false;
 
+        if (graphfuzz_debug) {
+          cerr << "Invoking shim_" << n.type() << endl;
+        }
         // Invoke shim.
         void (*func)(void **, void **, const char *) = FUZZER_SHIMS[n.type()];
         func(in_ref, out_ref, context);
 
         if (will_bail) {
             // Target called graphfuzz_bail()
             if (graphfuzz_debug) {
-                cerr << "Bailing..." << endl;
+                cerr << "Bailing... (invalid? " << mark_invalid << ")" << endl;
             }
+
+            shim_finalize();
             return mark_invalid;
         }
 

diff --git a/install.sh b/install.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+set -e
+
+echo "[+] building core"
+rm -rf build
+mkdir build
+pushd build
+export CC=clang
+export CXX=clang++
+export CFLAGS="-flto=full -ggdb"
+export CXXFLAGS="-flto=full -ggdb"
+cmake -G Ninja ..
+ninja
+sudo ninja install
+popd
+
+echo "[+] building core with asan"
+rm -rf build.asan
+mkdir build.asan
+pushd build.asan
+export CC=clang
+export CXX=clang++
+export CFLAGS="-flto=full -ggdb -fsanitize=address,undefined"
+export CXXFLAGS="-flto=full -ggdb -fsanitize=address,undefined"
+cmake -G Ninja ..
+ninja
+popd
+
+echo "[+] building python tool"
+pushd cli
+poetry build
+poetry export > dist/requirements.txt
+pip install --user -r dist/requirements.txt
+pip install --user -e .