Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 67 additions & 10 deletions cli/gfuzz/commands/gen/cpp/gen_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import re
import yaml
import codecs

from gfuzz.commands.cliopt import CLIOpt
from gfuzz.schema import Schema
Expand Down Expand Up @@ -53,7 +54,7 @@
'''

CPP_SAVE_REF = '''\
out_ref[{out_ref_idx}] = reinterpret_cast<void *>({arg_name});
out_ref[{out_ref_idx}] = (void*)({arg_name});
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there was a reason I used reinterpret_cast here over C-style casts, bit I can't remember the specifics. Can you explain the rationale for this change?

Copy link
Author

@f0rki f0rki Jan 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had troubles with harnessing a C library unless I used this. (because of casting a const pointer to a non-const pointer and vice-versa or something. I think const to non-const cast is forbidden with reinterpret_cast).

'''

CPP_LOAD_SIMPLE = '''\
Expand Down Expand Up @@ -90,6 +91,30 @@
std::cout << "}};" << std::endl;
'''

CPP_PRINT_CHAR_ARRAY = '''\
std::cout << "// {var_type} {arg_name}[{count}] = \\"";
for (int i = 0; i < {count}; ++i) {{
char c = {arg_name}[i];
if (isprint(c) && c != '"' && c != '\\n') {{
std::cout << c;
}} else {{
char x[8] = {{0,}};
unsigned char _c = c;
snprintf(x, sizeof(x), "\\\\x%02x", _c);
std::cout << x;
}}
}}
std::cout << "\\";" << std::endl;
std::cout << " {var_type} {arg_name}[{count}] = {{";
for (int i = 0; i < {count}; ++i) {{
if (i % 16 == 0 && {count} > 16) std::cout << std::endl << " ";
std::cout << {print_cast}{arg_name}[i];
if (i < {count} - 1) std::cout << ", ";
}}
if ({count} > 16) std::cout << std::endl << " ";
std::cout << "}};" << std::endl;
'''

CPP_LOAD_ENUM = '''\
{var_type} {arg_name};
switch (*(reinterpret_cast<const unsigned char *>(context + {context_offset})) % {enum_size}) {{
Expand Down Expand Up @@ -174,17 +199,11 @@
#include <string.h>
#include <string>
#include <iostream>
#include <cstdio>

unsigned long CURR_ID = 0;

extern "C" void __attribute__((visibility ("default"))) global_init(int *argc, char ***argv) {{
char **new_argv = (char **)malloc((*argc + 2) * sizeof(char *));
memcpy(new_argv, *argv, sizeof(*new_argv) * *argc);
new_argv[*argc] = (char *)"-detect_leaks=0";
new_argv[*argc + 1] = 0;
(*argc)++;
*argv = new_argv;
}}
extern "C" void __attribute__((visibility ("default"))) global_init(int *argc, char ***argv) {{ }}

extern "C" void __attribute__((visibility ("default"))) shim_init() {{
CURR_ID = 0;
Expand All @@ -193,9 +212,12 @@
std::cout << "{macros}" << std::endl;

std::cout << "int main() {{" << std::endl;
std::cout << "{global_init}" << std::endl;
std::cout << "{initializer}" << std::endl;
}}

extern "C" void __attribute__((visibility ("default"))) shim_finalize() {{
std::cout << "{finalizer}" << std::endl;
std::cout << "}}" << std::endl;
}}
'''
Expand Down Expand Up @@ -851,7 +873,10 @@ def build_write_shim(self, shim_name) -> str:
context_offset=context_offset,
context_size=k['context_size']
))
inner_load_args.append(CPP_PRINT_SIMPLE_ARRAY.format(
fmtstring = CPP_PRINT_SIMPLE_ARRAY
if k['name'] == 'char':
fmtstring = CPP_PRINT_CHAR_ARRAY
inner_load_args.append(fmtstring.format(
var_type=k['name'],
count=self.arg_count[i],
arg_name=name,
Expand Down Expand Up @@ -1117,10 +1142,42 @@ def make_write_harness(schema: Schema, scopes: List[CPPScope]) -> str:
includes = ''.join(includes)
header_string = includes.replace('"', "\\\"").replace('\n', '\\n')

# Check for custom initializer/finalizer
global_init = ''
initializer = ''
finalizer = ''
for k in schema.objects:
if schema.objects[k]['type'] == 'config':
if 'global_init' in schema.objects[k]:
global_init = schema.objects[k]['global_init']
if 'initializer' in schema.objects[k]:
initializer = schema.objects[k]['initializer']
if 'finalizer' in schema.objects[k]:
finalizer = schema.objects[k]['finalizer']
# override the writer
for k in schema.objects:
if schema.objects[k]['type'] == 'config_writer':
global_init = ''
initializer = ''
finalizer = ''
if 'global_init' in schema.objects[k]:
global_init = schema.objects[k]['global_init']
if 'initializer' in schema.objects[k]:
initializer = schema.objects[k]['initializer']
if 'finalizer' in schema.objects[k]:
finalizer = schema.objects[k]['finalizer']

global_init = codecs.escape_encode(bytes(global_init, 'ascii'))[0].decode('ascii')
initializer = codecs.escape_encode(bytes(initializer, 'ascii'))[0].decode('ascii')
finalizer = codecs.escape_encode(bytes(finalizer, 'ascii'))[0].decode('ascii')

code = FULL_HARNESS.format(
includes='',
header=SHIM_HEADER_WRITE.format(
header_string=header_string,
global_init=global_init,
initializer=initializer,
finalizer=finalizer,
macros=SHIM_MACROS.replace('"', "\\\"").replace('\n', '\\n')
),
shim_code=shim_code,
Expand Down
35 changes: 32 additions & 3 deletions cli/gfuzz/schema.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,36 @@

from typing import Set
import yaml


# for input validation
SCHEMA_OBJECT_ALLOWED_KEYS = { 'headers', 'c_headers', 'name', 'type', 'static_methods', 'methods', 'context_size', 'values', 'print_cast', 'load', 'finalizer', 'initializer', 'global_init', 'load_arr' }
SCHEMA_METHOD_ALLOWED_KEYS = { 'inputs', 'outputs', 'args', 'exec' }


def _validate_obj(name: str, obj: dict, orig_path: str) -> dict:
if not 'type' in obj:
print(f'[!] Error {name} has no attribute "type"')
return None

for k in obj:
if k not in SCHEMA_OBJECT_ALLOWED_KEYS:
print(f'[!] Error {name} has invalid attribute {k!r}')
return None
for method_type in ('methods', 'static_methods'):
for method_specs in obj.get(method_type, []):
if isinstance(method_specs, dict):
for (method_name, d) in method_specs.items():
for k in d:
if k not in SCHEMA_METHOD_ALLOWED_KEYS:
print(f'[!] Error {name}.{method_type} -> {method_name} has invalid attribute {k!r}')
return None
elif isinstance(method_specs, str):
# ok like that
pass
else:
print(f'[!] Error {name}.{method_type} -> {method_specs!r} has invalid type {type(method_specs)} (expected str or dict)')
return None

obj['orig_path'] = orig_path
obj['headers'] = obj.get('headers') or []
obj['c_headers'] = obj.get('c_headers') or []
Expand All @@ -17,12 +40,18 @@ def _validate_obj(name: str, obj: dict, orig_path: str) -> dict:

obj['name'] = obj.get('name') or ''

if obj['type'] in ['struct', 'class', 'file']:
if obj['type'] in {'struct', 'class', 'file'}:
obj['methods'] = obj.get('methods') or []
obj['static_methods'] = obj.get('static_methods') or []
elif obj['type'] in {'config', 'enum', 'simple', 'ignore'}:
pass
else:
print(f'[!] Error {name} has invalid attribute "type": {obj["type"] !r}')
return None

return obj


class Schema(object):
"""A schema represents the API surface of a target."""

Expand Down Expand Up @@ -65,7 +94,7 @@ def load(path: str, loaded: Set[str] = None) -> 'Schema':
# Process include list.
if loaded is None:
loaded = set()

include = objects.get('include') or []
for sub_path in include:
if sub_path in loaded:
Expand Down
15 changes: 11 additions & 4 deletions core/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,11 +347,18 @@ class TGraph {
* Recursively sample and append nodes from a type tree.
*/
void AppendTree(unsigned int node_idx, unsigned int conn_idx, int layer, bool forward, TypeTree tree) {
int sample = ((unsigned int)rand()) % tree.num_subtrees;
unsigned int sample_idx = -1;
while (sample >= 0) {
// fail loud when assertions are enabled
assert(tree.num_subtrees > 0);
assert(tree.children.size() > 0);
// return early in case the assertions are disabled
if (tree.num_subtrees == 0 || tree.children.size() == 0) return;

unsigned int sample = ((unsigned int)rand()) % tree.num_subtrees;
unsigned int sample_idx = 0;
unsigned int subtree_sum = tree.children[sample_idx].num_subtrees;
while (subtree_sum < sample && sample_idx < tree.children.size()) {
sample_idx += 1;
sample -= tree.children[sample_idx].num_subtrees;
subtree_sum += tree.children[sample_idx].num_subtrees;
}

ScopeTree stree = tree.children[sample_idx];
Expand Down
36 changes: 32 additions & 4 deletions core/harness.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,13 +255,24 @@ extern "C" int InitCorpus(const char *init_corpus_dir) {
g.CreateWithScope(def);

std::string fpath = base_dir + "/seed_" + to_string(i);
if (graphfuzz_debug) {
cerr << "\tattempting to write to " << fpath << endl;
}

bool err = false;
string out_str = g.Write(&err);
if (err) return -1;
if (err) {
if (graphfuzz_debug) {
cerr << "\tFailed to write graph - aborting" << endl;
}
return -1;
}

ofstream out(fpath);
out.write(out_str.data(), out_str.size());
if (graphfuzz_debug) {
cerr << "\twrote graph to " << fpath << endl;
}
}

cerr << "[*] Done" << endl;
Expand Down Expand Up @@ -426,12 +437,24 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {

vector<Node> nodes = g.GetOrderedNodes();
void *ref[nodes.size()][MAX_CONN];
if (graphfuzz_debug) {
// catch uninitialized reads early on
memset(ref, 0xca, sizeof(ref));
}

for (Node n : nodes) {
void *in_ref[n.in_ref_size()];
void *out_ref[n.out_ref_size()];
// allocate at least 1 even if we use only 0 to avoid zero-sized stack
// arrays that are UB.
void *in_ref[n.in_ref_size() + 1];
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you explain this part

Copy link
Author

@f0rki f0rki Jan 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UBSan detects zero sized VLA on the stack as UB, so I just made sure the array has at least one element, even if it is not used.

void *out_ref[n.out_ref_size() + 1];
const char *context = n.context().data();

if (graphfuzz_debug) {
// fail more obviously if we have an uninit read for some reason
memset(in_ref, 0xca, sizeof(in_ref));
memset(out_ref, 0xca, sizeof(out_ref));
}

// Load inputs.
for (int i = 0; i < n.in_ref_size(); ++i) {
in_ref[i] = ref[n.index()][i];
Expand All @@ -446,15 +469,20 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
// Unset bail flag.
will_bail = false;

if (graphfuzz_debug) {
cerr << "Invoking shim_" << n.type() << endl;
}
// Invoke shim.
void (*func)(void **, void **, const char *) = FUZZER_SHIMS[n.type()];
func(in_ref, out_ref, context);

if (will_bail) {
// Target called graphfuzz_bail()
if (graphfuzz_debug) {
cerr << "Bailing..." << endl;
cerr << "Bailing... (invalid? " << mark_invalid << ")" << endl;
}

shim_finalize();
return mark_invalid;
}

Expand Down
35 changes: 35 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env bash

set -e

echo "[+] building core"
rm -rf build
mkdir build
pushd build
export CC=clang
export CXX=clang++
export CFLAGS="-flto=full -ggdb"
export CXXFLAGS="-flto=full -ggdb"
cmake -G Ninja ..
ninja
sudo ninja install
popd

echo "[+] building core with asan"
rm -rf build.asan
mkdir build.asan
pushd build.asan
export CC=clang
export CXX=clang++
export CFLAGS="-flto=full -ggdb -fsanitize=address,undefined"
export CXXFLAGS="-flto=full -ggdb -fsanitize=address,undefined"
cmake -G Ninja ..
ninja
popd

echo "[+] building python tool"
pushd cli
poetry build
poetry export > dist/requirements.txt
pip install --user -r dist/requirements.txt
pip install --user -e .