diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:06:48 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:06:48 +0000 |
commit | 93c1b73a09a52d4a265f683bf1954b08bb430049 (patch) | |
tree | 5543464d74945196cc890e9d9099e5d0660df7eb /lib/fuzzer/scripts/collect_data_flow.py | |
parent | 0d8e7490d6e8a13a8f0977d9b7771803b9f64ea0 (diff) |
Diffstat (limited to 'lib/fuzzer/scripts/collect_data_flow.py')
-rwxr-xr-x | lib/fuzzer/scripts/collect_data_flow.py | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/lib/fuzzer/scripts/collect_data_flow.py b/lib/fuzzer/scripts/collect_data_flow.py new file mode 100755 index 000000000000..3edff66bb9d1 --- /dev/null +++ b/lib/fuzzer/scripts/collect_data_flow.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +#===- lib/fuzzer/scripts/collect_data_flow.py ------------------------------===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# +# Runs the data-flow tracer several times on the same input in order to collect +# the complete trace for all input bytes (running it on all bytes at once +# may fail if DFSan runs out of labels). +# Usage: +# +# # Collect dataflow for one input, store it in OUTPUT (default is stdout) +# collect_data_flow.py BINARY INPUT [OUTPUT] +# +# # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR +# collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR +#===------------------------------------------------------------------------===# +import atexit +import hashlib +import sys +import os +import subprocess +import tempfile +import shutil + +tmpdir = "" + +def cleanup(d): + print("removing: %s" % d) + shutil.rmtree(d) + +def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir): + print("Collecting dataflow for corpus: %s output_dir: %s" % (corpus_dir, + output_dir)) + assert not os.path.exists(output_dir) + os.mkdir(output_dir) + for root, dirs, files in os.walk(corpus_dir): + for f in files: + path = os.path.join(root, f) + sha1 = hashlib.sha1(open(path).read()).hexdigest() + output = os.path.join(output_dir, sha1) + subprocess.call([self, exe, path, output]) + functions_txt = open(os.path.join(output_dir, "functions.txt"), "w") + subprocess.call([exe], stdout=functions_txt) + + +def main(argv): + exe = argv[1] + inp = argv[2] + if os.path.isdir(inp): + return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3]) + size = os.path.getsize(inp) + q = [[0, size]] + tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-") + atexit.register(cleanup, tmpdir) + print "tmpdir: ", tmpdir + outputs = [] + while len(q): + r = q.pop() + print "******* Trying: ", r + tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1])) + ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile]) + if ret and r[1] - r[0] >= 2: + q.append([r[0], (r[1] + r[0]) / 2]) + q.append([(r[1] + r[0]) / 2, r[1]]) + else: + outputs.append(tmpfile) + print "******* Success: ", r + f = sys.stdout + if len(argv) >= 4: + f = open(argv[3], "w") + merge = os.path.join(os.path.dirname(argv[0]), "merge_data_flow.py") + subprocess.call([merge] + outputs, stdout=f) + +if __name__ == '__main__': + main(sys.argv) |