aboutsummaryrefslogtreecommitdiff
path: root/lib/fuzzer/scripts/collect_data_flow.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/fuzzer/scripts/collect_data_flow.py')
-rwxr-xr-xlib/fuzzer/scripts/collect_data_flow.py79
1 files changed, 79 insertions, 0 deletions
diff --git a/lib/fuzzer/scripts/collect_data_flow.py b/lib/fuzzer/scripts/collect_data_flow.py
new file mode 100755
index 000000000000..3edff66bb9d1
--- /dev/null
+++ b/lib/fuzzer/scripts/collect_data_flow.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+#===- lib/fuzzer/scripts/collect_data_flow.py ------------------------------===#
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+# Runs the data-flow tracer several times on the same input in order to collect
+# the complete trace for all input bytes (running it on all bytes at once
+# may fail if DFSan runs out of labels).
+# Usage:
+#
+# # Collect dataflow for one input, store it in OUTPUT (default is stdout)
+# collect_data_flow.py BINARY INPUT [OUTPUT]
+#
+# # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR
+# collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR
+#===------------------------------------------------------------------------===#
+import atexit
+import hashlib
+import sys
+import os
+import subprocess
+import tempfile
+import shutil
+
+tmpdir = ""
+
+def cleanup(d):
+ print("removing: %s" % d)
+ shutil.rmtree(d)
+
+def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir):
+ print("Collecting dataflow for corpus: %s output_dir: %s" % (corpus_dir,
+ output_dir))
+ assert not os.path.exists(output_dir)
+ os.mkdir(output_dir)
+ for root, dirs, files in os.walk(corpus_dir):
+ for f in files:
+ path = os.path.join(root, f)
+ sha1 = hashlib.sha1(open(path).read()).hexdigest()
+ output = os.path.join(output_dir, sha1)
+ subprocess.call([self, exe, path, output])
+ functions_txt = open(os.path.join(output_dir, "functions.txt"), "w")
+ subprocess.call([exe], stdout=functions_txt)
+
+
+def main(argv):
+ exe = argv[1]
+ inp = argv[2]
+ if os.path.isdir(inp):
+ return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3])
+ size = os.path.getsize(inp)
+ q = [[0, size]]
+ tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-")
+ atexit.register(cleanup, tmpdir)
+ print "tmpdir: ", tmpdir
+ outputs = []
+ while len(q):
+ r = q.pop()
+ print "******* Trying: ", r
+ tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1]))
+ ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile])
+ if ret and r[1] - r[0] >= 2:
+ q.append([r[0], (r[1] + r[0]) / 2])
+ q.append([(r[1] + r[0]) / 2, r[1]])
+ else:
+ outputs.append(tmpfile)
+ print "******* Success: ", r
+ f = sys.stdout
+ if len(argv) >= 4:
+ f = open(argv[3], "w")
+ merge = os.path.join(os.path.dirname(argv[0]), "merge_data_flow.py")
+ subprocess.call([merge] + outputs, stdout=f)
+
+if __name__ == '__main__':
+ main(sys.argv)