summaryrefslogtreecommitdiff
path: root/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp')
-rw-r--r--tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp143
1 files changed, 143 insertions, 0 deletions
diff --git a/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp b/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp
new file mode 100644
index 0000000000000..643afe64073e6
--- /dev/null
+++ b/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp
@@ -0,0 +1,143 @@
+//===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerInterface.h"
+#include "llvm-c/Disassembler.h"
+#include "llvm-c/Target.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+const unsigned AssemblyTextBufSize = 80;
+
+static cl::opt<std::string>
+ TripleName("triple", cl::desc("Target triple to assemble for, "
+ "see -version for available targets"));
+
+static cl::opt<std::string>
+ MCPU("mcpu",
+ cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+ cl::value_desc("cpu-name"), cl::init(""));
+
+// This is useful for variable-length instruction sets.
+static cl::opt<unsigned> InsnLimit(
+ "insn-limit",
+ cl::desc("Limit the number of instructions to process (0 for no limit)"),
+ cl::value_desc("count"), cl::init(0));
+
+static cl::list<std::string>
+ MAttrs("mattr", cl::CommaSeparated,
+ cl::desc("Target specific attributes (-mattr=help for details)"),
+ cl::value_desc("a1,+a2,-a3,..."));
+// The feature string derived from -mattr's values.
+std::string FeaturesStr;
+
+static cl::list<std::string>
+ FuzzerArgs("fuzzer-args", cl::Positional,
+ cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore,
+ cl::PositionalEatsArgs);
+static std::vector<char *> ModifiedArgv;
+
+int DisassembleOneInput(const uint8_t *Data, size_t Size) {
+ char AssemblyText[AssemblyTextBufSize];
+
+ std::vector<uint8_t> DataCopy(Data, Data + Size);
+
+ LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures(
+ TripleName.c_str(), MCPU.c_str(), FeaturesStr.c_str(), nullptr, 0,
+ nullptr, nullptr);
+ assert(Ctx);
+ uint8_t *p = DataCopy.data();
+ unsigned Consumed;
+ unsigned InstructionsProcessed = 0;
+ do {
+ Consumed = LLVMDisasmInstruction(Ctx, p, Size, 0, AssemblyText,
+ AssemblyTextBufSize);
+ Size -= Consumed;
+ p += Consumed;
+
+ InstructionsProcessed ++;
+ if (InsnLimit != 0 && InstructionsProcessed < InsnLimit)
+ break;
+ } while (Consumed != 0);
+ LLVMDisasmDispose(Ctx);
+ return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ return DisassembleOneInput(Data, Size);
+}
+
+int LLVMFuzzerInitialize(int *argc, char ***argv) {
+ // The command line is unusual compared to other fuzzers due to the need to
+ // specify the target. Options like -triple, -mcpu, and -mattr work like
+ // their counterparts in llvm-mc, while -fuzzer-args collects options for the
+ // fuzzer itself.
+ //
+ // Examples:
+ //
+ // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
+ // 4-bytes each and use the contents of ./corpus as the test corpus:
+ // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
+ // -fuzzer-args -max_len=4 -runs=100000 ./corpus
+ //
+ // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
+ // feature enabled using up to 64-byte inputs:
+ // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
+ // -disassemble -fuzzer-args ./corpus
+ //
+ // If your aim is to find instructions that are not tested, then it is
+ // advisable to constrain the maximum input size to a single instruction
+ // using -max_len as in the first example. This results in a test corpus of
+ // individual instructions that test unique paths. Without this constraint,
+ // there will be considerable redundancy in the corpus.
+
+ char **OriginalArgv = *argv;
+
+ LLVMInitializeAllTargetInfos();
+ LLVMInitializeAllTargetMCs();
+ LLVMInitializeAllDisassemblers();
+
+ cl::ParseCommandLineOptions(*argc, OriginalArgv);
+
+ // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
+ // the driver can parse its arguments.
+ //
+ // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
+ // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
+ // non-const buffer to avoid the need to clean up when the fuzzer terminates.
+ ModifiedArgv.push_back(OriginalArgv[0]);
+ for (const auto &FuzzerArg : FuzzerArgs) {
+ for (int i = 1; i < *argc; ++i) {
+ if (FuzzerArg == OriginalArgv[i])
+ ModifiedArgv.push_back(OriginalArgv[i]);
+ }
+ }
+ *argc = ModifiedArgv.size();
+ *argv = ModifiedArgv.data();
+
+ // Package up features to be passed to target/subtarget
+ // We have to pass it via a global since the callback doesn't
+ // permit any user data.
+ if (MAttrs.size()) {
+ SubtargetFeatures Features;
+ for (unsigned i = 0; i != MAttrs.size(); ++i)
+ Features.AddFeature(MAttrs[i]);
+ FeaturesStr = Features.getString();
+ }
+
+ if (TripleName.empty())
+ TripleName = sys::getDefaultTargetTriple();
+
+ return 0;
+}