summaryrefslogtreecommitdiff
path: root/lib/Fuzzer/FuzzerTraceState.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Fuzzer/FuzzerTraceState.cpp')
-rw-r--r--lib/Fuzzer/FuzzerTraceState.cpp210
1 files changed, 135 insertions, 75 deletions
diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp
index b2006fa3aa4d0..d6e1f79791f5e 100644
--- a/lib/Fuzzer/FuzzerTraceState.cpp
+++ b/lib/Fuzzer/FuzzerTraceState.cpp
@@ -41,8 +41,8 @@
// __dfsw_HOOK(a, b, label(a), label(b)) so that __dfsw_HOOK
// gets all the taint labels for the arguments.
// * At the Fuzzer startup we assign a unique DFSan label
-// to every byte of the input string (Fuzzer::CurrentUnit) so that for any
-// chunk of data we know which input bytes it has derived from.
+// to every byte of the input string (Fuzzer::CurrentUnitData) so that
+// for any chunk of data we know which input bytes it has derived from.
// * The __dfsw_* functions (implemented in this file) record the
// parameters (i.e. the application data and the corresponding taint labels)
// in a global state.
@@ -77,6 +77,7 @@
#include <cstring>
#include <thread>
#include <map>
+#include <set>
#if !LLVM_FUZZER_SUPPORTS_DFSAN
// Stubs for dfsan for platforms where dfsan does not exist and weak
@@ -164,24 +165,20 @@ struct LabelRange {
// For now, very simple: put Size bytes of Data at position Pos.
struct TraceBasedMutation {
- static const size_t kMaxSize = 28;
- uint32_t Pos : 24;
- uint32_t Size : 8;
- uint8_t Data[kMaxSize];
+ uint32_t Pos;
+ Word W;
};
-const size_t TraceBasedMutation::kMaxSize;
+// Declared as static globals for faster checks inside the hooks.
+static bool RecordingTraces = false;
+static bool RecordingMemcmp = false;
+static bool RecordingMemmem = false;
class TraceState {
- public:
- TraceState(UserSuppliedFuzzer &USF,
- const Fuzzer::FuzzingOptions &Options, const Unit &CurrentUnit)
- : USF(USF), Options(Options), CurrentUnit(CurrentUnit) {
- // Current trace collection is not thread-friendly and it probably
- // does not have to be such, but at least we should not crash in presence
- // of threads. So, just ignore all traces coming from all threads but one.
- IsMyThread = true;
- }
+public:
+ TraceState(MutationDispatcher &MD, const FuzzingOptions &Options,
+ const Fuzzer *F)
+ : MD(MD), Options(Options), F(F) {}
LabelRange GetLabelRange(dfsan_label L);
void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
@@ -200,28 +197,33 @@ class TraceState {
void TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits, uint64_t Val,
size_t NumCases, uint64_t *Cases);
int TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
- size_t DataSize);
+ size_t DataSize);
int TryToAddDesiredData(const uint8_t *PresentData,
const uint8_t *DesiredData, size_t DataSize);
void StartTraceRecording() {
- if (!Options.UseTraces) return;
- RecordingTraces = true;
+ if (!Options.UseTraces && !Options.UseMemcmp)
+ return;
+ RecordingTraces = Options.UseTraces;
+ RecordingMemcmp = Options.UseMemcmp;
+ RecordingMemmem = Options.UseMemmem;
NumMutations = 0;
- USF.GetMD().ClearAutoDictionary();
+ InterestingWords.clear();
+ MD.ClearAutoDictionary();
}
void StopTraceRecording() {
- if (!RecordingTraces) return;
+ if (!RecordingTraces && !RecordingMemcmp)
+ return;
RecordingTraces = false;
+ RecordingMemcmp = false;
for (size_t i = 0; i < NumMutations; i++) {
auto &M = Mutations[i];
- Unit U(M.Data, M.Data + M.Size);
if (Options.Verbosity >= 2) {
- AutoDictUnitCounts[U]++;
+ AutoDictUnitCounts[M.W]++;
AutoDictAdds++;
if ((AutoDictAdds & (AutoDictAdds - 1)) == 0) {
- typedef std::pair<size_t, Unit> CU;
+ typedef std::pair<size_t, Word> CU;
std::vector<CU> CountedUnits;
for (auto &I : AutoDictUnitCounts)
CountedUnits.push_back(std::make_pair(I.second, I.first));
@@ -235,17 +237,17 @@ class TraceState {
}
}
}
- USF.GetMD().AddWordToAutoDictionary(U, M.Pos);
+ MD.AddWordToAutoDictionary({M.W, M.Pos});
}
+ for (auto &W : InterestingWords)
+ MD.AddWordToAutoDictionary({W});
}
void AddMutation(uint32_t Pos, uint32_t Size, const uint8_t *Data) {
if (NumMutations >= kMaxMutations) return;
- assert(Size <= TraceBasedMutation::kMaxSize);
auto &M = Mutations[NumMutations++];
M.Pos = Pos;
- M.Size = Size;
- memcpy(M.Data, Data, Size);
+ M.W.Set(Data, Size);
}
void AddMutation(uint32_t Pos, uint32_t Size, uint64_t Data) {
@@ -253,26 +255,61 @@ class TraceState {
AddMutation(Pos, Size, reinterpret_cast<uint8_t*>(&Data));
}
+ void AddInterestingWord(const uint8_t *Data, size_t Size) {
+ if (!RecordingMemmem || !F->InFuzzingThread()) return;
+ if (Size <= 1) return;
+ Size = std::min(Size, Word::GetMaxSize());
+ Word W(Data, Size);
+ InterestingWords.insert(W);
+ }
+
+ void EnsureDfsanLabels(size_t Size) {
+ for (; LastDfsanLabel < Size; LastDfsanLabel++) {
+ dfsan_label L = dfsan_create_label("input", (void *)(LastDfsanLabel + 1));
+ // We assume that no one else has called dfsan_create_label before.
+ if (L != LastDfsanLabel + 1) {
+ Printf("DFSan labels are not starting from 1, exiting\n");
+ exit(1);
+ }
+ }
+ }
+
private:
bool IsTwoByteData(uint64_t Data) {
int64_t Signed = static_cast<int64_t>(Data);
Signed >>= 16;
return Signed == 0 || Signed == -1L;
}
- bool RecordingTraces = false;
+
+ // We don't want to create too many trace-based mutations as it is both
+ // expensive and useless. So after some number of mutations is collected,
+ // start rejecting some of them. The more there are mutations the more we
+ // reject.
+ bool WantToHandleOneMoreMutation() {
+ const size_t FirstN = 64;
+ // Gladly handle first N mutations.
+ if (NumMutations <= FirstN) return true;
+ size_t Diff = NumMutations - FirstN;
+ size_t DiffLog = sizeof(long) * 8 - __builtin_clzl((long)Diff);
+ assert(DiffLog > 0 && DiffLog < 64);
+ bool WantThisOne = MD.GetRand()(1 << DiffLog) == 0; // 1 out of DiffLog.
+ return WantThisOne;
+ }
+
static const size_t kMaxMutations = 1 << 16;
size_t NumMutations;
TraceBasedMutation Mutations[kMaxMutations];
+ // TODO: std::set is too inefficient, need to have a custom DS here.
+ std::set<Word> InterestingWords;
LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)];
- UserSuppliedFuzzer &USF;
- const Fuzzer::FuzzingOptions &Options;
- const Unit &CurrentUnit;
- std::map<Unit, size_t> AutoDictUnitCounts;
+ size_t LastDfsanLabel = 0;
+ MutationDispatcher &MD;
+ const FuzzingOptions Options;
+ const Fuzzer *F;
+ std::map<Word, size_t> AutoDictUnitCounts;
size_t AutoDictAdds = 0;
- static thread_local bool IsMyThread;
};
-thread_local bool TraceState::IsMyThread;
LabelRange TraceState::GetLabelRange(dfsan_label L) {
LabelRange &LR = LabelRanges[L];
@@ -288,7 +325,7 @@ void TraceState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
dfsan_label L2) {
assert(ReallyHaveDFSan());
- if (!RecordingTraces || !IsMyThread) return;
+ if (!RecordingTraces || !F->InFuzzingThread()) return;
if (L1 == 0 && L2 == 0)
return; // Not actionable.
if (L1 != 0 && L2 != 0)
@@ -303,7 +340,7 @@ void TraceState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
AddMutation(Pos, CmpSize, Data - 1);
}
- if (CmpSize > LR.End - LR.Beg)
+ if (CmpSize > (size_t)(LR.End - LR.Beg))
AddMutation(LR.Beg, (unsigned)(LR.End - LR.Beg), Data);
@@ -318,7 +355,7 @@ void TraceState::DFSanMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
dfsan_label L2) {
assert(ReallyHaveDFSan());
- if (!RecordingTraces || !IsMyThread) return;
+ if (!RecordingMemcmp || !F->InFuzzingThread()) return;
if (L1 == 0 && L2 == 0)
return; // Not actionable.
if (L1 != 0 && L2 != 0)
@@ -337,7 +374,7 @@ void TraceState::DFSanSwitchCallback(uint64_t PC, size_t ValSizeInBits,
uint64_t Val, size_t NumCases,
uint64_t *Cases, dfsan_label L) {
assert(ReallyHaveDFSan());
- if (!RecordingTraces || !IsMyThread) return;
+ if (!RecordingTraces || !F->InFuzzingThread()) return;
if (!L) return; // Not actionable.
LabelRange LR = GetLabelRange(L);
size_t ValSize = ValSizeInBits / 8;
@@ -362,15 +399,18 @@ void TraceState::DFSanSwitchCallback(uint64_t PC, size_t ValSizeInBits,
int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
size_t DataSize) {
+ if (NumMutations >= kMaxMutations || !WantToHandleOneMoreMutation()) return 0;
+ const uint8_t *UnitData;
+ auto UnitSize = F->GetCurrentUnitInFuzzingThead(&UnitData);
int Res = 0;
- const uint8_t *Beg = CurrentUnit.data();
- const uint8_t *End = Beg + CurrentUnit.size();
+ const uint8_t *Beg = UnitData;
+ const uint8_t *End = Beg + UnitSize;
for (const uint8_t *Cur = Beg; Cur < End; Cur++) {
Cur = (uint8_t *)memmem(Cur, End - Cur, &PresentData, DataSize);
if (!Cur)
break;
size_t Pos = Cur - Beg;
- assert(Pos < CurrentUnit.size());
+ assert(Pos < UnitSize);
AddMutation(Pos, DataSize, DesiredData);
AddMutation(Pos, DataSize, DesiredData + 1);
AddMutation(Pos, DataSize, DesiredData - 1);
@@ -382,15 +422,18 @@ int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
int TraceState::TryToAddDesiredData(const uint8_t *PresentData,
const uint8_t *DesiredData,
size_t DataSize) {
+ if (NumMutations >= kMaxMutations || !WantToHandleOneMoreMutation()) return 0;
+ const uint8_t *UnitData;
+ auto UnitSize = F->GetCurrentUnitInFuzzingThead(&UnitData);
int Res = 0;
- const uint8_t *Beg = CurrentUnit.data();
- const uint8_t *End = Beg + CurrentUnit.size();
+ const uint8_t *Beg = UnitData;
+ const uint8_t *End = Beg + UnitSize;
for (const uint8_t *Cur = Beg; Cur < End; Cur++) {
Cur = (uint8_t *)memmem(Cur, End - Cur, PresentData, DataSize);
if (!Cur)
break;
size_t Pos = Cur - Beg;
- assert(Pos < CurrentUnit.size());
+ assert(Pos < UnitSize);
AddMutation(Pos, DataSize, DesiredData);
Res++;
}
@@ -399,7 +442,7 @@ int TraceState::TryToAddDesiredData(const uint8_t *PresentData,
void TraceState::TraceCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
uint64_t Arg1, uint64_t Arg2) {
- if (!RecordingTraces || !IsMyThread) return;
+ if (!RecordingTraces || !F->InFuzzingThread()) return;
if ((CmpType == ICMP_EQ || CmpType == ICMP_NE) && Arg1 == Arg2)
return; // No reason to mutate.
int Added = 0;
@@ -415,8 +458,8 @@ void TraceState::TraceCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
void TraceState::TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
const uint8_t *Data2) {
- if (!RecordingTraces || !IsMyThread) return;
- CmpSize = std::min(CmpSize, TraceBasedMutation::kMaxSize);
+ if (!RecordingMemcmp || !F->InFuzzingThread()) return;
+ CmpSize = std::min(CmpSize, Word::GetMaxSize());
int Added2 = TryToAddDesiredData(Data1, Data2, CmpSize);
int Added1 = TryToAddDesiredData(Data2, Data1, CmpSize);
if ((Added1 || Added2) && Options.Verbosity >= 3) {
@@ -430,7 +473,7 @@ void TraceState::TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
void TraceState::TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits,
uint64_t Val, size_t NumCases,
uint64_t *Cases) {
- if (!RecordingTraces || !IsMyThread) return;
+ if (!RecordingTraces || !F->InFuzzingThread()) return;
size_t ValSize = ValSizeInBits / 8;
bool TryShort = IsTwoByteData(Val);
for (size_t i = 0; i < NumCases; i++)
@@ -451,9 +494,6 @@ static TraceState *TS;
void Fuzzer::StartTraceRecording() {
if (!TS) return;
- if (ReallyHaveDFSan())
- for (size_t i = 0; i < static_cast<size_t>(Options.MaxLen); i++)
- dfsan_set_label(i + 1, &CurrentUnit[i], 1);
TS->StartTraceRecording();
}
@@ -462,20 +502,17 @@ void Fuzzer::StopTraceRecording() {
TS->StopTraceRecording();
}
-void Fuzzer::InitializeTraceState() {
- if (!Options.UseTraces) return;
- TS = new TraceState(USF, Options, CurrentUnit);
- CurrentUnit.resize(Options.MaxLen);
- // The rest really requires DFSan.
+void Fuzzer::AssignTaintLabels(uint8_t *Data, size_t Size) {
+ if (!Options.UseTraces && !Options.UseMemcmp) return;
if (!ReallyHaveDFSan()) return;
- for (size_t i = 0; i < static_cast<size_t>(Options.MaxLen); i++) {
- dfsan_label L = dfsan_create_label("input", (void*)(i + 1));
- // We assume that no one else has called dfsan_create_label before.
- if (L != i + 1) {
- Printf("DFSan labels are not starting from 1, exiting\n");
- exit(1);
- }
- }
+ TS->EnsureDfsanLabels(Size);
+ for (size_t i = 0; i < Size; i++)
+ dfsan_set_label(i + 1, &Data[i], 1);
+}
+
+void Fuzzer::InitializeTraceState() {
+ if (!Options.UseTraces && !Options.UseMemcmp) return;
+ TS = new TraceState(MD, Options, this);
}
static size_t InternalStrnlen(const char *S, size_t MaxLen) {
@@ -487,12 +524,14 @@ static size_t InternalStrnlen(const char *S, size_t MaxLen) {
} // namespace fuzzer
using fuzzer::TS;
+using fuzzer::RecordingTraces;
+using fuzzer::RecordingMemcmp;
extern "C" {
void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
uint64_t Arg2, dfsan_label L0,
dfsan_label L1, dfsan_label L2) {
- if (!TS) return;
+ if (!RecordingTraces) return;
assert(L0 == 0);
uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
uint64_t CmpSize = (SizeAndType >> 32) / 8;
@@ -502,7 +541,7 @@ void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
dfsan_label L1, dfsan_label L2) {
- if (!TS) return;
+ if (!RecordingTraces) return;
uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
TS->DFSanSwitchCallback(PC, Cases[1], Val, Cases[0], Cases+2, L1);
}
@@ -510,7 +549,7 @@ void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
size_t n, dfsan_label s1_label,
dfsan_label s2_label, dfsan_label n_label) {
- if (!TS) return;
+ if (!RecordingMemcmp) return;
dfsan_label L1 = dfsan_read_label(s1, n);
dfsan_label L2 = dfsan_read_label(s2, n);
TS->DFSanMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
@@ -520,7 +559,7 @@ void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2,
size_t n, dfsan_label s1_label,
dfsan_label s2_label, dfsan_label n_label) {
- if (!TS) return;
+ if (!RecordingMemcmp) return;
n = std::min(n, fuzzer::InternalStrnlen(s1, n));
n = std::min(n, fuzzer::InternalStrnlen(s2, n));
dfsan_label L1 = dfsan_read_label(s1, n);
@@ -531,7 +570,7 @@ void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2,
void dfsan_weak_hook_strcmp(void *caller_pc, const char *s1, const char *s2,
dfsan_label s1_label, dfsan_label s2_label) {
- if (!TS) return;
+ if (!RecordingMemcmp) return;
size_t Len1 = strlen(s1);
size_t Len2 = strlen(s2);
size_t N = std::min(Len1, Len2);
@@ -550,7 +589,7 @@ void dfsan_weak_hook_strcmp(void *caller_pc, const char *s1, const char *s2,
#if LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1,
const void *s2, size_t n, int result) {
- if (!TS) return;
+ if (!RecordingMemcmp) return;
if (result == 0) return; // No reason to mutate.
if (n <= 1) return; // Not interesting.
TS->TraceMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
@@ -559,7 +598,7 @@ void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1,
void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1,
const char *s2, size_t n, int result) {
- if (!TS) return;
+ if (!RecordingMemcmp) return;
if (result == 0) return; // No reason to mutate.
size_t Len1 = fuzzer::InternalStrnlen(s1, n);
size_t Len2 = fuzzer::InternalStrnlen(s2, n);
@@ -572,7 +611,7 @@ void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1,
void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1,
const char *s2, int result) {
- if (!TS) return;
+ if (!RecordingMemcmp) return;
if (result == 0) return; // No reason to mutate.
size_t Len1 = strlen(s1);
size_t Len2 = strlen(s2);
@@ -582,12 +621,33 @@ void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1,
reinterpret_cast<const uint8_t *>(s2));
}
+void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1,
+ const char *s2, size_t n, int result) {
+ return __sanitizer_weak_hook_strncmp(called_pc, s1, s2, n, result);
+}
+void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1,
+ const char *s2, int result) {
+ return __sanitizer_weak_hook_strcmp(called_pc, s1, s2, result);
+}
+void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1,
+ const char *s2, char *result) {
+ TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
+}
+void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1,
+ const char *s2, char *result) {
+ TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
+}
+void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1,
+ const void *s2, size_t len2, void *result) {
+ // TODO: can't hook memmem since memmem is used by libFuzzer.
+}
+
#endif // LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
__attribute__((visibility("default")))
void __sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
uint64_t Arg2) {
- if (!TS) return;
+ if (!RecordingTraces) return;
uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
uint64_t CmpSize = (SizeAndType >> 32) / 8;
uint64_t Type = (SizeAndType << 32) >> 32;
@@ -596,7 +656,7 @@ void __sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
__attribute__((visibility("default")))
void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) {
- if (!TS) return;
+ if (!RecordingTraces) return;
uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
TS->TraceSwitchCallback(PC, Cases[1], Val, Cases[0], Cases + 2);
}