diff options
Diffstat (limited to 'lib/Fuzzer/FuzzerInternal.h')
-rw-r--r-- | lib/Fuzzer/FuzzerInternal.h | 441 |
1 files changed, 364 insertions, 77 deletions
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h index c1e9daac98089..08f8801ac5fdd 100644 --- a/lib/Fuzzer/FuzzerInternal.h +++ b/lib/Fuzzer/FuzzerInternal.h @@ -12,38 +12,108 @@ #ifndef LLVM_FUZZER_INTERNAL_H #define LLVM_FUZZER_INTERNAL_H +#include <algorithm> +#include <atomic> #include <cassert> -#include <climits> #include <chrono> +#include <climits> #include <cstddef> #include <cstdlib> +#include <random> +#include <string.h> #include <string> -#include <vector> #include <unordered_set> +#include <vector> +#include "FuzzerExtFunctions.h" #include "FuzzerInterface.h" +#include "FuzzerTracePC.h" + +// Platform detection. +#ifdef __linux__ +#define LIBFUZZER_LINUX 1 +#define LIBFUZZER_APPLE 0 +#elif __APPLE__ +#define LIBFUZZER_LINUX 0 +#define LIBFUZZER_APPLE 1 +#else +#error "Support for your platform has not been implemented" +#endif namespace fuzzer { + +typedef int (*UserCallback)(const uint8_t *Data, size_t Size); +int FuzzerDriver(int *argc, char ***argv, UserCallback Callback); + using namespace std::chrono; +typedef std::vector<uint8_t> Unit; +typedef std::vector<Unit> UnitVector; + +// A simple POD sized array of bytes. +template <size_t kMaxSize> class FixedWord { +public: + FixedWord() {} + FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); } + + void Set(const uint8_t *B, uint8_t S) { + assert(S <= kMaxSize); + memcpy(Data, B, S); + Size = S; + } + + bool operator==(const FixedWord<kMaxSize> &w) const { + return Size == w.Size && 0 == memcmp(Data, w.Data, Size); + } + + bool operator<(const FixedWord<kMaxSize> &w) const { + if (Size != w.Size) + return Size < w.Size; + return memcmp(Data, w.Data, Size) < 0; + } + + static size_t GetMaxSize() { return kMaxSize; } + const uint8_t *data() const { return Data; } + uint8_t size() const { return Size; } + +private: + uint8_t Size = 0; + uint8_t Data[kMaxSize]; +}; +typedef FixedWord<27> Word; // 28 bytes. + +bool IsFile(const std::string &Path); std::string FileToString(const std::string &Path); -Unit FileToVector(const std::string &Path); +Unit FileToVector(const std::string &Path, size_t MaxSize = 0); void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V, - long *Epoch); + long *Epoch, size_t MaxSize); void WriteToFile(const Unit &U, const std::string &Path); void CopyFileToErr(const std::string &Path); // Returns "Dir/FileName" or equivalent for the current OS. std::string DirPlusFile(const std::string &DirPath, const std::string &FileName); +void DupAndCloseStderr(); +void CloseStdout(); void Printf(const char *Fmt, ...); -void Print(const Unit &U, const char *PrintAfter = ""); +void PrintHexArray(const Unit &U, const char *PrintAfter = ""); +void PrintHexArray(const uint8_t *Data, size_t Size, + const char *PrintAfter = ""); void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter = ""); void PrintASCII(const Unit &U, const char *PrintAfter = ""); +void PrintASCII(const Word &W, const char *PrintAfter = ""); std::string Hash(const Unit &U); void SetTimer(int Seconds); +void SetSigSegvHandler(); +void SetSigBusHandler(); +void SetSigAbrtHandler(); +void SetSigIllHandler(); +void SetSigFpeHandler(); +void SetSigIntHandler(); +void SetSigTermHandler(); std::string Base64(const Unit &U); int ExecuteCommand(const std::string &Command); +size_t GetPeakRSSMb(); // Private copy of SHA1 implementation. static const int kSHA1NumBytes = 20; @@ -52,11 +122,24 @@ void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out); // Changes U to contain only ASCII (isprint+isspace) characters. // Returns true iff U has been changed. -bool ToASCII(Unit &U); +bool ToASCII(uint8_t *Data, size_t Size); bool IsASCII(const Unit &U); +bool IsASCII(const uint8_t *Data, size_t Size); int NumberOfCpuCores(); int GetPid(); +void SleepSeconds(int Seconds); + +class Random { + public: + Random(unsigned int seed) : R(seed) {} + size_t Rand() { return R(); } + size_t RandBool() { return Rand() % 2; } + size_t operator()(size_t n) { return n ? Rand() % n : 0; } + std::mt19937 &Get_mt19937() { return R; } + private: + std::mt19937 R; +}; // Dictionary. @@ -68,50 +151,240 @@ bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); // were parsed succesfully. bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units); -class Fuzzer { +class DictionaryEntry { + public: + DictionaryEntry() {} + DictionaryEntry(Word W) : W(W) {} + DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {} + const Word &GetW() const { return W; } + + bool HasPositionHint() const { return PositionHint != std::numeric_limits<size_t>::max(); } + size_t GetPositionHint() const { + assert(HasPositionHint()); + return PositionHint; + } + void IncUseCount() { UseCount++; } + void IncSuccessCount() { SuccessCount++; } + size_t GetUseCount() const { return UseCount; } + size_t GetSuccessCount() const {return SuccessCount; } + +private: + Word W; + size_t PositionHint = std::numeric_limits<size_t>::max(); + size_t UseCount = 0; + size_t SuccessCount = 0; +}; + +class Dictionary { public: - struct FuzzingOptions { - int Verbosity = 1; - int MaxLen = 0; - int UnitTimeoutSec = 300; - int MaxTotalTimeSec = 0; - bool DoCrossOver = true; - int MutateDepth = 5; - bool ExitOnFirst = false; - bool UseCounters = false; - bool UseIndirCalls = true; - bool UseTraces = false; - bool UseFullCoverageSet = false; - bool Reload = true; - bool ShuffleAtStartUp = true; - int PreferSmallDuringInitialShuffle = -1; - size_t MaxNumberOfRuns = ULONG_MAX; - int SyncTimeout = 600; - int ReportSlowUnits = 10; - bool OnlyASCII = false; - std::string OutputCorpus; - std::string SyncCommand; - std::string ArtifactPrefix = "./"; - std::string ExactArtifactPath; - bool SaveArtifacts = true; - bool PrintNEW = true; // Print a status line when new units are found; - bool OutputCSV = false; - bool PrintNewCovPcs = false; + static const size_t kMaxDictSize = 1 << 14; + + bool ContainsWord(const Word &W) const { + return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) { + return DE.GetW() == W; + }); + } + const DictionaryEntry *begin() const { return &DE[0]; } + const DictionaryEntry *end() const { return begin() + Size; } + DictionaryEntry & operator[] (size_t Idx) { + assert(Idx < Size); + return DE[Idx]; + } + void push_back(DictionaryEntry DE) { + if (Size < kMaxDictSize) + this->DE[Size++] = DE; + } + void clear() { Size = 0; } + bool empty() const { return Size == 0; } + size_t size() const { return Size; } + +private: + DictionaryEntry DE[kMaxDictSize]; + size_t Size = 0; +}; + +struct FuzzingOptions { + int Verbosity = 1; + size_t MaxLen = 0; + int UnitTimeoutSec = 300; + int TimeoutExitCode = 77; + int ErrorExitCode = 77; + int MaxTotalTimeSec = 0; + int RssLimitMb = 0; + bool DoCrossOver = true; + int MutateDepth = 5; + bool UseCounters = false; + bool UseIndirCalls = true; + bool UseTraces = false; + bool UseMemcmp = true; + bool UseMemmem = true; + bool UseFullCoverageSet = false; + bool Reload = true; + bool ShuffleAtStartUp = true; + bool PreferSmall = true; + size_t MaxNumberOfRuns = ULONG_MAX; + int ReportSlowUnits = 10; + bool OnlyASCII = false; + std::string OutputCorpus; + std::string ArtifactPrefix = "./"; + std::string ExactArtifactPath; + bool SaveArtifacts = true; + bool PrintNEW = true; // Print a status line when new units are found; + bool OutputCSV = false; + bool PrintNewCovPcs = false; + bool PrintFinalStats = false; + bool DetectLeaks = true; + bool TruncateUnits = false; + bool PruneCorpus = true; +}; + +class MutationDispatcher { +public: + MutationDispatcher(Random &Rand, const FuzzingOptions &Options); + ~MutationDispatcher() {} + /// Indicate that we are about to start a new sequence of mutations. + void StartMutationSequence(); + /// Print the current sequence of mutations. + void PrintMutationSequence(); + /// Indicate that the current sequence of mutations was successfull. + void RecordSuccessfulMutationSequence(); + /// Mutates data by invoking user-provided mutator. + size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by invoking user-provided crossover. + size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by shuffling bytes. + size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by erasing a byte. + size_t Mutate_EraseByte(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by inserting a byte. + size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by chanding one byte. + size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by chanding one bit. + size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Mutates data by adding a word from the manual dictionary. + size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size, + size_t MaxSize); + + /// Mutates data by adding a word from the temporary automatic dictionary. + size_t Mutate_AddWordFromTemporaryAutoDictionary(uint8_t *Data, size_t Size, + size_t MaxSize); + + /// Mutates data by adding a word from the persistent automatic dictionary. + size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size, + size_t MaxSize); + + /// Tries to find an ASCII integer in Data, changes it to another ASCII int. + size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize); + + /// CrossOver Data with some other element of the corpus. + size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Applies one of the configured mutations. + /// Returns the new size of data which could be up to MaxSize. + size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); + /// Applies one of the default mutations. Provided as a service + /// to mutation authors. + size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Creates a cross-over of two pieces of Data, returns its size. + size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, + size_t Size2, uint8_t *Out, size_t MaxOutSize); + + void AddWordToManualDictionary(const Word &W); + + void AddWordToAutoDictionary(DictionaryEntry DE); + void ClearAutoDictionary(); + void PrintRecommendedDictionary(); + + void SetCorpus(const std::vector<Unit> *Corpus) { this->Corpus = Corpus; } + + Random &GetRand() { return Rand; } + +private: + + struct Mutator { + size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); + const char *Name; + }; + + size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size, + size_t MaxSize); + size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize, + const std::vector<Mutator> &Mutators); + + Random &Rand; + const FuzzingOptions Options; + + // Dictionary provided by the user via -dict=DICT_FILE. + Dictionary ManualDictionary; + // Temporary dictionary modified by the fuzzer itself, + // recreated periodically. + Dictionary TempAutoDictionary; + // Persistent dictionary modified by the fuzzer, consists of + // entries that led to successfull discoveries in the past mutations. + Dictionary PersistentAutoDictionary; + std::vector<Mutator> CurrentMutatorSequence; + std::vector<DictionaryEntry *> CurrentDictionaryEntrySequence; + const std::vector<Unit> *Corpus = nullptr; + std::vector<uint8_t> MutateInPlaceHere; + + std::vector<Mutator> Mutators; + std::vector<Mutator> DefaultMutators; +}; + +class Fuzzer { +public: + + // Aggregates all available coverage measurements. + struct Coverage { + Coverage() { Reset(); } + + void Reset() { + BlockCoverage = 0; + CallerCalleeCoverage = 0; + PcMapBits = 0; + CounterBitmapBits = 0; + PcBufferLen = 0; + CounterBitmap.clear(); + PCMap.Reset(); + } + + std::string DebugString() const; + + size_t BlockCoverage; + size_t CallerCalleeCoverage; + + size_t PcBufferLen; + // Precalculated number of bits in CounterBitmap. + size_t CounterBitmapBits; + std::vector<uint8_t> CounterBitmap; + // Precalculated number of bits in PCMap. + size_t PcMapBits; + PcCoverageMap PCMap; }; - Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options); - void AddToCorpus(const Unit &U) { Corpus.push_back(U); } + + Fuzzer(UserCallback CB, MutationDispatcher &MD, FuzzingOptions Options); + void AddToCorpus(const Unit &U) { + Corpus.push_back(U); + UpdateCorpusDistribution(); + } size_t ChooseUnitIdxToMutate(); const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; }; + void TruncateUnits(std::vector<Unit> *NewCorpus); void Loop(); void Drill(); void ShuffleAndMinimize(); void InitializeTraceState(); + void AssignTaintLabels(uint8_t *Data, size_t Size); size_t CorpusSize() const { return Corpus.size(); } - void ReadDir(const std::string &Path, long *Epoch) { + size_t MaxUnitSizeInCorpus() const; + void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) { Printf("Loading corpus: %s\n", Path.c_str()); - ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch); + ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch, MaxSize); } - void RereadOutputCorpus(); + void RereadOutputCorpus(size_t MaxSize); // Save the current corpus to OutputCorpus. void SaveCorpus(); @@ -119,35 +392,56 @@ class Fuzzer { return duration_cast<seconds>(system_clock::now() - ProcessStartTime) .count(); } + size_t execPerSec() { + size_t Seconds = secondsSinceProcessStartUp(); + return Seconds ? TotalNumberOfRuns / Seconds : 0; + } size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; } static void StaticAlarmCallback(); + static void StaticCrashSignalCallback(); + static void StaticInterruptCallback(); - void ExecuteCallback(const Unit &U); + void ExecuteCallback(const uint8_t *Data, size_t Size); + bool RunOne(const uint8_t *Data, size_t Size); // Merge Corpora[1:] into Corpora[0]. void Merge(const std::vector<std::string> &Corpora); + // Returns a subset of 'Extra' that adds coverage to 'Initial'. + UnitVector FindExtraUnits(const UnitVector &Initial, const UnitVector &Extra); + MutationDispatcher &GetMD() { return MD; } + void PrintFinalStats(); + void SetMaxLen(size_t MaxLen); + void RssLimitCallback(); - private: + // Public for tests. + void ResetCoverage(); + + bool InFuzzingThread() const { return IsMyThread; } + size_t GetCurrentUnitInFuzzingThead(const uint8_t **Data) const; + +private: void AlarmCallback(); + void CrashCallback(); + void InterruptCallback(); void MutateAndTestOne(); void ReportNewCoverage(const Unit &U); - bool RunOne(const Unit &U); - void RunOneAndUpdateCorpus(Unit &U); + bool RunOne(const Unit &U) { return RunOne(U.data(), U.size()); } + void RunOneAndUpdateCorpus(const uint8_t *Data, size_t Size); void WriteToOutputCorpus(const Unit &U); void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); void PrintStats(const char *Where, const char *End = "\n"); void PrintStatusForNewUnit(const Unit &U); - void PrintUnitInASCII(const Unit &U, const char *PrintAfter = ""); + void ShuffleCorpus(UnitVector *V); + void TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size, + bool DuringInitialCorpusExecution); - void SyncCorpus(); - - size_t RecordBlockCoverage(); - size_t RecordCallerCalleeCoverage(); - void PrepareCoverageBeforeRun(); - bool CheckCoverageAfterRun(); + // Updates the probability distribution for the units in the corpus. + // Must be called whenever the corpus or unit weights are changed. + void UpdateCorpusDistribution(); + bool UpdateMaxCoverage(); // Trace-based fuzzing: we run a unit with some kind of tracing // enabled and record potentially useful mutations. Then @@ -160,48 +454,41 @@ class Fuzzer { void SetDeathCallback(); static void StaticDeathCallback(); + void DumpCurrentUnit(const char *Prefix); void DeathCallback(); - Unit CurrentUnit; + + void LazyAllocateCurrentUnitData(); + uint8_t *CurrentUnitData = nullptr; + std::atomic<size_t> CurrentUnitSize; size_t TotalNumberOfRuns = 0; - size_t TotalNumberOfExecutedTraceBasedMutations = 0; + size_t NumberOfNewUnitsAdded = 0; + + bool HasMoreMallocsThanFrees = false; + size_t NumberOfLeakDetectionAttempts = 0; std::vector<Unit> Corpus; std::unordered_set<std::string> UnitHashesAddedToCorpus; - // For UseCounters - std::vector<uint8_t> CounterBitmap; - size_t TotalBits() { // Slow. Call it only for printing stats. - size_t Res = 0; - for (auto x : CounterBitmap) Res += __builtin_popcount(x); - return Res; - } - - UserSuppliedFuzzer &USF; + std::piecewise_constant_distribution<double> CorpusDistribution; + UserCallback CB; + MutationDispatcher &MD; FuzzingOptions Options; system_clock::time_point ProcessStartTime = system_clock::now(); - system_clock::time_point LastExternalSync = system_clock::now(); system_clock::time_point UnitStartTime; long TimeOfLongestUnitInSeconds = 0; long EpochOfLastReadOfOutputCorpus = 0; - size_t LastRecordedBlockCoverage = 0; - size_t LastRecordedCallerCalleeCoverage = 0; - size_t LastCoveragePcBufferLen = 0; -}; - -class SimpleUserSuppliedFuzzer: public UserSuppliedFuzzer { - public: - SimpleUserSuppliedFuzzer(FuzzerRandomBase *Rand, UserCallback Callback) - : UserSuppliedFuzzer(Rand), Callback(Callback) {} - virtual int TargetFunction(const uint8_t *Data, size_t Size) override { - return Callback(Data, Size); - } + // Maximum recorded coverage. + Coverage MaxCoverage; - private: - UserCallback Callback = nullptr; + // Need to know our own thread. + static thread_local bool IsMyThread; }; -}; // namespace fuzzer +// Global interface to functions that may or may not be available. +extern ExternalFunctions *EF; + +}; // namespace fuzzer #endif // LLVM_FUZZER_INTERNAL_H |