diff options
Diffstat (limited to 'include/clang/Analysis/CloneDetection.h')
-rw-r--r-- | include/clang/Analysis/CloneDetection.h | 245 |
1 files changed, 32 insertions, 213 deletions
diff --git a/include/clang/Analysis/CloneDetection.h b/include/clang/Analysis/CloneDetection.h index 6339deef41bd..051b9236658c 100644 --- a/include/clang/Analysis/CloneDetection.h +++ b/include/clang/Analysis/CloneDetection.h @@ -7,19 +7,15 @@ // //===----------------------------------------------------------------------===// /// -/// /file -/// This file defines classes for searching and anlyzing source code clones. +/// \file +/// This file defines classes for searching and analyzing source code clones. /// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_AST_CLONEDETECTION_H #define LLVM_CLANG_AST_CLONEDETECTION_H -#include "clang/AST/DeclTemplate.h" #include "clang/AST/StmtVisitor.h" -#include "clang/Basic/SourceLocation.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" #include "llvm/Support/Regex.h" #include <vector> @@ -31,192 +27,6 @@ class VarDecl; class ASTContext; class CompoundStmt; -namespace clone_detection { - -/// Returns a string that represents all macro expansions that expanded into the -/// given SourceLocation. -/// -/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations -/// A and B are expanded from the same macros in the same order. -std::string getMacroStack(SourceLocation Loc, ASTContext &Context); - -/// Collects the data of a single Stmt. -/// -/// This class defines what a code clone is: If it collects for two statements -/// the same data, then those two statements are considered to be clones of each -/// other. -/// -/// All collected data is forwarded to the given data consumer of the type T. -/// The data consumer class needs to provide a member method with the signature: -/// update(StringRef Str) -template <typename T> -class StmtDataCollector : public ConstStmtVisitor<StmtDataCollector<T>> { - - ASTContext &Context; - /// The data sink to which all data is forwarded. - T &DataConsumer; - -public: - /// Collects data of the given Stmt. - /// \param S The given statement. - /// \param Context The ASTContext of S. - /// \param DataConsumer The data sink to which all data is forwarded. - StmtDataCollector(const Stmt *S, ASTContext &Context, T &DataConsumer) - : Context(Context), DataConsumer(DataConsumer) { - this->Visit(S); - } - - typedef unsigned DataPiece; - - // Below are utility methods for appending different data to the vector. - - void addData(DataPiece Integer) { - DataConsumer.update( - StringRef(reinterpret_cast<char *>(&Integer), sizeof(Integer))); - } - - void addData(llvm::StringRef Str) { DataConsumer.update(Str); } - - void addData(const QualType &QT) { addData(QT.getAsString()); } - -// The functions below collect the class specific data of each Stmt subclass. - -// Utility macro for defining a visit method for a given class. This method -// calls back to the ConstStmtVisitor to visit all parent classes. -#define DEF_ADD_DATA(CLASS, CODE) \ - void Visit##CLASS(const CLASS *S) { \ - CODE; \ - ConstStmtVisitor<StmtDataCollector>::Visit##CLASS(S); \ - } - - DEF_ADD_DATA(Stmt, { - addData(S->getStmtClass()); - // This ensures that macro generated code isn't identical to macro-generated - // code. - addData(getMacroStack(S->getLocStart(), Context)); - addData(getMacroStack(S->getLocEnd(), Context)); - }) - DEF_ADD_DATA(Expr, { addData(S->getType()); }) - - //--- Builtin functionality ----------------------------------------------// - DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); }) - DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); }) - DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); }) - DEF_ADD_DATA(TypeTraitExpr, { - addData(S->getTrait()); - for (unsigned i = 0; i < S->getNumArgs(); ++i) - addData(S->getArg(i)->getType()); - }) - - //--- Calls --------------------------------------------------------------// - DEF_ADD_DATA(CallExpr, { - // Function pointers don't have a callee and we just skip hashing it. - if (const FunctionDecl *D = S->getDirectCallee()) { - // If the function is a template specialization, we also need to handle - // the template arguments as they are not included in the qualified name. - if (auto Args = D->getTemplateSpecializationArgs()) { - std::string ArgString; - - // Print all template arguments into ArgString - llvm::raw_string_ostream OS(ArgString); - for (unsigned i = 0; i < Args->size(); ++i) { - Args->get(i).print(Context.getLangOpts(), OS); - // Add a padding character so that 'foo<X, XX>()' != 'foo<XX, X>()'. - OS << '\n'; - } - OS.flush(); - - addData(ArgString); - } - addData(D->getQualifiedNameAsString()); - } - }) - - //--- Exceptions ---------------------------------------------------------// - DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); }) - - //--- C++ OOP Stmts ------------------------------------------------------// - DEF_ADD_DATA(CXXDeleteExpr, { - addData(S->isArrayFormAsWritten()); - addData(S->isGlobalDelete()); - }) - - //--- Casts --------------------------------------------------------------// - DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); }) - - //--- Miscellaneous Exprs ------------------------------------------------// - DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); }) - DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); }) - - //--- Control flow -------------------------------------------------------// - DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); }) - DEF_ADD_DATA(IndirectGotoStmt, { - if (S->getConstantTarget()) - addData(S->getConstantTarget()->getName()); - }) - DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); }) - DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); }) - DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); }) - - //--- Objective-C --------------------------------------------------------// - DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); }) - DEF_ADD_DATA(ObjCPropertyRefExpr, { - addData(S->isSuperReceiver()); - addData(S->isImplicitProperty()); - }) - DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); }) - - //--- Miscellaneous Stmts ------------------------------------------------// - DEF_ADD_DATA(CXXFoldExpr, { - addData(S->isRightFold()); - addData(S->getOperator()); - }) - DEF_ADD_DATA(GenericSelectionExpr, { - for (unsigned i = 0; i < S->getNumAssocs(); ++i) { - addData(S->getAssocType(i)); - } - }) - DEF_ADD_DATA(LambdaExpr, { - for (const LambdaCapture &C : S->captures()) { - addData(C.isPackExpansion()); - addData(C.getCaptureKind()); - if (C.capturesVariable()) - addData(C.getCapturedVar()->getType()); - } - addData(S->isGenericLambda()); - addData(S->isMutable()); - }) - DEF_ADD_DATA(DeclStmt, { - auto numDecls = std::distance(S->decl_begin(), S->decl_end()); - addData(static_cast<DataPiece>(numDecls)); - for (const Decl *D : S->decls()) { - if (const VarDecl *VD = dyn_cast<VarDecl>(D)) { - addData(VD->getType()); - } - } - }) - DEF_ADD_DATA(AsmStmt, { - addData(S->isSimple()); - addData(S->isVolatile()); - addData(S->generateAsmString(Context)); - for (unsigned i = 0; i < S->getNumInputs(); ++i) { - addData(S->getInputConstraint(i)); - } - for (unsigned i = 0; i < S->getNumOutputs(); ++i) { - addData(S->getOutputConstraint(i)); - } - for (unsigned i = 0; i < S->getNumClobbers(); ++i) { - addData(S->getClobber(i)); - } - }) - DEF_ADD_DATA(AttributedStmt, { - for (const Attr *A : S->getAttrs()) { - addData(std::string(A->getSpelling())); - } - }) -}; -} // namespace clone_detection - /// Identifies a list of statements. /// /// Can either identify a single arbitrary Stmt object, a continuous sequence of @@ -423,9 +233,9 @@ public: /// filtered. /// \param Filter The filter function that should return true for all groups /// that should be removed from the list. - static void - filterGroups(std::vector<CloneDetector::CloneGroup> &CloneGroups, - std::function<bool(const CloneDetector::CloneGroup &)> Filter) { + static void filterGroups( + std::vector<CloneDetector::CloneGroup> &CloneGroups, + llvm::function_ref<bool(const CloneDetector::CloneGroup &)> Filter) { CloneGroups.erase( std::remove_if(CloneGroups.begin(), CloneGroups.end(), Filter), CloneGroups.end()); @@ -439,25 +249,29 @@ public: /// to the same CloneGroup. static void splitCloneGroups( std::vector<CloneDetector::CloneGroup> &CloneGroups, - std::function<bool(const StmtSequence &, const StmtSequence &)> Compare); + llvm::function_ref<bool(const StmtSequence &, const StmtSequence &)> + Compare); }; -/// Searches all children of the given clones for type II clones (i.e. they are -/// identical in every aspect beside the used variable names). -class RecursiveCloneTypeIIConstraint { - - /// Generates and saves a hash code for the given Stmt. - /// \param S The given Stmt. - /// \param D The Decl containing S. - /// \param StmtsByHash Output parameter that will contain the hash codes for - /// each StmtSequence in the given Stmt. - /// \return The hash code of the given Stmt. - /// - /// If the given Stmt is a CompoundStmt, this method will also generate - /// hashes for all possible StmtSequences in the children of this Stmt. - size_t saveHash(const Stmt *S, const Decl *D, - std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash); +/// This constraint moves clones into clone groups of type II via hashing. +/// +/// Clones with different hash values are moved into separate clone groups. +/// Collisions are possible, and this constraint does nothing to address this +/// them. Add the slower RecursiveCloneTypeIIVerifyConstraint later in the +/// constraint chain, not necessarily immediately, to eliminate hash collisions +/// through a more detailed analysis. +class RecursiveCloneTypeIIHashConstraint { +public: + void constrain(std::vector<CloneDetector::CloneGroup> &Sequences); +}; +/// This constraint moves clones into clone groups of type II by comparing them. +/// +/// Clones that aren't type II clones are moved into separate clone groups. +/// In contrast to the RecursiveCloneTypeIIHashConstraint, all clones in a clone +/// group are guaranteed to be be type II clones of each other, but it is too +/// slow to efficiently handle large amounts of clones. +class RecursiveCloneTypeIIVerifyConstraint { public: void constrain(std::vector<CloneDetector::CloneGroup> &Sequences); }; @@ -474,14 +288,19 @@ public: MinComplexityConstraint(unsigned MinComplexity) : MinComplexity(MinComplexity) {} - size_t calculateStmtComplexity(const StmtSequence &Seq, + /// Calculates the complexity of the given StmtSequence. + /// \param Limit The limit of complexity we probe for. After reaching + /// this limit during calculation, this method is exiting + /// early to improve performance and returns this limit. + size_t calculateStmtComplexity(const StmtSequence &Seq, std::size_t Limit, const std::string &ParentMacroStack = ""); void constrain(std::vector<CloneDetector::CloneGroup> &CloneGroups) { CloneConstraint::filterGroups( CloneGroups, [this](const CloneDetector::CloneGroup &A) { if (!A.empty()) - return calculateStmtComplexity(A.front()) < MinComplexity; + return calculateStmtComplexity(A.front(), MinComplexity) < + MinComplexity; else return false; }); |