diff options
Diffstat (limited to 'lib/Tooling/Core/Replacement.cpp')
| -rw-r--r-- | lib/Tooling/Core/Replacement.cpp | 501 |
1 files changed, 312 insertions, 189 deletions
diff --git a/lib/Tooling/Core/Replacement.cpp b/lib/Tooling/Core/Replacement.cpp index 4f130709ac16d..e194b59a6e2b1 100644 --- a/lib/Tooling/Core/Replacement.cpp +++ b/lib/Tooling/Core/Replacement.cpp @@ -20,6 +20,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" #include "clang/Rewrite/Core/Rewriter.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_os_ostream.h" @@ -29,8 +30,7 @@ namespace tooling { static const char * const InvalidLocation = ""; -Replacement::Replacement() - : FilePath(InvalidLocation) {} +Replacement::Replacement() : FilePath(InvalidLocation) {} Replacement::Replacement(StringRef FilePath, unsigned Offset, unsigned Length, StringRef ReplacementText) @@ -84,11 +84,8 @@ bool operator<(const Replacement &LHS, const Replacement &RHS) { if (LHS.getOffset() != RHS.getOffset()) return LHS.getOffset() < RHS.getOffset(); - // Apply longer replacements first, specifically so that deletions are - // executed before insertions. It is (hopefully) never the intention to - // delete parts of newly inserted code. if (LHS.getLength() != RHS.getLength()) - return LHS.getLength() > RHS.getLength(); + return LHS.getLength() < RHS.getLength(); if (LHS.getFilePath() != RHS.getFilePath()) return LHS.getFilePath() < RHS.getFilePath(); @@ -138,200 +135,196 @@ void Replacement::setFromSourceRange(const SourceManager &Sources, ReplacementText); } -template <typename T> -unsigned shiftedCodePositionInternal(const T &Replaces, unsigned Position) { - unsigned Offset = 0; - for (const auto& R : Replaces) { - if (R.getOffset() + R.getLength() <= Position) { - Offset += R.getReplacementText().size() - R.getLength(); - continue; - } - if (R.getOffset() < Position && - R.getOffset() + R.getReplacementText().size() <= Position) { - Position = R.getOffset() + R.getReplacementText().size() - 1; - } - break; - } - return Position + Offset; +Replacement +Replacements::getReplacementInChangedCode(const Replacement &R) const { + unsigned NewStart = getShiftedCodePosition(R.getOffset()); + unsigned NewEnd = getShiftedCodePosition(R.getOffset() + R.getLength()); + return Replacement(R.getFilePath(), NewStart, NewEnd - NewStart, + R.getReplacementText()); } -unsigned shiftedCodePosition(const Replacements &Replaces, unsigned Position) { - return shiftedCodePositionInternal(Replaces, Position); +static std::string getReplacementErrString(replacement_error Err) { + switch (Err) { + case replacement_error::fail_to_apply: + return "Failed to apply a replacement."; + case replacement_error::wrong_file_path: + return "The new replacement's file path is different from the file path of " + "existing replacements"; + case replacement_error::overlap_conflict: + return "The new replacement overlaps with an existing replacement."; + case replacement_error::insert_conflict: + return "The new insertion has the same insert location as an existing " + "replacement."; + } + llvm_unreachable("A value of replacement_error has no message."); } -// FIXME: Remove this function when Replacements is implemented as std::vector -// instead of std::set. -unsigned shiftedCodePosition(const std::vector<Replacement> &Replaces, - unsigned Position) { - return shiftedCodePositionInternal(Replaces, Position); +std::string ReplacementError::message() const { + std::string Message = getReplacementErrString(Err); + if (NewReplacement.hasValue()) + Message += "\nNew replacement: " + NewReplacement->toString(); + if (ExistingReplacement.hasValue()) + Message += "\nExisting replacement: " + ExistingReplacement->toString(); + return Message; } -void deduplicate(std::vector<Replacement> &Replaces, - std::vector<Range> &Conflicts) { - if (Replaces.empty()) - return; - - auto LessNoPath = [](const Replacement &LHS, const Replacement &RHS) { - if (LHS.getOffset() != RHS.getOffset()) - return LHS.getOffset() < RHS.getOffset(); - if (LHS.getLength() != RHS.getLength()) - return LHS.getLength() < RHS.getLength(); - return LHS.getReplacementText() < RHS.getReplacementText(); - }; - - auto EqualNoPath = [](const Replacement &LHS, const Replacement &RHS) { - return LHS.getOffset() == RHS.getOffset() && - LHS.getLength() == RHS.getLength() && - LHS.getReplacementText() == RHS.getReplacementText(); - }; +char ReplacementError::ID = 0; - // Deduplicate. We don't want to deduplicate based on the path as we assume - // that all replacements refer to the same file (or are symlinks). - std::sort(Replaces.begin(), Replaces.end(), LessNoPath); - Replaces.erase(std::unique(Replaces.begin(), Replaces.end(), EqualNoPath), - Replaces.end()); - - // Detect conflicts - Range ConflictRange(Replaces.front().getOffset(), - Replaces.front().getLength()); - unsigned ConflictStart = 0; - unsigned ConflictLength = 1; - for (unsigned i = 1; i < Replaces.size(); ++i) { - Range Current(Replaces[i].getOffset(), Replaces[i].getLength()); - if (ConflictRange.overlapsWith(Current)) { - // Extend conflicted range - ConflictRange = Range(ConflictRange.getOffset(), - std::max(ConflictRange.getLength(), - Current.getOffset() + Current.getLength() - - ConflictRange.getOffset())); - ++ConflictLength; - } else { - if (ConflictLength > 1) - Conflicts.push_back(Range(ConflictStart, ConflictLength)); - ConflictRange = Current; - ConflictStart = i; - ConflictLength = 1; +Replacements Replacements::getCanonicalReplacements() const { + std::vector<Replacement> NewReplaces; + // Merge adjacent replacements. + for (const auto &R : Replaces) { + if (NewReplaces.empty()) { + NewReplaces.push_back(R); + continue; } - } - - if (ConflictLength > 1) - Conflicts.push_back(Range(ConflictStart, ConflictLength)); -} - -bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite) { - bool Result = true; - for (Replacements::const_iterator I = Replaces.begin(), - E = Replaces.end(); - I != E; ++I) { - if (I->isApplicable()) { - Result = I->apply(Rewrite) && Result; + auto &Prev = NewReplaces.back(); + unsigned PrevEnd = Prev.getOffset() + Prev.getLength(); + if (PrevEnd < R.getOffset()) { + NewReplaces.push_back(R); } else { - Result = false; + assert(PrevEnd == R.getOffset() && + "Existing replacements must not overlap."); + Replacement NewR( + R.getFilePath(), Prev.getOffset(), Prev.getLength() + R.getLength(), + (Prev.getReplacementText() + R.getReplacementText()).str()); + Prev = NewR; } } - return Result; + ReplacementsImpl NewReplacesImpl(NewReplaces.begin(), NewReplaces.end()); + return Replacements(NewReplacesImpl.begin(), NewReplacesImpl.end()); } -// FIXME: Remove this function when Replacements is implemented as std::vector -// instead of std::set. -bool applyAllReplacements(const std::vector<Replacement> &Replaces, - Rewriter &Rewrite) { - bool Result = true; - for (std::vector<Replacement>::const_iterator I = Replaces.begin(), - E = Replaces.end(); - I != E; ++I) { - if (I->isApplicable()) { - Result = I->apply(Rewrite) && Result; - } else { - Result = false; - } - } - return Result; +// `R` and `Replaces` are order-independent if applying them in either order +// has the same effect, so we need to compare replacements associated to +// applying them in either order. +llvm::Expected<Replacements> +Replacements::mergeIfOrderIndependent(const Replacement &R) const { + Replacements Rs(R); + // A Replacements set containg a single replacement that is `R` referring to + // the code after the existing replacements `Replaces` are applied. + Replacements RsShiftedByReplaces(getReplacementInChangedCode(R)); + // A Replacements set that is `Replaces` referring to the code after `R` is + // applied. + Replacements ReplacesShiftedByRs; + for (const auto &Replace : Replaces) + ReplacesShiftedByRs.Replaces.insert( + Rs.getReplacementInChangedCode(Replace)); + // This is equivalent to applying `Replaces` first and then `R`. + auto MergeShiftedRs = merge(RsShiftedByReplaces); + // This is equivalent to applying `R` first and then `Replaces`. + auto MergeShiftedReplaces = Rs.merge(ReplacesShiftedByRs); + + // Since empty or segmented replacements around existing replacements might be + // produced above, we need to compare replacements in canonical forms. + if (MergeShiftedRs.getCanonicalReplacements() == + MergeShiftedReplaces.getCanonicalReplacements()) + return MergeShiftedRs; + return llvm::make_error<ReplacementError>(replacement_error::overlap_conflict, + R, *Replaces.begin()); } -llvm::Expected<std::string> applyAllReplacements(StringRef Code, - const Replacements &Replaces) { - if (Replaces.empty()) - return Code.str(); +llvm::Error Replacements::add(const Replacement &R) { + // Check the file path. + if (!Replaces.empty() && R.getFilePath() != Replaces.begin()->getFilePath()) + return llvm::make_error<ReplacementError>( + replacement_error::wrong_file_path, R, *Replaces.begin()); - IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem( - new vfs::InMemoryFileSystem); - FileManager Files(FileSystemOptions(), InMemoryFileSystem); - DiagnosticsEngine Diagnostics( - IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), - new DiagnosticOptions); - SourceManager SourceMgr(Diagnostics, Files); - Rewriter Rewrite(SourceMgr, LangOptions()); - InMemoryFileSystem->addFile( - "<stdin>", 0, llvm::MemoryBuffer::getMemBuffer(Code, "<stdin>")); - FileID ID = SourceMgr.createFileID(Files.getFile("<stdin>"), SourceLocation(), - clang::SrcMgr::C_User); - for (Replacements::const_iterator I = Replaces.begin(), E = Replaces.end(); - I != E; ++I) { - Replacement Replace("<stdin>", I->getOffset(), I->getLength(), - I->getReplacementText()); - if (!Replace.apply(Rewrite)) - return llvm::make_error<llvm::StringError>( - "Failed to apply replacement: " + Replace.toString(), - llvm::inconvertibleErrorCode()); + // Special-case header insertions. + if (R.getOffset() == UINT_MAX) { + Replaces.insert(R); + return llvm::Error::success(); } - std::string Result; - llvm::raw_string_ostream OS(Result); - Rewrite.getEditBuffer(ID).write(OS); - OS.flush(); - return Result; -} -// Merge and sort overlapping ranges in \p Ranges. -static std::vector<Range> mergeAndSortRanges(std::vector<Range> Ranges) { - std::sort(Ranges.begin(), Ranges.end(), - [](const Range &LHS, const Range &RHS) { - if (LHS.getOffset() != RHS.getOffset()) - return LHS.getOffset() < RHS.getOffset(); - return LHS.getLength() < RHS.getLength(); - }); - std::vector<Range> Result; - for (const auto &R : Ranges) { - if (Result.empty() || - Result.back().getOffset() + Result.back().getLength() < R.getOffset()) { - Result.push_back(R); - } else { - unsigned NewEnd = - std::max(Result.back().getOffset() + Result.back().getLength(), - R.getOffset() + R.getLength()); - Result[Result.size() - 1] = - Range(Result.back().getOffset(), NewEnd - Result.back().getOffset()); + // This replacement cannot conflict with replacements that end before + // this replacement starts or start after this replacement ends. + // We also know that there currently are no overlapping replacements. + // Thus, we know that all replacements that start after the end of the current + // replacement cannot overlap. + Replacement AtEnd(R.getFilePath(), R.getOffset() + R.getLength(), 0, ""); + + // Find the first entry that starts after or at the end of R. Note that + // entries that start at the end can still be conflicting if R is an + // insertion. + auto I = Replaces.lower_bound(AtEnd); + // If `I` starts at the same offset as `R`, `R` must be an insertion. + if (I != Replaces.end() && R.getOffset() == I->getOffset()) { + assert(R.getLength() == 0); + // `I` is also an insertion, `R` and `I` conflict. + if (I->getLength() == 0) { + // Check if two insertions are order-indepedent: if inserting them in + // either order produces the same text, they are order-independent. + if ((R.getReplacementText() + I->getReplacementText()).str() != + (I->getReplacementText() + R.getReplacementText()).str()) + return llvm::make_error<ReplacementError>( + replacement_error::insert_conflict, R, *I); + // If insertions are order-independent, we can merge them. + Replacement NewR( + R.getFilePath(), R.getOffset(), 0, + (R.getReplacementText() + I->getReplacementText()).str()); + Replaces.erase(I); + Replaces.insert(std::move(NewR)); + return llvm::Error::success(); } + // Insertion `R` is adjacent to a non-insertion replacement `I`, so they + // are order-independent. It is safe to assume that `R` will not conflict + // with any replacement before `I` since all replacements before `I` must + // either end before `R` or end at `R` but has length > 0 (if the + // replacement before `I` is an insertion at `R`, it would have been `I` + // since it is a lower bound of `AtEnd` and ordered before the current `I` + // in the set). + Replaces.insert(R); + return llvm::Error::success(); } - return Result; -} -std::vector<Range> calculateChangedRanges(const Replacements &Replaces) { - std::vector<Range> ChangedRanges; - int Shift = 0; - for (const Replacement &R : Replaces) { - unsigned Offset = R.getOffset() + Shift; - unsigned Length = R.getReplacementText().size(); - Shift += Length - R.getLength(); - ChangedRanges.push_back(Range(Offset, Length)); + // `I` is the smallest iterator (after `R`) whose entry cannot overlap. + // If that is begin(), there are no overlaps. + if (I == Replaces.begin()) { + Replaces.insert(R); + return llvm::Error::success(); } - return mergeAndSortRanges(ChangedRanges); -} - -std::vector<Range> -calculateRangesAfterReplacements(const Replacements &Replaces, - const std::vector<Range> &Ranges) { - auto MergedRanges = mergeAndSortRanges(Ranges); - tooling::Replacements FakeReplaces; - for (const auto &R : MergedRanges) - FakeReplaces.insert(Replacement(Replaces.begin()->getFilePath(), - R.getOffset(), R.getLength(), - std::string(R.getLength(), ' '))); - tooling::Replacements NewReplaces = mergeReplacements(FakeReplaces, Replaces); - return calculateChangedRanges(NewReplaces); + --I; + auto Overlap = [](const Replacement &R1, const Replacement &R2) -> bool { + return Range(R1.getOffset(), R1.getLength()) + .overlapsWith(Range(R2.getOffset(), R2.getLength())); + }; + // If the previous entry does not overlap, we know that entries before it + // can also not overlap. + if (!Overlap(R, *I)) { + // If `R` and `I` do not have the same offset, it is safe to add `R` since + // it must come after `I`. Otherwise: + // - If `R` is an insertion, `I` must not be an insertion since it would + // have come after `AtEnd`. + // - If `R` is not an insertion, `I` must be an insertion; otherwise, `R` + // and `I` would have overlapped. + // In either case, we can safely insert `R`. + Replaces.insert(R); + } else { + // `I` overlaps with `R`. We need to check `R` against all overlapping + // replacements to see if they are order-indepedent. If they are, merge `R` + // with them and replace them with the merged replacements. + auto MergeBegin = I; + auto MergeEnd = std::next(I); + while (I != Replaces.begin()) { + --I; + // If `I` doesn't overlap with `R`, don't merge it. + if (!Overlap(R, *I)) + break; + MergeBegin = I; + } + Replacements OverlapReplaces(MergeBegin, MergeEnd); + llvm::Expected<Replacements> Merged = + OverlapReplaces.mergeIfOrderIndependent(R); + if (!Merged) + return Merged.takeError(); + Replaces.erase(MergeBegin, MergeEnd); + Replaces.insert(Merged->begin(), Merged->end()); + } + return llvm::Error::success(); } namespace { + // Represents a merged replacement, i.e. a replacement consisting of multiple // overlapping replacements from 'First' and 'Second' in mergeReplacements. // @@ -425,26 +418,19 @@ private: unsigned Length; std::string Text; }; -} // namespace -std::map<std::string, Replacements> -groupReplacementsByFile(const Replacements &Replaces) { - std::map<std::string, Replacements> FileToReplaces; - for (const auto &Replace : Replaces) { - FileToReplaces[Replace.getFilePath()].insert(Replace); - } - return FileToReplaces; -} +} // namespace -Replacements mergeReplacements(const Replacements &First, - const Replacements &Second) { - if (First.empty() || Second.empty()) - return First.empty() ? Second : First; +Replacements Replacements::merge(const Replacements &ReplacesToMerge) const { + if (empty() || ReplacesToMerge.empty()) + return empty() ? ReplacesToMerge : *this; + auto &First = Replaces; + auto &Second = ReplacesToMerge.Replaces; // Delta is the amount of characters that replacements from 'Second' need to // be shifted so that their offsets refer to the original text. int Delta = 0; - Replacements Result; + ReplacementsImpl Result; // Iterate over both sets and always add the next element (smallest total // Offset) from either 'First' or 'Second'. Merge that element with @@ -470,6 +456,143 @@ Replacements mergeReplacements(const Replacements &First, Delta -= Merged.deltaFirst(); Result.insert(Merged.asReplacement()); } + return Replacements(Result.begin(), Result.end()); +} + +// Combines overlapping ranges in \p Ranges and sorts the combined ranges. +// Returns a set of non-overlapping and sorted ranges that is equivalent to +// \p Ranges. +static std::vector<Range> combineAndSortRanges(std::vector<Range> Ranges) { + std::sort(Ranges.begin(), Ranges.end(), + [](const Range &LHS, const Range &RHS) { + if (LHS.getOffset() != RHS.getOffset()) + return LHS.getOffset() < RHS.getOffset(); + return LHS.getLength() < RHS.getLength(); + }); + std::vector<Range> Result; + for (const auto &R : Ranges) { + if (Result.empty() || + Result.back().getOffset() + Result.back().getLength() < R.getOffset()) { + Result.push_back(R); + } else { + unsigned NewEnd = + std::max(Result.back().getOffset() + Result.back().getLength(), + R.getOffset() + R.getLength()); + Result[Result.size() - 1] = + Range(Result.back().getOffset(), NewEnd - Result.back().getOffset()); + } + } + return Result; +} + +std::vector<Range> +calculateRangesAfterReplacements(const Replacements &Replaces, + const std::vector<Range> &Ranges) { + // To calculate the new ranges, + // - Turn \p Ranges into Replacements at (offset, length) with an empty + // (unimportant) replacement text of length "length". + // - Merge with \p Replaces. + // - The new ranges will be the affected ranges of the merged replacements. + auto MergedRanges = combineAndSortRanges(Ranges); + if (Replaces.empty()) + return MergedRanges; + tooling::Replacements FakeReplaces; + for (const auto &R : MergedRanges) { + auto Err = FakeReplaces.add(Replacement(Replaces.begin()->getFilePath(), + R.getOffset(), R.getLength(), + std::string(R.getLength(), ' '))); + assert(!Err && + "Replacements must not conflict since ranges have been merged."); + (void)Err; + } + return FakeReplaces.merge(Replaces).getAffectedRanges(); +} + +std::vector<Range> Replacements::getAffectedRanges() const { + std::vector<Range> ChangedRanges; + int Shift = 0; + for (const Replacement &R : Replaces) { + unsigned Offset = R.getOffset() + Shift; + unsigned Length = R.getReplacementText().size(); + Shift += Length - R.getLength(); + ChangedRanges.push_back(Range(Offset, Length)); + } + return combineAndSortRanges(ChangedRanges); +} + +unsigned Replacements::getShiftedCodePosition(unsigned Position) const { + unsigned Offset = 0; + for (const auto& R : Replaces) { + if (R.getOffset() + R.getLength() <= Position) { + Offset += R.getReplacementText().size() - R.getLength(); + continue; + } + if (R.getOffset() < Position && + R.getOffset() + R.getReplacementText().size() <= Position) { + Position = R.getOffset() + R.getReplacementText().size(); + if (R.getReplacementText().size() > 0) + Position--; + } + break; + } + return Position + Offset; +} + +bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite) { + bool Result = true; + for (auto I = Replaces.rbegin(), E = Replaces.rend(); I != E; ++I) { + if (I->isApplicable()) { + Result = I->apply(Rewrite) && Result; + } else { + Result = false; + } + } + return Result; +} + +llvm::Expected<std::string> applyAllReplacements(StringRef Code, + const Replacements &Replaces) { + if (Replaces.empty()) + return Code.str(); + + IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem( + new vfs::InMemoryFileSystem); + FileManager Files(FileSystemOptions(), InMemoryFileSystem); + DiagnosticsEngine Diagnostics( + IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), + new DiagnosticOptions); + SourceManager SourceMgr(Diagnostics, Files); + Rewriter Rewrite(SourceMgr, LangOptions()); + InMemoryFileSystem->addFile( + "<stdin>", 0, llvm::MemoryBuffer::getMemBuffer(Code, "<stdin>")); + FileID ID = SourceMgr.createFileID(Files.getFile("<stdin>"), SourceLocation(), + clang::SrcMgr::C_User); + for (auto I = Replaces.rbegin(), E = Replaces.rend(); I != E; ++I) { + Replacement Replace("<stdin>", I->getOffset(), I->getLength(), + I->getReplacementText()); + if (!Replace.apply(Rewrite)) + return llvm::make_error<ReplacementError>( + replacement_error::fail_to_apply, Replace); + } + std::string Result; + llvm::raw_string_ostream OS(Result); + Rewrite.getEditBuffer(ID).write(OS); + OS.flush(); + return Result; +} + +std::map<std::string, Replacements> groupReplacementsByFile( + FileManager &FileMgr, + const std::map<std::string, Replacements> &FileToReplaces) { + std::map<std::string, Replacements> Result; + llvm::SmallPtrSet<const FileEntry *, 16> ProcessedFileEntries; + for (const auto &Entry : FileToReplaces) { + const FileEntry *FE = FileMgr.getFile(Entry.first); + if (!FE) + llvm::errs() << "File path " << Entry.first << " is invalid.\n"; + else if (ProcessedFileEntries.insert(FE).second) + Result[Entry.first] = std::move(Entry.second); + } return Result; } |
