diff options
Diffstat (limited to 'lib/Lex/DependencyDirectivesSourceMinimizer.cpp')
| -rw-r--r-- | lib/Lex/DependencyDirectivesSourceMinimizer.cpp | 250 | 
1 files changed, 221 insertions, 29 deletions
diff --git a/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp index cfc37c5d3c62..f063ed711c44 100644 --- a/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -59,6 +59,7 @@ private:    LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End);    LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);    LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); +  LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);    LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);    LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);    LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); @@ -184,26 +185,58 @@ static void skipRawString(const char *&First, const char *const End) {    }  } +// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) +static unsigned isEOL(const char *First, const char *const End) { +  if (First == End) +    return 0; +  if (End - First > 1 && isVerticalWhitespace(First[0]) && +      isVerticalWhitespace(First[1]) && First[0] != First[1]) +    return 2; +  return !!isVerticalWhitespace(First[0]); +} +  static void skipString(const char *&First, const char *const End) { -  assert(*First == '\'' || *First == '"'); -  const char Terminator = *First; -  for (++First; First != End && *First != Terminator; ++First) -    if (*First == '\\') -      if (++First == End) -        return; +  assert(*First == '\'' || *First == '"' || *First == '<'); +  const char Terminator = *First == '<' ? '>' : *First; +  for (++First; First != End && *First != Terminator; ++First) { +    // String and character literals don't extend past the end of the line. +    if (isVerticalWhitespace(*First)) +      return; +    if (*First != '\\') +      continue; +    // Skip past backslash to the next character. This ensures that the +    // character right after it is skipped as well, which matters if it's +    // the terminator. +    if (++First == End) +      return; +    if (!isWhitespace(*First)) +      continue; +    // Whitespace after the backslash might indicate a line continuation. +    const char *FirstAfterBackslashPastSpace = First; +    skipOverSpaces(FirstAfterBackslashPastSpace, End); +    if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { +      // Advance the character pointer to the next line for the next +      // iteration. +      First = FirstAfterBackslashPastSpace + NLSize - 1; +    } +  }    if (First != End)      ++First; // Finish off the string.  } -static void skipNewline(const char *&First, const char *End) { -  assert(isVerticalWhitespace(*First)); -  ++First; +// Returns the length of the skipped newline +static unsigned skipNewline(const char *&First, const char *End) {    if (First == End) -    return; +    return 0; +  assert(isVerticalWhitespace(*First)); +  unsigned Len = isEOL(First, End); +  assert(Len && "expected newline"); +  First += Len; +  return Len; +} -  // Check for "\n\r" and "\r\n". -  if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0])) -    ++First; +static bool wasLineContinuation(const char *First, unsigned EOLLen) { +  return *(First - (int)EOLLen - 1) == '\\';  }  static void skipToNewlineRaw(const char *&First, const char *const End) { @@ -211,27 +244,40 @@ static void skipToNewlineRaw(const char *&First, const char *const End) {      if (First == End)        return; -    if (isVerticalWhitespace(*First)) +    unsigned Len = isEOL(First, End); +    if (Len)        return; -    while (!isVerticalWhitespace(*First)) +    do {        if (++First == End)          return; +      Len = isEOL(First, End); +    } while (!Len);      if (First[-1] != '\\')        return; -    ++First; // Keep going... +    First += Len; +    // Keep skipping lines...    }  } -static const char *reverseOverSpaces(const char *First, const char *Last) { +static const char *findLastNonSpace(const char *First, const char *Last) {    assert(First <= Last);    while (First != Last && isHorizontalWhitespace(Last[-1]))      --Last;    return Last;  } +static const char *findFirstTrailingSpace(const char *First, +                                          const char *Last) { +  const char *LastNonSpace = findLastNonSpace(First, Last); +  if (Last == LastNonSpace) +    return Last; +  assert(isHorizontalWhitespace(LastNonSpace[0])); +  return LastNonSpace + 1; +} +  static void skipLineComment(const char *&First, const char *const End) {    assert(First[0] == '/' && First[1] == '/');    First += 2; @@ -276,7 +322,7 @@ static bool isQuoteCppDigitSeparator(const char *const Start,  }  static void skipLine(const char *&First, const char *const End) { -  do { +  for (;;) {      assert(First <= End);      if (First == End)        return; @@ -321,9 +367,10 @@ static void skipLine(const char *&First, const char *const End) {        return;      // Skip over the newline. -    assert(isVerticalWhitespace(*First)); -    skipNewline(First, End); -  } while (First[-2] == '\\'); // Continue past line-continuations. +    unsigned Len = skipNewline(First, End); +    if (!wasLineContinuation(First, Len)) // Continue past line-continuations. +      break; +  }  }  static void skipDirective(StringRef Name, const char *&First, @@ -343,7 +390,8 @@ void Minimizer::printToNewline(const char *&First, const char *const End) {      const char *Last = First;      do {        // Iterate over strings correctly to avoid comments and newlines. -      if (*Last == '"' || *Last == '\'') { +      if (*Last == '"' || *Last == '\'' || +          (*Last == '<' && top() == pp_include)) {          if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))            skipRawString(Last, End);          else @@ -361,7 +409,7 @@ void Minimizer::printToNewline(const char *&First, const char *const End) {        }        // Deal with "//..." and "/*...*/". -      append(First, reverseOverSpaces(First, Last)); +      append(First, findFirstTrailingSpace(First, Last));        First = Last;        if (Last[1] == '/') { @@ -376,13 +424,20 @@ void Minimizer::printToNewline(const char *&First, const char *const End) {      } while (Last != End && !isVerticalWhitespace(*Last));      // Print out the string. -    if (Last == End || Last == First || Last[-1] != '\\') { -      append(First, reverseOverSpaces(First, Last)); +    const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last); +    if (Last == End || LastBeforeTrailingSpace == First || +        LastBeforeTrailingSpace[-1] != '\\') { +      append(First, LastBeforeTrailingSpace); +      First = Last; +      skipNewline(First, End);        return;      } -    // Print up to the backslash, backing up over spaces. -    append(First, reverseOverSpaces(First, Last - 1)); +    // Print up to the backslash, backing up over spaces. Preserve at least one +    // space, as the space matters when tokens are separated by a line +    // continuation. +    append(First, findFirstTrailingSpace( +                      First, LastBeforeTrailingSpace - 1));      First = Last;      skipNewline(First, End); @@ -576,6 +631,59 @@ bool Minimizer::lexAt(const char *&First, const char *const End) {    return false;  } +bool Minimizer::lexModule(const char *&First, const char *const End) { +  IdInfo Id = lexIdentifier(First, End); +  First = Id.Last; +  bool Export = false; +  if (Id.Name == "export") { +    Export = true; +    skipWhitespace(First, End); +    if (!isIdentifierBody(*First)) { +      skipLine(First, End); +      return false; +    } +    Id = lexIdentifier(First, End); +    First = Id.Last; +  } + +  if (Id.Name != "module" && Id.Name != "import") { +    skipLine(First, End); +    return false; +  } + +  skipWhitespace(First, End); + +  // Ignore this as a module directive if the next character can't be part of +  // an import. + +  switch (*First) { +  case ':': +  case '<': +  case '"': +    break; +  default: +    if (!isIdentifierBody(*First)) { +      skipLine(First, End); +      return false; +    } +  } + +  if (Export) { +    makeToken(cxx_export_decl); +    append("export "); +  } + +  if (Id.Name == "module") +    makeToken(cxx_module_decl); +  else +    makeToken(cxx_import_decl); +  append(Id.Name); +  append(" "); +  printToNewline(First, End); +  append("\n"); +  return false; +} +  bool Minimizer::lexDefine(const char *&First, const char *const End) {    makeToken(pp_define);    append("#define "); @@ -612,7 +720,21 @@ bool Minimizer::lexDefine(const char *&First, const char *const End) {  bool Minimizer::lexPragma(const char *&First, const char *const End) {    // #pragma. -  if (!isNextIdentifier("clang", First, End)) { +  skipWhitespace(First, End); +  if (First == End || !isIdentifierHead(*First)) +    return false; + +  IdInfo FoundId = lexIdentifier(First, End); +  First = FoundId.Last; +  if (FoundId.Name == "once") { +    // #pragma once +    skipLine(First, End); +    makeToken(pp_pragma_once); +    append("#pragma once\n"); +    return false; +  } + +  if (FoundId.Name != "clang") {      skipLine(First, End);      return false;    } @@ -663,6 +785,18 @@ bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive,    return false;  } +static bool isStartOfRelevantLine(char First) { +  switch (First) { +  case '#': +  case '@': +  case 'i': +  case 'e': +  case 'm': +    return true; +  } +  return false; +} +  bool Minimizer::lexPPLine(const char *&First, const char *const End) {    assert(First != End); @@ -671,7 +805,7 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) {    if (First == End)      return false; -  if (*First != '#' && *First != '@') { +  if (!isStartOfRelevantLine(*First)) {      skipLine(First, End);      assert(First <= End);      return false; @@ -681,6 +815,9 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) {    if (*First == '@')      return lexAt(First, End); +  if (*First == 'i' || *First == 'e' || *First == 'm') +    return lexModule(First, End); +    // Handle preprocessing directives.    ++First; // Skip over '#'.    skipWhitespace(First, End); @@ -729,7 +866,14 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) {    return lexDefault(Kind, Id.Name, First, End);  } +static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { +  if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && +      First[2] == '\xbf') +    First += 3; +} +  bool Minimizer::minimizeImpl(const char *First, const char *const End) { +  skipUTF8ByteOrderMark(First, End);    while (First != End)      if (lexPPLine(First, End))        return true; @@ -753,6 +897,54 @@ bool Minimizer::minimize() {    return Error;  } +bool clang::minimize_source_to_dependency_directives::computeSkippedRanges( +    ArrayRef<Token> Input, llvm::SmallVectorImpl<SkippedRange> &Range) { +  struct Directive { +    enum DirectiveKind { +      If,  // if/ifdef/ifndef +      Else // elif,else +    }; +    int Offset; +    DirectiveKind Kind; +  }; +  llvm::SmallVector<Directive, 32> Offsets; +  for (const Token &T : Input) { +    switch (T.K) { +    case pp_if: +    case pp_ifdef: +    case pp_ifndef: +      Offsets.push_back({T.Offset, Directive::If}); +      break; + +    case pp_elif: +    case pp_else: { +      if (Offsets.empty()) +        return true; +      int PreviousOffset = Offsets.back().Offset; +      Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); +      Offsets.push_back({T.Offset, Directive::Else}); +      break; +    } + +    case pp_endif: { +      if (Offsets.empty()) +        return true; +      int PreviousOffset = Offsets.back().Offset; +      Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); +      do { +        Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind; +        if (Kind == Directive::If) +          break; +      } while (!Offsets.empty()); +      break; +    } +    default: +      break; +    } +  } +  return false; +} +  bool clang::minimizeSourceToDependencyDirectives(      StringRef Input, SmallVectorImpl<char> &Output,      SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags,  | 
