diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2013-12-22 00:04:03 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2013-12-22 00:04:03 +0000 | 
| commit | f8af5cf600354830d4ccf59732403f0f073eccb9 (patch) | |
| tree | 2ba0398b4c42ad4f55561327538044fd2c925a8b /lib/Support/CommandLine.cpp | |
| parent | 59d6cff90eecf31cb3dd860c4e786674cfdd42eb (diff) | |
Notes
Diffstat (limited to 'lib/Support/CommandLine.cpp')
| -rw-r--r-- | lib/Support/CommandLine.cpp | 352 | 
1 files changed, 264 insertions, 88 deletions
| diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 18d3db527bec4..44a88d81e3a0a 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -17,12 +17,14 @@  //===----------------------------------------------------------------------===//  #include "llvm/Support/CommandLine.h" +#include "llvm/ADT/ArrayRef.h"  #include "llvm/ADT/OwningPtr.h"  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/SmallString.h"  #include "llvm/ADT/StringMap.h"  #include "llvm/ADT/Twine.h"  #include "llvm/Config/config.h" +#include "llvm/Support/ConvertUTF.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/Host.h" @@ -58,6 +60,7 @@ TEMPLATE_INSTANTIATION(class opt<char>);  TEMPLATE_INSTANTIATION(class opt<bool>);  } } // end namespace llvm::cl +// Pin the vtables to this file.  void GenericOptionValue::anchor() {}  void OptionValue<boolOrDefault>::anchor() {}  void OptionValue<std::string>::anchor() {} @@ -72,6 +75,7 @@ void parser<double>::anchor() {}  void parser<float>::anchor() {}  void parser<std::string>::anchor() {}  void parser<char>::anchor() {} +void StringSaver::anchor() {}  //===----------------------------------------------------------------------===// @@ -435,39 +439,248 @@ static bool EatsUnboundedNumberOfValues(const Option *O) {           O->getNumOccurrencesFlag() == cl::OneOrMore;  } -/// ParseCStringVector - Break INPUT up wherever one or more -/// whitespace characters are found, and store the resulting tokens in -/// OUTPUT. The tokens stored in OUTPUT are dynamically allocated -/// using strdup(), so it is the caller's responsibility to free() -/// them later. +static bool isWhitespace(char C) { +  return strchr(" \t\n\r\f\v", C); +} + +static bool isQuote(char C) { +  return C == '\"' || C == '\''; +} + +static bool isGNUSpecial(char C) { +  return strchr("\\\"\' ", C); +} + +void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, +                                SmallVectorImpl<const char *> &NewArgv) { +  SmallString<128> Token; +  for (size_t I = 0, E = Src.size(); I != E; ++I) { +    // Consume runs of whitespace. +    if (Token.empty()) { +      while (I != E && isWhitespace(Src[I])) +        ++I; +      if (I == E) break; +    } + +    // Backslashes can escape backslashes, spaces, and other quotes.  Otherwise +    // they are literal.  This makes it much easier to read Windows file paths. +    if (I + 1 < E && Src[I] == '\\' && isGNUSpecial(Src[I + 1])) { +      ++I;  // Skip the escape. +      Token.push_back(Src[I]); +      continue; +    } + +    // Consume a quoted string. +    if (isQuote(Src[I])) { +      char Quote = Src[I++]; +      while (I != E && Src[I] != Quote) { +        // Backslashes are literal, unless they escape a special character. +        if (Src[I] == '\\' && I + 1 != E && isGNUSpecial(Src[I + 1])) +          ++I; +        Token.push_back(Src[I]); +        ++I; +      } +      if (I == E) break; +      continue; +    } + +    // End the token if this is whitespace. +    if (isWhitespace(Src[I])) { +      if (!Token.empty()) +        NewArgv.push_back(Saver.SaveString(Token.c_str())); +      Token.clear(); +      continue; +    } + +    // This is a normal character.  Append it. +    Token.push_back(Src[I]); +  } + +  // Append the last token after hitting EOF with no whitespace. +  if (!Token.empty()) +    NewArgv.push_back(Saver.SaveString(Token.c_str())); +} + +/// Backslashes are interpreted in a rather complicated way in the Windows-style +/// command line, because backslashes are used both to separate path and to +/// escape double quote. This method consumes runs of backslashes as well as the +/// following double quote if it's escaped. +/// +///  * If an even number of backslashes is followed by a double quote, one +///    backslash is output for every pair of backslashes, and the last double +///    quote remains unconsumed. The double quote will later be interpreted as +///    the start or end of a quoted string in the main loop outside of this +///    function. +/// +///  * If an odd number of backslashes is followed by a double quote, one +///    backslash is output for every pair of backslashes, and a double quote is +///    output for the last pair of backslash-double quote. The double quote is +///    consumed in this case.  /// -static void ParseCStringVector(std::vector<char *> &OutputVector, -                               const char *Input) { -  // Characters which will be treated as token separators: -  StringRef Delims = " \v\f\t\r\n"; - -  StringRef WorkStr(Input); -  while (!WorkStr.empty()) { -    // If the first character is a delimiter, strip them off. -    if (Delims.find(WorkStr[0]) != StringRef::npos) { -      size_t Pos = WorkStr.find_first_not_of(Delims); -      if (Pos == StringRef::npos) Pos = WorkStr.size(); -      WorkStr = WorkStr.substr(Pos); +///  * Otherwise, backslashes are interpreted literally. +static size_t parseBackslash(StringRef Src, size_t I, SmallString<128> &Token) { +  size_t E = Src.size(); +  int BackslashCount = 0; +  // Skip the backslashes. +  do { +    ++I; +    ++BackslashCount; +  } while (I != E && Src[I] == '\\'); + +  bool FollowedByDoubleQuote = (I != E && Src[I] == '"'); +  if (FollowedByDoubleQuote) { +    Token.append(BackslashCount / 2, '\\'); +    if (BackslashCount % 2 == 0) +      return I - 1; +    Token.push_back('"'); +    return I; +  } +  Token.append(BackslashCount, '\\'); +  return I - 1; +} + +void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, +                                    SmallVectorImpl<const char *> &NewArgv) { +  SmallString<128> Token; + +  // This is a small state machine to consume characters until it reaches the +  // end of the source string. +  enum { INIT, UNQUOTED, QUOTED } State = INIT; +  for (size_t I = 0, E = Src.size(); I != E; ++I) { +    // INIT state indicates that the current input index is at the start of +    // the string or between tokens. +    if (State == INIT) { +      if (isWhitespace(Src[I])) +        continue; +      if (Src[I] == '"') { +        State = QUOTED; +        continue; +      } +      if (Src[I] == '\\') { +        I = parseBackslash(Src, I, Token); +        State = UNQUOTED; +        continue; +      } +      Token.push_back(Src[I]); +      State = UNQUOTED;        continue;      } -    // Find position of first delimiter. -    size_t Pos = WorkStr.find_first_of(Delims); -    if (Pos == StringRef::npos) Pos = WorkStr.size(); +    // UNQUOTED state means that it's reading a token not quoted by double +    // quotes. +    if (State == UNQUOTED) { +      // Whitespace means the end of the token. +      if (isWhitespace(Src[I])) { +        NewArgv.push_back(Saver.SaveString(Token.c_str())); +        Token.clear(); +        State = INIT; +        continue; +      } +      if (Src[I] == '"') { +        State = QUOTED; +        continue; +      } +      if (Src[I] == '\\') { +        I = parseBackslash(Src, I, Token); +        continue; +      } +      Token.push_back(Src[I]); +      continue; +    } -    // Everything from 0 to Pos is the next word to copy. -    char *NewStr = (char*)malloc(Pos+1); -    memcpy(NewStr, WorkStr.data(), Pos); -    NewStr[Pos] = 0; -    OutputVector.push_back(NewStr); +    // QUOTED state means that it's reading a token quoted by double quotes. +    if (State == QUOTED) { +      if (Src[I] == '"') { +        State = UNQUOTED; +        continue; +      } +      if (Src[I] == '\\') { +        I = parseBackslash(Src, I, Token); +        continue; +      } +      Token.push_back(Src[I]); +    } +  } +  // Append the last token after hitting EOF with no whitespace. +  if (!Token.empty()) +    NewArgv.push_back(Saver.SaveString(Token.c_str())); +} -    WorkStr = WorkStr.substr(Pos); +static bool ExpandResponseFile(const char *FName, StringSaver &Saver, +                               TokenizerCallback Tokenizer, +                               SmallVectorImpl<const char *> &NewArgv) { +  OwningPtr<MemoryBuffer> MemBuf; +  if (MemoryBuffer::getFile(FName, MemBuf)) +    return false; +  StringRef Str(MemBuf->getBufferStart(), MemBuf->getBufferSize()); + +  // If we have a UTF-16 byte order mark, convert to UTF-8 for parsing. +  ArrayRef<char> BufRef(MemBuf->getBufferStart(), MemBuf->getBufferEnd()); +  std::string UTF8Buf; +  if (hasUTF16ByteOrderMark(BufRef)) { +    if (!convertUTF16ToUTF8String(BufRef, UTF8Buf)) +      return false; +    Str = StringRef(UTF8Buf);    } + +  // Tokenize the contents into NewArgv. +  Tokenizer(Str, Saver, NewArgv); + +  return true; +} + +/// \brief Expand response files on a command line recursively using the given +/// StringSaver and tokenization strategy. +bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, +                             SmallVectorImpl<const char *> &Argv) { +  unsigned RspFiles = 0; +  bool AllExpanded = false; + +  // Don't cache Argv.size() because it can change. +  for (unsigned I = 0; I != Argv.size(); ) { +    const char *Arg = Argv[I]; +    if (Arg[0] != '@') { +      ++I; +      continue; +    } + +    // If we have too many response files, leave some unexpanded.  This avoids +    // crashing on self-referential response files. +    if (RspFiles++ > 20) +      return false; + +    // Replace this response file argument with the tokenization of its +    // contents.  Nested response files are expanded in subsequent iterations. +    // FIXME: If a nested response file uses a relative path, is it relative to +    // the cwd of the process or the response file? +    SmallVector<const char *, 0> ExpandedArgv; +    if (!ExpandResponseFile(Arg + 1, Saver, Tokenizer, ExpandedArgv)) { +      AllExpanded = false; +      continue; +    } +    Argv.erase(Argv.begin() + I); +    Argv.insert(Argv.begin() + I, ExpandedArgv.begin(), ExpandedArgv.end()); +  } +  return AllExpanded; +} + +namespace { +  class StrDupSaver : public StringSaver { +    std::vector<char*> Dups; +  public: +    ~StrDupSaver() { +      for (std::vector<char *>::iterator I = Dups.begin(), E = Dups.end(); +           I != E; ++I) { +        char *Dup = *I; +        free(Dup); +      } +    } +    const char *SaveString(const char *Str) LLVM_OVERRIDE { +      char *Dup = strdup(Str); +      Dups.push_back(Dup); +      return Dup; +    } +  };  }  /// ParseEnvironmentOptions - An alternative entry point to the @@ -488,56 +701,15 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,    // Get program's "name", which we wouldn't know without the caller    // telling us. -  std::vector<char*> newArgv; -  newArgv.push_back(strdup(progName)); +  SmallVector<const char *, 20> newArgv; +  StrDupSaver Saver; +  newArgv.push_back(Saver.SaveString(progName));    // Parse the value of the environment variable into a "command line"    // and hand it off to ParseCommandLineOptions(). -  ParseCStringVector(newArgv, envValue); +  TokenizeGNUCommandLine(envValue, Saver, newArgv);    int newArgc = static_cast<int>(newArgv.size());    ParseCommandLineOptions(newArgc, &newArgv[0], Overview); - -  // Free all the strdup()ed strings. -  for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end(); -       i != e; ++i) -    free(*i); -} - - -/// ExpandResponseFiles - Copy the contents of argv into newArgv, -/// substituting the contents of the response files for the arguments -/// of type @file. -static void ExpandResponseFiles(unsigned argc, const char*const* argv, -                                std::vector<char*>& newArgv) { -  for (unsigned i = 1; i != argc; ++i) { -    const char *arg = argv[i]; - -    if (arg[0] == '@') { -      sys::PathWithStatus respFile(++arg); - -      // Check that the response file is not empty (mmap'ing empty -      // files can be problematic). -      const sys::FileStatus *FileStat = respFile.getFileStatus(); -      if (FileStat && FileStat->getSize() != 0) { - -        // If we could open the file, parse its contents, otherwise -        // pass the @file option verbatim. - -        // TODO: we should also support recursive loading of response files, -        // since this is how gcc behaves. (From their man page: "The file may -        // itself contain additional @file options; any such options will be -        // processed recursively.") - -        // Mmap the response file into memory. -        OwningPtr<MemoryBuffer> respFilePtr; -        if (!MemoryBuffer::getFile(respFile.c_str(), respFilePtr)) { -          ParseCStringVector(newArgv, respFilePtr->getBufferStart()); -          continue; -        } -      } -    } -    newArgv.push_back(strdup(arg)); -  }  }  void cl::ParseCommandLineOptions(int argc, const char * const *argv, @@ -552,9 +724,11 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv,           "No options specified!");    // Expand response files. -  std::vector<char*> newArgv; -  newArgv.push_back(strdup(argv[0])); -  ExpandResponseFiles(argc, argv, newArgv); +  SmallVector<const char *, 20> newArgv; +  for (int i = 0; i != argc; ++i) +    newArgv.push_back(argv[i]); +  StrDupSaver Saver; +  ExpandResponseFiles(Saver, TokenizeGNUCommandLine, newArgv);    argv = &newArgv[0];    argc = static_cast<int>(newArgv.size()); @@ -848,12 +1022,6 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv,    PositionalOpts.clear();    MoreHelp->clear(); -  // Free the memory allocated by ExpandResponseFiles. -  // Free all the strdup()ed strings. -  for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end(); -       i != e; ++i) -    free(*i); -    // If we had an error processing our arguments, don't let the program execute    if (ErrorParsing) exit(1);  } @@ -913,11 +1081,20 @@ size_t alias::getOptionWidth() const {    return std::strlen(ArgStr)+6;  } +static void printHelpStr(StringRef HelpStr, size_t Indent, +                         size_t FirstLineIndentedBy) { +  std::pair<StringRef, StringRef> Split = HelpStr.split('\n'); +  outs().indent(Indent - FirstLineIndentedBy) << " - " << Split.first << "\n"; +  while (!Split.second.empty()) { +    Split = Split.second.split('\n'); +    outs().indent(Indent) << Split.first << "\n"; +  } +} +  // Print out the option for the alias.  void alias::printOptionInfo(size_t GlobalWidth) const { -  size_t L = std::strlen(ArgStr);    outs() << "  -" << ArgStr; -  outs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n"; +  printHelpStr(HelpStr, GlobalWidth, std::strlen(ArgStr) + 6);  }  //===----------------------------------------------------------------------===// @@ -946,7 +1123,7 @@ void basic_parser_impl::printOptionInfo(const Option &O,    if (const char *ValName = getValueName())      outs() << "=<" << getValueStr(O, ValName) << '>'; -  outs().indent(GlobalWidth-getOptionWidth(O)) << " - " << O.HelpStr << '\n'; +  printHelpStr(O.HelpStr, GlobalWidth, getOptionWidth(O));  }  void basic_parser_impl::printOptionName(const Option &O, @@ -1087,9 +1264,8 @@ size_t generic_parser_base::getOptionWidth(const Option &O) const {  void generic_parser_base::printOptionInfo(const Option &O,                                            size_t GlobalWidth) const {    if (O.hasArgStr()) { -    size_t L = std::strlen(O.ArgStr);      outs() << "  -" << O.ArgStr; -    outs().indent(GlobalWidth-L-6) << " - " << O.HelpStr << '\n'; +    printHelpStr(O.HelpStr, GlobalWidth, std::strlen(O.ArgStr) + 6);      for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {        size_t NumSpaces = GlobalWidth-strlen(getOption(i))-8; @@ -1100,9 +1276,9 @@ void generic_parser_base::printOptionInfo(const Option &O,      if (O.HelpStr[0])        outs() << "  " << O.HelpStr << '\n';      for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { -      size_t L = std::strlen(getOption(i)); -      outs() << "    -" << getOption(i); -      outs().indent(GlobalWidth-L-8) << " - " << getDescription(i) << '\n'; +      const char *Option = getOption(i); +      outs() << "    -" << Option; +      printHelpStr(getDescription(i), GlobalWidth, std::strlen(Option) + 8);      }    }  } | 
