diff options
Diffstat (limited to 'clang/lib/AST/ScanfFormatString.cpp')
| -rw-r--r-- | clang/lib/AST/ScanfFormatString.cpp | 567 | 
1 files changed, 567 insertions, 0 deletions
diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp new file mode 100644 index 000000000000..8d763f28e57f --- /dev/null +++ b/clang/lib/AST/ScanfFormatString.cpp @@ -0,0 +1,567 @@ +//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Handling of format string in scanf and friends.  The structure of format +// strings for fscanf() are described in C99 7.19.6.2. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/FormatString.h" +#include "FormatStringParsing.h" +#include "clang/Basic/TargetInfo.h" + +using clang::analyze_format_string::ArgType; +using clang::analyze_format_string::FormatStringHandler; +using clang::analyze_format_string::LengthModifier; +using clang::analyze_format_string::OptionalAmount; +using clang::analyze_format_string::ConversionSpecifier; +using clang::analyze_scanf::ScanfConversionSpecifier; +using clang::analyze_scanf::ScanfSpecifier; +using clang::UpdateOnReturn; +using namespace clang; + +typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> +        ScanfSpecifierResult; + +static bool ParseScanList(FormatStringHandler &H, +                          ScanfConversionSpecifier &CS, +                          const char *&Beg, const char *E) { +  const char *I = Beg; +  const char *start = I - 1; +  UpdateOnReturn <const char*> UpdateBeg(Beg, I); + +  // No more characters? +  if (I == E) { +    H.HandleIncompleteScanList(start, I); +    return true; +  } + +  // Special case: ']' is the first character. +  if (*I == ']') { +    if (++I == E) { +      H.HandleIncompleteScanList(start, I - 1); +      return true; +    } +  } + +  // Special case: "^]" are the first characters. +  if (I + 1 != E && I[0] == '^' && I[1] == ']') { +    I += 2; +    if (I == E) { +      H.HandleIncompleteScanList(start, I - 1); +      return true; +    } +  } + +  // Look for a ']' character which denotes the end of the scan list. +  while (*I != ']') { +    if (++I == E) { +      H.HandleIncompleteScanList(start, I - 1); +      return true; +    } +  } + +  CS.setEndScanList(I); +  return false; +} + +// FIXME: Much of this is copy-paste from ParsePrintfSpecifier. +// We can possibly refactor. +static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, +                                                const char *&Beg, +                                                const char *E, +                                                unsigned &argIndex, +                                                const LangOptions &LO, +                                                const TargetInfo &Target) { +  using namespace clang::analyze_format_string; +  using namespace clang::analyze_scanf; +  const char *I = Beg; +  const char *Start = nullptr; +  UpdateOnReturn <const char*> UpdateBeg(Beg, I); + +    // Look for a '%' character that indicates the start of a format specifier. +  for ( ; I != E ; ++I) { +    char c = *I; +    if (c == '\0') { +        // Detect spurious null characters, which are likely errors. +      H.HandleNullChar(I); +      return true; +    } +    if (c == '%') { +      Start = I++;  // Record the start of the format specifier. +      break; +    } +  } + +    // No format specifier found? +  if (!Start) +    return false; + +  if (I == E) { +      // No more characters left? +    H.HandleIncompleteSpecifier(Start, E - Start); +    return true; +  } + +  ScanfSpecifier FS; +  if (ParseArgPosition(H, FS, Start, I, E)) +    return true; + +  if (I == E) { +      // No more characters left? +    H.HandleIncompleteSpecifier(Start, E - Start); +    return true; +  } + +  // Look for '*' flag if it is present. +  if (*I == '*') { +    FS.setSuppressAssignment(I); +    if (++I == E) { +      H.HandleIncompleteSpecifier(Start, E - Start); +      return true; +    } +  } + +  // Look for the field width (if any).  Unlike printf, this is either +  // a fixed integer or isn't present. +  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); +  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { +    assert(Amt.getHowSpecified() == OptionalAmount::Constant); +    FS.setFieldWidth(Amt); + +    if (I == E) { +      // No more characters left? +      H.HandleIncompleteSpecifier(Start, E - Start); +      return true; +    } +  } + +  // Look for the length modifier. +  if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) { +      // No more characters left? +    H.HandleIncompleteSpecifier(Start, E - Start); +    return true; +  } + +  // Detect spurious null characters, which are likely errors. +  if (*I == '\0') { +    H.HandleNullChar(I); +    return true; +  } + +  // Finally, look for the conversion specifier. +  const char *conversionPosition = I++; +  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; +  switch (*conversionPosition) { +    default: +      break; +    case '%': k = ConversionSpecifier::PercentArg;   break; +    case 'A': k = ConversionSpecifier::AArg; break; +    case 'E': k = ConversionSpecifier::EArg; break; +    case 'F': k = ConversionSpecifier::FArg; break; +    case 'G': k = ConversionSpecifier::GArg; break; +    case 'X': k = ConversionSpecifier::XArg; break; +    case 'a': k = ConversionSpecifier::aArg; break; +    case 'd': k = ConversionSpecifier::dArg; break; +    case 'e': k = ConversionSpecifier::eArg; break; +    case 'f': k = ConversionSpecifier::fArg; break; +    case 'g': k = ConversionSpecifier::gArg; break; +    case 'i': k = ConversionSpecifier::iArg; break; +    case 'n': k = ConversionSpecifier::nArg; break; +    case 'c': k = ConversionSpecifier::cArg; break; +    case 'C': k = ConversionSpecifier::CArg; break; +    case 'S': k = ConversionSpecifier::SArg; break; +    case '[': k = ConversionSpecifier::ScanListArg; break; +    case 'u': k = ConversionSpecifier::uArg; break; +    case 'x': k = ConversionSpecifier::xArg; break; +    case 'o': k = ConversionSpecifier::oArg; break; +    case 's': k = ConversionSpecifier::sArg; break; +    case 'p': k = ConversionSpecifier::pArg; break; +    // Apple extensions +      // Apple-specific +    case 'D': +      if (Target.getTriple().isOSDarwin()) +        k = ConversionSpecifier::DArg; +      break; +    case 'O': +      if (Target.getTriple().isOSDarwin()) +        k = ConversionSpecifier::OArg; +      break; +    case 'U': +      if (Target.getTriple().isOSDarwin()) +        k = ConversionSpecifier::UArg; +      break; +  } +  ScanfConversionSpecifier CS(conversionPosition, k); +  if (k == ScanfConversionSpecifier::ScanListArg) { +    if (ParseScanList(H, CS, I, E)) +      return true; +  } +  FS.setConversionSpecifier(CS); +  if (CS.consumesDataArgument() && !FS.getSuppressAssignment() +      && !FS.usesPositionalArg()) +    FS.setArgIndex(argIndex++); + +  // FIXME: '%' and '*' doesn't make sense.  Issue a warning. +  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. + +  if (k == ScanfConversionSpecifier::InvalidSpecifier) { +    unsigned Len = I - Beg; +    if (ParseUTF8InvalidSpecifier(Beg, E, Len)) { +      CS.setEndScanList(Beg + Len); +      FS.setConversionSpecifier(CS); +    } +    // Assume the conversion takes one argument. +    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len); +  } +  return ScanfSpecifierResult(Start, FS); +} + +ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { +  const ScanfConversionSpecifier &CS = getConversionSpecifier(); + +  if (!CS.consumesDataArgument()) +    return ArgType::Invalid(); + +  switch(CS.getKind()) { +    // Signed int. +    case ConversionSpecifier::dArg: +    case ConversionSpecifier::DArg: +    case ConversionSpecifier::iArg: +      switch (LM.getKind()) { +        case LengthModifier::None: +          return ArgType::PtrTo(Ctx.IntTy); +        case LengthModifier::AsChar: +          return ArgType::PtrTo(ArgType::AnyCharTy); +        case LengthModifier::AsShort: +          return ArgType::PtrTo(Ctx.ShortTy); +        case LengthModifier::AsLong: +          return ArgType::PtrTo(Ctx.LongTy); +        case LengthModifier::AsLongLong: +        case LengthModifier::AsQuad: +          return ArgType::PtrTo(Ctx.LongLongTy); +        case LengthModifier::AsInt64: +          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); +        case LengthModifier::AsIntMax: +          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); +        case LengthModifier::AsSizeT: +          return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t")); +        case LengthModifier::AsPtrDiff: +          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); +        case LengthModifier::AsLongDouble: +          // GNU extension. +          return ArgType::PtrTo(Ctx.LongLongTy); +        case LengthModifier::AsAllocate: +        case LengthModifier::AsMAllocate: +        case LengthModifier::AsInt32: +        case LengthModifier::AsInt3264: +        case LengthModifier::AsWide: +        case LengthModifier::AsShortLong: +          return ArgType::Invalid(); +      } +      llvm_unreachable("Unsupported LengthModifier Type"); + +    // Unsigned int. +    case ConversionSpecifier::oArg: +    case ConversionSpecifier::OArg: +    case ConversionSpecifier::uArg: +    case ConversionSpecifier::UArg: +    case ConversionSpecifier::xArg: +    case ConversionSpecifier::XArg: +      switch (LM.getKind()) { +        case LengthModifier::None: +          return ArgType::PtrTo(Ctx.UnsignedIntTy); +        case LengthModifier::AsChar: +          return ArgType::PtrTo(Ctx.UnsignedCharTy); +        case LengthModifier::AsShort: +          return ArgType::PtrTo(Ctx.UnsignedShortTy); +        case LengthModifier::AsLong: +          return ArgType::PtrTo(Ctx.UnsignedLongTy); +        case LengthModifier::AsLongLong: +        case LengthModifier::AsQuad: +          return ArgType::PtrTo(Ctx.UnsignedLongLongTy); +        case LengthModifier::AsInt64: +          return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")); +        case LengthModifier::AsIntMax: +          return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t")); +        case LengthModifier::AsSizeT: +          return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t")); +        case LengthModifier::AsPtrDiff: +          return ArgType::PtrTo( +              ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t")); +        case LengthModifier::AsLongDouble: +          // GNU extension. +          return ArgType::PtrTo(Ctx.UnsignedLongLongTy); +        case LengthModifier::AsAllocate: +        case LengthModifier::AsMAllocate: +        case LengthModifier::AsInt32: +        case LengthModifier::AsInt3264: +        case LengthModifier::AsWide: +        case LengthModifier::AsShortLong: +          return ArgType::Invalid(); +      } +      llvm_unreachable("Unsupported LengthModifier Type"); + +    // Float. +    case ConversionSpecifier::aArg: +    case ConversionSpecifier::AArg: +    case ConversionSpecifier::eArg: +    case ConversionSpecifier::EArg: +    case ConversionSpecifier::fArg: +    case ConversionSpecifier::FArg: +    case ConversionSpecifier::gArg: +    case ConversionSpecifier::GArg: +      switch (LM.getKind()) { +        case LengthModifier::None: +          return ArgType::PtrTo(Ctx.FloatTy); +        case LengthModifier::AsLong: +          return ArgType::PtrTo(Ctx.DoubleTy); +        case LengthModifier::AsLongDouble: +          return ArgType::PtrTo(Ctx.LongDoubleTy); +        default: +          return ArgType::Invalid(); +      } + +    // Char, string and scanlist. +    case ConversionSpecifier::cArg: +    case ConversionSpecifier::sArg: +    case ConversionSpecifier::ScanListArg: +      switch (LM.getKind()) { +        case LengthModifier::None: +          return ArgType::PtrTo(ArgType::AnyCharTy); +        case LengthModifier::AsLong: +        case LengthModifier::AsWide: +          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); +        case LengthModifier::AsAllocate: +        case LengthModifier::AsMAllocate: +          return ArgType::PtrTo(ArgType::CStrTy); +        case LengthModifier::AsShort: +          if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) +            return ArgType::PtrTo(ArgType::AnyCharTy); +          LLVM_FALLTHROUGH; +        default: +          return ArgType::Invalid(); +      } +    case ConversionSpecifier::CArg: +    case ConversionSpecifier::SArg: +      // FIXME: Mac OS X specific? +      switch (LM.getKind()) { +        case LengthModifier::None: +        case LengthModifier::AsWide: +          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); +        case LengthModifier::AsAllocate: +        case LengthModifier::AsMAllocate: +          return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); +        case LengthModifier::AsShort: +          if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) +            return ArgType::PtrTo(ArgType::AnyCharTy); +          LLVM_FALLTHROUGH; +        default: +          return ArgType::Invalid(); +      } + +    // Pointer. +    case ConversionSpecifier::pArg: +      return ArgType::PtrTo(ArgType::CPointerTy); + +    // Write-back. +    case ConversionSpecifier::nArg: +      switch (LM.getKind()) { +        case LengthModifier::None: +          return ArgType::PtrTo(Ctx.IntTy); +        case LengthModifier::AsChar: +          return ArgType::PtrTo(Ctx.SignedCharTy); +        case LengthModifier::AsShort: +          return ArgType::PtrTo(Ctx.ShortTy); +        case LengthModifier::AsLong: +          return ArgType::PtrTo(Ctx.LongTy); +        case LengthModifier::AsLongLong: +        case LengthModifier::AsQuad: +          return ArgType::PtrTo(Ctx.LongLongTy); +        case LengthModifier::AsInt64: +          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); +        case LengthModifier::AsIntMax: +          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); +        case LengthModifier::AsSizeT: +          return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t")); +        case LengthModifier::AsPtrDiff: +          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); +        case LengthModifier::AsLongDouble: +          return ArgType(); // FIXME: Is this a known extension? +        case LengthModifier::AsAllocate: +        case LengthModifier::AsMAllocate: +        case LengthModifier::AsInt32: +        case LengthModifier::AsInt3264: +        case LengthModifier::AsWide: +        case LengthModifier::AsShortLong: +          return ArgType::Invalid(); +        } + +    default: +      break; +  } + +  return ArgType(); +} + +bool ScanfSpecifier::fixType(QualType QT, QualType RawQT, +                             const LangOptions &LangOpt, +                             ASTContext &Ctx) { + +  // %n is different from other conversion specifiers; don't try to fix it. +  if (CS.getKind() == ConversionSpecifier::nArg) +    return false; + +  if (!QT->isPointerType()) +    return false; + +  QualType PT = QT->getPointeeType(); + +  // If it's an enum, get its underlying type. +  if (const EnumType *ETy = PT->getAs<EnumType>()) { +    // Don't try to fix incomplete enums. +    if (!ETy->getDecl()->isComplete()) +      return false; +    PT = ETy->getDecl()->getIntegerType(); +  } + +  const BuiltinType *BT = PT->getAs<BuiltinType>(); +  if (!BT) +    return false; + +  // Pointer to a character. +  if (PT->isAnyCharacterType()) { +    CS.setKind(ConversionSpecifier::sArg); +    if (PT->isWideCharType()) +      LM.setKind(LengthModifier::AsWideChar); +    else +      LM.setKind(LengthModifier::None); + +    // If we know the target array length, we can use it as a field width. +    if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) { +      if (CAT->getSizeModifier() == ArrayType::Normal) +        FieldWidth = OptionalAmount(OptionalAmount::Constant, +                                    CAT->getSize().getZExtValue() - 1, +                                    "", 0, false); + +    } +    return true; +  } + +  // Figure out the length modifier. +  switch (BT->getKind()) { +    // no modifier +    case BuiltinType::UInt: +    case BuiltinType::Int: +    case BuiltinType::Float: +      LM.setKind(LengthModifier::None); +      break; + +    // hh +    case BuiltinType::Char_U: +    case BuiltinType::UChar: +    case BuiltinType::Char_S: +    case BuiltinType::SChar: +      LM.setKind(LengthModifier::AsChar); +      break; + +    // h +    case BuiltinType::Short: +    case BuiltinType::UShort: +      LM.setKind(LengthModifier::AsShort); +      break; + +    // l +    case BuiltinType::Long: +    case BuiltinType::ULong: +    case BuiltinType::Double: +      LM.setKind(LengthModifier::AsLong); +      break; + +    // ll +    case BuiltinType::LongLong: +    case BuiltinType::ULongLong: +      LM.setKind(LengthModifier::AsLongLong); +      break; + +    // L +    case BuiltinType::LongDouble: +      LM.setKind(LengthModifier::AsLongDouble); +      break; + +    // Don't know. +    default: +      return false; +  } + +  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. +  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11)) +    namedTypeToLengthModifier(PT, LM); + +  // If fixing the length modifier was enough, we are done. +  if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) { +    const analyze_scanf::ArgType &AT = getArgType(Ctx); +    if (AT.isValid() && AT.matchesType(Ctx, QT)) +      return true; +  } + +  // Figure out the conversion specifier. +  if (PT->isRealFloatingType()) +    CS.setKind(ConversionSpecifier::fArg); +  else if (PT->isSignedIntegerType()) +    CS.setKind(ConversionSpecifier::dArg); +  else if (PT->isUnsignedIntegerType()) +    CS.setKind(ConversionSpecifier::uArg); +  else +    llvm_unreachable("Unexpected type"); + +  return true; +} + +void ScanfSpecifier::toString(raw_ostream &os) const { +  os << "%"; + +  if (usesPositionalArg()) +    os << getPositionalArgIndex() << "$"; +  if (SuppressAssignment) +    os << "*"; + +  FieldWidth.toString(os); +  os << LM.toString(); +  os << CS.toString(); +} + +bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, +                                                    const char *I, +                                                    const char *E, +                                                    const LangOptions &LO, +                                                    const TargetInfo &Target) { + +  unsigned argIndex = 0; + +  // Keep looking for a format specifier until we have exhausted the string. +  while (I != E) { +    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, +                                                          LO, Target); +    // Did a fail-stop error of any kind occur when parsing the specifier? +    // If so, don't do any more processing. +    if (FSR.shouldStop()) +      return true; +      // Did we exhaust the string or encounter an error that +      // we can recover from? +    if (!FSR.hasValue()) +      continue; +      // We have a format specifier.  Pass it to the callback. +    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), +                                I - FSR.getStart())) { +      return true; +    } +  } +  assert(I == E && "Format string not exhausted"); +  return false; +}  | 
