diff options
Diffstat (limited to 'lldb/source/DataFormatters/StringPrinter.cpp')
| -rw-r--r-- | lldb/source/DataFormatters/StringPrinter.cpp | 651 | 
1 files changed, 651 insertions, 0 deletions
diff --git a/lldb/source/DataFormatters/StringPrinter.cpp b/lldb/source/DataFormatters/StringPrinter.cpp new file mode 100644 index 0000000000000..27d649bfc370e --- /dev/null +++ b/lldb/source/DataFormatters/StringPrinter.cpp @@ -0,0 +1,651 @@ +//===-- StringPrinter.cpp ----------------------------------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/DataFormatters/StringPrinter.h" + +#include "lldb/Core/Debugger.h" +#include "lldb/Core/ValueObject.h" +#include "lldb/Target/Language.h" +#include "lldb/Target/Process.h" +#include "lldb/Target/Target.h" +#include "lldb/Utility/Status.h" + +#include "llvm/Support/ConvertUTF.h" + +#include <ctype.h> +#include <locale> +#include <memory> + +using namespace lldb; +using namespace lldb_private; +using namespace lldb_private::formatters; + +// we define this for all values of type but only implement it for those we +// care about that's good because we get linker errors for any unsupported type +template <lldb_private::formatters::StringPrinter::StringElementType type> +static StringPrinter::StringPrinterBufferPointer<> +GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next); + +// mimic isprint() for Unicode codepoints +static bool isprint(char32_t codepoint) { +  if (codepoint <= 0x1F || codepoint == 0x7F) // C0 +  { +    return false; +  } +  if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 +  { +    return false; +  } +  if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators +  { +    return false; +  } +  if (codepoint == 0x200E || codepoint == 0x200F || +      (codepoint >= 0x202A && +       codepoint <= 0x202E)) // bidirectional text control +  { +    return false; +  } +  if (codepoint >= 0xFFF9 && +      codepoint <= 0xFFFF) // interlinears and generally specials +  { +    return false; +  } +  return true; +} + +template <> +StringPrinter::StringPrinterBufferPointer<> +GetPrintableImpl<StringPrinter::StringElementType::ASCII>(uint8_t *buffer, +                                                          uint8_t *buffer_end, +                                                          uint8_t *&next) { +  StringPrinter::StringPrinterBufferPointer<> retval = {nullptr}; + +  switch (*buffer) { +  case 0: +    retval = {"\\0", 2}; +    break; +  case '\a': +    retval = {"\\a", 2}; +    break; +  case '\b': +    retval = {"\\b", 2}; +    break; +  case '\f': +    retval = {"\\f", 2}; +    break; +  case '\n': +    retval = {"\\n", 2}; +    break; +  case '\r': +    retval = {"\\r", 2}; +    break; +  case '\t': +    retval = {"\\t", 2}; +    break; +  case '\v': +    retval = {"\\v", 2}; +    break; +  case '\"': +    retval = {"\\\"", 2}; +    break; +  case '\\': +    retval = {"\\\\", 2}; +    break; +  default: +    if (isprint(*buffer)) +      retval = {buffer, 1}; +    else { +      uint8_t *data = new uint8_t[5]; +      sprintf((char *)data, "\\x%02x", *buffer); +      retval = {data, 4, [](const uint8_t *c) { delete[] c; }}; +      break; +    } +  } + +  next = buffer + 1; +  return retval; +} + +static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1) { +  return (c0 - 192) * 64 + (c1 - 128); +} +static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, +                                       unsigned char c2) { +  return (c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128); +} +static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, +                                       unsigned char c2, unsigned char c3) { +  return (c0 - 240) * 262144 + (c2 - 128) * 4096 + (c2 - 128) * 64 + (c3 - 128); +} + +template <> +StringPrinter::StringPrinterBufferPointer<> +GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer, +                                                         uint8_t *buffer_end, +                                                         uint8_t *&next) { +  StringPrinter::StringPrinterBufferPointer<> retval{nullptr}; + +  unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer); + +  if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) { +    // I don't have enough bytes - print whatever I have left +    retval = {buffer, static_cast<size_t>(1 + buffer_end - buffer)}; +    next = buffer_end + 1; +    return retval; +  } + +  char32_t codepoint = 0; +  switch (utf8_encoded_len) { +  case 1: +    // this is just an ASCII byte - ask ASCII +    return GetPrintableImpl<StringPrinter::StringElementType::ASCII>( +        buffer, buffer_end, next); +  case 2: +    codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, +                                       (unsigned char)*(buffer + 1)); +    break; +  case 3: +    codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, +                                       (unsigned char)*(buffer + 1), +                                       (unsigned char)*(buffer + 2)); +    break; +  case 4: +    codepoint = ConvertUTF8ToCodePoint( +        (unsigned char)*buffer, (unsigned char)*(buffer + 1), +        (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3)); +    break; +  default: +    // this is probably some bogus non-character thing just print it as-is and +    // hope to sync up again soon +    retval = {buffer, 1}; +    next = buffer + 1; +    return retval; +  } + +  if (codepoint) { +    switch (codepoint) { +    case 0: +      retval = {"\\0", 2}; +      break; +    case '\a': +      retval = {"\\a", 2}; +      break; +    case '\b': +      retval = {"\\b", 2}; +      break; +    case '\f': +      retval = {"\\f", 2}; +      break; +    case '\n': +      retval = {"\\n", 2}; +      break; +    case '\r': +      retval = {"\\r", 2}; +      break; +    case '\t': +      retval = {"\\t", 2}; +      break; +    case '\v': +      retval = {"\\v", 2}; +      break; +    case '\"': +      retval = {"\\\"", 2}; +      break; +    case '\\': +      retval = {"\\\\", 2}; +      break; +    default: +      if (isprint(codepoint)) +        retval = {buffer, utf8_encoded_len}; +      else { +        uint8_t *data = new uint8_t[11]; +        sprintf((char *)data, "\\U%08x", (unsigned)codepoint); +        retval = {data, 10, [](const uint8_t *c) { delete[] c; }}; +        break; +      } +    } + +    next = buffer + utf8_encoded_len; +    return retval; +  } + +  // this should not happen - but just in case.. try to resync at some point +  retval = {buffer, 1}; +  next = buffer + 1; +  return retval; +} + +// Given a sequence of bytes, this function returns: a sequence of bytes to +// actually print out + a length the following unscanned position of the buffer +// is in next +static StringPrinter::StringPrinterBufferPointer<> +GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer, +             uint8_t *buffer_end, uint8_t *&next) { +  if (!buffer) +    return {nullptr}; + +  switch (type) { +  case StringPrinter::StringElementType::ASCII: +    return GetPrintableImpl<StringPrinter::StringElementType::ASCII>( +        buffer, buffer_end, next); +  case StringPrinter::StringElementType::UTF8: +    return GetPrintableImpl<StringPrinter::StringElementType::UTF8>( +        buffer, buffer_end, next); +  default: +    return {nullptr}; +  } +} + +StringPrinter::EscapingHelper +StringPrinter::GetDefaultEscapingHelper(GetPrintableElementType elem_type) { +  switch (elem_type) { +  case GetPrintableElementType::UTF8: +    return [](uint8_t *buffer, uint8_t *buffer_end, +              uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer<> { +      return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, +                          buffer_end, next); +    }; +  case GetPrintableElementType::ASCII: +    return [](uint8_t *buffer, uint8_t *buffer_end, +              uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer<> { +      return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, +                          buffer_end, next); +    }; +  } +  llvm_unreachable("bad element type"); +} + +// use this call if you already have an LLDB-side buffer for the data +template <typename SourceDataType> +static bool DumpUTFBufferToStream( +    llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, +                                              const SourceDataType *, +                                              llvm::UTF8 **, llvm::UTF8 *, +                                              llvm::ConversionFlags), +    const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) { +  Stream &stream(*dump_options.GetStream()); +  if (dump_options.GetPrefixToken() != nullptr) +    stream.Printf("%s", dump_options.GetPrefixToken()); +  if (dump_options.GetQuote() != 0) +    stream.Printf("%c", dump_options.GetQuote()); +  auto data(dump_options.GetData()); +  auto source_size(dump_options.GetSourceSize()); +  if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) { +    const int bufferSPSize = data.GetByteSize(); +    if (dump_options.GetSourceSize() == 0) { +      const int origin_encoding = 8 * sizeof(SourceDataType); +      source_size = bufferSPSize / (origin_encoding / 4); +    } + +    const SourceDataType *data_ptr = +        (const SourceDataType *)data.GetDataStart(); +    const SourceDataType *data_end_ptr = data_ptr + source_size; + +    const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); + +    if (zero_is_terminator) { +      while (data_ptr < data_end_ptr) { +        if (!*data_ptr) { +          data_end_ptr = data_ptr; +          break; +        } +        data_ptr++; +      } + +      data_ptr = (const SourceDataType *)data.GetDataStart(); +    } + +    lldb::DataBufferSP utf8_data_buffer_sp; +    llvm::UTF8 *utf8_data_ptr = nullptr; +    llvm::UTF8 *utf8_data_end_ptr = nullptr; + +    if (ConvertFunction) { +      utf8_data_buffer_sp = +          std::make_shared<DataBufferHeap>(4 * bufferSPSize, 0); +      utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); +      utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); +      ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr, +                      utf8_data_end_ptr, llvm::lenientConversion); +      if (!zero_is_terminator) +        utf8_data_end_ptr = utf8_data_ptr; +      // needed because the ConvertFunction will change the value of the +      // data_ptr. +      utf8_data_ptr = +          (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); +    } else { +      // just copy the pointers - the cast is necessary to make the compiler +      // happy but this should only happen if we are reading UTF8 data +      utf8_data_ptr = const_cast<llvm::UTF8 *>( +          reinterpret_cast<const llvm::UTF8 *>(data_ptr)); +      utf8_data_end_ptr = const_cast<llvm::UTF8 *>( +          reinterpret_cast<const llvm::UTF8 *>(data_end_ptr)); +    } + +    const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); +    lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; +    if (escape_non_printables) { +      if (Language *language = Language::FindPlugin(dump_options.GetLanguage())) +        escaping_callback = language->GetStringPrinterEscapingHelper( +            lldb_private::formatters::StringPrinter::GetPrintableElementType:: +                UTF8); +      else +        escaping_callback = +            lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( +                lldb_private::formatters::StringPrinter:: +                    GetPrintableElementType::UTF8); +    } + +    // since we tend to accept partial data (and even partially malformed data) +    // we might end up with no NULL terminator before the end_ptr hence we need +    // to take a slower route and ensure we stay within boundaries +    for (; utf8_data_ptr < utf8_data_end_ptr;) { +      if (zero_is_terminator && !*utf8_data_ptr) +        break; + +      if (escape_non_printables) { +        uint8_t *next_data = nullptr; +        auto printable = +            escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); +        auto printable_bytes = printable.GetBytes(); +        auto printable_size = printable.GetSize(); +        if (!printable_bytes || !next_data) { +          // GetPrintable() failed on us - print one byte in a desperate resync +          // attempt +          printable_bytes = utf8_data_ptr; +          printable_size = 1; +          next_data = utf8_data_ptr + 1; +        } +        for (unsigned c = 0; c < printable_size; c++) +          stream.Printf("%c", *(printable_bytes + c)); +        utf8_data_ptr = (uint8_t *)next_data; +      } else { +        stream.Printf("%c", *utf8_data_ptr); +        utf8_data_ptr++; +      } +    } +  } +  if (dump_options.GetQuote() != 0) +    stream.Printf("%c", dump_options.GetQuote()); +  if (dump_options.GetSuffixToken() != nullptr) +    stream.Printf("%s", dump_options.GetSuffixToken()); +  if (dump_options.GetIsTruncated()) +    stream.Printf("..."); +  return true; +} + +lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions:: +    ReadStringAndDumpToStreamOptions(ValueObject &valobj) +    : ReadStringAndDumpToStreamOptions() { +  SetEscapeNonPrintables( +      valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); +} + +lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: +    ReadBufferAndDumpToStreamOptions(ValueObject &valobj) +    : ReadBufferAndDumpToStreamOptions() { +  SetEscapeNonPrintables( +      valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); +} + +lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: +    ReadBufferAndDumpToStreamOptions( +        const ReadStringAndDumpToStreamOptions &options) +    : ReadBufferAndDumpToStreamOptions() { +  SetStream(options.GetStream()); +  SetPrefixToken(options.GetPrefixToken()); +  SetSuffixToken(options.GetSuffixToken()); +  SetQuote(options.GetQuote()); +  SetEscapeNonPrintables(options.GetEscapeNonPrintables()); +  SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); +  SetLanguage(options.GetLanguage()); +} + +namespace lldb_private { + +namespace formatters { + +template <> +bool StringPrinter::ReadStringAndDumpToStream< +    StringPrinter::StringElementType::ASCII>( +    const ReadStringAndDumpToStreamOptions &options) { +  assert(options.GetStream() && "need a Stream to print the string to"); +  Status my_error; + +  ProcessSP process_sp(options.GetProcessSP()); + +  if (process_sp.get() == nullptr || options.GetLocation() == 0) +    return false; + +  size_t size; +  const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); +  bool is_truncated = false; + +  if (options.GetSourceSize() == 0) +    size = max_size; +  else if (!options.GetIgnoreMaxLength()) { +    size = options.GetSourceSize(); +    if (size > max_size) { +      size = max_size; +      is_truncated = true; +    } +  } else +    size = options.GetSourceSize(); + +  lldb::DataBufferSP buffer_sp(new DataBufferHeap(size, 0)); + +  process_sp->ReadCStringFromMemory( +      options.GetLocation(), (char *)buffer_sp->GetBytes(), size, my_error); + +  if (my_error.Fail()) +    return false; + +  const char *prefix_token = options.GetPrefixToken(); +  char quote = options.GetQuote(); + +  if (prefix_token != nullptr) +    options.GetStream()->Printf("%s%c", prefix_token, quote); +  else if (quote != 0) +    options.GetStream()->Printf("%c", quote); + +  uint8_t *data_end = buffer_sp->GetBytes() + buffer_sp->GetByteSize(); + +  const bool escape_non_printables = options.GetEscapeNonPrintables(); +  lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; +  if (escape_non_printables) { +    if (Language *language = Language::FindPlugin(options.GetLanguage())) +      escaping_callback = language->GetStringPrinterEscapingHelper( +          lldb_private::formatters::StringPrinter::GetPrintableElementType:: +              ASCII); +    else +      escaping_callback = +          lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( +              lldb_private::formatters::StringPrinter::GetPrintableElementType:: +                  ASCII); +  } + +  // since we tend to accept partial data (and even partially malformed data) +  // we might end up with no NULL terminator before the end_ptr hence we need +  // to take a slower route and ensure we stay within boundaries +  for (uint8_t *data = buffer_sp->GetBytes(); *data && (data < data_end);) { +    if (escape_non_printables) { +      uint8_t *next_data = nullptr; +      auto printable = escaping_callback(data, data_end, next_data); +      auto printable_bytes = printable.GetBytes(); +      auto printable_size = printable.GetSize(); +      if (!printable_bytes || !next_data) { +        // GetPrintable() failed on us - print one byte in a desperate resync +        // attempt +        printable_bytes = data; +        printable_size = 1; +        next_data = data + 1; +      } +      for (unsigned c = 0; c < printable_size; c++) +        options.GetStream()->Printf("%c", *(printable_bytes + c)); +      data = (uint8_t *)next_data; +    } else { +      options.GetStream()->Printf("%c", *data); +      data++; +    } +  } + +  const char *suffix_token = options.GetSuffixToken(); + +  if (suffix_token != nullptr) +    options.GetStream()->Printf("%c%s", quote, suffix_token); +  else if (quote != 0) +    options.GetStream()->Printf("%c", quote); + +  if (is_truncated) +    options.GetStream()->Printf("..."); + +  return true; +} + +template <typename SourceDataType> +static bool ReadUTFBufferAndDumpToStream( +    const StringPrinter::ReadStringAndDumpToStreamOptions &options, +    llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, +                                              const SourceDataType *, +                                              llvm::UTF8 **, llvm::UTF8 *, +                                              llvm::ConversionFlags)) { +  assert(options.GetStream() && "need a Stream to print the string to"); + +  if (options.GetLocation() == 0 || +      options.GetLocation() == LLDB_INVALID_ADDRESS) +    return false; + +  lldb::ProcessSP process_sp(options.GetProcessSP()); + +  if (!process_sp) +    return false; + +  const int type_width = sizeof(SourceDataType); +  const int origin_encoding = 8 * type_width; +  if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) +    return false; +  // if not UTF8, I need a conversion function to return proper UTF8 +  if (origin_encoding != 8 && !ConvertFunction) +    return false; + +  if (!options.GetStream()) +    return false; + +  uint32_t sourceSize = options.GetSourceSize(); +  bool needs_zero_terminator = options.GetNeedsZeroTermination(); + +  bool is_truncated = false; +  const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); + +  if (!sourceSize) { +    sourceSize = max_size; +    needs_zero_terminator = true; +  } else if (!options.GetIgnoreMaxLength()) { +    if (sourceSize > max_size) { +      sourceSize = max_size; +      is_truncated = true; +    } +  } + +  const int bufferSPSize = sourceSize * type_width; + +  lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0)); + +  if (!buffer_sp->GetBytes()) +    return false; + +  Status error; +  char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); + +  if (needs_zero_terminator) +    process_sp->ReadStringFromMemory(options.GetLocation(), buffer, +                                     bufferSPSize, error, type_width); +  else +    process_sp->ReadMemoryFromInferior(options.GetLocation(), +                                       (char *)buffer_sp->GetBytes(), +                                       bufferSPSize, error); + +  if (error.Fail()) { +    options.GetStream()->Printf("unable to read data"); +    return true; +  } + +  DataExtractor data(buffer_sp, process_sp->GetByteOrder(), +                     process_sp->GetAddressByteSize()); + +  StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options); +  dump_options.SetData(data); +  dump_options.SetSourceSize(sourceSize); +  dump_options.SetIsTruncated(is_truncated); + +  return DumpUTFBufferToStream(ConvertFunction, dump_options); +} + +template <> +bool StringPrinter::ReadStringAndDumpToStream< +    StringPrinter::StringElementType::UTF8>( +    const ReadStringAndDumpToStreamOptions &options) { +  return ReadUTFBufferAndDumpToStream<llvm::UTF8>(options, nullptr); +} + +template <> +bool StringPrinter::ReadStringAndDumpToStream< +    StringPrinter::StringElementType::UTF16>( +    const ReadStringAndDumpToStreamOptions &options) { +  return ReadUTFBufferAndDumpToStream<llvm::UTF16>(options, +                                                   llvm::ConvertUTF16toUTF8); +} + +template <> +bool StringPrinter::ReadStringAndDumpToStream< +    StringPrinter::StringElementType::UTF32>( +    const ReadStringAndDumpToStreamOptions &options) { +  return ReadUTFBufferAndDumpToStream<llvm::UTF32>(options, +                                                   llvm::ConvertUTF32toUTF8); +} + +template <> +bool StringPrinter::ReadBufferAndDumpToStream< +    StringPrinter::StringElementType::UTF8>( +    const ReadBufferAndDumpToStreamOptions &options) { +  assert(options.GetStream() && "need a Stream to print the string to"); + +  return DumpUTFBufferToStream<llvm::UTF8>(nullptr, options); +} + +template <> +bool StringPrinter::ReadBufferAndDumpToStream< +    StringPrinter::StringElementType::ASCII>( +    const ReadBufferAndDumpToStreamOptions &options) { +  // treat ASCII the same as UTF8 +  // FIXME: can we optimize ASCII some more? +  return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); +} + +template <> +bool StringPrinter::ReadBufferAndDumpToStream< +    StringPrinter::StringElementType::UTF16>( +    const ReadBufferAndDumpToStreamOptions &options) { +  assert(options.GetStream() && "need a Stream to print the string to"); + +  return DumpUTFBufferToStream(llvm::ConvertUTF16toUTF8, options); +} + +template <> +bool StringPrinter::ReadBufferAndDumpToStream< +    StringPrinter::StringElementType::UTF32>( +    const ReadBufferAndDumpToStreamOptions &options) { +  assert(options.GetStream() && "need a Stream to print the string to"); + +  return DumpUTFBufferToStream(llvm::ConvertUTF32toUTF8, options); +} + +} // namespace formatters + +} // namespace lldb_private  | 
