diff options
Diffstat (limited to 'lib/Support/raw_ostream.cpp')
| -rw-r--r-- | lib/Support/raw_ostream.cpp | 94 | 
1 files changed, 82 insertions, 12 deletions
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 038ad00bd6085..21dde7ff914aa 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -60,6 +60,7 @@  #endif  #ifdef _WIN32 +#include "llvm/Support/ConvertUTF.h"  #include "Windows/WindowsSupport.h"  #endif @@ -567,6 +568,12 @@ raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)    if (FD <= STDERR_FILENO)      ShouldClose = false; +#ifdef _WIN32 +  // Check if this is a console device. This is not equivalent to isatty. +  IsWindowsConsole = +      ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR; +#endif +    // Get the starting position.    off_t loc = ::lseek(FD, 0, SEEK_CUR);  #ifdef _WIN32 @@ -609,25 +616,77 @@ raw_fd_ostream::~raw_fd_ostream() {                         /*GenCrashDiag=*/false);  } +#if defined(_WIN32) +// The most reliable way to print unicode in a Windows console is with +// WriteConsoleW. To use that, first transcode from UTF-8 to UTF-16. This +// assumes that LLVM programs always print valid UTF-8 to the console. The data +// might not be UTF-8 for two major reasons: +// 1. The program is printing binary (-filetype=obj -o -), in which case it +// would have been gibberish anyway. +// 2. The program is printing text in a semi-ascii compatible codepage like +// shift-jis or cp1252. +// +// Most LLVM programs don't produce non-ascii text unless they are quoting +// user source input. A well-behaved LLVM program should either validate that +// the input is UTF-8 or transcode from the local codepage to UTF-8 before +// quoting it. If they don't, this may mess up the encoding, but this is still +// probably the best compromise we can make. +static bool write_console_impl(int FD, StringRef Data) { +  SmallVector<wchar_t, 256> WideText; + +  // Fall back to ::write if it wasn't valid UTF-8. +  if (auto EC = sys::windows::UTF8ToUTF16(Data, WideText)) +    return false; + +  // On Windows 7 and earlier, WriteConsoleW has a low maximum amount of data +  // that can be written to the console at a time. +  size_t MaxWriteSize = WideText.size(); +  if (!RunningWindows8OrGreater()) +    MaxWriteSize = 32767; + +  size_t WCharsWritten = 0; +  do { +    size_t WCharsToWrite = +        std::min(MaxWriteSize, WideText.size() - WCharsWritten); +    DWORD ActuallyWritten; +    bool Success = +        ::WriteConsoleW((HANDLE)::_get_osfhandle(FD), &WideText[WCharsWritten], +                        WCharsToWrite, &ActuallyWritten, +                        /*Reserved=*/nullptr); + +    // The most likely reason for WriteConsoleW to fail is that FD no longer +    // points to a console. Fall back to ::write. If this isn't the first loop +    // iteration, something is truly wrong. +    if (!Success) +      return false; + +    WCharsWritten += ActuallyWritten; +  } while (WCharsWritten != WideText.size()); +  return true; +} +#endif +  void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {    assert(FD >= 0 && "File already closed.");    pos += Size; -  // The maximum write size is limited to SSIZE_MAX because a write -  // greater than SSIZE_MAX is implementation-defined in POSIX. -  // Since SSIZE_MAX is not portable, we use SIZE_MAX >> 1 instead. -  size_t MaxWriteSize = SIZE_MAX >> 1; +#if defined(_WIN32) +  // If this is a Windows console device, try re-encoding from UTF-8 to UTF-16 +  // and using WriteConsoleW. If that fails, fall back to plain write(). +  if (IsWindowsConsole) +    if (write_console_impl(FD, StringRef(Ptr, Size))) +      return; +#endif + +  // The maximum write size is limited to INT32_MAX. A write +  // greater than SSIZE_MAX is implementation-defined in POSIX, +  // and Windows _write requires 32 bit input. +  size_t MaxWriteSize = INT32_MAX;  #if defined(__linux__)    // It is observed that Linux returns EINVAL for a very large write (>2G).    // Make it a reasonably small value.    MaxWriteSize = 1024 * 1024 * 1024; -#elif defined(_WIN32) -  // Writing a large size of output to Windows console returns ENOMEM. It seems -  // that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and -  // the latter has a size limit (66000 bytes or less, depending on heap usage). -  if (::_isatty(FD) && !RunningWindows8OrGreater()) -    MaxWriteSize = 32767;  #endif    do { @@ -696,8 +755,17 @@ void raw_fd_ostream::pwrite_impl(const char *Ptr, size_t Size,  }  size_t raw_fd_ostream::preferred_buffer_size() const { -#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix) -  // Windows and Minix have no st_blksize. +#if defined(_WIN32) +  // Disable buffering for console devices. Console output is re-encoded from +  // UTF-8 to UTF-16 on Windows, and buffering it would require us to split the +  // buffer on a valid UTF-8 codepoint boundary. Terminal buffering is disabled +  // below on most other OSs, so do the same thing on Windows and avoid that +  // complexity. +  if (IsWindowsConsole) +    return 0; +  return raw_ostream::preferred_buffer_size(); +#elif !defined(__minix) +  // Minix has no st_blksize.    assert(FD >= 0 && "File not yet open!");    struct stat statbuf;    if (fstat(FD, &statbuf) != 0) @@ -846,3 +914,5 @@ void raw_null_ostream::pwrite_impl(const char *Ptr, size_t Size,                                     uint64_t Offset) {}  void raw_pwrite_stream::anchor() {} + +void buffer_ostream::anchor() {}  | 
