summaryrefslogtreecommitdiff
path: root/lib/Support/raw_ostream.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Support/raw_ostream.cpp')
-rw-r--r--lib/Support/raw_ostream.cpp94
1 files changed, 82 insertions, 12 deletions
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 038ad00bd6085..21dde7ff914aa 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -60,6 +60,7 @@
#endif
#ifdef _WIN32
+#include "llvm/Support/ConvertUTF.h"
#include "Windows/WindowsSupport.h"
#endif
@@ -567,6 +568,12 @@ raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
if (FD <= STDERR_FILENO)
ShouldClose = false;
+#ifdef _WIN32
+ // Check if this is a console device. This is not equivalent to isatty.
+ IsWindowsConsole =
+ ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR;
+#endif
+
// Get the starting position.
off_t loc = ::lseek(FD, 0, SEEK_CUR);
#ifdef _WIN32
@@ -609,25 +616,77 @@ raw_fd_ostream::~raw_fd_ostream() {
/*GenCrashDiag=*/false);
}
+#if defined(_WIN32)
+// The most reliable way to print unicode in a Windows console is with
+// WriteConsoleW. To use that, first transcode from UTF-8 to UTF-16. This
+// assumes that LLVM programs always print valid UTF-8 to the console. The data
+// might not be UTF-8 for two major reasons:
+// 1. The program is printing binary (-filetype=obj -o -), in which case it
+// would have been gibberish anyway.
+// 2. The program is printing text in a semi-ascii compatible codepage like
+// shift-jis or cp1252.
+//
+// Most LLVM programs don't produce non-ascii text unless they are quoting
+// user source input. A well-behaved LLVM program should either validate that
+// the input is UTF-8 or transcode from the local codepage to UTF-8 before
+// quoting it. If they don't, this may mess up the encoding, but this is still
+// probably the best compromise we can make.
+static bool write_console_impl(int FD, StringRef Data) {
+ SmallVector<wchar_t, 256> WideText;
+
+ // Fall back to ::write if it wasn't valid UTF-8.
+ if (auto EC = sys::windows::UTF8ToUTF16(Data, WideText))
+ return false;
+
+ // On Windows 7 and earlier, WriteConsoleW has a low maximum amount of data
+ // that can be written to the console at a time.
+ size_t MaxWriteSize = WideText.size();
+ if (!RunningWindows8OrGreater())
+ MaxWriteSize = 32767;
+
+ size_t WCharsWritten = 0;
+ do {
+ size_t WCharsToWrite =
+ std::min(MaxWriteSize, WideText.size() - WCharsWritten);
+ DWORD ActuallyWritten;
+ bool Success =
+ ::WriteConsoleW((HANDLE)::_get_osfhandle(FD), &WideText[WCharsWritten],
+ WCharsToWrite, &ActuallyWritten,
+ /*Reserved=*/nullptr);
+
+ // The most likely reason for WriteConsoleW to fail is that FD no longer
+ // points to a console. Fall back to ::write. If this isn't the first loop
+ // iteration, something is truly wrong.
+ if (!Success)
+ return false;
+
+ WCharsWritten += ActuallyWritten;
+ } while (WCharsWritten != WideText.size());
+ return true;
+}
+#endif
+
void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
assert(FD >= 0 && "File already closed.");
pos += Size;
- // The maximum write size is limited to SSIZE_MAX because a write
- // greater than SSIZE_MAX is implementation-defined in POSIX.
- // Since SSIZE_MAX is not portable, we use SIZE_MAX >> 1 instead.
- size_t MaxWriteSize = SIZE_MAX >> 1;
+#if defined(_WIN32)
+ // If this is a Windows console device, try re-encoding from UTF-8 to UTF-16
+ // and using WriteConsoleW. If that fails, fall back to plain write().
+ if (IsWindowsConsole)
+ if (write_console_impl(FD, StringRef(Ptr, Size)))
+ return;
+#endif
+
+ // The maximum write size is limited to INT32_MAX. A write
+ // greater than SSIZE_MAX is implementation-defined in POSIX,
+ // and Windows _write requires 32 bit input.
+ size_t MaxWriteSize = INT32_MAX;
#if defined(__linux__)
// It is observed that Linux returns EINVAL for a very large write (>2G).
// Make it a reasonably small value.
MaxWriteSize = 1024 * 1024 * 1024;
-#elif defined(_WIN32)
- // Writing a large size of output to Windows console returns ENOMEM. It seems
- // that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
- // the latter has a size limit (66000 bytes or less, depending on heap usage).
- if (::_isatty(FD) && !RunningWindows8OrGreater())
- MaxWriteSize = 32767;
#endif
do {
@@ -696,8 +755,17 @@ void raw_fd_ostream::pwrite_impl(const char *Ptr, size_t Size,
}
size_t raw_fd_ostream::preferred_buffer_size() const {
-#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
- // Windows and Minix have no st_blksize.
+#if defined(_WIN32)
+ // Disable buffering for console devices. Console output is re-encoded from
+ // UTF-8 to UTF-16 on Windows, and buffering it would require us to split the
+ // buffer on a valid UTF-8 codepoint boundary. Terminal buffering is disabled
+ // below on most other OSs, so do the same thing on Windows and avoid that
+ // complexity.
+ if (IsWindowsConsole)
+ return 0;
+ return raw_ostream::preferred_buffer_size();
+#elif !defined(__minix)
+ // Minix has no st_blksize.
assert(FD >= 0 && "File not yet open!");
struct stat statbuf;
if (fstat(FD, &statbuf) != 0)
@@ -846,3 +914,5 @@ void raw_null_ostream::pwrite_impl(const char *Ptr, size_t Size,
uint64_t Offset) {}
void raw_pwrite_stream::anchor() {}
+
+void buffer_ostream::anchor() {}