summaryrefslogtreecommitdiff
path: root/tools/clang-format/ClangFormat.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:49 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:49 +0000
commit2298981669bf3bd63335a4be179bc0f96823a8f4 (patch)
tree1cbe2eb27f030d2d70b80ee5ca3c86bee7326a9f /tools/clang-format/ClangFormat.cpp
parent9a83721404652cea39e9f02ae3e3b5c964602a5c (diff)
Diffstat (limited to 'tools/clang-format/ClangFormat.cpp')
-rw-r--r--tools/clang-format/ClangFormat.cpp39
1 files changed, 34 insertions, 5 deletions
diff --git a/tools/clang-format/ClangFormat.cpp b/tools/clang-format/ClangFormat.cpp
index 49162d0d910e..60a7feb7119b 100644
--- a/tools/clang-format/ClangFormat.cpp
+++ b/tools/clang-format/ClangFormat.cpp
@@ -1,9 +1,8 @@
//===-- clang-format/ClangFormat.cpp - Clang format tool ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -258,6 +257,36 @@ static bool format(StringRef FileName) {
std::unique_ptr<llvm::MemoryBuffer> Code = std::move(CodeOrErr.get());
if (Code->getBufferSize() == 0)
return false; // Empty files are formatted correctly.
+
+ // Check to see if the buffer has a UTF Byte Order Mark (BOM).
+ // We only support UTF-8 with and without a BOM right now. See
+ // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding
+ // for more information.
+ StringRef BufStr = Code->getBuffer();
+ const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr)
+ .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
+ "UTF-32 (BE)")
+ .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
+ "UTF-32 (LE)")
+ .StartsWith("\xFE\xFF", "UTF-16 (BE)")
+ .StartsWith("\xFF\xFE", "UTF-16 (LE)")
+ .StartsWith("\x2B\x2F\x76", "UTF-7")
+ .StartsWith("\xF7\x64\x4C", "UTF-1")
+ .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
+ .StartsWith("\x0E\xFE\xFF", "SCSU")
+ .StartsWith("\xFB\xEE\x28", "BOCU-1")
+ .StartsWith("\x84\x31\x95\x33", "GB-18030")
+ .Default(nullptr);
+
+ if (InvalidBOM) {
+ errs() << "error: encoding with unsupported byte order mark \""
+ << InvalidBOM << "\" detected";
+ if (FileName != "-")
+ errs() << " in file '" << FileName << "'";
+ errs() << ".\n";
+ return true;
+ }
+
std::vector<tooling::Range> Ranges;
if (fillRanges(Code.get(), Ranges))
return true;
@@ -346,7 +375,7 @@ int main(int argc, const char **argv) {
cl::SetVersionPrinter(PrintVersion);
cl::ParseCommandLineOptions(
argc, argv,
- "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf code.\n\n"
+ "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.\n\n"
"If no arguments are specified, it formats the code from standard input\n"
"and writes the result to the standard output.\n"
"If <file>s are given, it reformats the files. If -i is specified\n"