diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2019-12-20 19:53:05 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2019-12-20 19:53:05 +0000 |
| commit | 0b57cec536236d46e3dba9bd041533462f33dbb7 (patch) | |
| tree | 56229dbdbbf76d18580f72f789003db17246c8d9 /contrib/llvm/lib/Support/JSON.cpp | |
| parent | 718ef55ec7785aae63f98f8ca05dc07ed399c16d (diff) | |
Notes
Diffstat (limited to 'contrib/llvm/lib/Support/JSON.cpp')
| -rw-r--r-- | contrib/llvm/lib/Support/JSON.cpp | 718 |
1 files changed, 0 insertions, 718 deletions
diff --git a/contrib/llvm/lib/Support/JSON.cpp b/contrib/llvm/lib/Support/JSON.cpp deleted file mode 100644 index 95e5ed654277..000000000000 --- a/contrib/llvm/lib/Support/JSON.cpp +++ /dev/null @@ -1,718 +0,0 @@ -//=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===---------------------------------------------------------------------===// - -#include "llvm/Support/JSON.h" -#include "llvm/Support/ConvertUTF.h" -#include "llvm/Support/Format.h" -#include <cctype> - -namespace llvm { -namespace json { - -Value &Object::operator[](const ObjectKey &K) { - return try_emplace(K, nullptr).first->getSecond(); -} -Value &Object::operator[](ObjectKey &&K) { - return try_emplace(std::move(K), nullptr).first->getSecond(); -} -Value *Object::get(StringRef K) { - auto I = find(K); - if (I == end()) - return nullptr; - return &I->second; -} -const Value *Object::get(StringRef K) const { - auto I = find(K); - if (I == end()) - return nullptr; - return &I->second; -} -llvm::Optional<std::nullptr_t> Object::getNull(StringRef K) const { - if (auto *V = get(K)) - return V->getAsNull(); - return llvm::None; -} -llvm::Optional<bool> Object::getBoolean(StringRef K) const { - if (auto *V = get(K)) - return V->getAsBoolean(); - return llvm::None; -} -llvm::Optional<double> Object::getNumber(StringRef K) const { - if (auto *V = get(K)) - return V->getAsNumber(); - return llvm::None; -} -llvm::Optional<int64_t> Object::getInteger(StringRef K) const { - if (auto *V = get(K)) - return V->getAsInteger(); - return llvm::None; -} -llvm::Optional<llvm::StringRef> Object::getString(StringRef K) const { - if (auto *V = get(K)) - return V->getAsString(); - return llvm::None; -} -const json::Object *Object::getObject(StringRef K) const { - if (auto *V = get(K)) - return V->getAsObject(); - return nullptr; -} -json::Object *Object::getObject(StringRef K) { - if (auto *V = get(K)) - return V->getAsObject(); - return nullptr; -} -const json::Array *Object::getArray(StringRef K) const { - if (auto *V = get(K)) - return V->getAsArray(); - return nullptr; -} -json::Array *Object::getArray(StringRef K) { - if (auto *V = get(K)) - return V->getAsArray(); - return nullptr; -} -bool operator==(const Object &LHS, const Object &RHS) { - if (LHS.size() != RHS.size()) - return false; - for (const auto &L : LHS) { - auto R = RHS.find(L.first); - if (R == RHS.end() || L.second != R->second) - return false; - } - return true; -} - -Array::Array(std::initializer_list<Value> Elements) { - V.reserve(Elements.size()); - for (const Value &V : Elements) { - emplace_back(nullptr); - back().moveFrom(std::move(V)); - } -} - -Value::Value(std::initializer_list<Value> Elements) - : Value(json::Array(Elements)) {} - -void Value::copyFrom(const Value &M) { - Type = M.Type; - switch (Type) { - case T_Null: - case T_Boolean: - case T_Double: - case T_Integer: - memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer)); - break; - case T_StringRef: - create<StringRef>(M.as<StringRef>()); - break; - case T_String: - create<std::string>(M.as<std::string>()); - break; - case T_Object: - create<json::Object>(M.as<json::Object>()); - break; - case T_Array: - create<json::Array>(M.as<json::Array>()); - break; - } -} - -void Value::moveFrom(const Value &&M) { - Type = M.Type; - switch (Type) { - case T_Null: - case T_Boolean: - case T_Double: - case T_Integer: - memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer)); - break; - case T_StringRef: - create<StringRef>(M.as<StringRef>()); - break; - case T_String: - create<std::string>(std::move(M.as<std::string>())); - M.Type = T_Null; - break; - case T_Object: - create<json::Object>(std::move(M.as<json::Object>())); - M.Type = T_Null; - break; - case T_Array: - create<json::Array>(std::move(M.as<json::Array>())); - M.Type = T_Null; - break; - } -} - -void Value::destroy() { - switch (Type) { - case T_Null: - case T_Boolean: - case T_Double: - case T_Integer: - break; - case T_StringRef: - as<StringRef>().~StringRef(); - break; - case T_String: - as<std::string>().~basic_string(); - break; - case T_Object: - as<json::Object>().~Object(); - break; - case T_Array: - as<json::Array>().~Array(); - break; - } -} - -bool operator==(const Value &L, const Value &R) { - if (L.kind() != R.kind()) - return false; - switch (L.kind()) { - case Value::Null: - return *L.getAsNull() == *R.getAsNull(); - case Value::Boolean: - return *L.getAsBoolean() == *R.getAsBoolean(); - case Value::Number: - // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323 - // The same integer must convert to the same double, per the standard. - // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32. - // So we avoid floating point promotion for exact comparisons. - if (L.Type == Value::T_Integer || R.Type == Value::T_Integer) - return L.getAsInteger() == R.getAsInteger(); - return *L.getAsNumber() == *R.getAsNumber(); - case Value::String: - return *L.getAsString() == *R.getAsString(); - case Value::Array: - return *L.getAsArray() == *R.getAsArray(); - case Value::Object: - return *L.getAsObject() == *R.getAsObject(); - } - llvm_unreachable("Unknown value kind"); -} - -namespace { -// Simple recursive-descent JSON parser. -class Parser { -public: - Parser(StringRef JSON) - : Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {} - - bool checkUTF8() { - size_t ErrOffset; - if (isUTF8(StringRef(Start, End - Start), &ErrOffset)) - return true; - P = Start + ErrOffset; // For line/column calculation. - return parseError("Invalid UTF-8 sequence"); - } - - bool parseValue(Value &Out); - - bool assertEnd() { - eatWhitespace(); - if (P == End) - return true; - return parseError("Text after end of document"); - } - - Error takeError() { - assert(Err); - return std::move(*Err); - } - -private: - void eatWhitespace() { - while (P != End && (*P == ' ' || *P == '\r' || *P == '\n' || *P == '\t')) - ++P; - } - - // On invalid syntax, parseX() functions return false and set Err. - bool parseNumber(char First, Value &Out); - bool parseString(std::string &Out); - bool parseUnicode(std::string &Out); - bool parseError(const char *Msg); // always returns false - - char next() { return P == End ? 0 : *P++; } - char peek() { return P == End ? 0 : *P; } - static bool isNumber(char C) { - return C == '0' || C == '1' || C == '2' || C == '3' || C == '4' || - C == '5' || C == '6' || C == '7' || C == '8' || C == '9' || - C == 'e' || C == 'E' || C == '+' || C == '-' || C == '.'; - } - - Optional<Error> Err; - const char *Start, *P, *End; -}; - -bool Parser::parseValue(Value &Out) { - eatWhitespace(); - if (P == End) - return parseError("Unexpected EOF"); - switch (char C = next()) { - // Bare null/true/false are easy - first char identifies them. - case 'n': - Out = nullptr; - return (next() == 'u' && next() == 'l' && next() == 'l') || - parseError("Invalid JSON value (null?)"); - case 't': - Out = true; - return (next() == 'r' && next() == 'u' && next() == 'e') || - parseError("Invalid JSON value (true?)"); - case 'f': - Out = false; - return (next() == 'a' && next() == 'l' && next() == 's' && next() == 'e') || - parseError("Invalid JSON value (false?)"); - case '"': { - std::string S; - if (parseString(S)) { - Out = std::move(S); - return true; - } - return false; - } - case '[': { - Out = Array{}; - Array &A = *Out.getAsArray(); - eatWhitespace(); - if (peek() == ']') { - ++P; - return true; - } - for (;;) { - A.emplace_back(nullptr); - if (!parseValue(A.back())) - return false; - eatWhitespace(); - switch (next()) { - case ',': - eatWhitespace(); - continue; - case ']': - return true; - default: - return parseError("Expected , or ] after array element"); - } - } - } - case '{': { - Out = Object{}; - Object &O = *Out.getAsObject(); - eatWhitespace(); - if (peek() == '}') { - ++P; - return true; - } - for (;;) { - if (next() != '"') - return parseError("Expected object key"); - std::string K; - if (!parseString(K)) - return false; - eatWhitespace(); - if (next() != ':') - return parseError("Expected : after object key"); - eatWhitespace(); - if (!parseValue(O[std::move(K)])) - return false; - eatWhitespace(); - switch (next()) { - case ',': - eatWhitespace(); - continue; - case '}': - return true; - default: - return parseError("Expected , or } after object property"); - } - } - } - default: - if (isNumber(C)) - return parseNumber(C, Out); - return parseError("Invalid JSON value"); - } -} - -bool Parser::parseNumber(char First, Value &Out) { - // Read the number into a string. (Must be null-terminated for strto*). - SmallString<24> S; - S.push_back(First); - while (isNumber(peek())) - S.push_back(next()); - char *End; - // Try first to parse as integer, and if so preserve full 64 bits. - // strtoll returns long long >= 64 bits, so check it's in range too. - auto I = std::strtoll(S.c_str(), &End, 10); - if (End == S.end() && I >= std::numeric_limits<int64_t>::min() && - I <= std::numeric_limits<int64_t>::max()) { - Out = int64_t(I); - return true; - } - // If it's not an integer - Out = std::strtod(S.c_str(), &End); - return End == S.end() || parseError("Invalid JSON value (number?)"); -} - -bool Parser::parseString(std::string &Out) { - // leading quote was already consumed. - for (char C = next(); C != '"'; C = next()) { - if (LLVM_UNLIKELY(P == End)) - return parseError("Unterminated string"); - if (LLVM_UNLIKELY((C & 0x1f) == C)) - return parseError("Control character in string"); - if (LLVM_LIKELY(C != '\\')) { - Out.push_back(C); - continue; - } - // Handle escape sequence. - switch (C = next()) { - case '"': - case '\\': - case '/': - Out.push_back(C); - break; - case 'b': - Out.push_back('\b'); - break; - case 'f': - Out.push_back('\f'); - break; - case 'n': - Out.push_back('\n'); - break; - case 'r': - Out.push_back('\r'); - break; - case 't': - Out.push_back('\t'); - break; - case 'u': - if (!parseUnicode(Out)) - return false; - break; - default: - return parseError("Invalid escape sequence"); - } - } - return true; -} - -static void encodeUtf8(uint32_t Rune, std::string &Out) { - if (Rune < 0x80) { - Out.push_back(Rune & 0x7F); - } else if (Rune < 0x800) { - uint8_t FirstByte = 0xC0 | ((Rune & 0x7C0) >> 6); - uint8_t SecondByte = 0x80 | (Rune & 0x3F); - Out.push_back(FirstByte); - Out.push_back(SecondByte); - } else if (Rune < 0x10000) { - uint8_t FirstByte = 0xE0 | ((Rune & 0xF000) >> 12); - uint8_t SecondByte = 0x80 | ((Rune & 0xFC0) >> 6); - uint8_t ThirdByte = 0x80 | (Rune & 0x3F); - Out.push_back(FirstByte); - Out.push_back(SecondByte); - Out.push_back(ThirdByte); - } else if (Rune < 0x110000) { - uint8_t FirstByte = 0xF0 | ((Rune & 0x1F0000) >> 18); - uint8_t SecondByte = 0x80 | ((Rune & 0x3F000) >> 12); - uint8_t ThirdByte = 0x80 | ((Rune & 0xFC0) >> 6); - uint8_t FourthByte = 0x80 | (Rune & 0x3F); - Out.push_back(FirstByte); - Out.push_back(SecondByte); - Out.push_back(ThirdByte); - Out.push_back(FourthByte); - } else { - llvm_unreachable("Invalid codepoint"); - } -} - -// Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed. -// May parse several sequential escapes to ensure proper surrogate handling. -// We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates. -// These are invalid Unicode but valid JSON (RFC 8259, section 8.2). -bool Parser::parseUnicode(std::string &Out) { - // Invalid UTF is not a JSON error (RFC 8529ยง8.2). It gets replaced by U+FFFD. - auto Invalid = [&] { Out.append(/* UTF-8 */ {'\xef', '\xbf', '\xbd'}); }; - // Decodes 4 hex digits from the stream into Out, returns false on error. - auto Parse4Hex = [this](uint16_t &Out) -> bool { - Out = 0; - char Bytes[] = {next(), next(), next(), next()}; - for (unsigned char C : Bytes) { - if (!std::isxdigit(C)) - return parseError("Invalid \\u escape sequence"); - Out <<= 4; - Out |= (C > '9') ? (C & ~0x20) - 'A' + 10 : (C - '0'); - } - return true; - }; - uint16_t First; // UTF-16 code unit from the first \u escape. - if (!Parse4Hex(First)) - return false; - - // We loop to allow proper surrogate-pair error handling. - while (true) { - // Case 1: the UTF-16 code unit is already a codepoint in the BMP. - if (LLVM_LIKELY(First < 0xD800 || First >= 0xE000)) { - encodeUtf8(First, Out); - return true; - } - - // Case 2: it's an (unpaired) trailing surrogate. - if (LLVM_UNLIKELY(First >= 0xDC00)) { - Invalid(); - return true; - } - - // Case 3: it's a leading surrogate. We expect a trailing one next. - // Case 3a: there's no trailing \u escape. Don't advance in the stream. - if (LLVM_UNLIKELY(P + 2 > End || *P != '\\' || *(P + 1) != 'u')) { - Invalid(); // Leading surrogate was unpaired. - return true; - } - P += 2; - uint16_t Second; - if (!Parse4Hex(Second)) - return false; - // Case 3b: there was another \u escape, but it wasn't a trailing surrogate. - if (LLVM_UNLIKELY(Second < 0xDC00 || Second >= 0xE000)) { - Invalid(); // Leading surrogate was unpaired. - First = Second; // Second escape still needs to be processed. - continue; - } - // Case 3c: a valid surrogate pair encoding an astral codepoint. - encodeUtf8(0x10000 | ((First - 0xD800) << 10) | (Second - 0xDC00), Out); - return true; - } -} - -bool Parser::parseError(const char *Msg) { - int Line = 1; - const char *StartOfLine = Start; - for (const char *X = Start; X < P; ++X) { - if (*X == 0x0A) { - ++Line; - StartOfLine = X + 1; - } - } - Err.emplace( - llvm::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start)); - return false; -} -} // namespace - -Expected<Value> parse(StringRef JSON) { - Parser P(JSON); - Value E = nullptr; - if (P.checkUTF8()) - if (P.parseValue(E)) - if (P.assertEnd()) - return std::move(E); - return P.takeError(); -} -char ParseError::ID = 0; - -static std::vector<const Object::value_type *> sortedElements(const Object &O) { - std::vector<const Object::value_type *> Elements; - for (const auto &E : O) - Elements.push_back(&E); - llvm::sort(Elements, - [](const Object::value_type *L, const Object::value_type *R) { - return L->first < R->first; - }); - return Elements; -} - -bool isUTF8(llvm::StringRef S, size_t *ErrOffset) { - // Fast-path for ASCII, which is valid UTF-8. - if (LLVM_LIKELY(isASCII(S))) - return true; - - const UTF8 *Data = reinterpret_cast<const UTF8 *>(S.data()), *Rest = Data; - if (LLVM_LIKELY(isLegalUTF8String(&Rest, Data + S.size()))) - return true; - - if (ErrOffset) - *ErrOffset = Rest - Data; - return false; -} - -std::string fixUTF8(llvm::StringRef S) { - // This isn't particularly efficient, but is only for error-recovery. - std::vector<UTF32> Codepoints(S.size()); // 1 codepoint per byte suffices. - const UTF8 *In8 = reinterpret_cast<const UTF8 *>(S.data()); - UTF32 *Out32 = Codepoints.data(); - ConvertUTF8toUTF32(&In8, In8 + S.size(), &Out32, Out32 + Codepoints.size(), - lenientConversion); - Codepoints.resize(Out32 - Codepoints.data()); - std::string Res(4 * Codepoints.size(), 0); // 4 bytes per codepoint suffice - const UTF32 *In32 = Codepoints.data(); - UTF8 *Out8 = reinterpret_cast<UTF8 *>(&Res[0]); - ConvertUTF32toUTF8(&In32, In32 + Codepoints.size(), &Out8, Out8 + Res.size(), - strictConversion); - Res.resize(reinterpret_cast<char *>(Out8) - Res.data()); - return Res; -} - -static void quote(llvm::raw_ostream &OS, llvm::StringRef S) { - OS << '\"'; - for (unsigned char C : S) { - if (C == 0x22 || C == 0x5C) - OS << '\\'; - if (C >= 0x20) { - OS << C; - continue; - } - OS << '\\'; - switch (C) { - // A few characters are common enough to make short escapes worthwhile. - case '\t': - OS << 't'; - break; - case '\n': - OS << 'n'; - break; - case '\r': - OS << 'r'; - break; - default: - OS << 'u'; - llvm::write_hex(OS, C, llvm::HexPrintStyle::Lower, 4); - break; - } - } - OS << '\"'; -} - -void llvm::json::OStream::value(const Value &V) { - switch (V.kind()) { - case Value::Null: - valueBegin(); - OS << "null"; - return; - case Value::Boolean: - valueBegin(); - OS << (*V.getAsBoolean() ? "true" : "false"); - return; - case Value::Number: - valueBegin(); - if (V.Type == Value::T_Integer) - OS << *V.getAsInteger(); - else - OS << format("%.*g", std::numeric_limits<double>::max_digits10, - *V.getAsNumber()); - return; - case Value::String: - valueBegin(); - quote(OS, *V.getAsString()); - return; - case Value::Array: - return array([&] { - for (const Value &E : *V.getAsArray()) - value(E); - }); - case Value::Object: - return object([&] { - for (const Object::value_type *E : sortedElements(*V.getAsObject())) - attribute(E->first, E->second); - }); - } -} - -void llvm::json::OStream::valueBegin() { - assert(Stack.back().Ctx != Object && "Only attributes allowed here"); - if (Stack.back().HasValue) { - assert(Stack.back().Ctx != Singleton && "Only one value allowed here"); - OS << ','; - } - if (Stack.back().Ctx == Array) - newline(); - Stack.back().HasValue = true; -} - -void llvm::json::OStream::newline() { - if (IndentSize) { - OS.write('\n'); - OS.indent(Indent); - } -} - -void llvm::json::OStream::arrayBegin() { - valueBegin(); - Stack.emplace_back(); - Stack.back().Ctx = Array; - Indent += IndentSize; - OS << '['; -} - -void llvm::json::OStream::arrayEnd() { - assert(Stack.back().Ctx == Array); - Indent -= IndentSize; - if (Stack.back().HasValue) - newline(); - OS << ']'; - Stack.pop_back(); - assert(!Stack.empty()); -} - -void llvm::json::OStream::objectBegin() { - valueBegin(); - Stack.emplace_back(); - Stack.back().Ctx = Object; - Indent += IndentSize; - OS << '{'; -} - -void llvm::json::OStream::objectEnd() { - assert(Stack.back().Ctx == Object); - Indent -= IndentSize; - if (Stack.back().HasValue) - newline(); - OS << '}'; - Stack.pop_back(); - assert(!Stack.empty()); -} - -void llvm::json::OStream::attributeBegin(llvm::StringRef Key) { - assert(Stack.back().Ctx == Object); - if (Stack.back().HasValue) - OS << ','; - newline(); - Stack.back().HasValue = true; - Stack.emplace_back(); - Stack.back().Ctx = Singleton; - if (LLVM_LIKELY(isUTF8(Key))) { - quote(OS, Key); - } else { - assert(false && "Invalid UTF-8 in attribute key"); - quote(OS, fixUTF8(Key)); - } - OS.write(':'); - if (IndentSize) - OS.write(' '); -} - -void llvm::json::OStream::attributeEnd() { - assert(Stack.back().Ctx == Singleton); - assert(Stack.back().HasValue && "Attribute must have a value"); - Stack.pop_back(); - assert(Stack.back().Ctx == Object); -} - -} // namespace json -} // namespace llvm - -void llvm::format_provider<llvm::json::Value>::format( - const llvm::json::Value &E, raw_ostream &OS, StringRef Options) { - unsigned IndentAmount = 0; - if (!Options.empty() && Options.getAsInteger(/*Radix=*/10, IndentAmount)) - llvm_unreachable("json::Value format options should be an integer"); - json::OStream(OS, IndentAmount).value(E); -} - |
