diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2015-05-27 20:26:41 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2015-05-27 20:26:41 +0000 | 
| commit | ff0cc061ecf297f1556e906d229826fd709f37d6 (patch) | |
| tree | bd13a22d9db57ccf3eddbc07b32c18109521d050 /contrib/llvm/lib/Support/ConvertUTFWrapper.cpp | |
| parent | e14ba20ace4c6ab45aca5130defd992ab7d6bf5f (diff) | |
| parent | 5a5ac124e1efaf208671f01c46edb15f29ed2a0b (diff) | |
Notes
Diffstat (limited to 'contrib/llvm/lib/Support/ConvertUTFWrapper.cpp')
| -rw-r--r-- | contrib/llvm/lib/Support/ConvertUTFWrapper.cpp | 45 | 
1 files changed, 43 insertions, 2 deletions
diff --git a/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp b/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp index e45335ddcb6c..1bbef233b82f 100644 --- a/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp +++ b/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp @@ -109,8 +109,9 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {    if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE)      Src++; -  // Just allocate enough space up front.  We'll shrink it later. -  Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT); +  // Just allocate enough space up front.  We'll shrink it later.  Allocate +  // enough that we can fit a null terminator without reallocating. +  Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1);    UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]);    UTF8 *DstEnd = Dst + Out.size(); @@ -124,6 +125,46 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {    }    Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]); +  Out.push_back(0); +  Out.pop_back(); +  return true; +} + +bool convertUTF8ToUTF16String(StringRef SrcUTF8, +                              SmallVectorImpl<UTF16> &DstUTF16) { +  assert(DstUTF16.empty()); + +  // Avoid OOB by returning early on empty input. +  if (SrcUTF8.empty()) { +    DstUTF16.push_back(0); +    DstUTF16.pop_back(); +    return true; +  } + +  const UTF8 *Src = reinterpret_cast<const UTF8 *>(SrcUTF8.begin()); +  const UTF8 *SrcEnd = reinterpret_cast<const UTF8 *>(SrcUTF8.end()); + +  // Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding +  // as UTF-16 should always require the same amount or less code units than the +  // UTF-8 encoding.  Allocate one extra byte for the null terminator though, +  // so that someone calling DstUTF16.data() gets a null terminated string. +  // We resize down later so we don't have to worry that this over allocates. +  DstUTF16.resize(SrcUTF8.size()+1); +  UTF16 *Dst = &DstUTF16[0]; +  UTF16 *DstEnd = Dst + DstUTF16.size(); + +  ConversionResult CR = +      ConvertUTF8toUTF16(&Src, SrcEnd, &Dst, DstEnd, strictConversion); +  assert(CR != targetExhausted); + +  if (CR != conversionOK) { +    DstUTF16.clear(); +    return false; +  } + +  DstUTF16.resize(Dst - &DstUTF16[0]); +  DstUTF16.push_back(0); +  DstUTF16.pop_back();    return true;  }  | 
