author: Dimitry Andric <dim@FreeBSD.org> 2015-05-27 20:26:41 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2015-05-27 20:26:41 +0000
commit: ff0cc061ecf297f1556e906d229826fd709f37d6 (patch)
tree: bd13a22d9db57ccf3eddbc07b32c18109521d050 /contrib/llvm/lib/Support/ConvertUTFWrapper.cpp
parent: e14ba20ace4c6ab45aca5130defd992ab7d6bf5f (diff)
parent: 5a5ac124e1efaf208671f01c46edb15f29ed2a0b (diff)
1 files changed, 43 insertions, 2 deletions
diff --git a/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp b/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp
index e45335ddcb6c..1bbef233b82f 100644
--- a/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp
+++ b/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp
@@ -109,8 +109,9 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
   if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE)
     Src++;
 
-  // Just allocate enough space up front.  We'll shrink it later.
-  Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
+  // Just allocate enough space up front.  We'll shrink it later.  Allocate
+  // enough that we can fit a null terminator without reallocating.
+  Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1);
   UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]);
   UTF8 *DstEnd = Dst + Out.size();
 
@@ -124,6 +125,46 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
   }
 
   Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]);
+  Out.push_back(0);
+  Out.pop_back();
+  return true;
+}
+
+bool convertUTF8ToUTF16String(StringRef SrcUTF8,
+                              SmallVectorImpl<UTF16> &DstUTF16) {
+  assert(DstUTF16.empty());
+
+  // Avoid OOB by returning early on empty input.
+  if (SrcUTF8.empty()) {
+    DstUTF16.push_back(0);
+    DstUTF16.pop_back();
+    return true;
+  }
+
+  const UTF8 *Src = reinterpret_cast<const UTF8 *>(SrcUTF8.begin());
+  const UTF8 *SrcEnd = reinterpret_cast<const UTF8 *>(SrcUTF8.end());
+
+  // Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding
+  // as UTF-16 should always require the same amount or less code units than the
+  // UTF-8 encoding.  Allocate one extra byte for the null terminator though,
+  // so that someone calling DstUTF16.data() gets a null terminated string.
+  // We resize down later so we don't have to worry that this over allocates.
+  DstUTF16.resize(SrcUTF8.size()+1);
+  UTF16 *Dst = &DstUTF16[0];
+  UTF16 *DstEnd = Dst + DstUTF16.size();
+
+  ConversionResult CR =
+      ConvertUTF8toUTF16(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
+  assert(CR != targetExhausted);
+
+  if (CR != conversionOK) {
+    DstUTF16.clear();
+    return false;
+  }
+
+  DstUTF16.resize(Dst - &DstUTF16[0]);
+  DstUTF16.push_back(0);
+  DstUTF16.pop_back();
   return true;
 }
author	Dimitry Andric <dim@FreeBSD.org>	2015-05-27 20:26:41 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2015-05-27 20:26:41 +0000
commit	ff0cc061ecf297f1556e906d229826fd709f37d6 (patch)
tree	bd13a22d9db57ccf3eddbc07b32c18109521d050 /contrib/llvm/lib/Support/ConvertUTFWrapper.cpp
parent	e14ba20ace4c6ab45aca5130defd992ab7d6bf5f (diff)
parent	5a5ac124e1efaf208671f01c46edb15f29ed2a0b (diff)