diff options
Diffstat (limited to 'charconv.c')
-rw-r--r-- | charconv.c | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/charconv.c b/charconv.c new file mode 100644 index 000000000000..5f97509ab4e3 --- /dev/null +++ b/charconv.c @@ -0,0 +1,217 @@ +/* -*- Mode: c; tab-width: 8; indent-tabs-mode: 1; c-basic-offset: 8; -*- */ +/* + * Copyright (c) 1993, 1994, 1995, 1996, 1997 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the Computer Systems + * Engineering Group at Lawrence Berkeley Laboratory. + * 4. Neither the name of the University nor of the Laboratory may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef _WIN32 +#include <stdio.h> +#include <errno.h> + +#include <pcap/pcap.h> /* Needed for PCAP_ERRBUF_SIZE */ + +#include "charconv.h" + +wchar_t * +cp_to_utf_16le(UINT codepage, const char *cp_string, DWORD flags) +{ + int utf16le_len; + wchar_t *utf16le_string; + + /* + * Map from the specified code page to UTF-16LE. + * First, find out how big a buffer we'll need. + */ + utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1, + NULL, 0); + if (utf16le_len == 0) { + /* + * Error. Fail with EINVAL. + */ + errno = EINVAL; + return (NULL); + } + + /* + * Now attempt to allocate a buffer for that. + */ + utf16le_string = malloc(utf16le_len * sizeof (wchar_t)); + if (utf16le_string == NULL) { + /* + * Not enough memory; assume errno has been + * set, and fail. + */ + return (NULL); + } + + /* + * Now convert. + */ + utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1, + utf16le_string, utf16le_len); + if (utf16le_len == 0) { + /* + * Error. Fail with EINVAL. + * XXX - should this ever happen, given that + * we already ran the string through + * MultiByteToWideChar() to find out how big + * a buffer we needed? + */ + free(utf16le_string); + errno = EINVAL; + return (NULL); + } + return (utf16le_string); +} + +char * +utf_16le_to_cp(UINT codepage, const wchar_t *utf16le_string) +{ + int cp_len; + char *cp_string; + + /* + * Map from UTF-16LE to the specified code page. + * First, find out how big a buffer we'll need. + * We convert composite characters to precomposed characters, + * as that's what Windows expects. + */ + cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK, + utf16le_string, -1, NULL, 0, NULL, NULL); + if (cp_len == 0) { + /* + * Error. Fail with EINVAL. + */ + errno = EINVAL; + return (NULL); + } + + /* + * Now attempt to allocate a buffer for that. + */ + cp_string = malloc(cp_len * sizeof (char)); + if (cp_string == NULL) { + /* + * Not enough memory; assume errno has been + * set, and fail. + */ + return (NULL); + } + + /* + * Now convert. + */ + cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK, + utf16le_string, -1, cp_string, cp_len, NULL, NULL); + if (cp_len == 0) { + /* + * Error. Fail with EINVAL. + * XXX - should this ever happen, given that + * we already ran the string through + * WideCharToMultiByte() to find out how big + * a buffer we needed? + */ + free(cp_string); + errno = EINVAL; + return (NULL); + } + return (cp_string); +} + +/* + * Convert an error message string from UTF-8 to the local code page, as + * best we can. + * + * The buffer is assumed to be PCAP_ERRBUF_SIZE bytes long; we truncate + * if it doesn't fit. + */ +void +utf_8_to_acp_truncated(char *errbuf) +{ + wchar_t *utf_16_errbuf; + int retval; + DWORD err; + + /* + * Do this by converting to UTF-16LE and then to the local + * code page. That means we get to use Microsoft's + * conversion routines, rather than having to understand + * all the code pages ourselves, *and* that this routine + * can convert in place. + */ + + /* + * Map from UTF-8 to UTF-16LE. + * First, find out how big a buffer we'll need. + * Convert any invalid characters to REPLACEMENT CHARACTER. + */ + utf_16_errbuf = cp_to_utf_16le(CP_UTF8, errbuf, 0); + if (utf_16_errbuf == NULL) { + /* + * Error. Give up. + */ + snprintf(errbuf, PCAP_ERRBUF_SIZE, + "Can't convert error string to the local code page"); + return; + } + + /* + * Now, convert that to the local code page. + * Use the current thread's code page. For unconvertable + * characters, let it pick the "best fit" character. + * + * XXX - we'd like some way to do what utf_16le_to_utf_8_truncated() + * does if the buffer isn't big enough, but we don't want to have + * to handle all local code pages ourselves; doing so requires + * knowledge of all those code pages, including knowledge of how + * characters are formed in thoe code pages so that we can avoid + * cutting a multi-byte character into pieces. + * + * Converting to an un-truncated string using Windows APIs, and + * then copying to the buffer, still requires knowledge of how + * characters are formed in the target code page. + */ + retval = WideCharToMultiByte(CP_THREAD_ACP, 0, utf_16_errbuf, -1, + errbuf, PCAP_ERRBUF_SIZE, NULL, NULL); + if (retval == 0) { + err = GetLastError(); + free(utf_16_errbuf); + if (err == ERROR_INSUFFICIENT_BUFFER) + snprintf(errbuf, PCAP_ERRBUF_SIZE, + "The error string, in the local code page, didn't fit in the buffer"); + else + snprintf(errbuf, PCAP_ERRBUF_SIZE, + "Can't convert error string to the local code page"); + return; + } + free(utf_16_errbuf); +} +#endif |