diff options
Diffstat (limited to 'subversion/include/svn_utf.h')
-rw-r--r-- | subversion/include/svn_utf.h | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/subversion/include/svn_utf.h b/subversion/include/svn_utf.h new file mode 100644 index 000000000000..4a2c137b8e64 --- /dev/null +++ b/subversion/include/svn_utf.h @@ -0,0 +1,252 @@ +/** + * @copyright + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * @endcopyright + * + * @file svn_utf.h + * @brief UTF-8 conversion routines + * + * Whenever a conversion routine cannot convert to or from UTF-8, the + * error returned has code @c APR_EINVAL. + */ + + + +#ifndef SVN_UTF_H +#define SVN_UTF_H + +#include <apr_pools.h> +#include <apr_xlate.h> /* for APR_*_CHARSET */ + +#include "svn_types.h" +#include "svn_string.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET +#define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET + +/** + * Initialize the UTF-8 encoding/decoding routines. + * Allocate cached translation handles in a subpool of @a pool. + * + * If @a assume_native_utf8 is TRUE, the native character set is + * assumed to be UTF-8, i.e. conversion is a no-op. This is useful + * in contexts where the native character set is ASCII but UTF-8 + * should be used regardless (e.g. for mod_dav_svn which runs within + * httpd and always uses the "C" locale). + * + * @note It is optional to call this function, but if it is used, no other + * svn function may be in use in other threads during the call of this + * function or when @a pool is cleared or destroyed. + * Initializing the UTF-8 routines will improve performance. + * + * @since New in 1.8. + */ +void +svn_utf_initialize2(svn_boolean_t assume_native_utf8, + apr_pool_t *pool); + +/** + * Like svn_utf_initialize2() but without the ability to force the + * native encoding to UTF-8. + * + * @deprecated Provided for backward compatibility with the 1.7 API. + */ +SVN_DEPRECATED +void +svn_utf_initialize(apr_pool_t *pool); + +/** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src; + * allocate @a *dest in @a pool. + */ +svn_error_t * +svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest, + const svn_stringbuf_t *src, + apr_pool_t *pool); + + +/** Set @a *dest to a utf8-encoded string from native string @a src; allocate + * @a *dest in @a pool. + */ +svn_error_t * +svn_utf_string_to_utf8(const svn_string_t **dest, + const svn_string_t *src, + apr_pool_t *pool); + + +/** Set @a *dest to a utf8-encoded C string from native C string @a src; + * allocate @a *dest in @a pool. + */ +svn_error_t * +svn_utf_cstring_to_utf8(const char **dest, + const char *src, + apr_pool_t *pool); + + +/** Set @a *dest to a utf8 encoded C string from @a frompage encoded C + * string @a src; allocate @a *dest in @a pool. + * + * @since New in 1.4. + */ +svn_error_t * +svn_utf_cstring_to_utf8_ex2(const char **dest, + const char *src, + const char *frompage, + apr_pool_t *pool); + + +/** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is + * ignored. + * + * @deprecated Provided for backward compatibility with the 1.3 API. + */ +SVN_DEPRECATED +svn_error_t * +svn_utf_cstring_to_utf8_ex(const char **dest, + const char *src, + const char *frompage, + const char *convset_key, + apr_pool_t *pool); + + +/** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src; + * allocate @a *dest in @a pool. + */ +svn_error_t * +svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest, + const svn_stringbuf_t *src, + apr_pool_t *pool); + + +/** Set @a *dest to a natively-encoded string from utf8 string @a src; + * allocate @a *dest in @a pool. + */ +svn_error_t * +svn_utf_string_from_utf8(const svn_string_t **dest, + const svn_string_t *src, + apr_pool_t *pool); + + +/** Set @a *dest to a natively-encoded C string from utf8 C string @a src; + * allocate @a *dest in @a pool. + */ +svn_error_t * +svn_utf_cstring_from_utf8(const char **dest, + const char *src, + apr_pool_t *pool); + + +/** Set @a *dest to a @a topage encoded C string from utf8 encoded C string + * @a src; allocate @a *dest in @a pool. + * + * @since New in 1.4. + */ +svn_error_t * +svn_utf_cstring_from_utf8_ex2(const char **dest, + const char *src, + const char *topage, + apr_pool_t *pool); + + +/** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is + * ignored. + * + * @deprecated Provided for backward compatibility with the 1.3 API. + */ +SVN_DEPRECATED +svn_error_t * +svn_utf_cstring_from_utf8_ex(const char **dest, + const char *src, + const char *topage, + const char *convset_key, + apr_pool_t *pool); + + +/** Return a fuzzily native-encoded C string from utf8 C string @a src, + * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii + * characters the same, and substitutes "?\\XXX" for others, where XXX + * is the unsigned decimal code for that character. + * + * This function cannot error; it is guaranteed to return something. + * First it will recode as described above and then attempt to convert + * the (new) 7-bit UTF-8 string to native encoding. If that fails, it + * will return the raw fuzzily recoded string, which may or may not be + * meaningful in the client's locale, but is (presumably) better than + * nothing. + * + * ### Notes: + * + * Improvement is possible, even imminent. The original problem was + * that if you converted a UTF-8 string (say, a log message) into a + * locale that couldn't represent all the characters, you'd just get a + * static placeholder saying "[unconvertible log message]". Then + * Justin Erenkrantz pointed out how on platforms that didn't support + * conversion at all, "svn log" would still fail completely when it + * encountered unconvertible data. + * + * Now for both cases, the caller can at least fall back on this + * function, which converts the message as best it can, substituting + * "?\\XXX" escape codes for the non-ascii characters. + * + * Ultimately, some callers may prefer the iconv "//TRANSLIT" option, + * so when we can detect that at configure time, things will change. + * Also, this should (?) be moved to apr/apu eventually. + * + * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for + * details. + */ +const char * +svn_utf_cstring_from_utf8_fuzzy(const char *src, + apr_pool_t *pool); + + +/** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src; + * allocate @a *dest in @a pool. + */ +svn_error_t * +svn_utf_cstring_from_utf8_stringbuf(const char **dest, + const svn_stringbuf_t *src, + apr_pool_t *pool); + + +/** Set @a *dest to a natively-encoded C string from utf8 string @a src; + * allocate @a *dest in @a pool. + */ +svn_error_t * +svn_utf_cstring_from_utf8_string(const char **dest, + const svn_string_t *src, + apr_pool_t *pool); + +/** Return the display width of UTF-8-encoded C string @a cstr. + * If the string is not printable or invalid UTF-8, return -1. + * + * @since New in 1.8. + */ +int +svn_utf_cstring_utf8_width(const char *cstr); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* SVN_UTF_H */ |