diff options
Diffstat (limited to 'subversion/libsvn_subr/path.c')
-rw-r--r-- | subversion/libsvn_subr/path.c | 1315 |
1 files changed, 1315 insertions, 0 deletions
diff --git a/subversion/libsvn_subr/path.c b/subversion/libsvn_subr/path.c new file mode 100644 index 0000000000000..84368f3bb77b0 --- /dev/null +++ b/subversion/libsvn_subr/path.c @@ -0,0 +1,1315 @@ +/* + * paths.c: a path manipulation library using svn_stringbuf_t + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + + +#include <string.h> +#include <assert.h> + +#include <apr_file_info.h> +#include <apr_lib.h> +#include <apr_uri.h> + +#include "svn_string.h" +#include "svn_dirent_uri.h" +#include "svn_path.h" +#include "svn_private_config.h" /* for SVN_PATH_LOCAL_SEPARATOR */ +#include "svn_utf.h" +#include "svn_io.h" /* for svn_io_stat() */ +#include "svn_ctype.h" + +#include "dirent_uri.h" + + +/* The canonical empty path. Can this be changed? Well, change the empty + test below and the path library will work, not so sure about the fs/wc + libraries. */ +#define SVN_EMPTY_PATH "" + +/* TRUE if s is the canonical empty path, FALSE otherwise */ +#define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0') + +/* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can + this be changed? Well, the path library will work, not so sure about + the OS! */ +#define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.') + + + + +#ifndef NDEBUG +/* This function is an approximation of svn_path_is_canonical. + * It is supposed to be used in functions that do not have access + * to a pool, but still want to assert that a path is canonical. + * + * PATH with length LEN is assumed to be canonical if it isn't + * the platform's empty path (see definition of SVN_PATH_IS_PLATFORM_EMPTY), + * and does not contain "/./", and any one of the following + * conditions is also met: + * + * 1. PATH has zero length + * 2. PATH is the root directory (what exactly a root directory is + * depends on the platform) + * 3. PATH is not a root directory and does not end with '/' + * + * If possible, please use svn_path_is_canonical instead. + */ +static svn_boolean_t +is_canonical(const char *path, + apr_size_t len) +{ + return (! SVN_PATH_IS_PLATFORM_EMPTY(path, len) + && strstr(path, "/./") == NULL + && (len == 0 + || (len == 1 && path[0] == '/') + || (path[len-1] != '/') +#if defined(WIN32) || defined(__CYGWIN__) + || svn_dirent_is_root(path, len) +#endif + )); +} +#endif + + +/* functionality of svn_path_is_canonical but without the deprecation */ +static svn_boolean_t +svn_path_is_canonical_internal(const char *path, apr_pool_t *pool) +{ + return svn_uri_is_canonical(path, pool) || + svn_dirent_is_canonical(path, pool) || + svn_relpath_is_canonical(path); +} + +svn_boolean_t +svn_path_is_canonical(const char *path, apr_pool_t *pool) +{ + return svn_path_is_canonical_internal(path, pool); +} + +/* functionality of svn_path_join but without the deprecation */ +static char * +svn_path_join_internal(const char *base, + const char *component, + apr_pool_t *pool) +{ + apr_size_t blen = strlen(base); + apr_size_t clen = strlen(component); + char *path; + + assert(svn_path_is_canonical_internal(base, pool)); + assert(svn_path_is_canonical_internal(component, pool)); + + /* If the component is absolute, then return it. */ + if (*component == '/') + return apr_pmemdup(pool, component, clen + 1); + + /* If either is empty return the other */ + if (SVN_PATH_IS_EMPTY(base)) + return apr_pmemdup(pool, component, clen + 1); + if (SVN_PATH_IS_EMPTY(component)) + return apr_pmemdup(pool, base, blen + 1); + + if (blen == 1 && base[0] == '/') + blen = 0; /* Ignore base, just return separator + component */ + + /* Construct the new, combined path. */ + path = apr_palloc(pool, blen + 1 + clen + 1); + memcpy(path, base, blen); + path[blen] = '/'; + memcpy(path + blen + 1, component, clen + 1); + + return path; +} + +char *svn_path_join(const char *base, + const char *component, + apr_pool_t *pool) +{ + return svn_path_join_internal(base, component, pool); +} + +char *svn_path_join_many(apr_pool_t *pool, const char *base, ...) +{ +#define MAX_SAVED_LENGTHS 10 + apr_size_t saved_lengths[MAX_SAVED_LENGTHS]; + apr_size_t total_len; + int nargs; + va_list va; + const char *s; + apr_size_t len; + char *path; + char *p; + svn_boolean_t base_is_empty = FALSE, base_is_root = FALSE; + int base_arg = 0; + + total_len = strlen(base); + + assert(svn_path_is_canonical_internal(base, pool)); + + if (total_len == 1 && *base == '/') + base_is_root = TRUE; + else if (SVN_PATH_IS_EMPTY(base)) + { + total_len = sizeof(SVN_EMPTY_PATH) - 1; + base_is_empty = TRUE; + } + + saved_lengths[0] = total_len; + + /* Compute the length of the resulting string. */ + + nargs = 0; + va_start(va, base); + while ((s = va_arg(va, const char *)) != NULL) + { + len = strlen(s); + + assert(svn_path_is_canonical_internal(s, pool)); + + if (SVN_PATH_IS_EMPTY(s)) + continue; + + if (nargs++ < MAX_SAVED_LENGTHS) + saved_lengths[nargs] = len; + + if (*s == '/') + { + /* an absolute path. skip all components to this point and reset + the total length. */ + total_len = len; + base_arg = nargs; + base_is_root = len == 1; + base_is_empty = FALSE; + } + else if (nargs == base_arg + || (nargs == base_arg + 1 && base_is_root) + || base_is_empty) + { + /* if we have skipped everything up to this arg, then the base + and all prior components are empty. just set the length to + this component; do not add a separator. If the base is empty + we can now ignore it. */ + if (base_is_empty) + { + base_is_empty = FALSE; + total_len = 0; + } + total_len += len; + } + else + { + total_len += 1 + len; + } + } + va_end(va); + + /* base == "/" and no further components. just return that. */ + if (base_is_root && total_len == 1) + return apr_pmemdup(pool, "/", 2); + + /* we got the total size. allocate it, with room for a NULL character. */ + path = p = apr_palloc(pool, total_len + 1); + + /* if we aren't supposed to skip forward to an absolute component, and if + this is not an empty base that we are skipping, then copy the base + into the output. */ + if (base_arg == 0 && ! (SVN_PATH_IS_EMPTY(base) && ! base_is_empty)) + { + if (SVN_PATH_IS_EMPTY(base)) + memcpy(p, SVN_EMPTY_PATH, len = saved_lengths[0]); + else + memcpy(p, base, len = saved_lengths[0]); + p += len; + } + + nargs = 0; + va_start(va, base); + while ((s = va_arg(va, const char *)) != NULL) + { + if (SVN_PATH_IS_EMPTY(s)) + continue; + + if (++nargs < base_arg) + continue; + + if (nargs < MAX_SAVED_LENGTHS) + len = saved_lengths[nargs]; + else + len = strlen(s); + + /* insert a separator if we aren't copying in the first component + (which can happen when base_arg is set). also, don't put in a slash + if the prior character is a slash (occurs when prior component + is "/"). */ + if (p != path && p[-1] != '/') + *p++ = '/'; + + /* copy the new component and advance the pointer */ + memcpy(p, s, len); + p += len; + } + va_end(va); + + *p = '\0'; + assert((apr_size_t)(p - path) == total_len); + + return path; +} + + + +apr_size_t +svn_path_component_count(const char *path) +{ + apr_size_t count = 0; + + assert(is_canonical(path, strlen(path))); + + while (*path) + { + const char *start; + + while (*path == '/') + ++path; + + start = path; + + while (*path && *path != '/') + ++path; + + if (path != start) + ++count; + } + + return count; +} + + +/* Return the length of substring necessary to encompass the entire + * previous path segment in PATH, which should be a LEN byte string. + * + * A trailing slash will not be included in the returned length except + * in the case in which PATH is absolute and there are no more + * previous segments. + */ +static apr_size_t +previous_segment(const char *path, + apr_size_t len) +{ + if (len == 0) + return 0; + + while (len > 0 && path[--len] != '/') + ; + + if (len == 0 && path[0] == '/') + return 1; + else + return len; +} + + +void +svn_path_add_component(svn_stringbuf_t *path, + const char *component) +{ + apr_size_t len = strlen(component); + + assert(is_canonical(path->data, path->len)); + assert(is_canonical(component, strlen(component))); + + /* Append a dir separator, but only if this path is neither empty + nor consists of a single dir separator already. */ + if ((! SVN_PATH_IS_EMPTY(path->data)) + && (! ((path->len == 1) && (*(path->data) == '/')))) + { + char dirsep = '/'; + svn_stringbuf_appendbytes(path, &dirsep, sizeof(dirsep)); + } + + svn_stringbuf_appendbytes(path, component, len); +} + + +void +svn_path_remove_component(svn_stringbuf_t *path) +{ + assert(is_canonical(path->data, path->len)); + + path->len = previous_segment(path->data, path->len); + path->data[path->len] = '\0'; +} + + +void +svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n) +{ + while (n > 0) + { + svn_path_remove_component(path); + n--; + } +} + + +char * +svn_path_dirname(const char *path, apr_pool_t *pool) +{ + apr_size_t len = strlen(path); + + assert(svn_path_is_canonical_internal(path, pool)); + + return apr_pstrmemdup(pool, path, previous_segment(path, len)); +} + + +char * +svn_path_basename(const char *path, apr_pool_t *pool) +{ + apr_size_t len = strlen(path); + apr_size_t start; + + assert(svn_path_is_canonical_internal(path, pool)); + + if (len == 1 && path[0] == '/') + start = 0; + else + { + start = len; + while (start > 0 && path[start - 1] != '/') + --start; + } + + return apr_pstrmemdup(pool, path + start, len - start); +} + +int +svn_path_is_empty(const char *path) +{ + assert(is_canonical(path, strlen(path))); + + if (SVN_PATH_IS_EMPTY(path)) + return 1; + + return 0; +} + +int +svn_path_compare_paths(const char *path1, + const char *path2) +{ + apr_size_t path1_len = strlen(path1); + apr_size_t path2_len = strlen(path2); + apr_size_t min_len = ((path1_len < path2_len) ? path1_len : path2_len); + apr_size_t i = 0; + + assert(is_canonical(path1, path1_len)); + assert(is_canonical(path2, path2_len)); + + /* Skip past common prefix. */ + while (i < min_len && path1[i] == path2[i]) + ++i; + + /* Are the paths exactly the same? */ + if ((path1_len == path2_len) && (i >= min_len)) + return 0; + + /* Children of paths are greater than their parents, but less than + greater siblings of their parents. */ + if ((path1[i] == '/') && (path2[i] == 0)) + return 1; + if ((path2[i] == '/') && (path1[i] == 0)) + return -1; + if (path1[i] == '/') + return -1; + if (path2[i] == '/') + return 1; + + /* Common prefix was skipped above, next character is compared to + determine order. We need to use an unsigned comparison, though, + so a "next character" of NULL (0x00) sorts numerically + smallest. */ + return (unsigned char)(path1[i]) < (unsigned char)(path2[i]) ? -1 : 1; +} + +/* Return the string length of the longest common ancestor of PATH1 and PATH2. + * + * This function handles everything except the URL-handling logic + * of svn_path_get_longest_ancestor, and assumes that PATH1 and + * PATH2 are *not* URLs. + * + * If the two paths do not share a common ancestor, return 0. + * + * New strings are allocated in POOL. + */ +static apr_size_t +get_path_ancestor_length(const char *path1, + const char *path2, + apr_pool_t *pool) +{ + apr_size_t path1_len, path2_len; + apr_size_t i = 0; + apr_size_t last_dirsep = 0; + + path1_len = strlen(path1); + path2_len = strlen(path2); + + if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2)) + return 0; + + while (path1[i] == path2[i]) + { + /* Keep track of the last directory separator we hit. */ + if (path1[i] == '/') + last_dirsep = i; + + i++; + + /* If we get to the end of either path, break out. */ + if ((i == path1_len) || (i == path2_len)) + break; + } + + /* two special cases: + 1. '/' is the longest common ancestor of '/' and '/foo' + 2. '/' is the longest common ancestor of '/rif' and '/raf' */ + if (i == 1 && path1[0] == '/' && path2[0] == '/') + return 1; + + /* last_dirsep is now the offset of the last directory separator we + crossed before reaching a non-matching byte. i is the offset of + that non-matching byte. */ + if (((i == path1_len) && (path2[i] == '/')) + || ((i == path2_len) && (path1[i] == '/')) + || ((i == path1_len) && (i == path2_len))) + return i; + else + if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/') + return 1; + return last_dirsep; +} + + +char * +svn_path_get_longest_ancestor(const char *path1, + const char *path2, + apr_pool_t *pool) +{ + svn_boolean_t path1_is_url = svn_path_is_url(path1); + svn_boolean_t path2_is_url = svn_path_is_url(path2); + + /* Are we messing with URLs? If we have a mix of URLs and non-URLs, + there's nothing common between them. */ + if (path1_is_url && path2_is_url) + { + return svn_uri_get_longest_ancestor(path1, path2, pool); + } + else if ((! path1_is_url) && (! path2_is_url)) + { + return apr_pstrndup(pool, path1, + get_path_ancestor_length(path1, path2, pool)); + } + else + { + /* A URL and a non-URL => no common prefix */ + return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH)); + } +} + +const char * +svn_path_is_child(const char *path1, + const char *path2, + apr_pool_t *pool) +{ + apr_size_t i; + + /* assert (is_canonical (path1, strlen (path1))); ### Expensive strlen */ + /* assert (is_canonical (path2, strlen (path2))); ### Expensive strlen */ + + /* Allow "" and "foo" to be parent/child */ + if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */ + { + if (SVN_PATH_IS_EMPTY(path2) /* "" not a child */ + || path2[0] == '/') /* "/foo" not a child */ + return NULL; + else + /* everything else is child */ + return pool ? apr_pstrdup(pool, path2) : path2; + } + + /* Reach the end of at least one of the paths. How should we handle + things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't + appear to arise in the current Subversion code, it's not clear to me + if they should be parent/child or not. */ + for (i = 0; path1[i] && path2[i]; i++) + if (path1[i] != path2[i]) + return NULL; + + /* There are two cases that are parent/child + ... path1[i] == '\0' + .../foo path2[i] == '/' + or + / path1[i] == '\0' + /foo path2[i] != '/' + */ + if (path1[i] == '\0' && path2[i]) + { + if (path2[i] == '/') + return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1; + else if (i == 1 && path1[0] == '/') + return pool ? apr_pstrdup(pool, path2 + 1) : path2 + 1; + } + + /* Otherwise, path2 isn't a child. */ + return NULL; +} + + +svn_boolean_t +svn_path_is_ancestor(const char *path1, const char *path2) +{ + apr_size_t path1_len = strlen(path1); + + /* If path1 is empty and path2 is not absoulte, then path1 is an ancestor. */ + if (SVN_PATH_IS_EMPTY(path1)) + return *path2 != '/'; + + /* If path1 is a prefix of path2, then: + - If path1 ends in a path separator, + - If the paths are of the same length + OR + - path2 starts a new path component after the common prefix, + then path1 is an ancestor. */ + if (strncmp(path1, path2, path1_len) == 0) + return path1[path1_len - 1] == '/' + || (path2[path1_len] == '/' || path2[path1_len] == '\0'); + + return FALSE; +} + + +apr_array_header_t * +svn_path_decompose(const char *path, + apr_pool_t *pool) +{ + apr_size_t i, oldi; + + apr_array_header_t *components = + apr_array_make(pool, 1, sizeof(const char *)); + + assert(svn_path_is_canonical_internal(path, pool)); + + if (SVN_PATH_IS_EMPTY(path)) + return components; /* ### Should we return a "" component? */ + + /* If PATH is absolute, store the '/' as the first component. */ + i = oldi = 0; + if (path[i] == '/') + { + char dirsep = '/'; + + APR_ARRAY_PUSH(components, const char *) + = apr_pstrmemdup(pool, &dirsep, sizeof(dirsep)); + + i++; + oldi++; + if (path[i] == '\0') /* path is a single '/' */ + return components; + } + + do + { + if ((path[i] == '/') || (path[i] == '\0')) + { + if (SVN_PATH_IS_PLATFORM_EMPTY(path + oldi, i - oldi)) + APR_ARRAY_PUSH(components, const char *) = SVN_EMPTY_PATH; + else + APR_ARRAY_PUSH(components, const char *) + = apr_pstrmemdup(pool, path + oldi, i - oldi); + + i++; + oldi = i; /* skipping past the dirsep */ + continue; + } + i++; + } + while (path[i-1]); + + return components; +} + + +const char * +svn_path_compose(const apr_array_header_t *components, + apr_pool_t *pool) +{ + apr_size_t *lengths = apr_palloc(pool, components->nelts*sizeof(*lengths)); + apr_size_t max_length = components->nelts; + char *path; + char *p; + int i; + + /* Get the length of each component so a total length can be + calculated. */ + for (i = 0; i < components->nelts; ++i) + { + apr_size_t l = strlen(APR_ARRAY_IDX(components, i, const char *)); + lengths[i] = l; + max_length += l; + } + + path = apr_palloc(pool, max_length + 1); + p = path; + + for (i = 0; i < components->nelts; ++i) + { + /* Append a '/' to the path. Handle the case with an absolute + path where a '/' appears in the first component. Only append + a '/' if the component is the second component that does not + follow a "/" first component; or it is the third or later + component. */ + if (i > 1 || + (i == 1 && strcmp("/", APR_ARRAY_IDX(components, + 0, + const char *)) != 0)) + { + *p++ = '/'; + } + + memcpy(p, APR_ARRAY_IDX(components, i, const char *), lengths[i]); + p += lengths[i]; + } + + *p = '\0'; + + return path; +} + + +svn_boolean_t +svn_path_is_single_path_component(const char *name) +{ + assert(is_canonical(name, strlen(name))); + + /* Can't be empty or `..' */ + if (SVN_PATH_IS_EMPTY(name) + || (name[0] == '.' && name[1] == '.' && name[2] == '\0')) + return FALSE; + + /* Slashes are bad, m'kay... */ + if (strchr(name, '/') != NULL) + return FALSE; + + /* It is valid. */ + return TRUE; +} + + +svn_boolean_t +svn_path_is_dotpath_present(const char *path) +{ + size_t len; + + /* The empty string does not have a dotpath */ + if (path[0] == '\0') + return FALSE; + + /* Handle "." or a leading "./" */ + if (path[0] == '.' && (path[1] == '\0' || path[1] == '/')) + return TRUE; + + /* Paths of length 1 (at this point) have no dotpath present. */ + if (path[1] == '\0') + return FALSE; + + /* If any segment is "/./", then a dotpath is present. */ + if (strstr(path, "/./") != NULL) + return TRUE; + + /* Does the path end in "/." ? */ + len = strlen(path); + return path[len - 2] == '/' && path[len - 1] == '.'; +} + +svn_boolean_t +svn_path_is_backpath_present(const char *path) +{ + size_t len; + + /* 0 and 1-length paths do not have a backpath */ + if (path[0] == '\0' || path[1] == '\0') + return FALSE; + + /* Handle ".." or a leading "../" */ + if (path[0] == '.' && path[1] == '.' && (path[2] == '\0' || path[2] == '/')) + return TRUE; + + /* Paths of length 2 (at this point) have no backpath present. */ + if (path[2] == '\0') + return FALSE; + + /* If any segment is "..", then a backpath is present. */ + if (strstr(path, "/../") != NULL) + return TRUE; + + /* Does the path end in "/.." ? */ + len = strlen(path); + return path[len - 3] == '/' && path[len - 2] == '.' && path[len - 1] == '.'; +} + + +/*** URI Stuff ***/ + +/* Examine PATH as a potential URI, and return a substring of PATH + that immediately follows the (scheme):// portion of the URI, or + NULL if PATH doesn't appear to be a valid URI. The returned value + is not alloced -- it shares memory with PATH. */ +static const char * +skip_uri_scheme(const char *path) +{ + apr_size_t j; + + /* A scheme is terminated by a : and cannot contain any /'s. */ + for (j = 0; path[j] && path[j] != ':'; ++j) + if (path[j] == '/') + return NULL; + + if (j > 0 && path[j] == ':' && path[j+1] == '/' && path[j+2] == '/') + return path + j + 3; + + return NULL; +} + + +svn_boolean_t +svn_path_is_url(const char *path) +{ + /* ### This function is reaaaaaaaaaaaaaally stupid right now. + We're just going to look for: + + (scheme)://(optional_stuff) + + Where (scheme) has no ':' or '/' characters. + + Someday it might be nice to have an actual URI parser here. + */ + return skip_uri_scheme(path) != NULL; +} + + + +/* Here is the BNF for path components in a URI. "pchar" is a + character in a path component. + + pchar = unreserved | escaped | + ":" | "@" | "&" | "=" | "+" | "$" | "," + unreserved = alphanum | mark + mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" + + Note that "escaped" doesn't really apply to what users can put in + their paths, so that really means the set of characters is: + + alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | "," +*/ +const char svn_uri__char_validity[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, + + /* 64 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, + + /* 128 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + /* 192 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + + +svn_boolean_t +svn_path_is_uri_safe(const char *path) +{ + apr_size_t i; + + /* Skip the URI scheme. */ + path = skip_uri_scheme(path); + + /* No scheme? Get outta here. */ + if (! path) + return FALSE; + + /* Skip to the first slash that's after the URI scheme. */ + path = strchr(path, '/'); + + /* If there's no first slash, then there's only a host portion; + therefore there couldn't be any uri-unsafe characters after the + host... so return true. */ + if (path == NULL) + return TRUE; + + for (i = 0; path[i]; i++) + { + /* Allow '%XX' (where each X is a hex digit) */ + if (path[i] == '%') + { + if (svn_ctype_isxdigit(path[i + 1]) && + svn_ctype_isxdigit(path[i + 2])) + { + i += 2; + continue; + } + return FALSE; + } + else if (! svn_uri__char_validity[((unsigned char)path[i])]) + { + return FALSE; + } + } + + return TRUE; +} + + +/* URI-encode each character c in PATH for which TABLE[c] is 0. + If no encoding was needed, return PATH, else return a new string allocated + in POOL. */ +static const char * +uri_escape(const char *path, const char table[], apr_pool_t *pool) +{ + svn_stringbuf_t *retstr; + apr_size_t i, copied = 0; + int c; + + retstr = svn_stringbuf_create_ensure(strlen(path), pool); + for (i = 0; path[i]; i++) + { + c = (unsigned char)path[i]; + if (table[c]) + continue; + + /* If we got here, we're looking at a character that isn't + supported by the (or at least, our) URI encoding scheme. We + need to escape this character. */ + + /* First things first, copy all the good stuff that we haven't + yet copied into our output buffer. */ + if (i - copied) + svn_stringbuf_appendbytes(retstr, path + copied, + i - copied); + + /* Now, write in our escaped character, consisting of the + '%' and two digits. We cast the C to unsigned char here because + the 'X' format character will be tempted to treat it as an unsigned + int...which causes problem when messing with 0x80-0xFF chars. + We also need space for a null as apr_snprintf will write one. */ + svn_stringbuf_ensure(retstr, retstr->len + 4); + apr_snprintf(retstr->data + retstr->len, 4, "%%%02X", (unsigned char)c); + retstr->len += 3; + + /* Finally, update our copy counter. */ + copied = i + 1; + } + + /* If we didn't encode anything, we don't need to duplicate the string. */ + if (retstr->len == 0) + return path; + + /* Anything left to copy? */ + if (i - copied) + svn_stringbuf_appendbytes(retstr, path + copied, i - copied); + + /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf + functions. */ + + return retstr->data; +} + + +const char * +svn_path_uri_encode(const char *path, apr_pool_t *pool) +{ + const char *ret; + + ret = uri_escape(path, svn_uri__char_validity, pool); + + /* Our interface guarantees a copy. */ + if (ret == path) + return apr_pstrdup(pool, path); + else + return ret; +} + +static const char iri_escape_chars[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + /* 128 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +const char * +svn_path_uri_from_iri(const char *iri, apr_pool_t *pool) +{ + return uri_escape(iri, iri_escape_chars, pool); +} + +static const char uri_autoescape_chars[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, + + /* 64 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, + + /* 128 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + /* 192 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +const char * +svn_path_uri_autoescape(const char *uri, apr_pool_t *pool) +{ + return uri_escape(uri, uri_autoescape_chars, pool); +} + +const char * +svn_path_uri_decode(const char *path, apr_pool_t *pool) +{ + svn_stringbuf_t *retstr; + apr_size_t i; + svn_boolean_t query_start = FALSE; + + /* avoid repeated realloc */ + retstr = svn_stringbuf_create_ensure(strlen(path) + 1, pool); + + retstr->len = 0; + for (i = 0; path[i]; i++) + { + char c = path[i]; + + if (c == '?') + { + /* Mark the start of the query string, if it exists. */ + query_start = TRUE; + } + else if (c == '+' && query_start) + { + /* Only do this if we are into the query string. + * RFC 2396, section 3.3 */ + c = ' '; + } + else if (c == '%' && svn_ctype_isxdigit(path[i + 1]) + && svn_ctype_isxdigit(path[i+2])) + { + char digitz[3]; + digitz[0] = path[++i]; + digitz[1] = path[++i]; + digitz[2] = '\0'; + c = (char)(strtol(digitz, NULL, 16)); + } + + retstr->data[retstr->len++] = c; + } + + /* Null-terminate this bad-boy. */ + retstr->data[retstr->len] = 0; + + return retstr->data; +} + + +const char * +svn_path_url_add_component2(const char *url, + const char *component, + apr_pool_t *pool) +{ + /* = svn_path_uri_encode() but without always copying */ + component = uri_escape(component, svn_uri__char_validity, pool); + + return svn_path_join_internal(url, component, pool); +} + +svn_error_t * +svn_path_get_absolute(const char **pabsolute, + const char *relative, + apr_pool_t *pool) +{ + if (svn_path_is_url(relative)) + { + *pabsolute = apr_pstrdup(pool, relative); + return SVN_NO_ERROR; + } + + return svn_dirent_get_absolute(pabsolute, relative, pool); +} + + +#if !defined(WIN32) && !defined(DARWIN) +/** Get APR's internal path encoding. */ +static svn_error_t * +get_path_encoding(svn_boolean_t *path_is_utf8, apr_pool_t *pool) +{ + apr_status_t apr_err; + int encoding_style; + + apr_err = apr_filepath_encoding(&encoding_style, pool); + if (apr_err) + return svn_error_wrap_apr(apr_err, + _("Can't determine the native path encoding")); + + /* ### What to do about APR_FILEPATH_ENCODING_UNKNOWN? + Well, for now we'll just punt to the svn_utf_ functions; + those will at least do the ASCII-subset check. */ + *path_is_utf8 = (encoding_style == APR_FILEPATH_ENCODING_UTF8); + return SVN_NO_ERROR; +} +#endif + + +svn_error_t * +svn_path_cstring_from_utf8(const char **path_apr, + const char *path_utf8, + apr_pool_t *pool) +{ +#if !defined(WIN32) && !defined(DARWIN) + svn_boolean_t path_is_utf8; + SVN_ERR(get_path_encoding(&path_is_utf8, pool)); + if (path_is_utf8) +#endif + { + *path_apr = apr_pstrdup(pool, path_utf8); + return SVN_NO_ERROR; + } +#if !defined(WIN32) && !defined(DARWIN) + else + return svn_utf_cstring_from_utf8(path_apr, path_utf8, pool); +#endif +} + + +svn_error_t * +svn_path_cstring_to_utf8(const char **path_utf8, + const char *path_apr, + apr_pool_t *pool) +{ +#if !defined(WIN32) && !defined(DARWIN) + svn_boolean_t path_is_utf8; + SVN_ERR(get_path_encoding(&path_is_utf8, pool)); + if (path_is_utf8) +#endif + { + *path_utf8 = apr_pstrdup(pool, path_apr); + return SVN_NO_ERROR; + } +#if !defined(WIN32) && !defined(DARWIN) + else + return svn_utf_cstring_to_utf8(path_utf8, path_apr, pool); +#endif +} + + +/* Return a copy of PATH, allocated from POOL, for which control + characters have been escaped using the form \NNN (where NNN is the + octal representation of the byte's ordinal value). */ +const char * +svn_path_illegal_path_escape(const char *path, apr_pool_t *pool) +{ + svn_stringbuf_t *retstr; + apr_size_t i, copied = 0; + int c; + + /* At least one control character: + strlen - 1 (control) + \ + N + N + N + null . */ + retstr = svn_stringbuf_create_ensure(strlen(path) + 4, pool); + for (i = 0; path[i]; i++) + { + c = (unsigned char)path[i]; + if (! svn_ctype_iscntrl(c)) + continue; + + /* If we got here, we're looking at a character that isn't + supported by the (or at least, our) URI encoding scheme. We + need to escape this character. */ + + /* First things first, copy all the good stuff that we haven't + yet copied into our output buffer. */ + if (i - copied) + svn_stringbuf_appendbytes(retstr, path + copied, + i - copied); + + /* Make sure buffer is big enough for '\' 'N' 'N' 'N' (and NUL) */ + svn_stringbuf_ensure(retstr, retstr->len + 5); + /*### The backslash separator doesn't work too great with Windows, + but it's what we'll use for consistency with invalid utf8 + formatting (until someone has a better idea) */ + apr_snprintf(retstr->data + retstr->len, 5, "\\%03o", (unsigned char)c); + retstr->len += 4; + + /* Finally, update our copy counter. */ + copied = i + 1; + } + + /* If we didn't encode anything, we don't need to duplicate the string. */ + if (retstr->len == 0) + return path; + + /* Anything left to copy? */ + if (i - copied) + svn_stringbuf_appendbytes(retstr, path + copied, i - copied); + + /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf + functions. */ + + return retstr->data; +} + +svn_error_t * +svn_path_check_valid(const char *path, apr_pool_t *pool) +{ + const char *c; + + for (c = path; *c; c++) + { + if (svn_ctype_iscntrl(*c)) + { + return svn_error_createf + (SVN_ERR_FS_PATH_SYNTAX, NULL, + _("Invalid control character '0x%02x' in path '%s'"), + (unsigned char)*c, + svn_path_illegal_path_escape(svn_dirent_local_style(path, pool), + pool)); + } + } + + return SVN_NO_ERROR; +} + +void +svn_path_splitext(const char **path_root, + const char **path_ext, + const char *path, + apr_pool_t *pool) +{ + const char *last_dot, *last_slash; + + /* Easy out -- why do all the work when there's no way to report it? */ + if (! (path_root || path_ext)) + return; + + /* Do we even have a period in this thing? And if so, is there + anything after it? We look for the "rightmost" period in the + string. */ + last_dot = strrchr(path, '.'); + if (last_dot && (last_dot + 1 != '\0')) + { + /* If we have a period, we need to make sure it occurs in the + final path component -- that there's no path separator + between the last period and the end of the PATH -- otherwise, + it doesn't count. Also, we want to make sure that our period + isn't the first character of the last component. */ + last_slash = strrchr(path, '/'); + if ((last_slash && (last_dot > (last_slash + 1))) + || ((! last_slash) && (last_dot > path))) + { + if (path_root) + *path_root = apr_pstrmemdup(pool, path, + (last_dot - path + 1) * sizeof(*path)); + if (path_ext) + *path_ext = apr_pstrdup(pool, last_dot + 1); + return; + } + } + /* If we get here, we never found a suitable separator character, so + there's no split. */ + if (path_root) + *path_root = apr_pstrdup(pool, path); + if (path_ext) + *path_ext = ""; +} + + +/* Repository relative URLs (^/). */ + +svn_boolean_t +svn_path_is_repos_relative_url(const char *path) +{ + return (0 == strncmp("^/", path, 2)); +} + +svn_error_t * +svn_path_resolve_repos_relative_url(const char **absolute_url, + const char *relative_url, + const char *repos_root_url, + apr_pool_t *pool) +{ + if (! svn_path_is_repos_relative_url(relative_url)) + return svn_error_createf(SVN_ERR_BAD_URL, NULL, + _("Improper relative URL '%s'"), + relative_url); + + /* No assumptions are made about the canonicalization of the inut + * arguments, it is presumed that the output will be canonicalized after + * this function, which will remove any duplicate path separator. + */ + *absolute_url = apr_pstrcat(pool, repos_root_url, relative_url + 1, + (char *)NULL); + + return SVN_NO_ERROR; +} + |