summaryrefslogtreecommitdiff
path: root/subversion/libsvn_subr/path.c
diff options
context:
space:
mode:
Diffstat (limited to 'subversion/libsvn_subr/path.c')
-rw-r--r--subversion/libsvn_subr/path.c1315
1 files changed, 1315 insertions, 0 deletions
diff --git a/subversion/libsvn_subr/path.c b/subversion/libsvn_subr/path.c
new file mode 100644
index 0000000000000..84368f3bb77b0
--- /dev/null
+++ b/subversion/libsvn_subr/path.c
@@ -0,0 +1,1315 @@
+/*
+ * paths.c: a path manipulation library using svn_stringbuf_t
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+
+
+#include <string.h>
+#include <assert.h>
+
+#include <apr_file_info.h>
+#include <apr_lib.h>
+#include <apr_uri.h>
+
+#include "svn_string.h"
+#include "svn_dirent_uri.h"
+#include "svn_path.h"
+#include "svn_private_config.h" /* for SVN_PATH_LOCAL_SEPARATOR */
+#include "svn_utf.h"
+#include "svn_io.h" /* for svn_io_stat() */
+#include "svn_ctype.h"
+
+#include "dirent_uri.h"
+
+
+/* The canonical empty path. Can this be changed? Well, change the empty
+ test below and the path library will work, not so sure about the fs/wc
+ libraries. */
+#define SVN_EMPTY_PATH ""
+
+/* TRUE if s is the canonical empty path, FALSE otherwise */
+#define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
+
+/* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
+ this be changed? Well, the path library will work, not so sure about
+ the OS! */
+#define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
+
+
+
+
+#ifndef NDEBUG
+/* This function is an approximation of svn_path_is_canonical.
+ * It is supposed to be used in functions that do not have access
+ * to a pool, but still want to assert that a path is canonical.
+ *
+ * PATH with length LEN is assumed to be canonical if it isn't
+ * the platform's empty path (see definition of SVN_PATH_IS_PLATFORM_EMPTY),
+ * and does not contain "/./", and any one of the following
+ * conditions is also met:
+ *
+ * 1. PATH has zero length
+ * 2. PATH is the root directory (what exactly a root directory is
+ * depends on the platform)
+ * 3. PATH is not a root directory and does not end with '/'
+ *
+ * If possible, please use svn_path_is_canonical instead.
+ */
+static svn_boolean_t
+is_canonical(const char *path,
+ apr_size_t len)
+{
+ return (! SVN_PATH_IS_PLATFORM_EMPTY(path, len)
+ && strstr(path, "/./") == NULL
+ && (len == 0
+ || (len == 1 && path[0] == '/')
+ || (path[len-1] != '/')
+#if defined(WIN32) || defined(__CYGWIN__)
+ || svn_dirent_is_root(path, len)
+#endif
+ ));
+}
+#endif
+
+
+/* functionality of svn_path_is_canonical but without the deprecation */
+static svn_boolean_t
+svn_path_is_canonical_internal(const char *path, apr_pool_t *pool)
+{
+ return svn_uri_is_canonical(path, pool) ||
+ svn_dirent_is_canonical(path, pool) ||
+ svn_relpath_is_canonical(path);
+}
+
+svn_boolean_t
+svn_path_is_canonical(const char *path, apr_pool_t *pool)
+{
+ return svn_path_is_canonical_internal(path, pool);
+}
+
+/* functionality of svn_path_join but without the deprecation */
+static char *
+svn_path_join_internal(const char *base,
+ const char *component,
+ apr_pool_t *pool)
+{
+ apr_size_t blen = strlen(base);
+ apr_size_t clen = strlen(component);
+ char *path;
+
+ assert(svn_path_is_canonical_internal(base, pool));
+ assert(svn_path_is_canonical_internal(component, pool));
+
+ /* If the component is absolute, then return it. */
+ if (*component == '/')
+ return apr_pmemdup(pool, component, clen + 1);
+
+ /* If either is empty return the other */
+ if (SVN_PATH_IS_EMPTY(base))
+ return apr_pmemdup(pool, component, clen + 1);
+ if (SVN_PATH_IS_EMPTY(component))
+ return apr_pmemdup(pool, base, blen + 1);
+
+ if (blen == 1 && base[0] == '/')
+ blen = 0; /* Ignore base, just return separator + component */
+
+ /* Construct the new, combined path. */
+ path = apr_palloc(pool, blen + 1 + clen + 1);
+ memcpy(path, base, blen);
+ path[blen] = '/';
+ memcpy(path + blen + 1, component, clen + 1);
+
+ return path;
+}
+
+char *svn_path_join(const char *base,
+ const char *component,
+ apr_pool_t *pool)
+{
+ return svn_path_join_internal(base, component, pool);
+}
+
+char *svn_path_join_many(apr_pool_t *pool, const char *base, ...)
+{
+#define MAX_SAVED_LENGTHS 10
+ apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
+ apr_size_t total_len;
+ int nargs;
+ va_list va;
+ const char *s;
+ apr_size_t len;
+ char *path;
+ char *p;
+ svn_boolean_t base_is_empty = FALSE, base_is_root = FALSE;
+ int base_arg = 0;
+
+ total_len = strlen(base);
+
+ assert(svn_path_is_canonical_internal(base, pool));
+
+ if (total_len == 1 && *base == '/')
+ base_is_root = TRUE;
+ else if (SVN_PATH_IS_EMPTY(base))
+ {
+ total_len = sizeof(SVN_EMPTY_PATH) - 1;
+ base_is_empty = TRUE;
+ }
+
+ saved_lengths[0] = total_len;
+
+ /* Compute the length of the resulting string. */
+
+ nargs = 0;
+ va_start(va, base);
+ while ((s = va_arg(va, const char *)) != NULL)
+ {
+ len = strlen(s);
+
+ assert(svn_path_is_canonical_internal(s, pool));
+
+ if (SVN_PATH_IS_EMPTY(s))
+ continue;
+
+ if (nargs++ < MAX_SAVED_LENGTHS)
+ saved_lengths[nargs] = len;
+
+ if (*s == '/')
+ {
+ /* an absolute path. skip all components to this point and reset
+ the total length. */
+ total_len = len;
+ base_arg = nargs;
+ base_is_root = len == 1;
+ base_is_empty = FALSE;
+ }
+ else if (nargs == base_arg
+ || (nargs == base_arg + 1 && base_is_root)
+ || base_is_empty)
+ {
+ /* if we have skipped everything up to this arg, then the base
+ and all prior components are empty. just set the length to
+ this component; do not add a separator. If the base is empty
+ we can now ignore it. */
+ if (base_is_empty)
+ {
+ base_is_empty = FALSE;
+ total_len = 0;
+ }
+ total_len += len;
+ }
+ else
+ {
+ total_len += 1 + len;
+ }
+ }
+ va_end(va);
+
+ /* base == "/" and no further components. just return that. */
+ if (base_is_root && total_len == 1)
+ return apr_pmemdup(pool, "/", 2);
+
+ /* we got the total size. allocate it, with room for a NULL character. */
+ path = p = apr_palloc(pool, total_len + 1);
+
+ /* if we aren't supposed to skip forward to an absolute component, and if
+ this is not an empty base that we are skipping, then copy the base
+ into the output. */
+ if (base_arg == 0 && ! (SVN_PATH_IS_EMPTY(base) && ! base_is_empty))
+ {
+ if (SVN_PATH_IS_EMPTY(base))
+ memcpy(p, SVN_EMPTY_PATH, len = saved_lengths[0]);
+ else
+ memcpy(p, base, len = saved_lengths[0]);
+ p += len;
+ }
+
+ nargs = 0;
+ va_start(va, base);
+ while ((s = va_arg(va, const char *)) != NULL)
+ {
+ if (SVN_PATH_IS_EMPTY(s))
+ continue;
+
+ if (++nargs < base_arg)
+ continue;
+
+ if (nargs < MAX_SAVED_LENGTHS)
+ len = saved_lengths[nargs];
+ else
+ len = strlen(s);
+
+ /* insert a separator if we aren't copying in the first component
+ (which can happen when base_arg is set). also, don't put in a slash
+ if the prior character is a slash (occurs when prior component
+ is "/"). */
+ if (p != path && p[-1] != '/')
+ *p++ = '/';
+
+ /* copy the new component and advance the pointer */
+ memcpy(p, s, len);
+ p += len;
+ }
+ va_end(va);
+
+ *p = '\0';
+ assert((apr_size_t)(p - path) == total_len);
+
+ return path;
+}
+
+
+
+apr_size_t
+svn_path_component_count(const char *path)
+{
+ apr_size_t count = 0;
+
+ assert(is_canonical(path, strlen(path)));
+
+ while (*path)
+ {
+ const char *start;
+
+ while (*path == '/')
+ ++path;
+
+ start = path;
+
+ while (*path && *path != '/')
+ ++path;
+
+ if (path != start)
+ ++count;
+ }
+
+ return count;
+}
+
+
+/* Return the length of substring necessary to encompass the entire
+ * previous path segment in PATH, which should be a LEN byte string.
+ *
+ * A trailing slash will not be included in the returned length except
+ * in the case in which PATH is absolute and there are no more
+ * previous segments.
+ */
+static apr_size_t
+previous_segment(const char *path,
+ apr_size_t len)
+{
+ if (len == 0)
+ return 0;
+
+ while (len > 0 && path[--len] != '/')
+ ;
+
+ if (len == 0 && path[0] == '/')
+ return 1;
+ else
+ return len;
+}
+
+
+void
+svn_path_add_component(svn_stringbuf_t *path,
+ const char *component)
+{
+ apr_size_t len = strlen(component);
+
+ assert(is_canonical(path->data, path->len));
+ assert(is_canonical(component, strlen(component)));
+
+ /* Append a dir separator, but only if this path is neither empty
+ nor consists of a single dir separator already. */
+ if ((! SVN_PATH_IS_EMPTY(path->data))
+ && (! ((path->len == 1) && (*(path->data) == '/'))))
+ {
+ char dirsep = '/';
+ svn_stringbuf_appendbytes(path, &dirsep, sizeof(dirsep));
+ }
+
+ svn_stringbuf_appendbytes(path, component, len);
+}
+
+
+void
+svn_path_remove_component(svn_stringbuf_t *path)
+{
+ assert(is_canonical(path->data, path->len));
+
+ path->len = previous_segment(path->data, path->len);
+ path->data[path->len] = '\0';
+}
+
+
+void
+svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n)
+{
+ while (n > 0)
+ {
+ svn_path_remove_component(path);
+ n--;
+ }
+}
+
+
+char *
+svn_path_dirname(const char *path, apr_pool_t *pool)
+{
+ apr_size_t len = strlen(path);
+
+ assert(svn_path_is_canonical_internal(path, pool));
+
+ return apr_pstrmemdup(pool, path, previous_segment(path, len));
+}
+
+
+char *
+svn_path_basename(const char *path, apr_pool_t *pool)
+{
+ apr_size_t len = strlen(path);
+ apr_size_t start;
+
+ assert(svn_path_is_canonical_internal(path, pool));
+
+ if (len == 1 && path[0] == '/')
+ start = 0;
+ else
+ {
+ start = len;
+ while (start > 0 && path[start - 1] != '/')
+ --start;
+ }
+
+ return apr_pstrmemdup(pool, path + start, len - start);
+}
+
+int
+svn_path_is_empty(const char *path)
+{
+ assert(is_canonical(path, strlen(path)));
+
+ if (SVN_PATH_IS_EMPTY(path))
+ return 1;
+
+ return 0;
+}
+
+int
+svn_path_compare_paths(const char *path1,
+ const char *path2)
+{
+ apr_size_t path1_len = strlen(path1);
+ apr_size_t path2_len = strlen(path2);
+ apr_size_t min_len = ((path1_len < path2_len) ? path1_len : path2_len);
+ apr_size_t i = 0;
+
+ assert(is_canonical(path1, path1_len));
+ assert(is_canonical(path2, path2_len));
+
+ /* Skip past common prefix. */
+ while (i < min_len && path1[i] == path2[i])
+ ++i;
+
+ /* Are the paths exactly the same? */
+ if ((path1_len == path2_len) && (i >= min_len))
+ return 0;
+
+ /* Children of paths are greater than their parents, but less than
+ greater siblings of their parents. */
+ if ((path1[i] == '/') && (path2[i] == 0))
+ return 1;
+ if ((path2[i] == '/') && (path1[i] == 0))
+ return -1;
+ if (path1[i] == '/')
+ return -1;
+ if (path2[i] == '/')
+ return 1;
+
+ /* Common prefix was skipped above, next character is compared to
+ determine order. We need to use an unsigned comparison, though,
+ so a "next character" of NULL (0x00) sorts numerically
+ smallest. */
+ return (unsigned char)(path1[i]) < (unsigned char)(path2[i]) ? -1 : 1;
+}
+
+/* Return the string length of the longest common ancestor of PATH1 and PATH2.
+ *
+ * This function handles everything except the URL-handling logic
+ * of svn_path_get_longest_ancestor, and assumes that PATH1 and
+ * PATH2 are *not* URLs.
+ *
+ * If the two paths do not share a common ancestor, return 0.
+ *
+ * New strings are allocated in POOL.
+ */
+static apr_size_t
+get_path_ancestor_length(const char *path1,
+ const char *path2,
+ apr_pool_t *pool)
+{
+ apr_size_t path1_len, path2_len;
+ apr_size_t i = 0;
+ apr_size_t last_dirsep = 0;
+
+ path1_len = strlen(path1);
+ path2_len = strlen(path2);
+
+ if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
+ return 0;
+
+ while (path1[i] == path2[i])
+ {
+ /* Keep track of the last directory separator we hit. */
+ if (path1[i] == '/')
+ last_dirsep = i;
+
+ i++;
+
+ /* If we get to the end of either path, break out. */
+ if ((i == path1_len) || (i == path2_len))
+ break;
+ }
+
+ /* two special cases:
+ 1. '/' is the longest common ancestor of '/' and '/foo'
+ 2. '/' is the longest common ancestor of '/rif' and '/raf' */
+ if (i == 1 && path1[0] == '/' && path2[0] == '/')
+ return 1;
+
+ /* last_dirsep is now the offset of the last directory separator we
+ crossed before reaching a non-matching byte. i is the offset of
+ that non-matching byte. */
+ if (((i == path1_len) && (path2[i] == '/'))
+ || ((i == path2_len) && (path1[i] == '/'))
+ || ((i == path1_len) && (i == path2_len)))
+ return i;
+ else
+ if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
+ return 1;
+ return last_dirsep;
+}
+
+
+char *
+svn_path_get_longest_ancestor(const char *path1,
+ const char *path2,
+ apr_pool_t *pool)
+{
+ svn_boolean_t path1_is_url = svn_path_is_url(path1);
+ svn_boolean_t path2_is_url = svn_path_is_url(path2);
+
+ /* Are we messing with URLs? If we have a mix of URLs and non-URLs,
+ there's nothing common between them. */
+ if (path1_is_url && path2_is_url)
+ {
+ return svn_uri_get_longest_ancestor(path1, path2, pool);
+ }
+ else if ((! path1_is_url) && (! path2_is_url))
+ {
+ return apr_pstrndup(pool, path1,
+ get_path_ancestor_length(path1, path2, pool));
+ }
+ else
+ {
+ /* A URL and a non-URL => no common prefix */
+ return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
+ }
+}
+
+const char *
+svn_path_is_child(const char *path1,
+ const char *path2,
+ apr_pool_t *pool)
+{
+ apr_size_t i;
+
+ /* assert (is_canonical (path1, strlen (path1))); ### Expensive strlen */
+ /* assert (is_canonical (path2, strlen (path2))); ### Expensive strlen */
+
+ /* Allow "" and "foo" to be parent/child */
+ if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */
+ {
+ if (SVN_PATH_IS_EMPTY(path2) /* "" not a child */
+ || path2[0] == '/') /* "/foo" not a child */
+ return NULL;
+ else
+ /* everything else is child */
+ return pool ? apr_pstrdup(pool, path2) : path2;
+ }
+
+ /* Reach the end of at least one of the paths. How should we handle
+ things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't
+ appear to arise in the current Subversion code, it's not clear to me
+ if they should be parent/child or not. */
+ for (i = 0; path1[i] && path2[i]; i++)
+ if (path1[i] != path2[i])
+ return NULL;
+
+ /* There are two cases that are parent/child
+ ... path1[i] == '\0'
+ .../foo path2[i] == '/'
+ or
+ / path1[i] == '\0'
+ /foo path2[i] != '/'
+ */
+ if (path1[i] == '\0' && path2[i])
+ {
+ if (path2[i] == '/')
+ return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
+ else if (i == 1 && path1[0] == '/')
+ return pool ? apr_pstrdup(pool, path2 + 1) : path2 + 1;
+ }
+
+ /* Otherwise, path2 isn't a child. */
+ return NULL;
+}
+
+
+svn_boolean_t
+svn_path_is_ancestor(const char *path1, const char *path2)
+{
+ apr_size_t path1_len = strlen(path1);
+
+ /* If path1 is empty and path2 is not absoulte, then path1 is an ancestor. */
+ if (SVN_PATH_IS_EMPTY(path1))
+ return *path2 != '/';
+
+ /* If path1 is a prefix of path2, then:
+ - If path1 ends in a path separator,
+ - If the paths are of the same length
+ OR
+ - path2 starts a new path component after the common prefix,
+ then path1 is an ancestor. */
+ if (strncmp(path1, path2, path1_len) == 0)
+ return path1[path1_len - 1] == '/'
+ || (path2[path1_len] == '/' || path2[path1_len] == '\0');
+
+ return FALSE;
+}
+
+
+apr_array_header_t *
+svn_path_decompose(const char *path,
+ apr_pool_t *pool)
+{
+ apr_size_t i, oldi;
+
+ apr_array_header_t *components =
+ apr_array_make(pool, 1, sizeof(const char *));
+
+ assert(svn_path_is_canonical_internal(path, pool));
+
+ if (SVN_PATH_IS_EMPTY(path))
+ return components; /* ### Should we return a "" component? */
+
+ /* If PATH is absolute, store the '/' as the first component. */
+ i = oldi = 0;
+ if (path[i] == '/')
+ {
+ char dirsep = '/';
+
+ APR_ARRAY_PUSH(components, const char *)
+ = apr_pstrmemdup(pool, &dirsep, sizeof(dirsep));
+
+ i++;
+ oldi++;
+ if (path[i] == '\0') /* path is a single '/' */
+ return components;
+ }
+
+ do
+ {
+ if ((path[i] == '/') || (path[i] == '\0'))
+ {
+ if (SVN_PATH_IS_PLATFORM_EMPTY(path + oldi, i - oldi))
+ APR_ARRAY_PUSH(components, const char *) = SVN_EMPTY_PATH;
+ else
+ APR_ARRAY_PUSH(components, const char *)
+ = apr_pstrmemdup(pool, path + oldi, i - oldi);
+
+ i++;
+ oldi = i; /* skipping past the dirsep */
+ continue;
+ }
+ i++;
+ }
+ while (path[i-1]);
+
+ return components;
+}
+
+
+const char *
+svn_path_compose(const apr_array_header_t *components,
+ apr_pool_t *pool)
+{
+ apr_size_t *lengths = apr_palloc(pool, components->nelts*sizeof(*lengths));
+ apr_size_t max_length = components->nelts;
+ char *path;
+ char *p;
+ int i;
+
+ /* Get the length of each component so a total length can be
+ calculated. */
+ for (i = 0; i < components->nelts; ++i)
+ {
+ apr_size_t l = strlen(APR_ARRAY_IDX(components, i, const char *));
+ lengths[i] = l;
+ max_length += l;
+ }
+
+ path = apr_palloc(pool, max_length + 1);
+ p = path;
+
+ for (i = 0; i < components->nelts; ++i)
+ {
+ /* Append a '/' to the path. Handle the case with an absolute
+ path where a '/' appears in the first component. Only append
+ a '/' if the component is the second component that does not
+ follow a "/" first component; or it is the third or later
+ component. */
+ if (i > 1 ||
+ (i == 1 && strcmp("/", APR_ARRAY_IDX(components,
+ 0,
+ const char *)) != 0))
+ {
+ *p++ = '/';
+ }
+
+ memcpy(p, APR_ARRAY_IDX(components, i, const char *), lengths[i]);
+ p += lengths[i];
+ }
+
+ *p = '\0';
+
+ return path;
+}
+
+
+svn_boolean_t
+svn_path_is_single_path_component(const char *name)
+{
+ assert(is_canonical(name, strlen(name)));
+
+ /* Can't be empty or `..' */
+ if (SVN_PATH_IS_EMPTY(name)
+ || (name[0] == '.' && name[1] == '.' && name[2] == '\0'))
+ return FALSE;
+
+ /* Slashes are bad, m'kay... */
+ if (strchr(name, '/') != NULL)
+ return FALSE;
+
+ /* It is valid. */
+ return TRUE;
+}
+
+
+svn_boolean_t
+svn_path_is_dotpath_present(const char *path)
+{
+ size_t len;
+
+ /* The empty string does not have a dotpath */
+ if (path[0] == '\0')
+ return FALSE;
+
+ /* Handle "." or a leading "./" */
+ if (path[0] == '.' && (path[1] == '\0' || path[1] == '/'))
+ return TRUE;
+
+ /* Paths of length 1 (at this point) have no dotpath present. */
+ if (path[1] == '\0')
+ return FALSE;
+
+ /* If any segment is "/./", then a dotpath is present. */
+ if (strstr(path, "/./") != NULL)
+ return TRUE;
+
+ /* Does the path end in "/." ? */
+ len = strlen(path);
+ return path[len - 2] == '/' && path[len - 1] == '.';
+}
+
+svn_boolean_t
+svn_path_is_backpath_present(const char *path)
+{
+ size_t len;
+
+ /* 0 and 1-length paths do not have a backpath */
+ if (path[0] == '\0' || path[1] == '\0')
+ return FALSE;
+
+ /* Handle ".." or a leading "../" */
+ if (path[0] == '.' && path[1] == '.' && (path[2] == '\0' || path[2] == '/'))
+ return TRUE;
+
+ /* Paths of length 2 (at this point) have no backpath present. */
+ if (path[2] == '\0')
+ return FALSE;
+
+ /* If any segment is "..", then a backpath is present. */
+ if (strstr(path, "/../") != NULL)
+ return TRUE;
+
+ /* Does the path end in "/.." ? */
+ len = strlen(path);
+ return path[len - 3] == '/' && path[len - 2] == '.' && path[len - 1] == '.';
+}
+
+
+/*** URI Stuff ***/
+
+/* Examine PATH as a potential URI, and return a substring of PATH
+ that immediately follows the (scheme):// portion of the URI, or
+ NULL if PATH doesn't appear to be a valid URI. The returned value
+ is not alloced -- it shares memory with PATH. */
+static const char *
+skip_uri_scheme(const char *path)
+{
+ apr_size_t j;
+
+ /* A scheme is terminated by a : and cannot contain any /'s. */
+ for (j = 0; path[j] && path[j] != ':'; ++j)
+ if (path[j] == '/')
+ return NULL;
+
+ if (j > 0 && path[j] == ':' && path[j+1] == '/' && path[j+2] == '/')
+ return path + j + 3;
+
+ return NULL;
+}
+
+
+svn_boolean_t
+svn_path_is_url(const char *path)
+{
+ /* ### This function is reaaaaaaaaaaaaaally stupid right now.
+ We're just going to look for:
+
+ (scheme)://(optional_stuff)
+
+ Where (scheme) has no ':' or '/' characters.
+
+ Someday it might be nice to have an actual URI parser here.
+ */
+ return skip_uri_scheme(path) != NULL;
+}
+
+
+
+/* Here is the BNF for path components in a URI. "pchar" is a
+ character in a path component.
+
+ pchar = unreserved | escaped |
+ ":" | "@" | "&" | "=" | "+" | "$" | ","
+ unreserved = alphanum | mark
+ mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
+
+ Note that "escaped" doesn't really apply to what users can put in
+ their paths, so that really means the set of characters is:
+
+ alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | ","
+*/
+const char svn_uri__char_validity[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
+
+ /* 64 */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
+
+ /* 128 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ /* 192 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+
+svn_boolean_t
+svn_path_is_uri_safe(const char *path)
+{
+ apr_size_t i;
+
+ /* Skip the URI scheme. */
+ path = skip_uri_scheme(path);
+
+ /* No scheme? Get outta here. */
+ if (! path)
+ return FALSE;
+
+ /* Skip to the first slash that's after the URI scheme. */
+ path = strchr(path, '/');
+
+ /* If there's no first slash, then there's only a host portion;
+ therefore there couldn't be any uri-unsafe characters after the
+ host... so return true. */
+ if (path == NULL)
+ return TRUE;
+
+ for (i = 0; path[i]; i++)
+ {
+ /* Allow '%XX' (where each X is a hex digit) */
+ if (path[i] == '%')
+ {
+ if (svn_ctype_isxdigit(path[i + 1]) &&
+ svn_ctype_isxdigit(path[i + 2]))
+ {
+ i += 2;
+ continue;
+ }
+ return FALSE;
+ }
+ else if (! svn_uri__char_validity[((unsigned char)path[i])])
+ {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+
+/* URI-encode each character c in PATH for which TABLE[c] is 0.
+ If no encoding was needed, return PATH, else return a new string allocated
+ in POOL. */
+static const char *
+uri_escape(const char *path, const char table[], apr_pool_t *pool)
+{
+ svn_stringbuf_t *retstr;
+ apr_size_t i, copied = 0;
+ int c;
+
+ retstr = svn_stringbuf_create_ensure(strlen(path), pool);
+ for (i = 0; path[i]; i++)
+ {
+ c = (unsigned char)path[i];
+ if (table[c])
+ continue;
+
+ /* If we got here, we're looking at a character that isn't
+ supported by the (or at least, our) URI encoding scheme. We
+ need to escape this character. */
+
+ /* First things first, copy all the good stuff that we haven't
+ yet copied into our output buffer. */
+ if (i - copied)
+ svn_stringbuf_appendbytes(retstr, path + copied,
+ i - copied);
+
+ /* Now, write in our escaped character, consisting of the
+ '%' and two digits. We cast the C to unsigned char here because
+ the 'X' format character will be tempted to treat it as an unsigned
+ int...which causes problem when messing with 0x80-0xFF chars.
+ We also need space for a null as apr_snprintf will write one. */
+ svn_stringbuf_ensure(retstr, retstr->len + 4);
+ apr_snprintf(retstr->data + retstr->len, 4, "%%%02X", (unsigned char)c);
+ retstr->len += 3;
+
+ /* Finally, update our copy counter. */
+ copied = i + 1;
+ }
+
+ /* If we didn't encode anything, we don't need to duplicate the string. */
+ if (retstr->len == 0)
+ return path;
+
+ /* Anything left to copy? */
+ if (i - copied)
+ svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
+
+ /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf
+ functions. */
+
+ return retstr->data;
+}
+
+
+const char *
+svn_path_uri_encode(const char *path, apr_pool_t *pool)
+{
+ const char *ret;
+
+ ret = uri_escape(path, svn_uri__char_validity, pool);
+
+ /* Our interface guarantees a copy. */
+ if (ret == path)
+ return apr_pstrdup(pool, path);
+ else
+ return ret;
+}
+
+static const char iri_escape_chars[256] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ /* 128 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+const char *
+svn_path_uri_from_iri(const char *iri, apr_pool_t *pool)
+{
+ return uri_escape(iri, iri_escape_chars, pool);
+}
+
+static const char uri_autoescape_chars[256] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
+
+ /* 64 */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
+
+ /* 128 */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ /* 192 */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+
+const char *
+svn_path_uri_autoescape(const char *uri, apr_pool_t *pool)
+{
+ return uri_escape(uri, uri_autoescape_chars, pool);
+}
+
+const char *
+svn_path_uri_decode(const char *path, apr_pool_t *pool)
+{
+ svn_stringbuf_t *retstr;
+ apr_size_t i;
+ svn_boolean_t query_start = FALSE;
+
+ /* avoid repeated realloc */
+ retstr = svn_stringbuf_create_ensure(strlen(path) + 1, pool);
+
+ retstr->len = 0;
+ for (i = 0; path[i]; i++)
+ {
+ char c = path[i];
+
+ if (c == '?')
+ {
+ /* Mark the start of the query string, if it exists. */
+ query_start = TRUE;
+ }
+ else if (c == '+' && query_start)
+ {
+ /* Only do this if we are into the query string.
+ * RFC 2396, section 3.3 */
+ c = ' ';
+ }
+ else if (c == '%' && svn_ctype_isxdigit(path[i + 1])
+ && svn_ctype_isxdigit(path[i+2]))
+ {
+ char digitz[3];
+ digitz[0] = path[++i];
+ digitz[1] = path[++i];
+ digitz[2] = '\0';
+ c = (char)(strtol(digitz, NULL, 16));
+ }
+
+ retstr->data[retstr->len++] = c;
+ }
+
+ /* Null-terminate this bad-boy. */
+ retstr->data[retstr->len] = 0;
+
+ return retstr->data;
+}
+
+
+const char *
+svn_path_url_add_component2(const char *url,
+ const char *component,
+ apr_pool_t *pool)
+{
+ /* = svn_path_uri_encode() but without always copying */
+ component = uri_escape(component, svn_uri__char_validity, pool);
+
+ return svn_path_join_internal(url, component, pool);
+}
+
+svn_error_t *
+svn_path_get_absolute(const char **pabsolute,
+ const char *relative,
+ apr_pool_t *pool)
+{
+ if (svn_path_is_url(relative))
+ {
+ *pabsolute = apr_pstrdup(pool, relative);
+ return SVN_NO_ERROR;
+ }
+
+ return svn_dirent_get_absolute(pabsolute, relative, pool);
+}
+
+
+#if !defined(WIN32) && !defined(DARWIN)
+/** Get APR's internal path encoding. */
+static svn_error_t *
+get_path_encoding(svn_boolean_t *path_is_utf8, apr_pool_t *pool)
+{
+ apr_status_t apr_err;
+ int encoding_style;
+
+ apr_err = apr_filepath_encoding(&encoding_style, pool);
+ if (apr_err)
+ return svn_error_wrap_apr(apr_err,
+ _("Can't determine the native path encoding"));
+
+ /* ### What to do about APR_FILEPATH_ENCODING_UNKNOWN?
+ Well, for now we'll just punt to the svn_utf_ functions;
+ those will at least do the ASCII-subset check. */
+ *path_is_utf8 = (encoding_style == APR_FILEPATH_ENCODING_UTF8);
+ return SVN_NO_ERROR;
+}
+#endif
+
+
+svn_error_t *
+svn_path_cstring_from_utf8(const char **path_apr,
+ const char *path_utf8,
+ apr_pool_t *pool)
+{
+#if !defined(WIN32) && !defined(DARWIN)
+ svn_boolean_t path_is_utf8;
+ SVN_ERR(get_path_encoding(&path_is_utf8, pool));
+ if (path_is_utf8)
+#endif
+ {
+ *path_apr = apr_pstrdup(pool, path_utf8);
+ return SVN_NO_ERROR;
+ }
+#if !defined(WIN32) && !defined(DARWIN)
+ else
+ return svn_utf_cstring_from_utf8(path_apr, path_utf8, pool);
+#endif
+}
+
+
+svn_error_t *
+svn_path_cstring_to_utf8(const char **path_utf8,
+ const char *path_apr,
+ apr_pool_t *pool)
+{
+#if !defined(WIN32) && !defined(DARWIN)
+ svn_boolean_t path_is_utf8;
+ SVN_ERR(get_path_encoding(&path_is_utf8, pool));
+ if (path_is_utf8)
+#endif
+ {
+ *path_utf8 = apr_pstrdup(pool, path_apr);
+ return SVN_NO_ERROR;
+ }
+#if !defined(WIN32) && !defined(DARWIN)
+ else
+ return svn_utf_cstring_to_utf8(path_utf8, path_apr, pool);
+#endif
+}
+
+
+/* Return a copy of PATH, allocated from POOL, for which control
+ characters have been escaped using the form \NNN (where NNN is the
+ octal representation of the byte's ordinal value). */
+const char *
+svn_path_illegal_path_escape(const char *path, apr_pool_t *pool)
+{
+ svn_stringbuf_t *retstr;
+ apr_size_t i, copied = 0;
+ int c;
+
+ /* At least one control character:
+ strlen - 1 (control) + \ + N + N + N + null . */
+ retstr = svn_stringbuf_create_ensure(strlen(path) + 4, pool);
+ for (i = 0; path[i]; i++)
+ {
+ c = (unsigned char)path[i];
+ if (! svn_ctype_iscntrl(c))
+ continue;
+
+ /* If we got here, we're looking at a character that isn't
+ supported by the (or at least, our) URI encoding scheme. We
+ need to escape this character. */
+
+ /* First things first, copy all the good stuff that we haven't
+ yet copied into our output buffer. */
+ if (i - copied)
+ svn_stringbuf_appendbytes(retstr, path + copied,
+ i - copied);
+
+ /* Make sure buffer is big enough for '\' 'N' 'N' 'N' (and NUL) */
+ svn_stringbuf_ensure(retstr, retstr->len + 5);
+ /*### The backslash separator doesn't work too great with Windows,
+ but it's what we'll use for consistency with invalid utf8
+ formatting (until someone has a better idea) */
+ apr_snprintf(retstr->data + retstr->len, 5, "\\%03o", (unsigned char)c);
+ retstr->len += 4;
+
+ /* Finally, update our copy counter. */
+ copied = i + 1;
+ }
+
+ /* If we didn't encode anything, we don't need to duplicate the string. */
+ if (retstr->len == 0)
+ return path;
+
+ /* Anything left to copy? */
+ if (i - copied)
+ svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
+
+ /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf
+ functions. */
+
+ return retstr->data;
+}
+
+svn_error_t *
+svn_path_check_valid(const char *path, apr_pool_t *pool)
+{
+ const char *c;
+
+ for (c = path; *c; c++)
+ {
+ if (svn_ctype_iscntrl(*c))
+ {
+ return svn_error_createf
+ (SVN_ERR_FS_PATH_SYNTAX, NULL,
+ _("Invalid control character '0x%02x' in path '%s'"),
+ (unsigned char)*c,
+ svn_path_illegal_path_escape(svn_dirent_local_style(path, pool),
+ pool));
+ }
+ }
+
+ return SVN_NO_ERROR;
+}
+
+void
+svn_path_splitext(const char **path_root,
+ const char **path_ext,
+ const char *path,
+ apr_pool_t *pool)
+{
+ const char *last_dot, *last_slash;
+
+ /* Easy out -- why do all the work when there's no way to report it? */
+ if (! (path_root || path_ext))
+ return;
+
+ /* Do we even have a period in this thing? And if so, is there
+ anything after it? We look for the "rightmost" period in the
+ string. */
+ last_dot = strrchr(path, '.');
+ if (last_dot && (last_dot + 1 != '\0'))
+ {
+ /* If we have a period, we need to make sure it occurs in the
+ final path component -- that there's no path separator
+ between the last period and the end of the PATH -- otherwise,
+ it doesn't count. Also, we want to make sure that our period
+ isn't the first character of the last component. */
+ last_slash = strrchr(path, '/');
+ if ((last_slash && (last_dot > (last_slash + 1)))
+ || ((! last_slash) && (last_dot > path)))
+ {
+ if (path_root)
+ *path_root = apr_pstrmemdup(pool, path,
+ (last_dot - path + 1) * sizeof(*path));
+ if (path_ext)
+ *path_ext = apr_pstrdup(pool, last_dot + 1);
+ return;
+ }
+ }
+ /* If we get here, we never found a suitable separator character, so
+ there's no split. */
+ if (path_root)
+ *path_root = apr_pstrdup(pool, path);
+ if (path_ext)
+ *path_ext = "";
+}
+
+
+/* Repository relative URLs (^/). */
+
+svn_boolean_t
+svn_path_is_repos_relative_url(const char *path)
+{
+ return (0 == strncmp("^/", path, 2));
+}
+
+svn_error_t *
+svn_path_resolve_repos_relative_url(const char **absolute_url,
+ const char *relative_url,
+ const char *repos_root_url,
+ apr_pool_t *pool)
+{
+ if (! svn_path_is_repos_relative_url(relative_url))
+ return svn_error_createf(SVN_ERR_BAD_URL, NULL,
+ _("Improper relative URL '%s'"),
+ relative_url);
+
+ /* No assumptions are made about the canonicalization of the inut
+ * arguments, it is presumed that the output will be canonicalized after
+ * this function, which will remove any duplicate path separator.
+ */
+ *absolute_url = apr_pstrcat(pool, repos_root_url, relative_url + 1,
+ (char *)NULL);
+
+ return SVN_NO_ERROR;
+}
+