diff options
| author | Peter Wemm <peter@FreeBSD.org> | 2015-10-12 08:54:49 +0000 | 
|---|---|---|
| committer | Peter Wemm <peter@FreeBSD.org> | 2015-10-12 08:54:49 +0000 | 
| commit | dc5d469d6574e9fb03bdd793658bb371315b306a (patch) | |
| tree | 013c2e6845398e5a9ca4901dcc077769c7520e1d /subversion/libsvn_subr/string.c | |
| parent | 58218291fa73a17020ef0447398e9e8a78f9e8c7 (diff) | |
Notes
Diffstat (limited to 'subversion/libsvn_subr/string.c')
| -rw-r--r-- | subversion/libsvn_subr/string.c | 355 | 
1 files changed, 283 insertions, 72 deletions
diff --git a/subversion/libsvn_subr/string.c b/subversion/libsvn_subr/string.c index c3d7feccebf8..29176d6d5dc5 100644 --- a/subversion/libsvn_subr/string.c +++ b/subversion/libsvn_subr/string.c @@ -26,6 +26,7 @@  #include <apr.h> +#include <assert.h>  #include <string.h>      /* for memcpy(), memcmp(), strlen() */  #include <apr_fnmatch.h> @@ -53,9 +54,9 @@ membuf_create(void **data, apr_size_t *size,    /* apr_palloc will allocate multiples of 8.     * Thus, we would waste some of that memory if we stuck to the     * smaller size. Note that this is safe even if apr_palloc would -   * use some other aligment or none at all. */ +   * use some other alignment or none at all. */    minimum_size = APR_ALIGN_DEFAULT(minimum_size); -  *data = (!minimum_size ? NULL : apr_palloc(pool, minimum_size)); +  *data = apr_palloc(pool, minimum_size);    *size = minimum_size;  } @@ -78,14 +79,10 @@ membuf_ensure(void **data, apr_size_t *size,        apr_size_t new_size = *size;        if (new_size == 0) -        /* APR will increase odd allocation sizes to the next -         * multiple for 8, for instance. Take advantage of that -         * knowledge and allow for the extra size to be used. */          new_size = minimum_size;        else          while (new_size < minimum_size)            { -            /* new_size is aligned; doubling it should keep it aligned */              const apr_size_t prev_size = new_size;              new_size *= 2; @@ -121,7 +118,10 @@ svn_membuf__resize(svn_membuf_t *membuf, apr_size_t size)    const apr_size_t old_size = membuf->size;    membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool); -  if (membuf->data && old_data && old_data != membuf->data) + +  /* If we re-allocated MEMBUF->DATA, it cannot be NULL. +   * Statically initialized membuffers (OLD_DATA) may be NULL, though. */ +  if (old_data && old_data != membuf->data)      memcpy(membuf->data, old_data, old_size);  } @@ -151,7 +151,7 @@ string_compare(const char *str1,    if (len1 != len2)      return FALSE; -  /* now the strings must have identical lenghths */ +  /* now the strings must have identical lengths */    if ((memcmp(str1, str2, len1)) == 0)      return TRUE; @@ -240,7 +240,9 @@ svn_string_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)    new_string->data = data;    new_string->len = size; -  memcpy(data, bytes, size); +  /* If SIZE is 0, NULL is valid for BYTES. */ +  if (size) +    memcpy(data, bytes, size);    /* Null termination is the convention -- even if we suspect the data       to be binary, it's not up to us to decide, it's the caller's @@ -299,8 +301,9 @@ svn_string_isempty(const svn_string_t *str)  svn_string_t *  svn_string_dup(const svn_string_t *original_string, apr_pool_t *pool)  { -  return (svn_string_ncreate(original_string->data, -                             original_string->len, pool)); +  return (original_string ? svn_string_ncreate(original_string->data, +                                               original_string->len, pool) +                          : NULL);  } @@ -393,7 +396,10 @@ svn_stringbuf_t *  svn_stringbuf_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)  {    svn_stringbuf_t *strbuf = svn_stringbuf_create_ensure(size, pool); -  memcpy(strbuf->data, bytes, size); + +  /* If SIZE is 0, NULL is valid for BYTES. */ +  if (size) +    memcpy(strbuf->data, bytes, size);    /* Null termination is the convention -- even if we suspect the data       to be binary, it's not up to us to decide, it's the caller's @@ -418,6 +424,17 @@ svn_stringbuf_create_from_string(const svn_string_t *str, apr_pool_t *pool)    return svn_stringbuf_ncreate(str->data, str->len, pool);  } +svn_stringbuf_t * +svn_stringbuf_create_wrap(char *str, apr_pool_t *pool) +{ +  svn_stringbuf_t *result = apr_palloc(pool, sizeof(*result)); +  result->pool = pool; +  result->data = str; +  result->len = strlen(str); +  result->blocksize = result->len + 1; + +  return result; +}  svn_stringbuf_t *  svn_stringbuf_createv(apr_pool_t *pool, const char *fmt, va_list ap) @@ -579,6 +596,10 @@ svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes,    apr_size_t total_len;    void *start_address; +  if (!count) +    /* Allow BYTES to be NULL by avoiding passing it to memcpy. */ +    return; +    total_len = str->len + count;  /* total size needed */    /* svn_stringbuf_ensure adds 1 for null terminator. */ @@ -595,6 +616,21 @@ svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes,                                    to null-terminate. */  } +void +svn_stringbuf_appendfill(svn_stringbuf_t *str, +                         char byte, +                         apr_size_t count) +{ +  apr_size_t new_len = str->len + count; +  svn_stringbuf_ensure(str, new_len); + +  memset(str->data + str->len, byte, count); + +  /* update buffer length and always NUL-terminate it */ +  str->len = new_len; +  str->data[new_len] = '\0'; +} +  void  svn_stringbuf_appendstr(svn_stringbuf_t *targetstr, @@ -616,23 +652,22 @@ svn_stringbuf_insert(svn_stringbuf_t *str,                       const char *bytes,                       apr_size_t count)  { +  /* For COUNT==0, we allow BYTES to be NULL. It's a no-op in that case. */ +  if (count == 0) +    return; + +  /* special case: BYTES overlaps with this string -> copy the source */    if (bytes + count > str->data && bytes < str->data + str->blocksize) -    { -      /* special case: BYTES overlaps with this string -> copy the source */ -      const char *temp = apr_pmemdup(str->pool, bytes, count); -      svn_stringbuf_insert(str, pos, temp, count); -    } -  else -    { -      if (pos > str->len) -        pos = str->len; +    bytes = apr_pmemdup(str->pool, bytes, count); -      svn_stringbuf_ensure(str, str->len + count); -      memmove(str->data + pos + count, str->data + pos, str->len - pos + 1); -      memcpy(str->data + pos, bytes, count); +  if (pos > str->len) +    pos = str->len; -      str->len += count; -    } +  svn_stringbuf_ensure(str, str->len + count); +  memmove(str->data + pos + count, str->data + pos, str->len - pos + 1); +  memcpy(str->data + pos, bytes, count); + +  str->len += count;  }  void @@ -656,32 +691,35 @@ svn_stringbuf_replace(svn_stringbuf_t *str,                        const char *bytes,                        apr_size_t new_count)  { -  if (bytes + new_count > str->data && bytes < str->data + str->blocksize) +  /* For COUNT==0, we allow BYTES to be NULL. +   * In that case, this is just a substring removal. */ +  if (new_count == 0)      { -      /* special case: BYTES overlaps with this string -> copy the source */ -      const char *temp = apr_pmemdup(str->pool, bytes, new_count); -      svn_stringbuf_replace(str, pos, old_count, temp, new_count); +      svn_stringbuf_remove(str, pos, old_count); +      return;      } -  else -    { -      if (pos > str->len) -        pos = str->len; -      if (pos + old_count > str->len) -        old_count = str->len - pos; -      if (old_count < new_count) -        { -          apr_size_t delta = new_count - old_count; -          svn_stringbuf_ensure(str, str->len + delta); -        } +  /* special case: BYTES overlaps with this string -> copy the source */ +  if (bytes + new_count > str->data && bytes < str->data + str->blocksize) +    bytes = apr_pmemdup(str->pool, bytes, new_count); -      if (old_count != new_count) -        memmove(str->data + pos + new_count, str->data + pos + old_count, -                str->len - pos - old_count + 1); +  if (pos > str->len) +    pos = str->len; +  if (pos + old_count > str->len) +    old_count = str->len - pos; -      memcpy(str->data + pos, bytes, new_count); -      str->len += new_count - old_count; +  if (old_count < new_count) +    { +      apr_size_t delta = new_count - old_count; +      svn_stringbuf_ensure(str, str->len + delta);      } + +  if (old_count != new_count) +    memmove(str->data + pos + new_count, str->data + pos + old_count, +            str->len - pos - old_count + 1); + +  memcpy(str->data + pos, bytes, new_count); +  str->len += new_count - old_count;  } @@ -832,7 +870,7 @@ char *  svn_cstring_tokenize(const char *sep, char **str)  {      char *token; -    const char * next; +    char *next;      char csep;      /* check parameters */ @@ -862,8 +900,8 @@ svn_cstring_tokenize(const char *sep, char **str)        }      else        { -        *(char *)next = '\0'; -        *str = (char *)next + 1; +        *next = '\0'; +        *str = next + 1;        }      return token; @@ -1014,17 +1052,33 @@ svn_cstring_atoi(int *n, const char *str)    return SVN_NO_ERROR;  } - -apr_status_t -svn__strtoff(apr_off_t *offset, const char *buf, char **end, int base) +unsigned long +svn__strtoul(const char* buffer, const char** end)  { -#if !APR_VERSION_AT_LEAST(1,0,0) -  errno = 0; -  *offset = strtol(buf, end, base); -  return APR_FROM_OS_ERROR(errno); -#else -  return apr_strtoff(offset, buf, end, base); -#endif +  unsigned long result = 0; + +  /* this loop will execute in just 2 CPU cycles, confirmed by measurement: +     7 macro-ops (max 4 / cycle => 2 cycles) +       1 load (max 1 / cycle) +       1 jumps (compare + conditional jump == 1 macro op; max 1 / cycle) +       2 arithmetic ops (subtract, increment; max 3 / cycle) +       2 scale-and-add AGU ops (max 3 / cycle) +       1 compiler-generated move operation +     dependency chain: temp = result * 4 + result; result = temp * 2 + c +                       (2 ops with latency 1 => 2 cycles) +   */ +  while (1) +    { +      unsigned long c = (unsigned char)*buffer - (unsigned char)'0'; +      if (c > 9) +        break; + +      result = result * 10 + c; +      ++buffer; +    } + +  *end = buffer; +  return result;  }  /* "Precalculated" itoa values for 2 places (including leading zeros). @@ -1123,11 +1177,11 @@ svn__i64toa(char * dest, apr_int64_t number)      return svn__ui64toa(dest, (apr_uint64_t)number);    *dest = '-'; -  return svn__ui64toa(dest + 1, (apr_uint64_t)(0-number)) + 1; +  return svn__ui64toa(dest + 1, 0 - (apr_uint64_t)number) + 1;  }  static void -ui64toa_sep(apr_uint64_t number, char seperator, char *buffer) +ui64toa_sep(apr_uint64_t number, char separator, char *buffer)  {    apr_size_t length = svn__ui64toa(buffer, number);    apr_size_t i; @@ -1135,7 +1189,7 @@ ui64toa_sep(apr_uint64_t number, char seperator, char *buffer)    for (i = length; i > 3; i -= 3)      {        memmove(&buffer[i - 2], &buffer[i - 3], length - i + 3); -      buffer[i-3] = seperator; +      buffer[i-3] = separator;        length++;      } @@ -1143,30 +1197,110 @@ ui64toa_sep(apr_uint64_t number, char seperator, char *buffer)  }  char * -svn__ui64toa_sep(apr_uint64_t number, char seperator, apr_pool_t *pool) +svn__ui64toa_sep(apr_uint64_t number, char separator, apr_pool_t *pool)  {    char buffer[2 * SVN_INT64_BUFFER_SIZE]; -  ui64toa_sep(number, seperator, buffer); +  ui64toa_sep(number, separator, buffer);    return apr_pstrdup(pool, buffer);  }  char * -svn__i64toa_sep(apr_int64_t number, char seperator, apr_pool_t *pool) +svn__i64toa_sep(apr_int64_t number, char separator, apr_pool_t *pool)  {    char buffer[2 * SVN_INT64_BUFFER_SIZE];    if (number < 0)      {        buffer[0] = '-'; -      ui64toa_sep((apr_uint64_t)(-number), seperator, &buffer[1]); +      ui64toa_sep((apr_uint64_t)(-number), separator, &buffer[1]);      }    else -    ui64toa_sep((apr_uint64_t)(number), seperator, buffer); +    ui64toa_sep((apr_uint64_t)(number), separator, buffer);    return apr_pstrdup(pool, buffer);  } -unsigned int +apr_size_t +svn__ui64tobase36(char *dest, apr_uint64_t value) +{ +  char *dest_start = dest; +  if (value < 10) +    { +      /* pretty frequent and trivial case. Make it fast. */ +      *(dest++) = (char)(value) + '0'; +    } +  else +    { +      char buffer[SVN_INT64_BUFFER_SIZE]; +      char *p = buffer; + +      /* write result as little-endian to buffer */ +      while (value > 0) +        { +          char c = (char)(value % 36); +          value /= 36; + +          *p = (c <= 9) ? (c + '0') : (c - 10 + 'a'); +          ++p; +        } + +      /* copy as big-endian to DEST */ +      while (p > buffer) +        *(dest++) = *(--p); +    } + +  *dest = '\0'; +  return dest - dest_start; +} + +apr_uint64_t +svn__base36toui64(const char **next, const char *source) +{ +  apr_uint64_t result = 0; +  apr_uint64_t factor = 1; +  int i  = 0; +  char digits[SVN_INT64_BUFFER_SIZE]; + +  /* convert digits to numerical values and count the number of places. +   * Also, prevent buffer overflow. */ +  while (i < sizeof(digits)) +    { +      char c = *source; +      if (c < 'a') +        { +          /* includes detection of NUL terminator */ +          if (c < '0' || c > '9') +            break; + +          c -= '0'; +        } +      else +        { +          if (c < 'a' || c > 'z') +            break; + +          c -= 'a' - 10; +        } + +      digits[i++] = c; +      source++; +    } + +  /* fold digits into the result */ +  while (i > 0) +    { +      result += factor * (apr_uint64_t)digits[--i]; +      factor *= 36; +    } + +  if (next) +    *next = source; + +  return result; +} + + +apr_size_t  svn_cstring__similarity(const char *stra, const char *strb,                          svn_membuf_t *buffer, apr_size_t *rlcs)  { @@ -1178,7 +1312,7 @@ svn_cstring__similarity(const char *stra, const char *strb,    return svn_string__similarity(&stringa, &stringb, buffer, rlcs);  } -unsigned int +apr_size_t  svn_string__similarity(const svn_string_t *stringa,                         const svn_string_t *stringb,                         svn_membuf_t *buffer, apr_size_t *rlcs) @@ -1242,7 +1376,7 @@ svn_string__similarity(const svn_string_t *stringa,        /* Calculate LCS length of the remainder */        for (pstr = stra; pstr < enda; ++pstr)          { -          int i; +          apr_size_t i;            for (i = 1; i <= slots; ++i)              {                if (*pstr == strb[i-1]) @@ -1267,7 +1401,84 @@ svn_string__similarity(const svn_string_t *stringa,    /* Return similarity ratio rounded to 4 significant digits */    if (total) -    return(unsigned int)((2000 * lcs + total/2) / total); +    return ((2 * SVN_STRING__SIM_RANGE_MAX * lcs + total/2) / total);    else -    return 1000; +    return SVN_STRING__SIM_RANGE_MAX; +} + +apr_size_t +svn_cstring__match_length(const char *a, +                          const char *b, +                          apr_size_t max_len) +{ +  apr_size_t pos = 0; + +#if SVN_UNALIGNED_ACCESS_IS_OK + +  /* Chunky processing is so much faster ... +   * +   * We can't make this work on architectures that require aligned access +   * because A and B will probably have different alignment. So, skipping +   * the first few chars until alignment is reached is not an option. +   */ +  for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t)) +    if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos)) +      break; + +#endif + +  for (; pos < max_len; ++pos) +    if (a[pos] != b[pos]) +      break; + +  return pos; +} + +apr_size_t +svn_cstring__reverse_match_length(const char *a, +                                  const char *b, +                                  apr_size_t max_len) +{ +  apr_size_t pos = 0; + +#if SVN_UNALIGNED_ACCESS_IS_OK + +  /* Chunky processing is so much faster ... +   * +   * We can't make this work on architectures that require aligned access +   * because A and B will probably have different alignment. So, skipping +   * the first few chars until alignment is reached is not an option. +   */ +  for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t)) +    if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos)) +      break; + +  pos -= sizeof(apr_size_t); + +#endif + +  /* If we find a mismatch at -pos, pos-1 characters matched. +   */ +  while (++pos <= max_len) +    if (a[0-pos] != b[0-pos]) +      return pos - 1; + +  /* No mismatch found -> at least MAX_LEN matching chars. +   */ +  return max_len; +} + +const char * +svn_cstring_skip_prefix(const char *str, const char *prefix) +{ +  apr_size_t len = strlen(prefix); + +  if (strncmp(str, prefix, len) == 0) +    { +      return str + len; +    } +  else +    { +      return NULL; +    }  }  | 
