summaryrefslogtreecommitdiff
path: root/usr.bin/bsdiff
diff options
context:
space:
mode:
authorXin LI <delphij@FreeBSD.org>2016-07-25 03:58:19 +0000
committerXin LI <delphij@FreeBSD.org>2016-07-25 03:58:19 +0000
commitc08cbc64dca550d895175d5cf9dc60d00a041415 (patch)
treeb41ceae9f201eea572196b0d4659585ba03f9530 /usr.bin/bsdiff
parentafffab7e8b9f35c164a3da232389dcfece558895 (diff)
parent7db3f078630cfaac36b16a61362821443c0f7229 (diff)
downloadsrc-test-c08cbc64dca550d895175d5cf9dc60d00a041415.tar.gz
src-test-c08cbc64dca550d895175d5cf9dc60d00a041415.zip
Change bsdiff to use divsufsort suffix sort library instead of qsufsort,
which is more efficient. Note that for now we do not create a separate library for libdivsufsort because it's not used anywhere else. Obtained from: Chromium MFC after: 2 months
Notes
Notes: svn path=/head/; revision=303285
Diffstat (limited to 'usr.bin/bsdiff')
-rw-r--r--usr.bin/bsdiff/bsdiff/Makefile13
-rw-r--r--usr.bin/bsdiff/bsdiff/bsdiff.c112
-rw-r--r--usr.bin/bsdiff/bsdiff/config.h83
-rw-r--r--usr.bin/bsdiff/bsdiff/divsufsort64.h180
4 files changed, 282 insertions, 106 deletions
diff --git a/usr.bin/bsdiff/bsdiff/Makefile b/usr.bin/bsdiff/bsdiff/Makefile
index 7569b4f10a355..44c78a1f8d1b9 100644
--- a/usr.bin/bsdiff/bsdiff/Makefile
+++ b/usr.bin/bsdiff/bsdiff/Makefile
@@ -1,6 +1,17 @@
# $FreeBSD$
-PROG= bsdiff
+PROG= bsdiff
+
+# libdivsufsort configured with:
+# cmake -DCMAKE_BUILD_TYPE="Release" -DBUILD_DIVSUFSORT64=ON
+.PATH: ${.CURDIR}/../../../contrib/libdivsufsort/lib
+CFLAGS+= -DHAVE_CONFIG_H=1 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
+CFLAGS+= -D_LARGE_FILES -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
+CFLAGS+= -D__STDC_LIMIT_MACROS -DBUILD_DIVSUFSORT64
+CFLAGS+= -I${.CURDIR}/../../../contrib/libdivsufsort/include -I${.CURDIR}
+SRCS= divsufsort.c sssort.c trsort.c utils.c
+
+SRCS+= bsdiff.c
LIBADD= bz2
diff --git a/usr.bin/bsdiff/bsdiff/bsdiff.c b/usr.bin/bsdiff/bsdiff/bsdiff.c
index e0b3ee7d25534..4646e4ec40226 100644
--- a/usr.bin/bsdiff/bsdiff/bsdiff.c
+++ b/usr.bin/bsdiff/bsdiff/bsdiff.c
@@ -44,106 +44,11 @@ __FBSDID("$FreeBSD$");
#define O_BINARY 0
#endif
-#define MIN(x,y) (((x)<(y)) ? (x) : (y))
-
-static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h)
-{
- off_t i,j,k,x,tmp,jj,kk;
-
- if(len<16) {
- for(k=start;k<start+len;k+=j) {
- j=1;x=V[I[k]+h];
- for(i=1;k+i<start+len;i++) {
- if(V[I[k+i]+h]<x) {
- x=V[I[k+i]+h];
- j=0;
- }
- if(V[I[k+i]+h]==x) {
- tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
- j++;
- }
- }
- for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
- if(j==1) I[k]=-1;
- }
- return;
- }
+#include "divsufsort64.h"
+#define saidx_t saidx64_t
+#define divsufsort divsufsort64
- x=V[I[start+len/2]+h];
- jj=0;kk=0;
- for(i=start;i<start+len;i++) {
- if(V[I[i]+h]<x) jj++;
- if(V[I[i]+h]==x) kk++;
- }
- jj+=start;kk+=jj;
-
- i=start;j=0;k=0;
- while(i<jj) {
- if(V[I[i]+h]<x) {
- i++;
- } else if(V[I[i]+h]==x) {
- tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
- j++;
- } else {
- tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
- k++;
- }
- }
-
- while(jj+j<kk) {
- if(V[I[jj+j]+h]==x) {
- j++;
- } else {
- tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
- k++;
- }
- }
-
- if(jj>start) split(I,V,start,jj-start,h);
-
- for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
- if(jj==kk-1) I[jj]=-1;
-
- if(start+len>kk) split(I,V,kk,start+len-kk,h);
-}
-
-static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize)
-{
- off_t buckets[256];
- off_t i,h,len;
-
- for(i=0;i<256;i++) buckets[i]=0;
- for(i=0;i<oldsize;i++) buckets[old[i]]++;
- for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
- for(i=255;i>0;i--) buckets[i]=buckets[i-1];
- buckets[0]=0;
-
- for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
- I[0]=oldsize;
- for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
- V[oldsize]=0;
- for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
- I[0]=-1;
-
- for(h=1;I[0]!=-(oldsize+1);h+=h) {
- len=0;
- for(i=0;i<oldsize+1;) {
- if(I[i]<0) {
- len-=I[i];
- i-=I[i];
- } else {
- if(len) I[i-len]=-len;
- len=V[I[i]]+1-i;
- split(I,V,i,len,h);
- i+=len;
- len=0;
- }
- }
- if(len) I[i-len]=-len;
- }
-
- for(i=0;i<oldsize+1;i++) I[V[i]]=i;
-}
+#define MIN(x,y) (((x)<(y)) ? (x) : (y))
static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize)
{
@@ -212,7 +117,7 @@ int main(int argc,char *argv[])
int fd;
u_char *old,*new;
off_t oldsize,newsize;
- off_t *I,*V;
+ saidx_t *I;
off_t scan,pos,len;
off_t lastscan,lastpos,lastoffset;
off_t oldscore,scsc;
@@ -248,12 +153,9 @@ int main(int argc,char *argv[])
(read(fd,old,oldsize)!=oldsize) ||
(close(fd)==-1)) err(1,"%s",argv[1]);
- if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) ||
- ((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL);
-
- qsufsort(I,V,old,oldsize);
+ if(((I=malloc((oldsize+1)*sizeof(saidx_t)))==NULL)) err(1,NULL);
- free(V);
+ if(divsufsort(old, I, oldsize)) err(1, "divsufsort");
/* Allocate newsize+1 bytes instead of newsize bytes to ensure
that we never try to malloc(0) and get a NULL pointer */
diff --git a/usr.bin/bsdiff/bsdiff/config.h b/usr.bin/bsdiff/bsdiff/config.h
new file mode 100644
index 0000000000000..46912c2984ed5
--- /dev/null
+++ b/usr.bin/bsdiff/bsdiff/config.h
@@ -0,0 +1,83 @@
+/*
+ * config.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/** Define to the version of this package. **/
+#define PROJECT_VERSION_FULL "2.0.1-14-g5f60d6f"
+
+/** Define to 1 if you have the header files. **/
+#define HAVE_INTTYPES_H 1
+#define HAVE_STDDEF_H 1
+#define HAVE_STDINT_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_STRING_H 1
+#define HAVE_STRINGS_H 1
+#define HAVE_MEMORY_H 1
+#define HAVE_SYS_TYPES_H 1
+
+/** for WinIO **/
+/* #undef HAVE_IO_H */
+/* #undef HAVE_FCNTL_H */
+/* #undef HAVE__SETMODE */
+/* #undef HAVE_SETMODE */
+/* #undef HAVE__FILENO */
+/* #undef HAVE_FOPEN_S */
+/* #undef HAVE__O_BINARY */
+#ifndef HAVE__SETMODE
+# if HAVE_SETMODE
+# define _setmode setmode
+# define HAVE__SETMODE 1
+# endif
+# if HAVE__SETMODE && !HAVE__O_BINARY
+# define _O_BINARY 0
+# define HAVE__O_BINARY 1
+# endif
+#endif
+
+/** for inline **/
+#ifndef INLINE
+# define INLINE inline
+#endif
+
+/** for VC++ warning **/
+#ifdef _MSC_VER
+#pragma warning(disable: 4127)
+#endif
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _CONFIG_H */
diff --git a/usr.bin/bsdiff/bsdiff/divsufsort64.h b/usr.bin/bsdiff/bsdiff/divsufsort64.h
new file mode 100644
index 0000000000000..2f1c375ba45e4
--- /dev/null
+++ b/usr.bin/bsdiff/bsdiff/divsufsort64.h
@@ -0,0 +1,180 @@
+/*
+ * divsufsort64.h for libdivsufsort64
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT64_H
+#define _DIVSUFSORT64_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include <inttypes.h>
+
+#ifndef DIVSUFSORT_API
+# ifdef DIVSUFSORT_BUILD_DLL
+# define DIVSUFSORT_API
+# else
+# define DIVSUFSORT_API
+# endif
+#endif
+
+/*- Datatypes -*/
+#ifndef SAUCHAR_T
+#define SAUCHAR_T
+typedef uint8_t sauchar_t;
+#endif /* SAUCHAR_T */
+#ifndef SAINT_T
+#define SAINT_T
+typedef int32_t saint_t;
+#endif /* SAINT_T */
+#ifndef SAIDX64_T
+#define SAIDX64_T
+typedef int64_t saidx64_t;
+#endif /* SAIDX64_T */
+#ifndef PRIdSAINT_T
+#define PRIdSAINT_T PRId32
+#endif /* PRIdSAINT_T */
+#ifndef PRIdSAIDX64_T
+#define PRIdSAIDX64_T PRId64
+#endif /* PRIdSAIDX64_T */
+
+
+/*- Prototypes -*/
+
+/**
+ * Constructs the suffix array of a given string.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The output array of suffixes.
+ * @param n The length of the given string.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+divsufsort64(const sauchar_t *T, saidx64_t *SA, saidx64_t n);
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saidx64_t
+divbwt64(const sauchar_t *T, sauchar_t *U, saidx64_t *A, saidx64_t n);
+
+/**
+ * Returns the version of the divsufsort library.
+ * @return The version number string.
+ */
+DIVSUFSORT_API
+const char *
+divsufsort64_version(void);
+
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string and suffix array.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param SA[0..n-1] The suffix array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The output primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+bw_transform64(const sauchar_t *T, sauchar_t *U,
+ saidx64_t *SA /* can NULL */,
+ saidx64_t n, saidx64_t *idx);
+
+/**
+ * Inverse BW-transforms a given BWTed string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+inverse_bw_transform64(const sauchar_t *T, sauchar_t *U,
+ saidx64_t *A /* can NULL */,
+ saidx64_t n, saidx64_t idx);
+
+/**
+ * Checks the correctness of a given suffix array.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The input suffix array.
+ * @param n The length of the given string.
+ * @param verbose The verbose mode.
+ * @return 0 if no error occurred.
+ */
+DIVSUFSORT_API
+saint_t
+sufcheck64(const sauchar_t *T, const saidx64_t *SA, saidx64_t n, saint_t verbose);
+
+/**
+ * Search for the pattern P in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param P[0..Psize-1] The input pattern string.
+ * @param Psize The length of the given pattern string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx64_t
+sa_search64(const sauchar_t *T, saidx64_t Tsize,
+ const sauchar_t *P, saidx64_t Psize,
+ const saidx64_t *SA, saidx64_t SAsize,
+ saidx64_t *left);
+
+/**
+ * Search for the character c in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param c The input character.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx64_t
+sa_simplesearch64(const sauchar_t *T, saidx64_t Tsize,
+ const saidx64_t *SA, saidx64_t SAsize,
+ saint_t c, saidx64_t *left);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT64_H */