From 86a52f9335d0d6a09b053de20b22800732870122 Mon Sep 17 00:00:00 2001 From: Peter Wemm Date: Sun, 9 Aug 2015 05:28:08 +0000 Subject: Import sqlite3-3.8.11.1 (3081101) --- sqlite3.c | 67782 +++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 37386 insertions(+), 30396 deletions(-) (limited to 'sqlite3.c') diff --git a/sqlite3.c b/sqlite3.c index a8922e482839..1344938873b4 100644 --- a/sqlite3.c +++ b/sqlite3.c @@ -1,6 +1,6 @@ /****************************************************************************** ** This file is an amalgamation of many separate C source files from SQLite -** version 3.8.9. By combining all the individual C code files into this +** version 3.8.11.1. By combining all the individual C code files into this ** single large file, the entire code can be compiled as a single translation ** unit. This allows many compilers to do optimizations that would not be ** possible if the files were compiled separately. Performance improvements @@ -70,6 +70,7 @@ #pragma warning(disable : 4055) #pragma warning(disable : 4100) #pragma warning(disable : 4127) +#pragma warning(disable : 4130) #pragma warning(disable : 4152) #pragma warning(disable : 4189) #pragma warning(disable : 4206) @@ -157,6 +158,13 @@ # define _LARGEFILE_SOURCE 1 #endif +/* What version of GCC is being used. 0 means GCC is not being used */ +#ifdef __GNUC__ +# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__) +#else +# define GCC_VERSION 0 +#endif + /* Needed for various definitions... */ #if defined(__GNUC__) && !defined(_GNU_SOURCE) # define _GNU_SOURCE @@ -229,7 +237,7 @@ ** ** The official C-language API documentation for SQLite is derived ** from comments in this file. This file is the authoritative source -** on how SQLite interfaces are suppose to operate. +** on how SQLite interfaces are supposed to operate. ** ** The name of this file under configuration management is "sqlite.h.in". ** The makefile makes some minor changes to this file (such as inserting @@ -317,9 +325,9 @@ extern "C" { ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ -#define SQLITE_VERSION "3.8.9" -#define SQLITE_VERSION_NUMBER 3008009 -#define SQLITE_SOURCE_ID "2015-04-08 12:16:33 8a8ffc862e96f57aa698f93de10dee28e69f6e09" +#define SQLITE_VERSION "3.8.11.1" +#define SQLITE_VERSION_NUMBER 3008011 +#define SQLITE_SOURCE_ID "2015-07-29 20:00:57 cf538e2783e468bbc25e7cb2a9ee64d3e0e80b2f" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -476,6 +484,7 @@ typedef sqlite_uint64 sqlite3_uint64; /* ** CAPI3REF: Closing A Database Connection +** DESTRUCTOR: sqlite3 ** ** ^The sqlite3_close() and sqlite3_close_v2() routines are destructors ** for the [sqlite3] object. @@ -527,6 +536,7 @@ typedef int (*sqlite3_callback)(void*,int,char**, char**); /* ** CAPI3REF: One-Step Query Execution Interface +** METHOD: sqlite3 ** ** The sqlite3_exec() interface is a convenience wrapper around ** [sqlite3_prepare_v2()], [sqlite3_step()], and [sqlite3_finalize()], @@ -1167,6 +1177,14 @@ struct sqlite3_io_methods { ** circumstances in order to fix a problem with priority inversion. ** Applications should not use this file-control. ** +**
  • [[SQLITE_FCNTL_ZIPVFS]] +** The [SQLITE_FCNTL_ZIPVFS] opcode is implemented by zipvfs only. All other +** VFS should return SQLITE_NOTFOUND for this opcode. +** +**
  • [[SQLITE_FCNTL_RBU]] +** The [SQLITE_FCNTL_RBU] opcode is implemented by the special VFS used by +** the RBU extension only. All other VFS should return SQLITE_NOTFOUND for +** this opcode. ** */ #define SQLITE_FCNTL_LOCKSTATE 1 @@ -1192,6 +1210,8 @@ struct sqlite3_io_methods { #define SQLITE_FCNTL_COMMIT_PHASETWO 22 #define SQLITE_FCNTL_WIN32_SET_HANDLE 23 #define SQLITE_FCNTL_WAL_BLOCK 24 +#define SQLITE_FCNTL_ZIPVFS 25 +#define SQLITE_FCNTL_RBU 26 /* deprecated names */ #define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE @@ -1584,6 +1604,7 @@ SQLITE_API int SQLITE_CDECL sqlite3_config(int, ...); /* ** CAPI3REF: Configure database connections +** METHOD: sqlite3 ** ** The sqlite3_db_config() interface is used to make configuration ** changes to a [database connection]. The interface is similar to @@ -2081,6 +2102,7 @@ struct sqlite3_mem_methods { /* ** CAPI3REF: Enable Or Disable Extended Result Codes +** METHOD: sqlite3 ** ** ^The sqlite3_extended_result_codes() routine enables or disables the ** [extended result codes] feature of SQLite. ^The extended result @@ -2090,6 +2112,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_extended_result_codes(sqlite3*, int onoff) /* ** CAPI3REF: Last Insert Rowid +** METHOD: sqlite3 ** ** ^Each entry in most SQLite tables (except for [WITHOUT ROWID] tables) ** has a unique 64-bit signed @@ -2141,6 +2164,7 @@ SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_last_insert_rowid(sqlite3*); /* ** CAPI3REF: Count The Number Of Rows Modified +** METHOD: sqlite3 ** ** ^This function returns the number of rows modified, inserted or ** deleted by the most recently completed INSERT, UPDATE or DELETE @@ -2193,6 +2217,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_changes(sqlite3*); /* ** CAPI3REF: Total Number Of Rows Modified +** METHOD: sqlite3 ** ** ^This function returns the total number of rows inserted, modified or ** deleted by all [INSERT], [UPDATE] or [DELETE] statements completed @@ -2216,6 +2241,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_total_changes(sqlite3*); /* ** CAPI3REF: Interrupt A Long-Running Query +** METHOD: sqlite3 ** ** ^This function causes any pending database operation to abort and ** return at its earliest opportunity. This routine is typically @@ -2292,6 +2318,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_complete16(const void *sql); /* ** CAPI3REF: Register A Callback To Handle SQLITE_BUSY Errors ** KEYWORDS: {busy-handler callback} {busy handler} +** METHOD: sqlite3 ** ** ^The sqlite3_busy_handler(D,X,P) routine sets a callback function X ** that might be invoked with argument P whenever @@ -2351,6 +2378,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_busy_handler(sqlite3*, int(*)(void*,int), /* ** CAPI3REF: Set A Busy Timeout +** METHOD: sqlite3 ** ** ^This routine sets a [sqlite3_busy_handler | busy handler] that sleeps ** for a specified amount of time when a table is locked. ^The handler @@ -2373,6 +2401,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_busy_timeout(sqlite3*, int ms); /* ** CAPI3REF: Convenience Routines For Running Queries +** METHOD: sqlite3 ** ** This is a legacy interface that is preserved for backwards compatibility. ** Use of this interface is not recommended. @@ -2708,6 +2737,7 @@ SQLITE_API void SQLITE_STDCALL sqlite3_randomness(int N, void *P); /* ** CAPI3REF: Compile-Time Authorization Callbacks +** METHOD: sqlite3 ** ** ^This routine registers an authorizer callback with a particular ** [database connection], supplied in the first argument. @@ -2864,6 +2894,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_set_authorizer( /* ** CAPI3REF: Tracing And Profiling Functions +** METHOD: sqlite3 ** ** These routines register callback functions that can be used for ** tracing and profiling the execution of SQL statements. @@ -2896,6 +2927,7 @@ SQLITE_API SQLITE_EXPERIMENTAL void *SQLITE_STDCALL sqlite3_profile(sqlite3*, /* ** CAPI3REF: Query Progress Callbacks +** METHOD: sqlite3 ** ** ^The sqlite3_progress_handler(D,N,X,P) interface causes the callback ** function X to be invoked periodically during long running calls to @@ -2929,6 +2961,7 @@ SQLITE_API void SQLITE_STDCALL sqlite3_progress_handler(sqlite3*, int, int(*)(vo /* ** CAPI3REF: Opening A New Database Connection +** CONSTRUCTOR: sqlite3 ** ** ^These routines open an SQLite database file as specified by the ** filename argument. ^The filename argument is interpreted as UTF-8 for @@ -3214,6 +3247,7 @@ SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_uri_int64(const char*, const cha /* ** CAPI3REF: Error Codes And Messages +** METHOD: sqlite3 ** ** ^If the most recent sqlite3_* API call associated with ** [database connection] D failed, then the sqlite3_errcode(D) interface @@ -3259,33 +3293,34 @@ SQLITE_API const void *SQLITE_STDCALL sqlite3_errmsg16(sqlite3*); SQLITE_API const char *SQLITE_STDCALL sqlite3_errstr(int); /* -** CAPI3REF: SQL Statement Object +** CAPI3REF: Prepared Statement Object ** KEYWORDS: {prepared statement} {prepared statements} ** -** An instance of this object represents a single SQL statement. -** This object is variously known as a "prepared statement" or a -** "compiled SQL statement" or simply as a "statement". +** An instance of this object represents a single SQL statement that +** has been compiled into binary form and is ready to be evaluated. ** -** The life of a statement object goes something like this: +** Think of each SQL statement as a separate computer program. The +** original SQL text is source code. A prepared statement object +** is the compiled object code. All SQL must be converted into a +** prepared statement before it can be run. +** +** The life-cycle of a prepared statement object usually goes like this: ** **
      -**
    1. Create the object using [sqlite3_prepare_v2()] or a related -** function. -**
    2. Bind values to [host parameters] using the sqlite3_bind_*() +**
    3. Create the prepared statement object using [sqlite3_prepare_v2()]. +**
    4. Bind values to [parameters] using the sqlite3_bind_*() ** interfaces. **
    5. Run the SQL by calling [sqlite3_step()] one or more times. -**
    6. Reset the statement using [sqlite3_reset()] then go back +**
    7. Reset the prepared statement using [sqlite3_reset()] then go back ** to step 2. Do this zero or more times. **
    8. Destroy the object using [sqlite3_finalize()]. **
    -** -** Refer to documentation on individual methods above for additional -** information. */ typedef struct sqlite3_stmt sqlite3_stmt; /* ** CAPI3REF: Run-time Limits +** METHOD: sqlite3 ** ** ^(This interface allows the size of various constructs to be limited ** on a connection by connection basis. The first parameter is the @@ -3397,6 +3432,8 @@ SQLITE_API int SQLITE_STDCALL sqlite3_limit(sqlite3*, int id, int newVal); /* ** CAPI3REF: Compiling An SQL Statement ** KEYWORDS: {SQL statement compiler} +** METHOD: sqlite3 +** CONSTRUCTOR: sqlite3_stmt ** ** To execute an SQL query, it must first be compiled into a byte-code ** program using one of these routines. @@ -3504,6 +3541,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_prepare16_v2( /* ** CAPI3REF: Retrieving Statement SQL +** METHOD: sqlite3_stmt ** ** ^This interface can be used to retrieve a saved copy of the original ** SQL text used to create a [prepared statement] if that statement was @@ -3513,6 +3551,7 @@ SQLITE_API const char *SQLITE_STDCALL sqlite3_sql(sqlite3_stmt *pStmt); /* ** CAPI3REF: Determine If An SQL Statement Writes The Database +** METHOD: sqlite3_stmt ** ** ^The sqlite3_stmt_readonly(X) interface returns true (non-zero) if ** and only if the [prepared statement] X makes no direct changes to @@ -3544,6 +3583,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_stmt_readonly(sqlite3_stmt *pStmt); /* ** CAPI3REF: Determine If A Prepared Statement Has Been Reset +** METHOD: sqlite3_stmt ** ** ^The sqlite3_stmt_busy(S) interface returns true (non-zero) if the ** [prepared statement] S has been stepped at least once using @@ -3574,7 +3614,9 @@ SQLITE_API int SQLITE_STDCALL sqlite3_stmt_busy(sqlite3_stmt*); ** Some interfaces require a protected sqlite3_value. Other interfaces ** will accept either a protected or an unprotected sqlite3_value. ** Every interface that accepts sqlite3_value arguments specifies -** whether or not it requires a protected sqlite3_value. +** whether or not it requires a protected sqlite3_value. The +** [sqlite3_value_dup()] interface can be used to construct a new +** protected sqlite3_value from an unprotected sqlite3_value. ** ** The terms "protected" and "unprotected" refer to whether or not ** a mutex is held. An internal mutex is held for a protected @@ -3618,6 +3660,7 @@ typedef struct sqlite3_context sqlite3_context; ** CAPI3REF: Binding Values To Prepared Statements ** KEYWORDS: {host parameter} {host parameters} {host parameter name} ** KEYWORDS: {SQL parameter} {SQL parameters} {parameter binding} +** METHOD: sqlite3_stmt ** ** ^(In the SQL statement text input to [sqlite3_prepare_v2()] and its variants, ** literals may be replaced by a [parameter] that matches one of following @@ -3733,9 +3776,11 @@ SQLITE_API int SQLITE_STDCALL sqlite3_bind_text64(sqlite3_stmt*, int, const char void(*)(void*), unsigned char encoding); SQLITE_API int SQLITE_STDCALL sqlite3_bind_value(sqlite3_stmt*, int, const sqlite3_value*); SQLITE_API int SQLITE_STDCALL sqlite3_bind_zeroblob(sqlite3_stmt*, int, int n); +SQLITE_API int SQLITE_STDCALL sqlite3_bind_zeroblob64(sqlite3_stmt*, int, sqlite3_uint64); /* ** CAPI3REF: Number Of SQL Parameters +** METHOD: sqlite3_stmt ** ** ^This routine can be used to find the number of [SQL parameters] ** in a [prepared statement]. SQL parameters are tokens of the @@ -3756,6 +3801,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_bind_parameter_count(sqlite3_stmt*); /* ** CAPI3REF: Name Of A Host Parameter +** METHOD: sqlite3_stmt ** ** ^The sqlite3_bind_parameter_name(P,N) interface returns ** the name of the N-th [SQL parameter] in the [prepared statement] P. @@ -3783,6 +3829,7 @@ SQLITE_API const char *SQLITE_STDCALL sqlite3_bind_parameter_name(sqlite3_stmt*, /* ** CAPI3REF: Index Of A Parameter With A Given Name +** METHOD: sqlite3_stmt ** ** ^Return the index of an SQL parameter given its name. ^The ** index value returned is suitable for use as the second @@ -3799,6 +3846,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_bind_parameter_index(sqlite3_stmt*, const /* ** CAPI3REF: Reset All Bindings On A Prepared Statement +** METHOD: sqlite3_stmt ** ** ^Contrary to the intuition of many, [sqlite3_reset()] does not reset ** the [sqlite3_bind_blob | bindings] on a [prepared statement]. @@ -3808,6 +3856,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_clear_bindings(sqlite3_stmt*); /* ** CAPI3REF: Number Of Columns In A Result Set +** METHOD: sqlite3_stmt ** ** ^Return the number of columns in the result set returned by the ** [prepared statement]. ^This routine returns 0 if pStmt is an SQL @@ -3819,6 +3868,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_column_count(sqlite3_stmt *pStmt); /* ** CAPI3REF: Column Names In A Result Set +** METHOD: sqlite3_stmt ** ** ^These routines return the name assigned to a particular column ** in the result set of a [SELECT] statement. ^The sqlite3_column_name() @@ -3848,6 +3898,7 @@ SQLITE_API const void *SQLITE_STDCALL sqlite3_column_name16(sqlite3_stmt*, int N /* ** CAPI3REF: Source Of Data In A Query Result +** METHOD: sqlite3_stmt ** ** ^These routines provide a means to determine the database, table, and ** table column that is the origin of a particular result column in @@ -3900,6 +3951,7 @@ SQLITE_API const void *SQLITE_STDCALL sqlite3_column_origin_name16(sqlite3_stmt* /* ** CAPI3REF: Declared Datatype Of A Query Result +** METHOD: sqlite3_stmt ** ** ^(The first parameter is a [prepared statement]. ** If this statement is a [SELECT] statement and the Nth column of the @@ -3932,6 +3984,7 @@ SQLITE_API const void *SQLITE_STDCALL sqlite3_column_decltype16(sqlite3_stmt*,in /* ** CAPI3REF: Evaluate An SQL Statement +** METHOD: sqlite3_stmt ** ** After a [prepared statement] has been prepared using either ** [sqlite3_prepare_v2()] or [sqlite3_prepare16_v2()] or one of the legacy @@ -4011,6 +4064,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_step(sqlite3_stmt*); /* ** CAPI3REF: Number of columns in a result set +** METHOD: sqlite3_stmt ** ** ^The sqlite3_data_count(P) interface returns the number of columns in the ** current row of the result set of [prepared statement] P. @@ -4064,8 +4118,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); /* ** CAPI3REF: Result Values From A Query ** KEYWORDS: {column access functions} -** -** These routines form the "result set" interface. +** METHOD: sqlite3_stmt ** ** ^These routines return information about a single column of the current ** result row of a query. ^In every case the first argument is a pointer @@ -4126,13 +4179,14 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); ** even empty strings, are always zero-terminated. ^The return ** value from sqlite3_column_blob() for a zero-length BLOB is a NULL pointer. ** -** ^The object returned by [sqlite3_column_value()] is an -** [unprotected sqlite3_value] object. An unprotected sqlite3_value object -** may only be used with [sqlite3_bind_value()] and [sqlite3_result_value()]. +** Warning: ^The object returned by [sqlite3_column_value()] is an +** [unprotected sqlite3_value] object. In a multithreaded environment, +** an unprotected sqlite3_value object may only be used safely with +** [sqlite3_bind_value()] and [sqlite3_result_value()]. ** If the [unprotected sqlite3_value] object returned by ** [sqlite3_column_value()] is used in any other way, including calls ** to routines like [sqlite3_value_int()], [sqlite3_value_text()], -** or [sqlite3_value_bytes()], then the behavior is undefined. +** or [sqlite3_value_bytes()], the behavior is not threadsafe. ** ** These routines attempt to convert the value where appropriate. ^For ** example, if the internal representation is FLOAT and a text result @@ -4163,12 +4217,6 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); ** ** )^ ** -** The table above makes reference to standard C library functions atoi() -** and atof(). SQLite does not really use these functions. It has its -** own equivalent internal routines. The atoi() and atof() names are -** used in the table for brevity and because they are familiar to most -** C programmers. -** ** Note that when type conversions occur, pointers returned by prior ** calls to sqlite3_column_blob(), sqlite3_column_text(), and/or ** sqlite3_column_text16() may be invalidated. @@ -4193,7 +4241,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); ** of conversion are done in place when it is possible, but sometimes they ** are not possible and in those cases prior pointers are invalidated. ** -** The safest and easiest to remember policy is to invoke these routines +** The safest policy is to invoke these routines ** in one of the following ways: ** ** ** ** The first two constants cause sqlite3_mutex_alloc() to create @@ -19532,6 +19746,9 @@ static sqlite3_mutex *pthreadMutexAlloc(int iType){ SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER }; sqlite3_mutex *p; @@ -19771,6 +19988,7 @@ SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ ************************************************************************* ** This file contains the C functions that implement mutexes for Win32. */ +/* #include "sqliteInt.h" */ #if SQLITE_OS_WIN /* @@ -19809,16 +20027,6 @@ SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ # error "The MEMORY_DEBUG macro is obsolete. Use SQLITE_DEBUG instead." #endif -#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) -# ifndef SQLITE_DEBUG_OS_TRACE -# define SQLITE_DEBUG_OS_TRACE 0 -# endif - int sqlite3OSTrace = SQLITE_DEBUG_OS_TRACE; -# define OSTRACE(X) if( sqlite3OSTrace ) sqlite3DebugPrintf X -#else -# define OSTRACE(X) -#endif - /* ** Macros for performance tracing. Normally turned off. Only works ** on i486 hardware. @@ -20148,6 +20356,9 @@ static sqlite3_mutex winMutex_staticMutexes[] = { SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER }; @@ -20219,6 +20430,9 @@ static int winMutexEnd(void){ **
  • SQLITE_MUTEX_STATIC_APP1 **
  • SQLITE_MUTEX_STATIC_APP2 **
  • SQLITE_MUTEX_STATIC_APP3 +**
  • SQLITE_MUTEX_STATIC_VFS1 +**
  • SQLITE_MUTEX_STATIC_VFS2 +**
  • SQLITE_MUTEX_STATIC_VFS3 ** ** ** The first two constants cause sqlite3_mutex_alloc() to create @@ -20450,6 +20664,7 @@ SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ ** ** Memory allocation functions used throughout sqlite. */ +/* #include "sqliteInt.h" */ /* #include */ /* @@ -20630,10 +20845,9 @@ SQLITE_PRIVATE int sqlite3MallocInit(void){ sqlite3GlobalConfig.nScratch = 0; } if( sqlite3GlobalConfig.pPage==0 || sqlite3GlobalConfig.szPage<512 - || sqlite3GlobalConfig.nPage<1 ){ + || sqlite3GlobalConfig.nPage<=0 ){ sqlite3GlobalConfig.pPage = 0; sqlite3GlobalConfig.szPage = 0; - sqlite3GlobalConfig.nPage = 0; } rc = sqlite3GlobalConfig.m.xInit(sqlite3GlobalConfig.m.pAppData); if( rc!=SQLITE_OK ) memset(&mem0, 0, sizeof(mem0)); @@ -20663,10 +20877,8 @@ SQLITE_PRIVATE void sqlite3MallocEnd(void){ ** Return the amount of memory currently checked out. */ SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_memory_used(void){ - int n, mx; - sqlite3_int64 res; - sqlite3_status(SQLITE_STATUS_MEMORY_USED, &n, &mx, 0); - res = (sqlite3_int64)n; /* Work around bug in Borland C. Ticket #3216 */ + sqlite3_int64 res, mx; + sqlite3_status64(SQLITE_STATUS_MEMORY_USED, &res, &mx, 0); return res; } @@ -20676,11 +20888,9 @@ SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_memory_used(void){ ** or since the most recent reset. */ SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_memory_highwater(int resetFlag){ - int n, mx; - sqlite3_int64 res; - sqlite3_status(SQLITE_STATUS_MEMORY_USED, &n, &mx, resetFlag); - res = (sqlite3_int64)mx; /* Work around bug in Borland C. Ticket #3216 */ - return res; + sqlite3_int64 res, mx; + sqlite3_status64(SQLITE_STATUS_MEMORY_USED, &res, &mx, resetFlag); + return mx; } /* @@ -21212,19 +21422,11 @@ SQLITE_PRIVATE char *sqlite3DbStrNDup(sqlite3 *db, const char *z, u64 n){ } /* -** Create a string from the zFromat argument and the va_list that follows. -** Store the string in memory obtained from sqliteMalloc() and make *pz -** point to that string. +** Free any prior content in *pz and replace it with a copy of zNew. */ -SQLITE_PRIVATE void sqlite3SetString(char **pz, sqlite3 *db, const char *zFormat, ...){ - va_list ap; - char *z; - - va_start(ap, zFormat); - z = sqlite3VMPrintf(db, zFormat, ap); - va_end(ap); +SQLITE_PRIVATE void sqlite3SetString(char **pz, sqlite3 *db, const char *zNew){ sqlite3DbFree(db, *pz); - *pz = z; + *pz = sqlite3DbStrDup(db, zNew); } /* @@ -21245,17 +21447,16 @@ static SQLITE_NOINLINE int apiOomError(sqlite3 *db){ ** function. However, if a malloc() failure has occurred since the previous ** invocation SQLITE_NOMEM is returned instead. ** -** If the first argument, db, is not NULL and a malloc() error has occurred, -** then the connection error-code (the value returned by sqlite3_errcode()) -** is set to SQLITE_NOMEM. +** If an OOM as occurred, then the connection error-code (the value +** returned by sqlite3_errcode()) is set to SQLITE_NOMEM. */ SQLITE_PRIVATE int sqlite3ApiExit(sqlite3* db, int rc){ - /* If the db handle is not NULL, then we must hold the connection handle - ** mutex here. Otherwise the read (and possible write) of db->mallocFailed + /* If the db handle must hold the connection handle mutex here. + ** Otherwise the read (and possible write) of db->mallocFailed ** is unsafe, as is the call to sqlite3Error(). */ - assert( !db || sqlite3_mutex_held(db->mutex) ); - if( db==0 ) return rc & 0xff; + assert( db!=0 ); + assert( sqlite3_mutex_held(db->mutex) ); if( db->mallocFailed || rc==SQLITE_IOERR_NOMEM ){ return apiOomError(db); } @@ -21266,18 +21467,16 @@ SQLITE_PRIVATE int sqlite3ApiExit(sqlite3* db, int rc){ /************** Begin file printf.c ******************************************/ /* ** The "printf" code that follows dates from the 1980's. It is in -** the public domain. The original comments are included here for -** completeness. They are very out-of-date but might be useful as -** an historical reference. Most of the "enhancements" have been backed -** out so that the functionality is now the same as standard printf(). +** the public domain. ** ************************************************************************** ** ** This file contains code for a set of "printf"-like routines. These ** routines format strings much like the printf() from the standard C ** library, though the implementation here has enhancements to support -** SQLlite. +** SQLite. */ +/* #include "sqliteInt.h" */ /* ** Conversion types fall into various categories as defined by the @@ -21403,6 +21602,7 @@ static char et_getdigit(LONGDOUBLE_TYPE *val, int *cnt){ ** Set the StrAccum object to an error mode. */ static void setStrAccumError(StrAccum *p, u8 eError){ + assert( eError==STRACCUM_NOMEM || eError==STRACCUM_TOOBIG ); p->accError = eError; p->nAlloc = 0; } @@ -21517,7 +21717,6 @@ SQLITE_PRIVATE void sqlite3VXPrintf( } }while( !done && (c=(*++fmt))!=0 ); /* Get the field width */ - width = 0; if( c=='*' ){ if( bArgList ){ width = (int)getIntArg(pArgList); @@ -21541,7 +21740,6 @@ SQLITE_PRIVATE void sqlite3VXPrintf( /* Get the precision */ if( c=='.' ){ - precision = 0; c = *++fmt; if( c=='*' ){ if( bArgList ){ @@ -22020,7 +22218,7 @@ static int sqlite3StrAccumEnlarge(StrAccum *p, int N){ testcase(p->accError==STRACCUM_NOMEM); return 0; } - if( !p->useMalloc ){ + if( p->mxAlloc==0 ){ N = p->nAlloc - p->nChar - 1; setStrAccumError(p, STRACCUM_TOOBIG); return N; @@ -22040,10 +22238,10 @@ static int sqlite3StrAccumEnlarge(StrAccum *p, int N){ }else{ p->nAlloc = (int)szNew; } - if( p->useMalloc==1 ){ + if( p->db ){ zNew = sqlite3DbRealloc(p->db, zOld, p->nAlloc); }else{ - zNew = sqlite3_realloc(zOld, p->nAlloc); + zNew = sqlite3_realloc64(zOld, p->nAlloc); } if( zNew ){ assert( p->zText!=0 || p->nChar==0 ); @@ -22091,7 +22289,7 @@ static void SQLITE_NOINLINE enlargeAndAppend(StrAccum *p, const char *z, int N){ ** size of the memory allocation for StrAccum if necessary. */ SQLITE_PRIVATE void sqlite3StrAccumAppend(StrAccum *p, const char *z, int N){ - assert( z!=0 ); + assert( z!=0 || N==0 ); assert( p->zText!=0 || p->nChar==0 || p->accError ); assert( N>=0 ); assert( p->accError==0 || p->nAlloc==0 ); @@ -22120,12 +22318,8 @@ SQLITE_PRIVATE void sqlite3StrAccumAppendAll(StrAccum *p, const char *z){ SQLITE_PRIVATE char *sqlite3StrAccumFinish(StrAccum *p){ if( p->zText ){ p->zText[p->nChar] = 0; - if( p->useMalloc && p->zText==p->zBase ){ - if( p->useMalloc==1 ){ - p->zText = sqlite3DbMallocRaw(p->db, p->nChar+1 ); - }else{ - p->zText = sqlite3_malloc(p->nChar+1); - } + if( p->mxAlloc>0 && p->zText==p->zBase ){ + p->zText = sqlite3DbMallocRaw(p->db, p->nChar+1 ); if( p->zText ){ memcpy(p->zText, p->zBase, p->nChar+1); }else{ @@ -22141,25 +22335,31 @@ SQLITE_PRIVATE char *sqlite3StrAccumFinish(StrAccum *p){ */ SQLITE_PRIVATE void sqlite3StrAccumReset(StrAccum *p){ if( p->zText!=p->zBase ){ - if( p->useMalloc==1 ){ - sqlite3DbFree(p->db, p->zText); - }else{ - sqlite3_free(p->zText); - } + sqlite3DbFree(p->db, p->zText); } p->zText = 0; } /* -** Initialize a string accumulator +** Initialize a string accumulator. +** +** p: The accumulator to be initialized. +** db: Pointer to a database connection. May be NULL. Lookaside +** memory is used if not NULL. db->mallocFailed is set appropriately +** when not NULL. +** zBase: An initial buffer. May be NULL in which case the initial buffer +** is malloced. +** n: Size of zBase in bytes. If total space requirements never exceed +** n then no memory allocations ever occur. +** mx: Maximum number of bytes to accumulate. If mx==0 then no memory +** allocations will ever occur. */ -SQLITE_PRIVATE void sqlite3StrAccumInit(StrAccum *p, char *zBase, int n, int mx){ +SQLITE_PRIVATE void sqlite3StrAccumInit(StrAccum *p, sqlite3 *db, char *zBase, int n, int mx){ p->zText = p->zBase = zBase; - p->db = 0; + p->db = db; p->nChar = 0; p->nAlloc = n; p->mxAlloc = mx; - p->useMalloc = 1; p->accError = 0; } @@ -22172,9 +22372,8 @@ SQLITE_PRIVATE char *sqlite3VMPrintf(sqlite3 *db, const char *zFormat, va_list a char zBase[SQLITE_PRINT_BUF_SIZE]; StrAccum acc; assert( db!=0 ); - sqlite3StrAccumInit(&acc, zBase, sizeof(zBase), + sqlite3StrAccumInit(&acc, db, zBase, sizeof(zBase), db->aLimit[SQLITE_LIMIT_LENGTH]); - acc.db = db; sqlite3VXPrintf(&acc, SQLITE_PRINTF_INTERNAL, zFormat, ap); z = sqlite3StrAccumFinish(&acc); if( acc.accError==STRACCUM_NOMEM ){ @@ -22196,24 +22395,6 @@ SQLITE_PRIVATE char *sqlite3MPrintf(sqlite3 *db, const char *zFormat, ...){ return z; } -/* -** Like sqlite3MPrintf(), but call sqlite3DbFree() on zStr after formatting -** the string and before returning. This routine is intended to be used -** to modify an existing string. For example: -** -** x = sqlite3MPrintf(db, x, "prefix %s suffix", x); -** -*/ -SQLITE_PRIVATE char *sqlite3MAppendf(sqlite3 *db, char *zStr, const char *zFormat, ...){ - va_list ap; - char *z; - va_start(ap, zFormat); - z = sqlite3VMPrintf(db, zFormat, ap); - va_end(ap); - sqlite3DbFree(db, zStr); - return z; -} - /* ** Print into memory obtained from sqlite3_malloc(). Omit the internal ** %-conversion extensions. @@ -22232,8 +22413,7 @@ SQLITE_API char *SQLITE_STDCALL sqlite3_vmprintf(const char *zFormat, va_list ap #ifndef SQLITE_OMIT_AUTOINIT if( sqlite3_initialize() ) return 0; #endif - sqlite3StrAccumInit(&acc, zBase, sizeof(zBase), SQLITE_MAX_LENGTH); - acc.useMalloc = 2; + sqlite3StrAccumInit(&acc, 0, zBase, sizeof(zBase), SQLITE_MAX_LENGTH); sqlite3VXPrintf(&acc, 0, zFormat, ap); z = sqlite3StrAccumFinish(&acc); return z; @@ -22278,8 +22458,7 @@ SQLITE_API char *SQLITE_STDCALL sqlite3_vsnprintf(int n, char *zBuf, const char return zBuf; } #endif - sqlite3StrAccumInit(&acc, zBuf, n, 0); - acc.useMalloc = 0; + sqlite3StrAccumInit(&acc, 0, zBuf, n, 0); sqlite3VXPrintf(&acc, 0, zFormat, ap); return sqlite3StrAccumFinish(&acc); } @@ -22300,13 +22479,17 @@ SQLITE_API char *SQLITE_CDECL sqlite3_snprintf(int n, char *zBuf, const char *zF ** sqlite3_log() must render into a static buffer. It cannot dynamically ** allocate memory because it might be called while the memory allocator ** mutex is held. +** +** sqlite3VXPrintf() might ask for *temporary* memory allocations for +** certain format characters (%q) or for very large precisions or widths. +** Care must be taken that any sqlite3_log() calls that occur while the +** memory mutex is held do not use these mechanisms. */ static void renderLogMsg(int iErrCode, const char *zFormat, va_list ap){ StrAccum acc; /* String accumulator */ char zMsg[SQLITE_PRINT_BUF_SIZE*3]; /* Complete log message */ - sqlite3StrAccumInit(&acc, zMsg, sizeof(zMsg), 0); - acc.useMalloc = 0; + sqlite3StrAccumInit(&acc, 0, zMsg, sizeof(zMsg), 0); sqlite3VXPrintf(&acc, 0, zFormat, ap); sqlite3GlobalConfig.xLog(sqlite3GlobalConfig.pLogArg, iErrCode, sqlite3StrAccumFinish(&acc)); @@ -22324,7 +22507,7 @@ SQLITE_API void SQLITE_CDECL sqlite3_log(int iErrCode, const char *zFormat, ...) } } -#if defined(SQLITE_DEBUG) +#if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) /* ** A version of printf() that understands %lld. Used for debugging. ** The printf() built into some versions of windows does not understand %lld @@ -22334,8 +22517,7 @@ SQLITE_PRIVATE void sqlite3DebugPrintf(const char *zFormat, ...){ va_list ap; StrAccum acc; char zBuf[500]; - sqlite3StrAccumInit(&acc, zBuf, sizeof(zBuf), 0); - acc.useMalloc = 0; + sqlite3StrAccumInit(&acc, 0, zBuf, sizeof(zBuf), 0); va_start(ap,zFormat); sqlite3VXPrintf(&acc, 0, zFormat, ap); va_end(ap); @@ -22345,24 +22527,48 @@ SQLITE_PRIVATE void sqlite3DebugPrintf(const char *zFormat, ...){ } #endif -#ifdef SQLITE_DEBUG -/************************************************************************* -** Routines for implementing the "TreeView" display of hierarchical -** data structures for debugging. + +/* +** variable-argument wrapper around sqlite3VXPrintf(). +*/ +SQLITE_PRIVATE void sqlite3XPrintf(StrAccum *p, u32 bFlags, const char *zFormat, ...){ + va_list ap; + va_start(ap,zFormat); + sqlite3VXPrintf(p, bFlags, zFormat, ap); + va_end(ap); +} + +/************** End of printf.c **********************************************/ +/************** Begin file treeview.c ****************************************/ +/* +** 2015-06-08 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* ** -** The main entry points (coded elsewhere) are: -** sqlite3TreeViewExpr(0, pExpr, 0); -** sqlite3TreeViewExprList(0, pList, 0, 0); -** sqlite3TreeViewSelect(0, pSelect, 0); -** Insert calls to those routines while debugging in order to display -** a diagram of Expr, ExprList, and Select objects. +** This file contains C code to implement the TreeView debugging routines. +** These routines print a parse tree to standard output for debugging and +** analysis. ** +** The interfaces in this file is only available when compiling +** with SQLITE_DEBUG. */ -/* Add a new subitem to the tree. The moreToFollow flag indicates that this -** is not the last item in the tree. */ -SQLITE_PRIVATE TreeView *sqlite3TreeViewPush(TreeView *p, u8 moreToFollow){ +/* #include "sqliteInt.h" */ +#ifdef SQLITE_DEBUG + +/* +** Add a new subitem to the tree. The moreToFollow flag indicates that this +** is not the last item in the tree. +*/ +static TreeView *sqlite3TreeViewPush(TreeView *p, u8 moreToFollow){ if( p==0 ){ - p = sqlite3_malloc( sizeof(*p) ); + p = sqlite3_malloc64( sizeof(*p) ); if( p==0 ) return 0; memset(p, 0, sizeof(*p)); }else{ @@ -22372,21 +22578,26 @@ SQLITE_PRIVATE TreeView *sqlite3TreeViewPush(TreeView *p, u8 moreToFollow){ if( p->iLevelbLine) ) p->bLine[p->iLevel] = moreToFollow; return p; } -/* Finished with one layer of the tree */ -SQLITE_PRIVATE void sqlite3TreeViewPop(TreeView *p){ + +/* +** Finished with one layer of the tree +*/ +static void sqlite3TreeViewPop(TreeView *p){ if( p==0 ) return; p->iLevel--; if( p->iLevel<0 ) sqlite3_free(p); } -/* Generate a single line of output for the tree, with a prefix that contains -** all the appropriate tree lines */ -SQLITE_PRIVATE void sqlite3TreeViewLine(TreeView *p, const char *zFormat, ...){ + +/* +** Generate a single line of output for the tree, with a prefix that contains +** all the appropriate tree lines +*/ +static void sqlite3TreeViewLine(TreeView *p, const char *zFormat, ...){ va_list ap; int i; StrAccum acc; char zBuf[500]; - sqlite3StrAccumInit(&acc, zBuf, sizeof(zBuf), 0); - acc.useMalloc = 0; + sqlite3StrAccumInit(&acc, 0, zBuf, sizeof(zBuf), 0); if( p ){ for(i=0; iiLevel && ibLine)-1; i++){ sqlite3StrAccumAppend(&acc, p->bLine[i] ? "| " : " ", 4); @@ -22401,24 +22612,367 @@ SQLITE_PRIVATE void sqlite3TreeViewLine(TreeView *p, const char *zFormat, ...){ fprintf(stdout,"%s", zBuf); fflush(stdout); } -/* Shorthand for starting a new tree item that consists of a single label */ -SQLITE_PRIVATE void sqlite3TreeViewItem(TreeView *p, const char *zLabel, u8 moreToFollow){ - p = sqlite3TreeViewPush(p, moreToFollow); + +/* +** Shorthand for starting a new tree item that consists of a single label +*/ +static void sqlite3TreeViewItem(TreeView *p, const char *zLabel,u8 moreFollows){ + p = sqlite3TreeViewPush(p, moreFollows); sqlite3TreeViewLine(p, "%s", zLabel); } -#endif /* SQLITE_DEBUG */ + /* -** variable-argument wrapper around sqlite3VXPrintf(). +** Generate a human-readable description of a the Select object. */ -SQLITE_PRIVATE void sqlite3XPrintf(StrAccum *p, u32 bFlags, const char *zFormat, ...){ - va_list ap; - va_start(ap,zFormat); - sqlite3VXPrintf(p, bFlags, zFormat, ap); - va_end(ap); +SQLITE_PRIVATE void sqlite3TreeViewSelect(TreeView *pView, const Select *p, u8 moreToFollow){ + int n = 0; + pView = sqlite3TreeViewPush(pView, moreToFollow); + sqlite3TreeViewLine(pView, "SELECT%s%s (0x%p) selFlags=0x%x", + ((p->selFlags & SF_Distinct) ? " DISTINCT" : ""), + ((p->selFlags & SF_Aggregate) ? " agg_flag" : ""), p, p->selFlags + ); + if( p->pSrc && p->pSrc->nSrc ) n++; + if( p->pWhere ) n++; + if( p->pGroupBy ) n++; + if( p->pHaving ) n++; + if( p->pOrderBy ) n++; + if( p->pLimit ) n++; + if( p->pOffset ) n++; + if( p->pPrior ) n++; + sqlite3TreeViewExprList(pView, p->pEList, (n--)>0, "result-set"); + if( p->pSrc && p->pSrc->nSrc ){ + int i; + pView = sqlite3TreeViewPush(pView, (n--)>0); + sqlite3TreeViewLine(pView, "FROM"); + for(i=0; ipSrc->nSrc; i++){ + struct SrcList_item *pItem = &p->pSrc->a[i]; + StrAccum x; + char zLine[100]; + sqlite3StrAccumInit(&x, 0, zLine, sizeof(zLine), 0); + sqlite3XPrintf(&x, 0, "{%d,*}", pItem->iCursor); + if( pItem->zDatabase ){ + sqlite3XPrintf(&x, 0, " %s.%s", pItem->zDatabase, pItem->zName); + }else if( pItem->zName ){ + sqlite3XPrintf(&x, 0, " %s", pItem->zName); + } + if( pItem->pTab ){ + sqlite3XPrintf(&x, 0, " tabname=%Q", pItem->pTab->zName); + } + if( pItem->zAlias ){ + sqlite3XPrintf(&x, 0, " (AS %s)", pItem->zAlias); + } + if( pItem->jointype & JT_LEFT ){ + sqlite3XPrintf(&x, 0, " LEFT-JOIN"); + } + sqlite3StrAccumFinish(&x); + sqlite3TreeViewItem(pView, zLine, ipSrc->nSrc-1); + if( pItem->pSelect ){ + sqlite3TreeViewSelect(pView, pItem->pSelect, 0); + } + sqlite3TreeViewPop(pView); + } + sqlite3TreeViewPop(pView); + } + if( p->pWhere ){ + sqlite3TreeViewItem(pView, "WHERE", (n--)>0); + sqlite3TreeViewExpr(pView, p->pWhere, 0); + sqlite3TreeViewPop(pView); + } + if( p->pGroupBy ){ + sqlite3TreeViewExprList(pView, p->pGroupBy, (n--)>0, "GROUPBY"); + } + if( p->pHaving ){ + sqlite3TreeViewItem(pView, "HAVING", (n--)>0); + sqlite3TreeViewExpr(pView, p->pHaving, 0); + sqlite3TreeViewPop(pView); + } + if( p->pOrderBy ){ + sqlite3TreeViewExprList(pView, p->pOrderBy, (n--)>0, "ORDERBY"); + } + if( p->pLimit ){ + sqlite3TreeViewItem(pView, "LIMIT", (n--)>0); + sqlite3TreeViewExpr(pView, p->pLimit, 0); + sqlite3TreeViewPop(pView); + } + if( p->pOffset ){ + sqlite3TreeViewItem(pView, "OFFSET", (n--)>0); + sqlite3TreeViewExpr(pView, p->pOffset, 0); + sqlite3TreeViewPop(pView); + } + if( p->pPrior ){ + const char *zOp = "UNION"; + switch( p->op ){ + case TK_ALL: zOp = "UNION ALL"; break; + case TK_INTERSECT: zOp = "INTERSECT"; break; + case TK_EXCEPT: zOp = "EXCEPT"; break; + } + sqlite3TreeViewItem(pView, zOp, (n--)>0); + sqlite3TreeViewSelect(pView, p->pPrior, 0); + sqlite3TreeViewPop(pView); + } + sqlite3TreeViewPop(pView); } -/************** End of printf.c **********************************************/ +/* +** Generate a human-readable explanation of an expression tree. +*/ +SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView *pView, const Expr *pExpr, u8 moreToFollow){ + const char *zBinOp = 0; /* Binary operator */ + const char *zUniOp = 0; /* Unary operator */ + char zFlgs[30]; + pView = sqlite3TreeViewPush(pView, moreToFollow); + if( pExpr==0 ){ + sqlite3TreeViewLine(pView, "nil"); + sqlite3TreeViewPop(pView); + return; + } + if( pExpr->flags ){ + sqlite3_snprintf(sizeof(zFlgs),zFlgs," flags=0x%x",pExpr->flags); + }else{ + zFlgs[0] = 0; + } + switch( pExpr->op ){ + case TK_AGG_COLUMN: { + sqlite3TreeViewLine(pView, "AGG{%d:%d}%s", + pExpr->iTable, pExpr->iColumn, zFlgs); + break; + } + case TK_COLUMN: { + if( pExpr->iTable<0 ){ + /* This only happens when coding check constraints */ + sqlite3TreeViewLine(pView, "COLUMN(%d)%s", pExpr->iColumn, zFlgs); + }else{ + sqlite3TreeViewLine(pView, "{%d:%d}%s", + pExpr->iTable, pExpr->iColumn, zFlgs); + } + break; + } + case TK_INTEGER: { + if( pExpr->flags & EP_IntValue ){ + sqlite3TreeViewLine(pView, "%d", pExpr->u.iValue); + }else{ + sqlite3TreeViewLine(pView, "%s", pExpr->u.zToken); + } + break; + } +#ifndef SQLITE_OMIT_FLOATING_POINT + case TK_FLOAT: { + sqlite3TreeViewLine(pView,"%s", pExpr->u.zToken); + break; + } +#endif + case TK_STRING: { + sqlite3TreeViewLine(pView,"%Q", pExpr->u.zToken); + break; + } + case TK_NULL: { + sqlite3TreeViewLine(pView,"NULL"); + break; + } +#ifndef SQLITE_OMIT_BLOB_LITERAL + case TK_BLOB: { + sqlite3TreeViewLine(pView,"%s", pExpr->u.zToken); + break; + } +#endif + case TK_VARIABLE: { + sqlite3TreeViewLine(pView,"VARIABLE(%s,%d)", + pExpr->u.zToken, pExpr->iColumn); + break; + } + case TK_REGISTER: { + sqlite3TreeViewLine(pView,"REGISTER(%d)", pExpr->iTable); + break; + } + case TK_AS: { + sqlite3TreeViewLine(pView,"AS %Q", pExpr->u.zToken); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + break; + } + case TK_ID: { + sqlite3TreeViewLine(pView,"ID \"%w\"", pExpr->u.zToken); + break; + } +#ifndef SQLITE_OMIT_CAST + case TK_CAST: { + /* Expressions of the form: CAST(pLeft AS token) */ + sqlite3TreeViewLine(pView,"CAST %Q", pExpr->u.zToken); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + break; + } +#endif /* SQLITE_OMIT_CAST */ + case TK_LT: zBinOp = "LT"; break; + case TK_LE: zBinOp = "LE"; break; + case TK_GT: zBinOp = "GT"; break; + case TK_GE: zBinOp = "GE"; break; + case TK_NE: zBinOp = "NE"; break; + case TK_EQ: zBinOp = "EQ"; break; + case TK_IS: zBinOp = "IS"; break; + case TK_ISNOT: zBinOp = "ISNOT"; break; + case TK_AND: zBinOp = "AND"; break; + case TK_OR: zBinOp = "OR"; break; + case TK_PLUS: zBinOp = "ADD"; break; + case TK_STAR: zBinOp = "MUL"; break; + case TK_MINUS: zBinOp = "SUB"; break; + case TK_REM: zBinOp = "REM"; break; + case TK_BITAND: zBinOp = "BITAND"; break; + case TK_BITOR: zBinOp = "BITOR"; break; + case TK_SLASH: zBinOp = "DIV"; break; + case TK_LSHIFT: zBinOp = "LSHIFT"; break; + case TK_RSHIFT: zBinOp = "RSHIFT"; break; + case TK_CONCAT: zBinOp = "CONCAT"; break; + case TK_DOT: zBinOp = "DOT"; break; + + case TK_UMINUS: zUniOp = "UMINUS"; break; + case TK_UPLUS: zUniOp = "UPLUS"; break; + case TK_BITNOT: zUniOp = "BITNOT"; break; + case TK_NOT: zUniOp = "NOT"; break; + case TK_ISNULL: zUniOp = "ISNULL"; break; + case TK_NOTNULL: zUniOp = "NOTNULL"; break; + + case TK_COLLATE: { + sqlite3TreeViewLine(pView, "COLLATE %Q", pExpr->u.zToken); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + break; + } + + case TK_AGG_FUNCTION: + case TK_FUNCTION: { + ExprList *pFarg; /* List of function arguments */ + if( ExprHasProperty(pExpr, EP_TokenOnly) ){ + pFarg = 0; + }else{ + pFarg = pExpr->x.pList; + } + if( pExpr->op==TK_AGG_FUNCTION ){ + sqlite3TreeViewLine(pView, "AGG_FUNCTION%d %Q", + pExpr->op2, pExpr->u.zToken); + }else{ + sqlite3TreeViewLine(pView, "FUNCTION %Q", pExpr->u.zToken); + } + if( pFarg ){ + sqlite3TreeViewExprList(pView, pFarg, 0, 0); + } + break; + } +#ifndef SQLITE_OMIT_SUBQUERY + case TK_EXISTS: { + sqlite3TreeViewLine(pView, "EXISTS-expr"); + sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); + break; + } + case TK_SELECT: { + sqlite3TreeViewLine(pView, "SELECT-expr"); + sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); + break; + } + case TK_IN: { + sqlite3TreeViewLine(pView, "IN"); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); + if( ExprHasProperty(pExpr, EP_xIsSelect) ){ + sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); + }else{ + sqlite3TreeViewExprList(pView, pExpr->x.pList, 0, 0); + } + break; + } +#endif /* SQLITE_OMIT_SUBQUERY */ + + /* + ** x BETWEEN y AND z + ** + ** This is equivalent to + ** + ** x>=y AND x<=z + ** + ** X is stored in pExpr->pLeft. + ** Y is stored in pExpr->pList->a[0].pExpr. + ** Z is stored in pExpr->pList->a[1].pExpr. + */ + case TK_BETWEEN: { + Expr *pX = pExpr->pLeft; + Expr *pY = pExpr->x.pList->a[0].pExpr; + Expr *pZ = pExpr->x.pList->a[1].pExpr; + sqlite3TreeViewLine(pView, "BETWEEN"); + sqlite3TreeViewExpr(pView, pX, 1); + sqlite3TreeViewExpr(pView, pY, 1); + sqlite3TreeViewExpr(pView, pZ, 0); + break; + } + case TK_TRIGGER: { + /* If the opcode is TK_TRIGGER, then the expression is a reference + ** to a column in the new.* or old.* pseudo-tables available to + ** trigger programs. In this case Expr.iTable is set to 1 for the + ** new.* pseudo-table, or 0 for the old.* pseudo-table. Expr.iColumn + ** is set to the column of the pseudo-table to read, or to -1 to + ** read the rowid field. + */ + sqlite3TreeViewLine(pView, "%s(%d)", + pExpr->iTable ? "NEW" : "OLD", pExpr->iColumn); + break; + } + case TK_CASE: { + sqlite3TreeViewLine(pView, "CASE"); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); + sqlite3TreeViewExprList(pView, pExpr->x.pList, 0, 0); + break; + } +#ifndef SQLITE_OMIT_TRIGGER + case TK_RAISE: { + const char *zType = "unk"; + switch( pExpr->affinity ){ + case OE_Rollback: zType = "rollback"; break; + case OE_Abort: zType = "abort"; break; + case OE_Fail: zType = "fail"; break; + case OE_Ignore: zType = "ignore"; break; + } + sqlite3TreeViewLine(pView, "RAISE %s(%Q)", zType, pExpr->u.zToken); + break; + } +#endif + default: { + sqlite3TreeViewLine(pView, "op=%d", pExpr->op); + break; + } + } + if( zBinOp ){ + sqlite3TreeViewLine(pView, "%s%s", zBinOp, zFlgs); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); + sqlite3TreeViewExpr(pView, pExpr->pRight, 0); + }else if( zUniOp ){ + sqlite3TreeViewLine(pView, "%s%s", zUniOp, zFlgs); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + } + sqlite3TreeViewPop(pView); +} + +/* +** Generate a human-readable explanation of an expression list. +*/ +SQLITE_PRIVATE void sqlite3TreeViewExprList( + TreeView *pView, + const ExprList *pList, + u8 moreToFollow, + const char *zLabel +){ + int i; + pView = sqlite3TreeViewPush(pView, moreToFollow); + if( zLabel==0 || zLabel[0]==0 ) zLabel = "LIST"; + if( pList==0 ){ + sqlite3TreeViewLine(pView, "%s (empty)", zLabel); + }else{ + sqlite3TreeViewLine(pView, "%s", zLabel); + for(i=0; inExpr; i++){ + sqlite3TreeViewExpr(pView, pList->a[i].pExpr, inExpr-1); + } + } + sqlite3TreeViewPop(pView); +} + +#endif /* SQLITE_DEBUG */ + +/************** End of treeview.c ********************************************/ /************** Begin file random.c ******************************************/ /* ** 2001 September 15 @@ -22437,6 +22991,7 @@ SQLITE_PRIVATE void sqlite3XPrintf(StrAccum *p, u32 bFlags, const char *zFormat, ** Random numbers are used by some of the database backends in order ** to generate random integer keys for tables or random filenames. */ +/* #include "sqliteInt.h" */ /* All threads share a single random number generator. @@ -22583,7 +23138,9 @@ SQLITE_PRIVATE void sqlite3PrngRestoreState(void){ ** of multiple cores can do so, while also allowing applications to stay ** single-threaded if desired. */ +/* #include "sqliteInt.h" */ #if SQLITE_OS_WIN +/* # include "os_win.h" */ #endif #if SQLITE_MAX_WORKER_THREADS>0 @@ -22857,7 +23414,9 @@ SQLITE_PRIVATE int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){ ** 0xfe 0xff big-endian utf-16 follows ** */ +/* #include "sqliteInt.h" */ /* #include */ +/* #include "vdbeInt.h" */ #ifndef SQLITE_AMALGAMATION /* @@ -23370,6 +23929,7 @@ SQLITE_PRIVATE void sqlite3UtfSelfTest(void){ ** strings, and stuff like that. ** */ +/* #include "sqliteInt.h" */ /* #include */ #if HAVE_ISNAN || SQLITE_HAVE_ISNAN # include @@ -23459,10 +24019,8 @@ SQLITE_PRIVATE int sqlite3IsNaN(double x){ ** than 1GiB) the value returned might be less than the true string length. */ SQLITE_PRIVATE int sqlite3Strlen30(const char *z){ - const char *z2 = z; if( z==0 ) return 0; - while( *z2 ){ z2++; } - return 0x3fffffff & (int)(z2 - z); + return 0x3fffffff & (int)strlen(z); } /* @@ -24009,6 +24567,7 @@ SQLITE_PRIVATE int sqlite3GetInt32(const char *zNum, int *pValue){ } } #endif + while( zNum[0]=='0' ) zNum++; for(i=0; i<11 && (c = zNum[i] - '0')>=0 && c<=9; i++){ v = v*10 + c; } @@ -24433,14 +24992,38 @@ SQLITE_PRIVATE int sqlite3VarintLen(u64 v){ ** Read or write a four-byte big-endian integer value. */ SQLITE_PRIVATE u32 sqlite3Get4byte(const u8 *p){ +#if SQLITE_BYTEORDER==4321 + u32 x; + memcpy(&x,p,4); + return x; +#elif SQLITE_BYTEORDER==1234 && defined(__GNUC__) && GCC_VERSION>=4003000 + u32 x; + memcpy(&x,p,4); + return __builtin_bswap32(x); +#elif SQLITE_BYTEORDER==1234 && defined(_MSC_VER) && _MSC_VER>=1300 + u32 x; + memcpy(&x,p,4); + return _byteswap_ulong(x); +#else testcase( p[0]&0x80 ); return ((unsigned)p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3]; +#endif } SQLITE_PRIVATE void sqlite3Put4byte(unsigned char *p, u32 v){ +#if SQLITE_BYTEORDER==4321 + memcpy(p,&v,4); +#elif SQLITE_BYTEORDER==1234 && defined(__GNUC__) && GCC_VERSION>=4003000 + u32 x = __builtin_bswap32(v); + memcpy(p,&x,4); +#elif SQLITE_BYTEORDER==1234 && defined(_MSC_VER) && _MSC_VER>=1300 + u32 x = _byteswap_ulong(v); + memcpy(p,&x,4); +#else p[0] = (u8)(v>>24); p[1] = (u8)(v>>16); p[2] = (u8)(v>>8); p[3] = (u8)v; +#endif } @@ -24743,6 +25326,7 @@ SQLITE_PRIVATE u64 sqlite3LogEstToInt(LogEst x){ ** This is the implementation of generic hash-tables ** used in SQLite. */ +/* #include "sqliteInt.h" */ /* #include */ /* Turn bulk memory into a hash table object by initializing the @@ -25008,42 +25592,42 @@ SQLITE_PRIVATE void *sqlite3HashInsert(Hash *pH, const char *pKey, void *data){ #endif SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ static const char *const azName[] = { "?", - /* 1 */ "Function" OpHelp("r[P3]=func(r[P2@P5])"), - /* 2 */ "Savepoint" OpHelp(""), - /* 3 */ "AutoCommit" OpHelp(""), - /* 4 */ "Transaction" OpHelp(""), - /* 5 */ "SorterNext" OpHelp(""), - /* 6 */ "PrevIfOpen" OpHelp(""), - /* 7 */ "NextIfOpen" OpHelp(""), - /* 8 */ "Prev" OpHelp(""), - /* 9 */ "Next" OpHelp(""), - /* 10 */ "AggStep" OpHelp("accum=r[P3] step(r[P2@P5])"), - /* 11 */ "Checkpoint" OpHelp(""), - /* 12 */ "JournalMode" OpHelp(""), - /* 13 */ "Vacuum" OpHelp(""), - /* 14 */ "VFilter" OpHelp("iplan=r[P3] zplan='P4'"), - /* 15 */ "VUpdate" OpHelp("data=r[P3@P2]"), - /* 16 */ "Goto" OpHelp(""), - /* 17 */ "Gosub" OpHelp(""), - /* 18 */ "Return" OpHelp(""), + /* 1 */ "Savepoint" OpHelp(""), + /* 2 */ "AutoCommit" OpHelp(""), + /* 3 */ "Transaction" OpHelp(""), + /* 4 */ "SorterNext" OpHelp(""), + /* 5 */ "PrevIfOpen" OpHelp(""), + /* 6 */ "NextIfOpen" OpHelp(""), + /* 7 */ "Prev" OpHelp(""), + /* 8 */ "Next" OpHelp(""), + /* 9 */ "Checkpoint" OpHelp(""), + /* 10 */ "JournalMode" OpHelp(""), + /* 11 */ "Vacuum" OpHelp(""), + /* 12 */ "VFilter" OpHelp("iplan=r[P3] zplan='P4'"), + /* 13 */ "VUpdate" OpHelp("data=r[P3@P2]"), + /* 14 */ "Goto" OpHelp(""), + /* 15 */ "Gosub" OpHelp(""), + /* 16 */ "Return" OpHelp(""), + /* 17 */ "InitCoroutine" OpHelp(""), + /* 18 */ "EndCoroutine" OpHelp(""), /* 19 */ "Not" OpHelp("r[P2]= !r[P1]"), - /* 20 */ "InitCoroutine" OpHelp(""), - /* 21 */ "EndCoroutine" OpHelp(""), - /* 22 */ "Yield" OpHelp(""), - /* 23 */ "HaltIfNull" OpHelp("if r[P3]=null halt"), - /* 24 */ "Halt" OpHelp(""), - /* 25 */ "Integer" OpHelp("r[P2]=P1"), - /* 26 */ "Int64" OpHelp("r[P2]=P4"), - /* 27 */ "String" OpHelp("r[P2]='P4' (len=P1)"), - /* 28 */ "Null" OpHelp("r[P2..P3]=NULL"), - /* 29 */ "SoftNull" OpHelp("r[P1]=NULL"), - /* 30 */ "Blob" OpHelp("r[P2]=P4 (len=P1)"), - /* 31 */ "Variable" OpHelp("r[P2]=parameter(P1,P4)"), - /* 32 */ "Move" OpHelp("r[P2@P3]=r[P1@P3]"), - /* 33 */ "Copy" OpHelp("r[P2@P3+1]=r[P1@P3+1]"), - /* 34 */ "SCopy" OpHelp("r[P2]=r[P1]"), - /* 35 */ "ResultRow" OpHelp("output=r[P1@P2]"), - /* 36 */ "CollSeq" OpHelp(""), + /* 20 */ "Yield" OpHelp(""), + /* 21 */ "HaltIfNull" OpHelp("if r[P3]=null halt"), + /* 22 */ "Halt" OpHelp(""), + /* 23 */ "Integer" OpHelp("r[P2]=P1"), + /* 24 */ "Int64" OpHelp("r[P2]=P4"), + /* 25 */ "String" OpHelp("r[P2]='P4' (len=P1)"), + /* 26 */ "Null" OpHelp("r[P2..P3]=NULL"), + /* 27 */ "SoftNull" OpHelp("r[P1]=NULL"), + /* 28 */ "Blob" OpHelp("r[P2]=P4 (len=P1)"), + /* 29 */ "Variable" OpHelp("r[P2]=parameter(P1,P4)"), + /* 30 */ "Move" OpHelp("r[P2@P3]=r[P1@P3]"), + /* 31 */ "Copy" OpHelp("r[P2@P3+1]=r[P1@P3+1]"), + /* 32 */ "SCopy" OpHelp("r[P2]=r[P1]"), + /* 33 */ "ResultRow" OpHelp("output=r[P1@P2]"), + /* 34 */ "CollSeq" OpHelp(""), + /* 35 */ "Function0" OpHelp("r[P3]=func(r[P2@P5])"), + /* 36 */ "Function" OpHelp("r[P3]=func(r[P2@P5])"), /* 37 */ "AddImm" OpHelp("r[P1]=r[P1]+P2"), /* 38 */ "MustBeInt" OpHelp(""), /* 39 */ "RealAffinity" OpHelp(""), @@ -25069,20 +25653,20 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ /* 59 */ "SequenceTest" OpHelp("if( cursor[P1].ctr++ ) pc = P2"), /* 60 */ "OpenPseudo" OpHelp("P3 columns in r[P2]"), /* 61 */ "Close" OpHelp(""), - /* 62 */ "SeekLT" OpHelp("key=r[P3@P4]"), - /* 63 */ "SeekLE" OpHelp("key=r[P3@P4]"), - /* 64 */ "SeekGE" OpHelp("key=r[P3@P4]"), - /* 65 */ "SeekGT" OpHelp("key=r[P3@P4]"), - /* 66 */ "Seek" OpHelp("intkey=r[P2]"), - /* 67 */ "NoConflict" OpHelp("key=r[P3@P4]"), - /* 68 */ "NotFound" OpHelp("key=r[P3@P4]"), - /* 69 */ "Found" OpHelp("key=r[P3@P4]"), - /* 70 */ "NotExists" OpHelp("intkey=r[P3]"), + /* 62 */ "ColumnsUsed" OpHelp(""), + /* 63 */ "SeekLT" OpHelp("key=r[P3@P4]"), + /* 64 */ "SeekLE" OpHelp("key=r[P3@P4]"), + /* 65 */ "SeekGE" OpHelp("key=r[P3@P4]"), + /* 66 */ "SeekGT" OpHelp("key=r[P3@P4]"), + /* 67 */ "Seek" OpHelp("intkey=r[P2]"), + /* 68 */ "NoConflict" OpHelp("key=r[P3@P4]"), + /* 69 */ "NotFound" OpHelp("key=r[P3@P4]"), + /* 70 */ "Found" OpHelp("key=r[P3@P4]"), /* 71 */ "Or" OpHelp("r[P3]=(r[P1] || r[P2])"), /* 72 */ "And" OpHelp("r[P3]=(r[P1] && r[P2])"), - /* 73 */ "Sequence" OpHelp("r[P2]=cursor[P1].ctr++"), - /* 74 */ "NewRowid" OpHelp("r[P2]=rowid"), - /* 75 */ "Insert" OpHelp("intkey=r[P3] data=r[P2]"), + /* 73 */ "NotExists" OpHelp("intkey=r[P3]"), + /* 74 */ "Sequence" OpHelp("r[P2]=cursor[P1].ctr++"), + /* 75 */ "NewRowid" OpHelp("r[P2]=rowid"), /* 76 */ "IsNull" OpHelp("if r[P1]==NULL goto P2"), /* 77 */ "NotNull" OpHelp("if r[P1]!=NULL goto P2"), /* 78 */ "Ne" OpHelp("if r[P1]!=r[P3] goto P2"), @@ -25091,7 +25675,7 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ /* 81 */ "Le" OpHelp("if r[P1]<=r[P3] goto P2"), /* 82 */ "Lt" OpHelp("if r[P1]=r[P3] goto P2"), - /* 84 */ "InsertInt" OpHelp("intkey=P3 data=r[P2]"), + /* 84 */ "Insert" OpHelp("intkey=r[P3] data=r[P2]"), /* 85 */ "BitAnd" OpHelp("r[P3]=r[P1]&r[P2]"), /* 86 */ "BitOr" OpHelp("r[P3]=r[P1]|r[P2]"), /* 87 */ "ShiftLeft" OpHelp("r[P3]=r[P2]<0 goto P2"), - /* 138 */ "IfNeg" OpHelp("r[P1]+=P3, if r[P1]<0 goto P2"), - /* 139 */ "IfNotZero" OpHelp("if r[P1]!=0 then r[P1]+=P3, goto P2"), - /* 140 */ "DecrJumpZero" OpHelp("if (--r[P1])==0 goto P2"), - /* 141 */ "JumpZeroIncr" OpHelp("if (r[P1]++)==0 ) goto P2"), - /* 142 */ "AggFinal" OpHelp("accum=r[P1] N=P2"), - /* 143 */ "IncrVacuum" OpHelp(""), - /* 144 */ "Expire" OpHelp(""), - /* 145 */ "TableLock" OpHelp("iDb=P1 root=P2 write=P3"), - /* 146 */ "VBegin" OpHelp(""), - /* 147 */ "VCreate" OpHelp(""), - /* 148 */ "VDestroy" OpHelp(""), - /* 149 */ "VOpen" OpHelp(""), - /* 150 */ "VColumn" OpHelp("r[P3]=vcolumn(P2)"), - /* 151 */ "VNext" OpHelp(""), - /* 152 */ "VRename" OpHelp(""), - /* 153 */ "Pagecount" OpHelp(""), - /* 154 */ "MaxPgcnt" OpHelp(""), - /* 155 */ "Init" OpHelp("Start at P2"), - /* 156 */ "Noop" OpHelp(""), - /* 157 */ "Explain" OpHelp(""), + /* 134 */ "Param" OpHelp(""), + /* 135 */ "FkCounter" OpHelp("fkctr[P1]+=P2"), + /* 136 */ "FkIfZero" OpHelp("if fkctr[P1]==0 goto P2"), + /* 137 */ "MemMax" OpHelp("r[P1]=max(r[P1],r[P2])"), + /* 138 */ "IfPos" OpHelp("if r[P1]>0 goto P2"), + /* 139 */ "IfNeg" OpHelp("r[P1]+=P3, if r[P1]<0 goto P2"), + /* 140 */ "IfNotZero" OpHelp("if r[P1]!=0 then r[P1]+=P3, goto P2"), + /* 141 */ "DecrJumpZero" OpHelp("if (--r[P1])==0 goto P2"), + /* 142 */ "JumpZeroIncr" OpHelp("if (r[P1]++)==0 ) goto P2"), + /* 143 */ "AggStep0" OpHelp("accum=r[P3] step(r[P2@P5])"), + /* 144 */ "AggStep" OpHelp("accum=r[P3] step(r[P2@P5])"), + /* 145 */ "AggFinal" OpHelp("accum=r[P1] N=P2"), + /* 146 */ "IncrVacuum" OpHelp(""), + /* 147 */ "Expire" OpHelp(""), + /* 148 */ "TableLock" OpHelp("iDb=P1 root=P2 write=P3"), + /* 149 */ "VBegin" OpHelp(""), + /* 150 */ "VCreate" OpHelp(""), + /* 151 */ "VDestroy" OpHelp(""), + /* 152 */ "VOpen" OpHelp(""), + /* 153 */ "VColumn" OpHelp("r[P3]=vcolumn(P2)"), + /* 154 */ "VNext" OpHelp(""), + /* 155 */ "VRename" OpHelp(""), + /* 156 */ "Pagecount" OpHelp(""), + /* 157 */ "MaxPgcnt" OpHelp(""), + /* 158 */ "Init" OpHelp("Start at P2"), + /* 159 */ "Noop" OpHelp(""), + /* 160 */ "Explain" OpHelp(""), }; return azName[i]; } @@ -25217,6 +25804,7 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ ** * Definitions of sqlite3_vfs objects for all locking methods ** plus implementations of sqlite3_os_init() and sqlite3_os_end(). */ +/* #include "sqliteInt.h" */ #if SQLITE_OS_UNIX /* This file is used on unix only */ /* @@ -25264,6 +25852,17 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ # include #endif /* SQLITE_ENABLE_LOCKING_STYLE */ +#if defined(__APPLE__) && ((__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) || \ + (__IPHONE_OS_VERSION_MIN_REQUIRED > 2000)) +# if (!defined(TARGET_OS_EMBEDDED) || (TARGET_OS_EMBEDDED==0)) \ + && (!defined(TARGET_IPHONE_SIMULATOR) || (TARGET_IPHONE_SIMULATOR==0)) +# define HAVE_GETHOSTUUID 1 +# else +# warning "gethostuuid() is disabled." +# endif +#endif + + #if OS_VXWORKS /* # include */ # include @@ -25459,16 +26058,6 @@ static pid_t randomnessPid = 0; # error "The MEMORY_DEBUG macro is obsolete. Use SQLITE_DEBUG instead." #endif -#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) -# ifndef SQLITE_DEBUG_OS_TRACE -# define SQLITE_DEBUG_OS_TRACE 0 -# endif - int sqlite3OSTrace = SQLITE_DEBUG_OS_TRACE; -# define OSTRACE(X) if( sqlite3OSTrace ) sqlite3DebugPrintf X -#else -# define OSTRACE(X) -#endif - /* ** Macros for performance tracing. Normally turned off. Only works ** on i486 hardware. @@ -25999,19 +26588,19 @@ static int robust_open(const char *z, int f, mode_t m){ ** unixEnterLeave() */ static void unixEnterMutex(void){ - sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); + sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } static void unixLeaveMutex(void){ - sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); + sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } #ifdef SQLITE_DEBUG static int unixMutexHeld(void) { - return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); + return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } #endif -#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) +#ifdef SQLITE_HAVE_OS_TRACE /* ** Helper function for printing out trace information from debugging ** binaries. This returns the string representation of the supplied @@ -26274,7 +26863,7 @@ static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){ assert( zAbsoluteName[0]=='/' ); n = (int)strlen(zAbsoluteName); - pNew = sqlite3_malloc( sizeof(*pNew) + (n+1) ); + pNew = sqlite3_malloc64( sizeof(*pNew) + (n+1) ); if( pNew==0 ) return 0; pNew->zCanonicalName = (char*)&pNew[1]; memcpy(pNew->zCanonicalName, zAbsoluteName, n+1); @@ -26678,7 +27267,7 @@ static int findInodeInfo( pInode = pInode->pNext; } if( pInode==0 ){ - pInode = sqlite3_malloc( sizeof(*pInode) ); + pInode = sqlite3_malloc64( sizeof(*pInode) ); if( pInode==0 ){ return SQLITE_NOMEM; } @@ -29199,7 +29788,7 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ return SQLITE_OK; } case SQLITE_FCNTL_TEMPFILENAME: { - char *zTFile = sqlite3_malloc( pFile->pVfs->mxPathname ); + char *zTFile = sqlite3_malloc64( pFile->pVfs->mxPathname ); if( zTFile ){ unixGetTempname(pFile->pVfs->mxPathname, zTFile); *(char**)pArg = zTFile; @@ -29640,7 +30229,7 @@ static int unixOpenSharedMemory(unixFile *pDbFd){ int nShmFilename; /* Size of the SHM filename in bytes */ /* Allocate space for the new unixShm object. */ - p = sqlite3_malloc( sizeof(*p) ); + p = sqlite3_malloc64( sizeof(*p) ); if( p==0 ) return SQLITE_NOMEM; memset(p, 0, sizeof(*p)); assert( pDbFd->pShm==0 ); @@ -29671,7 +30260,7 @@ static int unixOpenSharedMemory(unixFile *pDbFd){ #else nShmFilename = 6 + (int)strlen(zBasePath); #endif - pShmNode = sqlite3_malloc( sizeof(*pShmNode) + nShmFilename ); + pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename ); if( pShmNode==0 ){ rc = SQLITE_NOMEM; goto shm_open_err; @@ -29881,7 +30470,7 @@ static int unixShmMap( goto shmpage_out; } }else{ - pMem = sqlite3_malloc(szRegion); + pMem = sqlite3_malloc64(szRegion); if( pMem==0 ){ rc = SQLITE_NOMEM; goto shmpage_out; @@ -30718,7 +31307,7 @@ static int fillInUnixFile( ** the afpLockingContext. */ afpLockingContext *pCtx; - pNew->lockingContext = pCtx = sqlite3_malloc( sizeof(*pCtx) ); + pNew->lockingContext = pCtx = sqlite3_malloc64( sizeof(*pCtx) ); if( pCtx==0 ){ rc = SQLITE_NOMEM; }else{ @@ -30748,7 +31337,7 @@ static int fillInUnixFile( int nFilename; assert( zFilename!=0 ); nFilename = (int)strlen(zFilename) + 6; - zLockFile = (char *)sqlite3_malloc(nFilename); + zLockFile = (char *)sqlite3_malloc64(nFilename); if( zLockFile==0 ){ rc = SQLITE_NOMEM; }else{ @@ -31125,7 +31714,7 @@ static int unixOpen( if( pUnused ){ fd = pUnused->fd; }else{ - pUnused = sqlite3_malloc(sizeof(*pUnused)); + pUnused = sqlite3_malloc64(sizeof(*pUnused)); if( !pUnused ){ return SQLITE_NOMEM; } @@ -31505,7 +32094,7 @@ static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ */ memset(zBuf, 0, nBuf); randomnessPid = osGetpid(0); -#if !defined(SQLITE_TEST) +#if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS) { int fd, got; fd = robust_open("/dev/urandom", O_RDONLY, 0); @@ -31917,7 +32506,7 @@ static int proxyCreateUnixFile( if( pUnused ){ fd = pUnused->fd; }else{ - pUnused = sqlite3_malloc(sizeof(*pUnused)); + pUnused = sqlite3_malloc64(sizeof(*pUnused)); if( !pUnused ){ return SQLITE_NOMEM; } @@ -31950,7 +32539,7 @@ static int proxyCreateUnixFile( } } - pNew = (unixFile *)sqlite3_malloc(sizeof(*pNew)); + pNew = (unixFile *)sqlite3_malloc64(sizeof(*pNew)); if( pNew==NULL ){ rc = SQLITE_NOMEM; goto end_create_proxy; @@ -31983,8 +32572,10 @@ SQLITE_API int sqlite3_hostid_num = 0; #define PROXY_HOSTIDLEN 16 /* conch file host id length */ +#ifdef HAVE_GETHOSTUUID /* Not always defined in the headers as it ought to be */ extern int gethostuuid(uuid_t id, const struct timespec *wait); +#endif /* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN ** bytes of writable memory. @@ -31992,8 +32583,7 @@ extern int gethostuuid(uuid_t id, const struct timespec *wait); static int proxyGetHostID(unsigned char *pHostID, int *pError){ assert(PROXY_HOSTIDLEN == sizeof(uuid_t)); memset(pHostID, 0, PROXY_HOSTIDLEN); -# if defined(__APPLE__) && ((__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) || \ - (__IPHONE_OS_VERSION_MIN_REQUIRED > 2000)) +#ifdef HAVE_GETHOSTUUID { struct timespec timeout = {1, 0}; /* 1 sec timeout */ if( gethostuuid(pHostID, &timeout) ){ @@ -32411,7 +33001,7 @@ static int proxyReleaseConch(unixFile *pFile){ /* ** Given the name of a database file, compute the name of its conch file. -** Store the conch filename in memory obtained from sqlite3_malloc(). +** Store the conch filename in memory obtained from sqlite3_malloc64(). ** Make *pConchPath point to the new name. Return SQLITE_OK on success ** or SQLITE_NOMEM if unable to obtain memory. ** @@ -32427,7 +33017,7 @@ static int proxyCreateConchPathname(char *dbPath, char **pConchPath){ /* Allocate space for the conch filename and initialize the name to ** the name of the original database file. */ - *pConchPath = conchPath = (char *)sqlite3_malloc(len + 8); + *pConchPath = conchPath = (char *)sqlite3_malloc64(len + 8); if( conchPath==0 ){ return SQLITE_NOMEM; } @@ -32543,7 +33133,7 @@ static int proxyTransformUnixFile(unixFile *pFile, const char *path) { OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h, (lockPath ? lockPath : ":auto:"), osGetpid(0))); - pCtx = sqlite3_malloc( sizeof(*pCtx) ); + pCtx = sqlite3_malloc64( sizeof(*pCtx) ); if( pCtx==0 ){ return SQLITE_NOMEM; } @@ -32949,6 +33539,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_os_end(void){ ** ** This file contains code that is specific to Windows. */ +/* #include "sqliteInt.h" */ #if SQLITE_OS_WIN /* This file is used for Windows only */ /* @@ -32987,16 +33578,6 @@ SQLITE_API int SQLITE_STDCALL sqlite3_os_end(void){ # error "The MEMORY_DEBUG macro is obsolete. Use SQLITE_DEBUG instead." #endif -#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) -# ifndef SQLITE_DEBUG_OS_TRACE -# define SQLITE_DEBUG_OS_TRACE 0 -# endif - int sqlite3OSTrace = SQLITE_DEBUG_OS_TRACE; -# define OSTRACE(X) if( sqlite3OSTrace ) sqlite3DebugPrintf X -#else -# define OSTRACE(X) -#endif - /* ** Macros for performance tracing. Normally turned off. Only works ** on i486 hardware. @@ -33167,6 +33748,7 @@ SQLITE_API int sqlite3_open_file_count = 0; /* ** Include the header file for the Windows VFS. */ +/* #include "os_win.h" */ /* ** Compiling and using WAL mode requires several APIs that are only @@ -35900,7 +36482,7 @@ static int winSync(sqlite3_file *id, int flags){ BOOL rc; #endif #if !defined(NDEBUG) || !defined(SQLITE_NO_SYNC) || \ - (defined(SQLITE_TEST) && defined(SQLITE_DEBUG)) + defined(SQLITE_HAVE_OS_TRACE) /* ** Used when SQLITE_NO_SYNC is not defined and by the assert() and/or ** OSTRACE() macros. @@ -36158,6 +36740,12 @@ static int winLock(sqlite3_file *id, int locktype){ return SQLITE_OK; } + /* Do not allow any kind of write-lock on a read-only database + */ + if( (pFile->ctrlFlags & WINFILE_RDONLY)!=0 && locktype>=RESERVED_LOCK ){ + return SQLITE_IOERR_LOCK; + } + /* Make sure the locking sequence is correct */ assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK ); @@ -36527,14 +37115,14 @@ static SYSTEM_INFO winSysInfo; ** winShmLeaveMutex() */ static void winShmEnterMutex(void){ - sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); + sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } static void winShmLeaveMutex(void){ - sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); + sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } #ifndef NDEBUG static int winShmMutexHeld(void) { - return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); + return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } #endif @@ -36577,7 +37165,7 @@ struct winShmNode { int nRef; /* Number of winShm objects pointing to this */ winShm *pFirst; /* All winShm objects pointing to this */ winShmNode *pNext; /* Next in list of all winShmNode objects */ -#ifdef SQLITE_DEBUG +#if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) u8 nextShmId; /* Next available winShm.id value */ #endif }; @@ -36608,7 +37196,7 @@ struct winShm { u8 hasMutex; /* True if holding the winShmNode mutex */ u16 sharedMask; /* Mask of shared locks held */ u16 exclMask; /* Mask of exclusive locks held */ -#ifdef SQLITE_DEBUG +#if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) u8 id; /* Id of this connection with its winShmNode */ #endif }; @@ -36799,7 +37387,7 @@ static int winOpenSharedMemory(winFile *pDbFd){ /* Make the new connection a child of the winShmNode */ p->pShmNode = pShmNode; -#ifdef SQLITE_DEBUG +#if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) p->id = pShmNode->nextShmId++; #endif pShmNode->nRef++; @@ -37068,7 +37656,7 @@ static int winShmMap( } /* Map the requested memory region into this processes address space. */ - apNew = (struct ShmRegion *)sqlite3_realloc( + apNew = (struct ShmRegion *)sqlite3_realloc64( pShmNode->aRegion, (iRegion+1)*sizeof(apNew[0]) ); if( !apNew ){ @@ -38515,7 +39103,7 @@ static void winDlClose(sqlite3_vfs *pVfs, void *pHandle){ static int winRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ int n = 0; UNUSED_PARAMETER(pVfs); -#if defined(SQLITE_TEST) +#if defined(SQLITE_TEST) || defined(SQLITE_OMIT_RANDOMNESS) n = nBuf; memset(zBuf, 0, nBuf); #else @@ -38549,23 +39137,23 @@ static int winRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ memcpy(&zBuf[n], &i, sizeof(i)); n += sizeof(i); } -#endif #if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && SQLITE_WIN32_USE_UUID if( sizeof(UUID)<=nBuf-n ){ UUID id; memset(&id, 0, sizeof(UUID)); osUuidCreate(&id); - memcpy(zBuf, &id, sizeof(UUID)); + memcpy(&zBuf[n], &id, sizeof(UUID)); n += sizeof(UUID); } if( sizeof(UUID)<=nBuf-n ){ UUID id; memset(&id, 0, sizeof(UUID)); osUuidCreateSequential(&id); - memcpy(zBuf, &id, sizeof(UUID)); + memcpy(&zBuf[n], &id, sizeof(UUID)); n += sizeof(UUID); } #endif +#endif /* defined(SQLITE_TEST) || defined(SQLITE_ZERO_PRNG_SEED) */ return n; } @@ -38814,6 +39402,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_os_end(void){ ** start of a transaction, and is thus usually less than a few thousand, ** but can be as large as 2 billion for a really big database. */ +/* #include "sqliteInt.h" */ /* Size of the Bitvec structure in bytes. */ #define BITVEC_SZ 512 @@ -38905,10 +39494,10 @@ SQLITE_PRIVATE Bitvec *sqlite3BitvecCreate(u32 iSize){ ** If p is NULL (if the bitmap has not been created) or if ** i is out of range, then return false. */ -SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec *p, u32 i){ - if( p==0 ) return 0; - if( i>p->iSize || i==0 ) return 0; +SQLITE_PRIVATE int sqlite3BitvecTestNotNull(Bitvec *p, u32 i){ + assert( p!=0 ); i--; + if( i>=p->iSize ) return 0; while( p->iDivisor ){ u32 bin = i/p->iDivisor; i = i%p->iDivisor; @@ -38928,6 +39517,9 @@ SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec *p, u32 i){ return 0; } } +SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec *p, u32 i){ + return p!=0 && sqlite3BitvecTestNotNull(p,i); +} /* ** Set the i-th bit. Return 0 on success and an error code if @@ -39120,7 +39712,7 @@ SQLITE_PRIVATE int sqlite3BitvecBuiltinTest(int sz, int *aOp){ ** bits to act as the reference */ pBitvec = sqlite3BitvecCreate( sz ); pV = sqlite3MallocZero( (sz+7)/8 + 1 ); - pTmpSpace = sqlite3_malloc(BITVEC_SZ); + pTmpSpace = sqlite3_malloc64(BITVEC_SZ); if( pBitvec==0 || pV==0 || pTmpSpace==0 ) goto bitvec_end; /* NULL pBitvec tests */ @@ -39200,6 +39792,7 @@ bitvec_end: ************************************************************************* ** This file implements that page cache. */ +/* #include "sqliteInt.h" */ /* ** A complete page cache is an instance of this structure. @@ -39216,7 +39809,6 @@ struct PCache { int (*xStress)(void*,PgHdr*); /* Call to try make a page clean */ void *pStress; /* Argument to xStress */ sqlite3_pcache *pCache; /* Pluggable cache module */ - PgHdr *pPage1; /* Reference to page 1 */ }; /********************************** Linked List Management ********************/ @@ -39294,9 +39886,6 @@ static void pcacheManageDirtyList(PgHdr *pPage, u8 addRemove){ */ static void pcacheUnpin(PgHdr *p){ if( p->pCache->bPurgeable ){ - if( p->pgno==1 ){ - p->pCache->pPage1 = 0; - } sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 0); } } @@ -39389,7 +39978,6 @@ SQLITE_PRIVATE int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){ sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache); } pCache->pCache = pNew; - pCache->pPage1 = 0; pCache->szPage = szPage; } return SQLITE_OK; @@ -39514,13 +40102,14 @@ static SQLITE_NOINLINE PgHdr *pcacheFetchFinishWithInit( assert( pPage!=0 ); pPgHdr = (PgHdr*)pPage->pExtra; assert( pPgHdr->pPage==0 ); - memset(pPgHdr, 0, sizeof(PgHdr)); + memset(pPgHdr, 0, sizeof(PgHdr)); pPgHdr->pPage = pPage; pPgHdr->pData = pPage->pBuf; pPgHdr->pExtra = (void *)&pPgHdr[1]; memset(pPgHdr->pExtra, 0, pCache->szExtra); pPgHdr->pCache = pCache; pPgHdr->pgno = pgno; + pPgHdr->flags = PGHDR_CLEAN; return sqlite3PcacheFetchFinish(pCache,pgno,pPage); } @@ -39537,7 +40126,7 @@ SQLITE_PRIVATE PgHdr *sqlite3PcacheFetchFinish( ){ PgHdr *pPgHdr; - if( pPage==0 ) return 0; + assert( pPage!=0 ); pPgHdr = (PgHdr *)pPage->pExtra; if( !pPgHdr->pPage ){ @@ -39547,9 +40136,6 @@ SQLITE_PRIVATE PgHdr *sqlite3PcacheFetchFinish( pCache->nRef++; } pPgHdr->nRef++; - if( pgno==1 ){ - pCache->pPage1 = pPgHdr; - } return pPgHdr; } @@ -39562,7 +40148,7 @@ SQLITE_PRIVATE void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){ p->nRef--; if( p->nRef==0 ){ p->pCache->nRef--; - if( (p->flags&PGHDR_DIRTY)==0 ){ + if( p->flags&PGHDR_CLEAN ){ pcacheUnpin(p); }else if( p->pDirtyPrev!=0 ){ /* Move the page to the head of the dirty list. */ @@ -39590,9 +40176,6 @@ SQLITE_PRIVATE void sqlite3PcacheDrop(PgHdr *p){ pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); } p->pCache->nRef--; - if( p->pgno==1 ){ - p->pCache->pPage1 = 0; - } sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 1); } @@ -39601,11 +40184,14 @@ SQLITE_PRIVATE void sqlite3PcacheDrop(PgHdr *p){ ** make it so. */ SQLITE_PRIVATE void sqlite3PcacheMakeDirty(PgHdr *p){ - p->flags &= ~PGHDR_DONT_WRITE; assert( p->nRef>0 ); - if( 0==(p->flags & PGHDR_DIRTY) ){ - p->flags |= PGHDR_DIRTY; - pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD); + if( p->flags & (PGHDR_CLEAN|PGHDR_DONT_WRITE) ){ + p->flags &= ~PGHDR_DONT_WRITE; + if( p->flags & PGHDR_CLEAN ){ + p->flags ^= (PGHDR_DIRTY|PGHDR_CLEAN); + assert( (p->flags & (PGHDR_DIRTY|PGHDR_CLEAN))==PGHDR_DIRTY ); + pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD); + } } } @@ -39615,8 +40201,10 @@ SQLITE_PRIVATE void sqlite3PcacheMakeDirty(PgHdr *p){ */ SQLITE_PRIVATE void sqlite3PcacheMakeClean(PgHdr *p){ if( (p->flags & PGHDR_DIRTY) ){ + assert( (p->flags & PGHDR_CLEAN)==0 ); pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); - p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC); + p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC|PGHDR_WRITEABLE); + p->flags |= PGHDR_CLEAN; if( p->nRef==0 ){ pcacheUnpin(p); } @@ -39683,9 +40271,14 @@ SQLITE_PRIVATE void sqlite3PcacheTruncate(PCache *pCache, Pgno pgno){ sqlite3PcacheMakeClean(p); } } - if( pgno==0 && pCache->pPage1 ){ - memset(pCache->pPage1->pData, 0, pCache->szPage); - pgno = 1; + if( pgno==0 && pCache->nRef ){ + sqlite3_pcache_page *pPage1; + pPage1 = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache,1,0); + if( ALWAYS(pPage1) ){ /* Page 1 is always available in cache, because + ** pCache->nRef>0 */ + memset(pPage1->pBuf, 0, pCache->szPage); + pgno = 1; + } } sqlite3GlobalConfig.pcache2.xTruncate(pCache->pCache, pgno+1); } @@ -39876,8 +40469,72 @@ SQLITE_PRIVATE void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHd ** of the SQLITE_CONFIG_PAGECACHE and sqlite3_release_memory() features. ** If the default page cache implementation is overridden, then neither of ** these two features are available. +** +** A Page cache line looks like this: +** +** ------------------------------------------------------------- +** | database page content | PgHdr1 | MemPage | PgHdr | +** ------------------------------------------------------------- +** +** The database page content is up front (so that buffer overreads tend to +** flow harmlessly into the PgHdr1, MemPage, and PgHdr extensions). MemPage +** is the extension added by the btree.c module containing information such +** as the database page number and how that database page is used. PgHdr +** is added by the pcache.c layer and contains information used to keep track +** of which pages are "dirty". PgHdr1 is an extension added by this +** module (pcache1.c). The PgHdr1 header is a subclass of sqlite3_pcache_page. +** PgHdr1 contains information needed to look up a page by its page number. +** The superclass sqlite3_pcache_page.pBuf points to the start of the +** database page content and sqlite3_pcache_page.pExtra points to PgHdr. +** +** The size of the extension (MemPage+PgHdr+PgHdr1) can be determined at +** runtime using sqlite3_config(SQLITE_CONFIG_PCACHE_HDRSZ, &size). The +** sizes of the extensions sum to 272 bytes on x64 for 3.8.10, but this +** size can vary according to architecture, compile-time options, and +** SQLite library version number. +** +** If SQLITE_PCACHE_SEPARATE_HEADER is defined, then the extension is obtained +** using a separate memory allocation from the database page content. This +** seeks to overcome the "clownshoe" problem (also called "internal +** fragmentation" in academic literature) of allocating a few bytes more +** than a power of two with the memory allocator rounding up to the next +** power of two, and leaving the rounded-up space unused. +** +** This module tracks pointers to PgHdr1 objects. Only pcache.c communicates +** with this module. Information is passed back and forth as PgHdr1 pointers. +** +** The pcache.c and pager.c modules deal pointers to PgHdr objects. +** The btree.c module deals with pointers to MemPage objects. +** +** SOURCE OF PAGE CACHE MEMORY: +** +** Memory for a page might come from any of three sources: +** +** (1) The general-purpose memory allocator - sqlite3Malloc() +** (2) Global page-cache memory provided using sqlite3_config() with +** SQLITE_CONFIG_PAGECACHE. +** (3) PCache-local bulk allocation. +** +** The third case is a chunk of heap memory (defaulting to 100 pages worth) +** that is allocated when the page cache is created. The size of the local +** bulk allocation can be adjusted using +** +** sqlite3_config(SQLITE_CONFIG_PCACHE, 0, 0, N). +** +** If N is positive, then N pages worth of memory are allocated using a single +** sqlite3Malloc() call and that memory is used for the first N pages allocated. +** Or if N is negative, then -1024*N bytes of memory are allocated and used +** for as many pages as can be accomodated. +** +** Only one of (2) or (3) can be used. Once the memory available to (2) or +** (3) is exhausted, subsequent allocations fail over to the general-purpose +** memory allocator (1). +** +** Earlier versions of SQLite used only methods (1) and (2). But experiments +** show that method (3) with N==100 provides about a 5% performance boost for +** common workloads. */ - +/* #include "sqliteInt.h" */ typedef struct PCache1 PCache1; typedef struct PgHdr1 PgHdr1; @@ -39930,8 +40587,9 @@ struct PCache1 { ** The PGroup mutex must be held when accessing nMax. */ PGroup *pGroup; /* PGroup this cache belongs to */ - int szPage; /* Size of allocated pages in bytes */ - int szExtra; /* Size of extra space in bytes */ + int szPage; /* Size of database content section */ + int szExtra; /* sizeof(MemPage)+sizeof(PgHdr) */ + int szAlloc; /* Total size of one pcache line */ int bPurgeable; /* True if cache is purgeable */ unsigned int nMin; /* Minimum number of pages reserved */ unsigned int nMax; /* Configured "cache_size" value */ @@ -39945,6 +40603,8 @@ struct PCache1 { unsigned int nPage; /* Total number of pages in apHash */ unsigned int nHash; /* Number of slots in apHash[] */ PgHdr1 **apHash; /* Hash table for fast lookup by key */ + PgHdr1 *pFree; /* List of unused pcache-local pages */ + void *pBulk; /* Bulk memory used by pcache-local */ }; /* @@ -39957,6 +40617,7 @@ struct PgHdr1 { sqlite3_pcache_page page; unsigned int iKey; /* Key value (page number) */ u8 isPinned; /* Page in use, not on the LRU list */ + u8 isBulkLocal; /* This page from bulk local storage */ PgHdr1 *pNext; /* Next in hash table chain */ PCache1 *pCache; /* Cache that currently owns this page */ PgHdr1 *pLruNext; /* Next in LRU list of unpinned pages */ @@ -39964,8 +40625,8 @@ struct PgHdr1 { }; /* -** Free slots in the allocator used to divide up the buffer provided using -** the SQLITE_CONFIG_PAGECACHE mechanism. +** Free slots in the allocator used to divide up the global page cache +** buffer provided using the SQLITE_CONFIG_PAGECACHE mechanism. */ struct PgFreeslot { PgFreeslot *pNext; /* Next free slot */ @@ -39983,10 +40644,12 @@ static SQLITE_WSD struct PCacheGlobal { ** The nFreeSlot and pFree values do require mutex protection. */ int isInit; /* True if initialized */ + int separateCache; /* Use a new PGroup for each PCache */ + int nInitPage; /* Initial bulk allocation size */ int szSlot; /* Size of each free slot */ int nSlot; /* The number of pcache slots */ int nReserve; /* Try to keep nFreeSlot above this */ - void *pStart, *pEnd; /* Bounds of pagecache malloc range */ + void *pStart, *pEnd; /* Bounds of global page cache memory */ /* Above requires no mutex. Use mutex below for variable that follow. */ sqlite3_mutex *mutex; /* Mutex for accessing the following: */ PgFreeslot *pFree; /* Free page blocks */ @@ -40008,8 +40671,15 @@ static SQLITE_WSD struct PCacheGlobal { /* ** Macros to enter and leave the PCache LRU mutex. */ -#define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex) -#define pcache1LeaveMutex(X) sqlite3_mutex_leave((X)->mutex) +#if !defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) || SQLITE_THREADSAFE==0 +# define pcache1EnterMutex(X) assert((X)->mutex==0) +# define pcache1LeaveMutex(X) assert((X)->mutex==0) +# define PCACHE1_MIGHT_USE_GROUP_MUTEX 0 +#else +# define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex) +# define pcache1LeaveMutex(X) sqlite3_mutex_leave((X)->mutex) +# define PCACHE1_MIGHT_USE_GROUP_MUTEX 1 +#endif /******************************************************************************/ /******** Page Allocation/SQLITE_CONFIG_PCACHE Related Functions **************/ @@ -40026,6 +40696,7 @@ static SQLITE_WSD struct PCacheGlobal { SQLITE_PRIVATE void sqlite3PCacheBufferSetup(void *pBuf, int sz, int n){ if( pcache1.isInit ){ PgFreeslot *p; + if( pBuf==0 ) sz = n = 0; sz = ROUNDDOWN8(sz); pcache1.szSlot = sz; pcache1.nSlot = pcache1.nFreeSlot = n; @@ -40043,6 +40714,43 @@ SQLITE_PRIVATE void sqlite3PCacheBufferSetup(void *pBuf, int sz, int n){ } } +/* +** Try to initialize the pCache->pFree and pCache->pBulk fields. Return +** true if pCache->pFree ends up containing one or more free pages. +*/ +static int pcache1InitBulk(PCache1 *pCache){ + i64 szBulk; + char *zBulk; + if( pcache1.nInitPage==0 ) return 0; + /* Do not bother with a bulk allocation if the cache size very small */ + if( pCache->nMax<3 ) return 0; + sqlite3BeginBenignMalloc(); + if( pcache1.nInitPage>0 ){ + szBulk = pCache->szAlloc * (i64)pcache1.nInitPage; + }else{ + szBulk = -1024 * (i64)pcache1.nInitPage; + } + if( szBulk > pCache->szAlloc*(i64)pCache->nMax ){ + szBulk = pCache->szAlloc*pCache->nMax; + } + zBulk = pCache->pBulk = sqlite3Malloc( szBulk ); + sqlite3EndBenignMalloc(); + if( zBulk ){ + int nBulk = sqlite3MallocSize(zBulk)/pCache->szAlloc; + int i; + for(i=0; iszPage]; + pX->page.pBuf = zBulk; + pX->page.pExtra = &pX[1]; + pX->isBulkLocal = 1; + pX->pNext = pCache->pFree; + pCache->pFree = pX; + zBulk += pCache->szAlloc; + } + } + return pCache->pFree!=0; +} + /* ** Malloc function used within this file to allocate space from the buffer ** configured using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no @@ -40090,9 +40798,9 @@ static void *pcache1Alloc(int nByte){ /* ** Free an allocated buffer obtained from pcache1Alloc(). */ -static int pcache1Free(void *p){ +static void pcache1Free(void *p){ int nFreed = 0; - if( p==0 ) return 0; + if( p==0 ) return; if( p>=pcache1.pStart && ppGroup->mutex) ); - pcache1LeaveMutex(pCache->pGroup); + if( pCache->pFree || (pCache->nPage==0 && pcache1InitBulk(pCache)) ){ + p = pCache->pFree; + pCache->pFree = p->pNext; + p->pNext = 0; + }else{ +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + /* The group mutex must be released before pcache1Alloc() is called. This + ** is because it might call sqlite3_release_memory(), which assumes that + ** this mutex is not held. */ + assert( pcache1.separateCache==0 ); + assert( pCache->pGroup==&pcache1.grp ); + pcache1LeaveMutex(pCache->pGroup); +#endif #ifdef SQLITE_PCACHE_SEPARATE_HEADER - pPg = pcache1Alloc(pCache->szPage); - p = sqlite3Malloc(sizeof(PgHdr1) + pCache->szExtra); - if( !pPg || !p ){ - pcache1Free(pPg); - sqlite3_free(p); - pPg = 0; - } + pPg = pcache1Alloc(pCache->szPage); + p = sqlite3Malloc(sizeof(PgHdr1) + pCache->szExtra); + if( !pPg || !p ){ + pcache1Free(pPg); + sqlite3_free(p); + pPg = 0; + } #else - pPg = pcache1Alloc(ROUND8(sizeof(PgHdr1)) + pCache->szPage + pCache->szExtra); - p = (PgHdr1 *)&((u8 *)pPg)[pCache->szPage]; + pPg = pcache1Alloc(pCache->szAlloc); + p = (PgHdr1 *)&((u8 *)pPg)[pCache->szPage]; #endif - pcache1EnterMutex(pCache->pGroup); - - if( pPg ){ +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + pcache1EnterMutex(pCache->pGroup); +#endif + if( pPg==0 ) return 0; p->page.pBuf = pPg; p->page.pExtra = &p[1]; - if( pCache->bPurgeable ){ - pCache->pGroup->nCurrentPage++; - } - return p; + p->isBulkLocal = 0; } - return 0; + if( pCache->bPurgeable ){ + pCache->pGroup->nCurrentPage++; + } + return p; } /* ** Free a page object allocated by pcache1AllocPage(). -** -** The pointer is allowed to be NULL, which is prudent. But it turns out -** that the current implementation happens to never call this routine -** with a NULL pointer, so we mark the NULL test with ALWAYS(). */ static void pcache1FreePage(PgHdr1 *p){ - if( ALWAYS(p) ){ - PCache1 *pCache = p->pCache; - assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) ); + PCache1 *pCache; + assert( p!=0 ); + pCache = p->pCache; + assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) ); + if( p->isBulkLocal ){ + p->pNext = pCache->pFree; + pCache->pFree = p; + }else{ pcache1Free(p->page.pBuf); #ifdef SQLITE_PCACHE_SEPARATE_HEADER sqlite3_free(p); #endif - if( pCache->bPurgeable ){ - pCache->pGroup->nCurrentPage--; - } + } + if( pCache->bPurgeable ){ + pCache->pGroup->nCurrentPage--; } } @@ -40285,41 +41003,41 @@ static void pcache1ResizeHash(PCache1 *p){ ** ** The PGroup mutex must be held when this function is called. */ -static void pcache1PinPage(PgHdr1 *pPage){ +static PgHdr1 *pcache1PinPage(PgHdr1 *pPage){ PCache1 *pCache; - PGroup *pGroup; assert( pPage!=0 ); assert( pPage->isPinned==0 ); pCache = pPage->pCache; - pGroup = pCache->pGroup; - assert( pPage->pLruNext || pPage==pGroup->pLruTail ); - assert( pPage->pLruPrev || pPage==pGroup->pLruHead ); - assert( sqlite3_mutex_held(pGroup->mutex) ); + assert( pPage->pLruNext || pPage==pCache->pGroup->pLruTail ); + assert( pPage->pLruPrev || pPage==pCache->pGroup->pLruHead ); + assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); if( pPage->pLruPrev ){ pPage->pLruPrev->pLruNext = pPage->pLruNext; }else{ - pGroup->pLruHead = pPage->pLruNext; + pCache->pGroup->pLruHead = pPage->pLruNext; } if( pPage->pLruNext ){ pPage->pLruNext->pLruPrev = pPage->pLruPrev; }else{ - pGroup->pLruTail = pPage->pLruPrev; + pCache->pGroup->pLruTail = pPage->pLruPrev; } pPage->pLruNext = 0; pPage->pLruPrev = 0; pPage->isPinned = 1; pCache->nRecyclable--; + return pPage; } /* ** Remove the page supplied as an argument from the hash table ** (PCache1.apHash structure) that it is currently stored in. +** Also free the page if freePage is true. ** ** The PGroup mutex must be held when this function is called. */ -static void pcache1RemoveFromHash(PgHdr1 *pPage){ +static void pcache1RemoveFromHash(PgHdr1 *pPage, int freeFlag){ unsigned int h; PCache1 *pCache = pPage->pCache; PgHdr1 **pp; @@ -40330,21 +41048,26 @@ static void pcache1RemoveFromHash(PgHdr1 *pPage){ *pp = (*pp)->pNext; pCache->nPage--; + if( freeFlag ) pcache1FreePage(pPage); } /* ** If there are currently more than nMaxPage pages allocated, try ** to recycle pages to reduce the number allocated to nMaxPage. */ -static void pcache1EnforceMaxPage(PGroup *pGroup){ +static void pcache1EnforceMaxPage(PCache1 *pCache){ + PGroup *pGroup = pCache->pGroup; assert( sqlite3_mutex_held(pGroup->mutex) ); while( pGroup->nCurrentPage>pGroup->nMaxPage && pGroup->pLruTail ){ PgHdr1 *p = pGroup->pLruTail; assert( p->pCache->pGroup==pGroup ); assert( p->isPinned==0 ); pcache1PinPage(p); - pcache1RemoveFromHash(p); - pcache1FreePage(p); + pcache1RemoveFromHash(p, 1); + } + if( pCache->nPage==0 && pCache->pBulk ){ + sqlite3_free(pCache->pBulk); + pCache->pBulk = pCache->pFree = 0; } } @@ -40390,10 +41113,45 @@ static int pcache1Init(void *NotUsed){ UNUSED_PARAMETER(NotUsed); assert( pcache1.isInit==0 ); memset(&pcache1, 0, sizeof(pcache1)); + + + /* + ** The pcache1.separateCache variable is true if each PCache has its own + ** private PGroup (mode-1). pcache1.separateCache is false if the single + ** PGroup in pcache1.grp is used for all page caches (mode-2). + ** + ** * Always use a unified cache (mode-2) if ENABLE_MEMORY_MANAGEMENT + ** + ** * Use a unified cache in single-threaded applications that have + ** configured a start-time buffer for use as page-cache memory using + ** sqlite3_config(SQLITE_CONFIG_PAGECACHE, pBuf, sz, N) with non-NULL + ** pBuf argument. + ** + ** * Otherwise use separate caches (mode-1) + */ +#if defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) + pcache1.separateCache = 0; +#elif SQLITE_THREADSAFE + pcache1.separateCache = sqlite3GlobalConfig.pPage==0 + || sqlite3GlobalConfig.bCoreMutex>0; +#else + pcache1.separateCache = sqlite3GlobalConfig.pPage==0; +#endif + +#if SQLITE_THREADSAFE if( sqlite3GlobalConfig.bCoreMutex ){ pcache1.grp.mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU); pcache1.mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_PMEM); } +#endif + if( pcache1.separateCache + && sqlite3GlobalConfig.nPage!=0 + && sqlite3GlobalConfig.pPage==0 + ){ + pcache1.nInitPage = sqlite3GlobalConfig.nPage; + }else{ + pcache1.nInitPage = 0; + } pcache1.grp.mxPinned = 10; pcache1.isInit = 1; return SQLITE_OK; @@ -40423,31 +41181,13 @@ static sqlite3_pcache *pcache1Create(int szPage, int szExtra, int bPurgeable){ PGroup *pGroup; /* The group the new page cache will belong to */ int sz; /* Bytes of memory required to allocate the new cache */ - /* - ** The separateCache variable is true if each PCache has its own private - ** PGroup. In other words, separateCache is true for mode (1) where no - ** mutexing is required. - ** - ** * Always use a unified cache (mode-2) if ENABLE_MEMORY_MANAGEMENT - ** - ** * Always use a unified cache in single-threaded applications - ** - ** * Otherwise (if multi-threaded and ENABLE_MEMORY_MANAGEMENT is off) - ** use separate caches (mode-1) - */ -#if defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) || SQLITE_THREADSAFE==0 - const int separateCache = 0; -#else - int separateCache = sqlite3GlobalConfig.bCoreMutex>0; -#endif - assert( (szPage & (szPage-1))==0 && szPage>=512 && szPage<=65536 ); assert( szExtra < 300 ); - sz = sizeof(PCache1) + sizeof(PGroup)*separateCache; + sz = sizeof(PCache1) + sizeof(PGroup)*pcache1.separateCache; pCache = (PCache1 *)sqlite3MallocZero(sz); if( pCache ){ - if( separateCache ){ + if( pcache1.separateCache ){ pGroup = (PGroup*)&pCache[1]; pGroup->mxPinned = 10; }else{ @@ -40456,6 +41196,7 @@ static sqlite3_pcache *pcache1Create(int szPage, int szExtra, int bPurgeable){ pCache->pGroup = pGroup; pCache->szPage = szPage; pCache->szExtra = szExtra; + pCache->szAlloc = szPage + szExtra + ROUND8(sizeof(PgHdr1)); pCache->bPurgeable = (bPurgeable ? 1 : 0); pcache1EnterMutex(pGroup); pcache1ResizeHash(pCache); @@ -40487,7 +41228,7 @@ static void pcache1Cachesize(sqlite3_pcache *p, int nMax){ pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; pCache->nMax = nMax; pCache->n90pct = pCache->nMax*9/10; - pcache1EnforceMaxPage(pGroup); + pcache1EnforceMaxPage(pCache); pcache1LeaveMutex(pGroup); } } @@ -40505,7 +41246,7 @@ static void pcache1Shrink(sqlite3_pcache *p){ pcache1EnterMutex(pGroup); savedMaxPage = pGroup->nMaxPage; pGroup->nMaxPage = 0; - pcache1EnforceMaxPage(pGroup); + pcache1EnforceMaxPage(pCache); pGroup->nMaxPage = savedMaxPage; pcache1LeaveMutex(pGroup); } @@ -40558,26 +41299,17 @@ static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2( assert( pCache->nHash>0 && pCache->apHash ); /* Step 4. Try to recycle a page. */ - if( pCache->bPurgeable && pGroup->pLruTail && ( - (pCache->nPage+1>=pCache->nMax) - || pGroup->nCurrentPage>=pGroup->nMaxPage - || pcache1UnderMemoryPressure(pCache) - )){ + if( pCache->bPurgeable + && pGroup->pLruTail + && ((pCache->nPage+1>=pCache->nMax) || pcache1UnderMemoryPressure(pCache)) + ){ PCache1 *pOther; pPage = pGroup->pLruTail; assert( pPage->isPinned==0 ); - pcache1RemoveFromHash(pPage); + pcache1RemoveFromHash(pPage, 0); pcache1PinPage(pPage); pOther = pPage->pCache; - - /* We want to verify that szPage and szExtra are the same for pOther - ** and pCache. Assert that we can verify this by comparing sums. */ - assert( (pCache->szPage & (pCache->szPage-1))==0 && pCache->szPage>=512 ); - assert( pCache->szExtra<512 ); - assert( (pOther->szPage & (pOther->szPage-1))==0 && pOther->szPage>=512 ); - assert( pOther->szExtra<512 ); - - if( pOther->szPage+pOther->szExtra != pCache->szPage+pCache->szExtra ){ + if( pOther->szAlloc != pCache->szAlloc ){ pcache1FreePage(pPage); pPage = 0; }else{ @@ -40589,9 +41321,9 @@ static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2( ** attempt to allocate a new one. */ if( !pPage ){ - if( createFlag==1 ) sqlite3BeginBenignMalloc(); + if( createFlag==1 ){ sqlite3BeginBenignMalloc(); } pPage = pcache1AllocPage(pCache); - if( createFlag==1 ) sqlite3EndBenignMalloc(); + if( createFlag==1 ){ sqlite3EndBenignMalloc(); } } if( pPage ){ @@ -40665,8 +41397,13 @@ static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2( ** proceed to step 5. ** ** 5. Otherwise, allocate and return a new page buffer. +** +** There are two versions of this routine. pcache1FetchWithMutex() is +** the general case. pcache1FetchNoMutex() is a faster implementation for +** the common case where pGroup->mutex is NULL. The pcache1Fetch() wrapper +** invokes the appropriate routine. */ -static sqlite3_pcache_page *pcache1Fetch( +static PgHdr1 *pcache1FetchNoMutex( sqlite3_pcache *p, unsigned int iKey, int createFlag @@ -40674,28 +41411,63 @@ static sqlite3_pcache_page *pcache1Fetch( PCache1 *pCache = (PCache1 *)p; PgHdr1 *pPage = 0; - assert( offsetof(PgHdr1,page)==0 ); - assert( pCache->bPurgeable || createFlag!=1 ); - assert( pCache->bPurgeable || pCache->nMin==0 ); - assert( pCache->bPurgeable==0 || pCache->nMin==10 ); - assert( pCache->nMin==0 || pCache->bPurgeable ); - assert( pCache->nHash>0 ); - pcache1EnterMutex(pCache->pGroup); - /* Step 1: Search the hash table for an existing entry. */ pPage = pCache->apHash[iKey % pCache->nHash]; while( pPage && pPage->iKey!=iKey ){ pPage = pPage->pNext; } /* Step 2: Abort if no existing page is found and createFlag is 0 */ if( pPage ){ - if( !pPage->isPinned ) pcache1PinPage(pPage); + if( !pPage->isPinned ){ + return pcache1PinPage(pPage); + }else{ + return pPage; + } }else if( createFlag ){ /* Steps 3, 4, and 5 implemented by this subroutine */ - pPage = pcache1FetchStage2(pCache, iKey, createFlag); + return pcache1FetchStage2(pCache, iKey, createFlag); + }else{ + return 0; } +} +#if PCACHE1_MIGHT_USE_GROUP_MUTEX +static PgHdr1 *pcache1FetchWithMutex( + sqlite3_pcache *p, + unsigned int iKey, + int createFlag +){ + PCache1 *pCache = (PCache1 *)p; + PgHdr1 *pPage; + + pcache1EnterMutex(pCache->pGroup); + pPage = pcache1FetchNoMutex(p, iKey, createFlag); assert( pPage==0 || pCache->iMaxKey>=iKey ); pcache1LeaveMutex(pCache->pGroup); - return (sqlite3_pcache_page*)pPage; + return pPage; +} +#endif +static sqlite3_pcache_page *pcache1Fetch( + sqlite3_pcache *p, + unsigned int iKey, + int createFlag +){ +#if PCACHE1_MIGHT_USE_GROUP_MUTEX || defined(SQLITE_DEBUG) + PCache1 *pCache = (PCache1 *)p; +#endif + + assert( offsetof(PgHdr1,page)==0 ); + assert( pCache->bPurgeable || createFlag!=1 ); + assert( pCache->bPurgeable || pCache->nMin==0 ); + assert( pCache->bPurgeable==0 || pCache->nMin==10 ); + assert( pCache->nMin==0 || pCache->bPurgeable ); + assert( pCache->nHash>0 ); +#if PCACHE1_MIGHT_USE_GROUP_MUTEX + if( pCache->pGroup->mutex ){ + return (sqlite3_pcache_page*)pcache1FetchWithMutex(p, iKey, createFlag); + }else +#endif + { + return (sqlite3_pcache_page*)pcache1FetchNoMutex(p, iKey, createFlag); + } } @@ -40724,8 +41496,7 @@ static void pcache1Unpin( assert( pPage->isPinned==1 ); if( reuseUnlikely || pGroup->nCurrentPage>pGroup->nMaxPage ){ - pcache1RemoveFromHash(pPage); - pcache1FreePage(pPage); + pcache1RemoveFromHash(pPage, 1); }else{ /* Add the page to the PGroup LRU list. */ if( pGroup->pLruHead ){ @@ -40812,8 +41583,9 @@ static void pcache1Destroy(sqlite3_pcache *p){ assert( pGroup->nMinPage >= pCache->nMin ); pGroup->nMinPage -= pCache->nMin; pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; - pcache1EnforceMaxPage(pGroup); + pcache1EnforceMaxPage(pCache); pcache1LeaveMutex(pGroup); + sqlite3_free(pCache->pBulk); sqlite3_free(pCache->apHash); sqlite3_free(pCache); } @@ -40869,7 +41641,7 @@ SQLITE_PRIVATE int sqlite3PcacheReleaseMemory(int nReq){ int nFree = 0; assert( sqlite3_mutex_notheld(pcache1.grp.mutex) ); assert( sqlite3_mutex_notheld(pcache1.mutex) ); - if( pcache1.pStart==0 ){ + if( sqlite3GlobalConfig.nPage==0 ){ PgHdr1 *p; pcache1EnterMutex(&pcache1.grp); while( (nReq<0 || nFreeisPinned==0 ); pcache1PinPage(p); - pcache1RemoveFromHash(p); - pcache1FreePage(p); + pcache1RemoveFromHash(p, 1); } pcache1LeaveMutex(&pcache1.grp); } @@ -40976,6 +41747,7 @@ SQLITE_PRIVATE void sqlite3PcacheStats( ** There is an added cost of O(N) when switching between TEST and ** SMALLEST primitives. */ +/* #include "sqliteInt.h" */ /* @@ -41445,6 +42217,7 @@ SQLITE_PRIVATE int sqlite3RowSetTest(RowSet *pRowSet, int iBatch, sqlite3_int64 ** another is writing. */ #ifndef SQLITE_OMIT_DISKIO +/* #include "sqliteInt.h" */ /************** Include wal.h in the middle of pager.c ***********************/ /************** Begin file wal.h *********************************************/ /* @@ -41466,6 +42239,7 @@ SQLITE_PRIVATE int sqlite3RowSetTest(RowSet *pRowSet, int iBatch, sqlite3_int64 #ifndef _WAL_H_ #define _WAL_H_ +/* #include "sqliteInt.h" */ /* Additional values that can be added to the sync_flags argument of ** sqlite3WalFrames(): @@ -42021,9 +42795,9 @@ struct PagerSavepoint { /* ** Bits of the Pager.doNotSpill flag. See further description below. */ -#define SPILLFLAG_OFF 0x01 /* Never spill cache. Set via pragma */ -#define SPILLFLAG_ROLLBACK 0x02 /* Current rolling back, so do not spill */ -#define SPILLFLAG_NOSYNC 0x04 /* Spill is ok, but do not sync */ +#define SPILLFLAG_OFF 0x01 /* Never spill cache. Set via pragma */ +#define SPILLFLAG_ROLLBACK 0x02 /* Current rolling back, so do not spill */ +#define SPILLFLAG_NOSYNC 0x04 /* Spill is ok, but do not sync */ /* ** An open page cache is an instance of struct Pager. A description of @@ -42105,11 +42879,11 @@ struct PagerSavepoint { ** while it is being traversed by code in pager_playback(). The SPILLFLAG_OFF ** case is a user preference. ** -** If the SPILLFLAG_NOSYNC bit is set, writing to the database from pagerStress() -** is permitted, but syncing the journal file is not. This flag is set -** by sqlite3PagerWrite() when the file-system sector-size is larger than -** the database page-size in order to prevent a journal sync from happening -** in between the journalling of two pages on the same sector. +** If the SPILLFLAG_NOSYNC bit is set, writing to the database from +** pagerStress() is permitted, but syncing the journal file is not. +** This flag is set by sqlite3PagerWrite() when the file-system sector-size +** is larger than the database page-size in order to prevent a journal sync +** from happening in between the journalling of two pages on the same sector. ** ** subjInMemory ** @@ -42212,7 +42986,7 @@ struct Pager { u8 doNotSpill; /* Do not spill the cache when non-zero */ u8 subjInMemory; /* True to use in-memory sub-journals */ u8 bUseFetch; /* True to use xFetch() */ - u8 hasBeenUsed; /* True if any content previously read from this pager*/ + u8 hasBeenUsed; /* True if any content previously read */ Pgno dbSize; /* Number of pages in the database */ Pgno dbOrigSize; /* dbSize before the current transaction */ Pgno dbFileSize; /* Number of pages in the database file */ @@ -42373,7 +43147,7 @@ static const unsigned char aJournalMagic[] = { ** ** if( pPager->jfd->pMethods ){ ... */ -#define isOpen(pFd) ((pFd)->pMethods) +#define isOpen(pFd) ((pFd)->pMethods!=0) /* ** Return true if this pager uses a write-ahead log instead of the usual @@ -42596,19 +43370,21 @@ static int subjRequiresPage(PgHdr *pPg){ int i; for(i=0; inSavepoint; i++){ p = &pPager->aSavepoint[i]; - if( p->nOrig>=pgno && 0==sqlite3BitvecTest(p->pInSavepoint, pgno) ){ + if( p->nOrig>=pgno && 0==sqlite3BitvecTestNotNull(p->pInSavepoint, pgno) ){ return 1; } } return 0; } +#ifdef SQLITE_DEBUG /* ** Return true if the page is already in the journal file. */ static int pageInJournal(Pager *pPager, PgHdr *pPg){ return sqlite3BitvecTest(pPager->pInJournal, pPg->pgno); } +#endif /* ** Read a 32-bit integer from the given file descriptor. Store the integer @@ -43220,7 +43996,8 @@ static int writeMasterJournal(Pager *pPager, const char *zMaster){ || (0 != (rc = sqlite3OsWrite(pPager->jfd, zMaster, nMaster, iHdrOff+4))) || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster, nMaster))) || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster+4, cksum))) - || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, iHdrOff+4+nMaster+8))) + || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, + iHdrOff+4+nMaster+8))) ){ return rc; } @@ -43780,7 +44557,7 @@ static int pager_playback_one_page( } } - /* If this page has already been played by before during the current + /* If this page has already been played back before during the current ** rollback, then don't bother to play it back again. */ if( pDone && (rc = sqlite3BitvecSet(pDone, pgno))!=SQLITE_OK ){ @@ -44609,9 +45386,7 @@ static int pagerWalFrames( ){ int rc; /* Return code */ int nList; /* Number of pages in pList */ -#if defined(SQLITE_DEBUG) || defined(SQLITE_CHECK_PAGES) PgHdr *p; /* For looping over pages */ -#endif assert( pPager->pWal ); assert( pList ); @@ -44628,7 +45403,6 @@ static int pagerWalFrames( ** any pages with page numbers greater than nTruncate into the WAL file. ** They will never be read by any client. So remove them from the pDirty ** list here. */ - PgHdr *p; PgHdr **ppNext = &pList; nList = 0; for(p=pList; (*ppNext = p)!=0; p=p->pDirty){ @@ -44648,7 +45422,6 @@ static int pagerWalFrames( pPager->pageSize, pList, nTruncate, isCommit, pPager->walSyncFlags ); if( rc==SQLITE_OK && pPager->pBackup ){ - PgHdr *p; for(p=pList; p; p=p->pDirty){ sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData); } @@ -44718,11 +45491,10 @@ static int pagerPagecount(Pager *pPager, Pgno *pnPage){ assert( pPager->eLock>=SHARED_LOCK ); nPage = sqlite3WalDbsize(pPager->pWal); - /* If the database size was not available from the WAL sub-system, - ** determine it based on the size of the database file. If the size - ** of the database file is not an integer multiple of the page-size, - ** round down to the nearest page. Except, any file larger than 0 - ** bytes in size is considered to contain at least one page. + /* If the number of pages in the database is not available from the + ** WAL sub-system, determine the page counte based on the size of + ** the database file. If the size of the database file is not an + ** integer multiple of the page-size, round up the result. */ if( nPage==0 ){ i64 n = 0; /* Size of db file in bytes */ @@ -45887,8 +46659,6 @@ static int openSubJournal(Pager *pPager){ /* ** Append a record of the current state of page pPg to the sub-journal. -** It is the callers responsibility to use subjRequiresPage() to check -** that it is really required before calling this function. ** ** If successful, set the bit corresponding to pPg->pgno in the bitvecs ** for all open savepoints before returning. @@ -45935,6 +46705,13 @@ static int subjournalPage(PgHdr *pPg){ } return rc; } +static int subjournalPageIfRequired(PgHdr *pPg){ + if( subjRequiresPage(pPg) ){ + return subjournalPage(pPg); + }else{ + return SQLITE_OK; + } +} /* ** This function is called by the pcache layer when it has reached some @@ -45992,9 +46769,7 @@ static int pagerStress(void *p, PgHdr *pPg){ pPg->pDirty = 0; if( pagerUseWal(pPager) ){ /* Write a single frame for this page to the log. */ - if( subjRequiresPage(pPg) ){ - rc = subjournalPage(pPg); - } + rc = subjournalPageIfRequired(pPg); if( rc==SQLITE_OK ){ rc = pagerWalFrames(pPager, pPg, 0, 0); } @@ -46007,39 +46782,6 @@ static int pagerStress(void *p, PgHdr *pPg){ rc = syncJournal(pPager, 1); } - /* If the page number of this page is larger than the current size of - ** the database image, it may need to be written to the sub-journal. - ** This is because the call to pager_write_pagelist() below will not - ** actually write data to the file in this case. - ** - ** Consider the following sequence of events: - ** - ** BEGIN; - ** - ** - ** SAVEPOINT sp; - ** - ** pagerStress(page X) - ** ROLLBACK TO sp; - ** - ** If (X>Y), then when pagerStress is called page X will not be written - ** out to the database file, but will be dropped from the cache. Then, - ** following the "ROLLBACK TO sp" statement, reading page X will read - ** data from the database file. This will be the copy of page X as it - ** was when the transaction started, not as it was when "SAVEPOINT sp" - ** was executed. - ** - ** The solution is to write the current data for page X into the - ** sub-journal file now (if it is not already there), so that it will - ** be restored to its current value when the "ROLLBACK TO sp" is - ** executed. - */ - if( NEVER( - rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg) - ) ){ - rc = subjournalPage(pPg); - } - /* Write the contents of the page out to the database file. */ if( rc==SQLITE_OK ){ assert( (pPg->flags&PGHDR_NEED_SYNC)==0 ); @@ -46295,7 +47037,7 @@ SQLITE_PRIVATE int sqlite3PagerOpen( act_like_temp_file: tempFile = 1; pPager->eState = PAGER_READER; /* Pretend we already have a lock */ - pPager->eLock = EXCLUSIVE_LOCK; /* Pretend we are in EXCLUSIVE locking mode */ + pPager->eLock = EXCLUSIVE_LOCK; /* Pretend we are in EXCLUSIVE mode */ pPager->noLock = 1; /* Do no locking */ readOnly = (vfsFlags&SQLITE_OPEN_READONLY); } @@ -46314,7 +47056,7 @@ act_like_temp_file: assert( nExtra<1000 ); nExtra = ROUND8(nExtra); rc = sqlite3PcacheOpen(szPageDflt, nExtra, !memDb, - !memDb?pagerStress:0, (void *)pPager, pPager->pPCache); + !memDb?pagerStress:0, (void *)pPager, pPager->pPCache); } /* If an error occurred above, free the Pager structure and close the file. @@ -46701,7 +47443,7 @@ SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){ ** occurring on the very first access to a file, in order to save a ** single unnecessary sqlite3OsRead() call at the start-up. ** - ** Database changes is detected by looking at 15 bytes beginning + ** Database changes are detected by looking at 15 bytes beginning ** at offset 24 into the file. The first 4 of these 16 bytes are ** a 32-bit counter that is incremented with each change. The ** other bytes change randomly with each file change when @@ -46909,9 +47651,14 @@ SQLITE_PRIVATE int sqlite3PagerAcquire( if( pBase==0 ){ rc = sqlite3PcacheFetchStress(pPager->pPCache, pgno, &pBase); if( rc!=SQLITE_OK ) goto pager_acquire_err; + if( pBase==0 ){ + pPg = *ppPage = 0; + rc = SQLITE_NOMEM; + goto pager_acquire_err; + } } pPg = *ppPage = sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pBase); - if( pPg==0 ) rc = SQLITE_NOMEM; + assert( pPg!=0 ); } } @@ -46922,10 +47669,11 @@ SQLITE_PRIVATE int sqlite3PagerAcquire( pPg = 0; goto pager_acquire_err; } - assert( (*ppPage)->pgno==pgno ); - assert( (*ppPage)->pPager==pPager || (*ppPage)->pPager==0 ); + assert( pPg==(*ppPage) ); + assert( pPg->pgno==pgno ); + assert( pPg->pPager==pPager || pPg->pPager==0 ); - if( (*ppPage)->pPager && !noContent ){ + if( pPg->pPager && !noContent ){ /* In this case the pcache already contains an initialized copy of ** the page. Return without further ado. */ assert( pgno<=PAGER_MAX_PGNO && pgno!=PAGER_MJ_PGNO(pPager) ); @@ -46936,7 +47684,6 @@ SQLITE_PRIVATE int sqlite3PagerAcquire( /* The pager cache has created a new page. Its content needs to ** be initialized. */ - pPg = *ppPage; pPg->pPager = pPager; /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page @@ -47015,6 +47762,7 @@ SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){ assert( pPager->pPCache!=0 ); pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0); assert( pPage==0 || pPager->hasBeenUsed ); + if( pPage==0 ) return 0; return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage); } @@ -47218,6 +47966,59 @@ SQLITE_PRIVATE int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory return rc; } +/* +** Write page pPg onto the end of the rollback journal. +*/ +static SQLITE_NOINLINE int pagerAddPageToRollbackJournal(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + int rc; + u32 cksum; + char *pData2; + i64 iOff = pPager->journalOff; + + /* We should never write to the journal file the page that + ** contains the database locks. The following assert verifies + ** that we do not. */ + assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) ); + + assert( pPager->journalHdr<=pPager->journalOff ); + CODEC2(pPager, pPg->pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); + cksum = pager_cksum(pPager, (u8*)pData2); + + /* Even if an IO or diskfull error occurs while journalling the + ** page in the block above, set the need-sync flag for the page. + ** Otherwise, when the transaction is rolled back, the logic in + ** playback_one_page() will think that the page needs to be restored + ** in the database file. And if an IO error occurs while doing so, + ** then corruption may follow. + */ + pPg->flags |= PGHDR_NEED_SYNC; + + rc = write32bits(pPager->jfd, iOff, pPg->pgno); + if( rc!=SQLITE_OK ) return rc; + rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4); + if( rc!=SQLITE_OK ) return rc; + rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum); + if( rc!=SQLITE_OK ) return rc; + + IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, + pPager->journalOff, pPager->pageSize)); + PAGER_INCR(sqlite3_pager_writej_count); + PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n", + PAGERID(pPager), pPg->pgno, + ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg))); + + pPager->journalOff += 8 + pPager->pageSize; + pPager->nRec++; + assert( pPager->pInJournal!=0 ); + rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); + testcase( rc==SQLITE_NOMEM ); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + rc |= addToSavepointBitvecs(pPager, pPg->pgno); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + return rc; +} + /* ** Mark a single data page as writeable. The page is written into the ** main journal or sub-journal as required. If the page is written into @@ -47228,7 +48029,6 @@ SQLITE_PRIVATE int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory static int pager_write(PgHdr *pPg){ Pager *pPager = pPg->pPager; int rc = SQLITE_OK; - int inJournal; /* This routine is not called unless a write-transaction has already ** been started. The journal file may or may not be open at this point. @@ -47241,7 +48041,6 @@ static int pager_write(PgHdr *pPg){ assert( assert_pager_state(pPager) ); assert( pPager->errCode==0 ); assert( pPager->readOnly==0 ); - CHECK_PAGE(pPg); /* The journal file needs to be opened. Higher level routines have already @@ -47260,91 +48059,48 @@ static int pager_write(PgHdr *pPg){ assert( pPager->eState>=PAGER_WRITER_CACHEMOD ); assert( assert_pager_state(pPager) ); - /* Mark the page as dirty. If the page has already been written - ** to the journal then we can return right away. - */ + /* Mark the page that is about to be modified as dirty. */ sqlite3PcacheMakeDirty(pPg); - inJournal = pageInJournal(pPager, pPg); - if( inJournal && (pPager->nSavepoint==0 || !subjRequiresPage(pPg)) ){ - assert( !pagerUseWal(pPager) ); - }else{ - - /* The transaction journal now exists and we have a RESERVED or an - ** EXCLUSIVE lock on the main database file. Write the current page to - ** the transaction journal if it is not there already. - */ - if( !inJournal && !pagerUseWal(pPager) ){ - assert( pagerUseWal(pPager)==0 ); - if( pPg->pgno<=pPager->dbOrigSize && isOpen(pPager->jfd) ){ - u32 cksum; - char *pData2; - i64 iOff = pPager->journalOff; - - /* We should never write to the journal file the page that - ** contains the database locks. The following assert verifies - ** that we do not. */ - assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) ); - - assert( pPager->journalHdr<=pPager->journalOff ); - CODEC2(pPager, pPg->pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); - cksum = pager_cksum(pPager, (u8*)pData2); - - /* Even if an IO or diskfull error occurs while journalling the - ** page in the block above, set the need-sync flag for the page. - ** Otherwise, when the transaction is rolled back, the logic in - ** playback_one_page() will think that the page needs to be restored - ** in the database file. And if an IO error occurs while doing so, - ** then corruption may follow. - */ - pPg->flags |= PGHDR_NEED_SYNC; - rc = write32bits(pPager->jfd, iOff, pPg->pgno); - if( rc!=SQLITE_OK ) return rc; - rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4); - if( rc!=SQLITE_OK ) return rc; - rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum); - if( rc!=SQLITE_OK ) return rc; - - IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, - pPager->journalOff, pPager->pageSize)); - PAGER_INCR(sqlite3_pager_writej_count); - PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n", - PAGERID(pPager), pPg->pgno, - ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg))); - - pPager->journalOff += 8 + pPager->pageSize; - pPager->nRec++; - assert( pPager->pInJournal!=0 ); - rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); - testcase( rc==SQLITE_NOMEM ); - assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); - rc |= addToSavepointBitvecs(pPager, pPg->pgno); - if( rc!=SQLITE_OK ){ - assert( rc==SQLITE_NOMEM ); - return rc; - } - }else{ - if( pPager->eState!=PAGER_WRITER_DBMOD ){ - pPg->flags |= PGHDR_NEED_SYNC; - } - PAGERTRACE(("APPEND %d page %d needSync=%d\n", - PAGERID(pPager), pPg->pgno, - ((pPg->flags&PGHDR_NEED_SYNC)?1:0))); + /* If a rollback journal is in use, them make sure the page that is about + ** to change is in the rollback journal, or if the page is a new page off + ** then end of the file, make sure it is marked as PGHDR_NEED_SYNC. + */ + assert( (pPager->pInJournal!=0) == isOpen(pPager->jfd) ); + if( pPager->pInJournal!=0 + && sqlite3BitvecTestNotNull(pPager->pInJournal, pPg->pgno)==0 + ){ + assert( pagerUseWal(pPager)==0 ); + if( pPg->pgno<=pPager->dbOrigSize ){ + rc = pagerAddPageToRollbackJournal(pPg); + if( rc!=SQLITE_OK ){ + return rc; } - } - - /* If the statement journal is open and the page is not in it, - ** then write the current page to the statement journal. Note that - ** the statement journal format differs from the standard journal format - ** in that it omits the checksums and the header. - */ - if( pPager->nSavepoint>0 && subjRequiresPage(pPg) ){ - rc = subjournalPage(pPg); + }else{ + if( pPager->eState!=PAGER_WRITER_DBMOD ){ + pPg->flags |= PGHDR_NEED_SYNC; + } + PAGERTRACE(("APPEND %d page %d needSync=%d\n", + PAGERID(pPager), pPg->pgno, + ((pPg->flags&PGHDR_NEED_SYNC)?1:0))); } } - /* Update the database size and return. + /* The PGHDR_DIRTY bit is set above when the page was added to the dirty-list + ** and before writing the page into the rollback journal. Wait until now, + ** after the page has been successfully journalled, before setting the + ** PGHDR_WRITEABLE bit that indicates that the page can be safely modified. + */ + pPg->flags |= PGHDR_WRITEABLE; + + /* If the statement journal is open and the page is not in it, + ** then write the page into the statement journal. */ + if( pPager->nSavepoint>0 ){ + rc = subjournalPageIfRequired(pPg); + } + + /* Update the database size and return. */ if( pPager->dbSizepgno ){ pPager->dbSize = pPg->pgno; } @@ -47359,17 +48115,17 @@ static int pager_write(PgHdr *pPg){ ** a write. ** ** Usually, the sector size is less than or equal to the page size, in which -** case pages can be individually written. This routine only runs in the exceptional -** case where the page size is smaller than the sector size. +** case pages can be individually written. This routine only runs in the +** exceptional case where the page size is smaller than the sector size. */ static SQLITE_NOINLINE int pagerWriteLargeSector(PgHdr *pPg){ - int rc = SQLITE_OK; /* Return code */ - Pgno nPageCount; /* Total number of pages in database file */ - Pgno pg1; /* First page of the sector pPg is located on. */ - int nPage = 0; /* Number of pages starting at pg1 to journal */ - int ii; /* Loop counter */ - int needSync = 0; /* True if any page has PGHDR_NEED_SYNC */ - Pager *pPager = pPg->pPager; /* The pager that owns pPg */ + int rc = SQLITE_OK; /* Return code */ + Pgno nPageCount; /* Total number of pages in database file */ + Pgno pg1; /* First page of the sector pPg is located on. */ + int nPage = 0; /* Number of pages starting at pg1 to journal */ + int ii; /* Loop counter */ + int needSync = 0; /* True if any page has PGHDR_NEED_SYNC */ + Pager *pPager = pPg->pPager; /* The pager that owns pPg */ Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize); /* Set the doNotSpill NOSYNC bit to 1. This is because we cannot allow @@ -47457,11 +48213,15 @@ static SQLITE_NOINLINE int pagerWriteLargeSector(PgHdr *pPg){ ** as appropriate. Otherwise, SQLITE_OK. */ SQLITE_PRIVATE int sqlite3PagerWrite(PgHdr *pPg){ + Pager *pPager = pPg->pPager; assert( (pPg->flags & PGHDR_MMAP)==0 ); - assert( pPg->pPager->eState>=PAGER_WRITER_LOCKED ); - assert( pPg->pPager->eState!=PAGER_ERROR ); - assert( assert_pager_state(pPg->pPager) ); - if( pPg->pPager->sectorSize > (u32)pPg->pPager->pageSize ){ + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + assert( pPager->eState!=PAGER_ERROR ); + assert( assert_pager_state(pPager) ); + if( (pPg->flags & PGHDR_WRITEABLE)!=0 && pPager->dbSize>=pPg->pgno ){ + if( pPager->nSavepoint ) return subjournalPageIfRequired(pPg); + return SQLITE_OK; + }else if( pPager->sectorSize > (u32)pPager->pageSize ){ return pagerWriteLargeSector(pPg); }else{ return pager_write(pPg); @@ -47475,7 +48235,7 @@ SQLITE_PRIVATE int sqlite3PagerWrite(PgHdr *pPg){ */ #ifndef NDEBUG SQLITE_PRIVATE int sqlite3PagerIswriteable(DbPage *pPg){ - return pPg->flags&PGHDR_DIRTY; + return pPg->flags & PGHDR_WRITEABLE; } #endif @@ -47499,6 +48259,7 @@ SQLITE_PRIVATE void sqlite3PagerDontWrite(PgHdr *pPg){ PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager))); IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno)) pPg->flags |= PGHDR_DONT_WRITE; + pPg->flags &= ~PGHDR_WRITEABLE; pager_set_pagehash(pPg); } } @@ -47965,12 +48726,14 @@ SQLITE_PRIVATE u8 sqlite3PagerIsreadonly(Pager *pPager){ return pPager->readOnly; } +#ifdef SQLITE_DEBUG /* ** Return the number of references to the pager. */ SQLITE_PRIVATE int sqlite3PagerRefcount(Pager *pPager){ return sqlite3PcacheRefCount(pPager->pPCache); } +#endif /* ** Return the approximate number of bytes of memory currently @@ -48053,54 +48816,62 @@ SQLITE_PRIVATE int sqlite3PagerIsMemdb(Pager *pPager){ ** occurs while opening the sub-journal file, then an IO error code is ** returned. Otherwise, SQLITE_OK. */ -SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){ +static SQLITE_NOINLINE int pagerOpenSavepoint(Pager *pPager, int nSavepoint){ int rc = SQLITE_OK; /* Return code */ int nCurrent = pPager->nSavepoint; /* Current number of savepoints */ + int ii; /* Iterator variable */ + PagerSavepoint *aNew; /* New Pager.aSavepoint array */ assert( pPager->eState>=PAGER_WRITER_LOCKED ); assert( assert_pager_state(pPager) ); + assert( nSavepoint>nCurrent && pPager->useJournal ); - if( nSavepoint>nCurrent && pPager->useJournal ){ - int ii; /* Iterator variable */ - PagerSavepoint *aNew; /* New Pager.aSavepoint array */ + /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM + ** if the allocation fails. Otherwise, zero the new portion in case a + ** malloc failure occurs while populating it in the for(...) loop below. + */ + aNew = (PagerSavepoint *)sqlite3Realloc( + pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint + ); + if( !aNew ){ + return SQLITE_NOMEM; + } + memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint)); + pPager->aSavepoint = aNew; - /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM - ** if the allocation fails. Otherwise, zero the new portion in case a - ** malloc failure occurs while populating it in the for(...) loop below. - */ - aNew = (PagerSavepoint *)sqlite3Realloc( - pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint - ); - if( !aNew ){ + /* Populate the PagerSavepoint structures just allocated. */ + for(ii=nCurrent; iidbSize; + if( isOpen(pPager->jfd) && pPager->journalOff>0 ){ + aNew[ii].iOffset = pPager->journalOff; + }else{ + aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager); + } + aNew[ii].iSubRec = pPager->nSubRec; + aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize); + if( !aNew[ii].pInSavepoint ){ return SQLITE_NOMEM; } - memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint)); - pPager->aSavepoint = aNew; - - /* Populate the PagerSavepoint structures just allocated. */ - for(ii=nCurrent; iidbSize; - if( isOpen(pPager->jfd) && pPager->journalOff>0 ){ - aNew[ii].iOffset = pPager->journalOff; - }else{ - aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager); - } - aNew[ii].iSubRec = pPager->nSubRec; - aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize); - if( !aNew[ii].pInSavepoint ){ - return SQLITE_NOMEM; - } - if( pagerUseWal(pPager) ){ - sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData); - } - pPager->nSavepoint = ii+1; + if( pagerUseWal(pPager) ){ + sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData); } - assert( pPager->nSavepoint==nSavepoint ); - assertTruncateConstraint(pPager); + pPager->nSavepoint = ii+1; } - + assert( pPager->nSavepoint==nSavepoint ); + assertTruncateConstraint(pPager); return rc; } +SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){ + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + assert( assert_pager_state(pPager) ); + + if( nSavepoint>pPager->nSavepoint && pPager->useJournal ){ + return pagerOpenSavepoint(pPager, nSavepoint); + }else{ + return SQLITE_OK; + } +} + /* ** This function is called to rollback or release (commit) a savepoint. @@ -48331,9 +49102,8 @@ SQLITE_PRIVATE int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, i ** one or more savepoint bitvecs. This is the reason this function ** may return SQLITE_NOMEM. */ - if( pPg->flags&PGHDR_DIRTY - && subjRequiresPage(pPg) - && SQLITE_OK!=(rc = subjournalPage(pPg)) + if( (pPg->flags & PGHDR_DIRTY)!=0 + && SQLITE_OK!=(rc = subjournalPageIfRequired(pPg)) ){ return rc; } @@ -48579,6 +49349,8 @@ SQLITE_PRIVATE int sqlite3PagerSetJournalMode(Pager *pPager, int eMode){ } assert( state==pPager->eState ); } + }else if( eMode==PAGER_JOURNALMODE_OFF ){ + sqlite3OsClose(pPager->jfd); } } @@ -49082,6 +49854,7 @@ SQLITE_PRIVATE int sqlite3PagerWalFramesize(Pager *pPager){ */ #ifndef SQLITE_OMIT_WAL +/* #include "wal.h" */ /* ** Trace output macros @@ -49361,7 +50134,7 @@ static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){ if( pWal->nWiData<=iPage ){ int nByte = sizeof(u32*)*(iPage+1); volatile u32 **apNew; - apNew = (volatile u32 **)sqlite3_realloc((void *)pWal->apWiData, nByte); + apNew = (volatile u32 **)sqlite3_realloc64((void *)pWal->apWiData, nByte); if( !apNew ){ *ppPage = 0; return SQLITE_NOMEM; @@ -49487,9 +50260,9 @@ static void walIndexWriteHdr(Wal *pWal){ pWal->hdr.isInit = 1; pWal->hdr.iVersion = WALINDEX_MAX_VERSION; walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum); - memcpy((void *)&aHdr[1], (void *)&pWal->hdr, sizeof(WalIndexHdr)); + memcpy((void*)&aHdr[1], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); walShmBarrier(pWal); - memcpy((void *)&aHdr[0], (void *)&pWal->hdr, sizeof(WalIndexHdr)); + memcpy((void*)&aHdr[0], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); } /* @@ -49791,13 +50564,13 @@ static void walCleanupHash(Wal *pWal){ ** via the hash table even after the cleanup. */ if( iLimit ){ - int i; /* Loop counter */ + int j; /* Loop counter */ int iKey; /* Hash key */ - for(i=1; i<=iLimit; i++){ - for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){ - if( aHash[iKey]==i ) break; + for(j=1; j<=iLimit; j++){ + for(iKey=walHash(aPgno[j]); aHash[iKey]; iKey=walNextHash(iKey)){ + if( aHash[iKey]==j ) break; } - assert( aHash[iKey]==i ); + assert( aHash[iKey]==j ); } } #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ @@ -49986,7 +50759,7 @@ static int walIndexRecover(Wal *pWal){ /* Malloc a buffer to read frames into. */ szFrame = szPage + WAL_FRAME_HDRSIZE; - aFrame = (u8 *)sqlite3_malloc(szFrame); + aFrame = (u8 *)sqlite3_malloc64(szFrame); if( !aFrame ){ rc = SQLITE_NOMEM; goto recovery_error; @@ -50299,7 +51072,7 @@ static void walMergesort( int nMerge = 0; /* Number of elements in list aMerge */ ht_slot *aMerge = 0; /* List to be merged */ int iList; /* Index into input list */ - int iSub = 0; /* Index into aSub array */ + u32 iSub = 0; /* Index into aSub array */ struct Sublist aSub[13]; /* Array of sub-lists */ memset(aSub, 0, sizeof(aSub)); @@ -50310,7 +51083,9 @@ static void walMergesort( nMerge = 1; aMerge = &aList[iList]; for(iSub=0; iList & (1<aList && p->nList<=(1<aList==&aList[iList&~((2<aList, p->nList, &aMerge, &nMerge, aBuffer); @@ -50321,7 +51096,9 @@ static void walMergesort( for(iSub++; iSubnList<=(1<aList==&aList[nList&~((2<aList, p->nList, &aMerge, &nMerge, aBuffer); @@ -50379,7 +51156,7 @@ static int walIteratorInit(Wal *pWal, WalIterator **pp){ nByte = sizeof(WalIterator) + (nSegment-1)*sizeof(struct WalSegment) + iLast*sizeof(ht_slot); - p = (WalIterator *)sqlite3_malloc(nByte); + p = (WalIterator *)sqlite3_malloc64(nByte); if( !p ){ return SQLITE_NOMEM; } @@ -50389,7 +51166,7 @@ static int walIteratorInit(Wal *pWal, WalIterator **pp){ /* Allocate temporary space used by the merge-sort routine. This block ** of memory will be freed before this function returns. */ - aTmp = (ht_slot *)sqlite3_malloc( + aTmp = (ht_slot *)sqlite3_malloc64( sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast) ); if( !aTmp ){ @@ -50569,6 +51346,14 @@ static int walCheckpoint( mxSafeFrame = pWal->hdr.mxFrame; mxPage = pWal->hdr.nPage; for(i=1; iaReadMark[i]; if( mxSafeFrame>y ){ assert( y<=pWal->hdr.mxFrame ); @@ -52224,6 +53009,7 @@ SQLITE_PRIVATE int sqlite3WalFramesize(Wal *pWal){ ** 4 Number of leaf pointers on this page ** * zero or more pages numbers of leaves */ +/* #include "sqliteInt.h" */ /* The following value is the maximum cell size assuming a maximum page @@ -52241,6 +53027,7 @@ SQLITE_PRIVATE int sqlite3WalFramesize(Wal *pWal){ /* Forward declarations */ typedef struct MemPage MemPage; typedef struct BtLock BtLock; +typedef struct CellInfo CellInfo; /* ** This is a magic string that appears at the beginning of every @@ -52304,7 +53091,10 @@ struct MemPage { u8 *aData; /* Pointer to disk image of the page data */ u8 *aDataEnd; /* One byte past the end of usable data */ u8 *aCellIdx; /* The cell index area */ + u8 *aDataOfst; /* Same as aData for leaves. aData+4 for interior */ DbPage *pDbPage; /* Pager page handle */ + u16 (*xCellSize)(MemPage*,u8*); /* cellSizePtr method */ + void (*xParseCell)(MemPage*,u8*,CellInfo*); /* btreeParseCell method */ Pgno pgno; /* Page number for this page */ }; @@ -52360,6 +53150,7 @@ struct Btree { u8 inTrans; /* TRANS_NONE, TRANS_READ or TRANS_WRITE */ u8 sharable; /* True if we can share pBt with another db */ u8 locked; /* True if db currently has pBt locked */ + u8 hasIncrblobCur; /* True if there are one or more Incrblob cursors */ int wantToLock; /* Number of nested calls to sqlite3BtreeEnter() */ int nBackup; /* Number of backup operations reading this btree */ u32 iDataVersion; /* Combines with pBt->pPager->iDataVersion */ @@ -52470,7 +53261,6 @@ struct BtShared { ** about a cell. The parseCellPtr() function fills in this structure ** based on information extract from the raw disk page. */ -typedef struct CellInfo CellInfo; struct CellInfo { i64 nKey; /* The key for INTKEY tables, or nPayload otherwise */ u8 *pPayload; /* Pointer to the start of payload */ @@ -52513,8 +53303,7 @@ struct CellInfo { struct BtCursor { Btree *pBtree; /* The Btree to which this cursor belongs */ BtShared *pBt; /* The BtShared this cursor points to */ - BtCursor *pNext, *pPrev; /* Forms a linked list of all cursors */ - struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */ + BtCursor *pNext; /* Forms a linked list of all cursors */ Pgno *aOverflow; /* Cache of overflow page locations */ CellInfo info; /* A parse of the cell we are pointing at */ i64 nKey; /* Size of pKey, or last integer key */ @@ -52524,9 +53313,16 @@ struct BtCursor { int skipNext; /* Prev() is noop if negative. Next() is noop if positive. ** Error code if eState==CURSOR_FAULT */ u8 curFlags; /* zero or more BTCF_* flags defined below */ + u8 curPagerFlags; /* Flags to send to sqlite3PagerAcquire() */ u8 eState; /* One of the CURSOR_XXX constants (see below) */ - u8 hints; /* As configured by CursorSetHints() */ - i16 iPage; /* Index of current page in apPage */ + u8 hints; /* As configured by CursorSetHints() */ + /* All fields above are zeroed when the cursor is allocated. See + ** sqlite3BtreeCursorZero(). Fields that follow must be manually + ** initialized. */ + i8 iPage; /* Index of current page in apPage */ + u8 curIntKey; /* Value of apPage[0]->intKey */ + struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */ + void *padding1; /* Make object size a multiple of 16 */ u16 aiIdx[BTCURSOR_MAX_DEPTH]; /* Current index in apPage[i] */ MemPage *apPage[BTCURSOR_MAX_DEPTH]; /* Pages from root to current page */ }; @@ -52539,6 +53335,7 @@ struct BtCursor { #define BTCF_ValidOvfl 0x04 /* True if aOverflow is valid */ #define BTCF_AtLast 0x08 /* Cursor is pointing ot the last entry */ #define BTCF_Incrblob 0x10 /* True if an incremental I/O handle */ +#define BTCF_Multiple 0x20 /* Maybe another cursor on the same btree */ /* ** Potential values for BtCursor.eState. @@ -52681,6 +53478,7 @@ struct IntegrityCk { const char *zPfx; /* Error message prefix */ int v1, v2; /* Values for up to two %d fields in zPfx */ StrAccum errMsg; /* Accumulate the error message text here */ + u32 *heap; /* Min-heap used for analyzing cell coverage */ }; /* @@ -52691,6 +53489,21 @@ struct IntegrityCk { #define get4byte sqlite3Get4byte #define put4byte sqlite3Put4byte +/* +** get2byteAligned(), unlike get2byte(), requires that its argument point to a +** two-byte aligned address. get2bytea() is only used for accessing the +** cell addresses in a btree header. +*/ +#if SQLITE_BYTEORDER==4321 +# define get2byteAligned(x) (*(u16*)(x)) +#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4008000 +# define get2byteAligned(x) __builtin_bswap16(*(u16*)(x)) +#elif SQLITE_BYTEORDER==1234 && defined(_MSC_VER) && _MSC_VER>=1300 +# define get2byteAligned(x) _byteswap_ushort(*(u16*)(x)) +#else +# define get2byteAligned(x) ((x)[0]<<8 | (x)[1]) +#endif + /************** End of btreeInt.h ********************************************/ /************** Continuing where we left off in btmutex.c ********************/ #ifndef SQLITE_OMIT_SHARED_CACHE @@ -52994,6 +53807,7 @@ SQLITE_PRIVATE void sqlite3BtreeEnterAll(sqlite3 *db){ ** See the header comment on "btreeInt.h" for additional information. ** Including a description of file format and an overview of operation. */ +/* #include "btreeInt.h" */ /* ** The header string that appears at the beginning of every @@ -53470,13 +54284,15 @@ static void invalidateIncrblobCursors( int isClearTable /* True if all rows are being deleted */ ){ BtCursor *p; - BtShared *pBt = pBtree->pBt; + if( pBtree->hasIncrblobCur==0 ) return; assert( sqlite3BtreeHoldsMutex(pBtree) ); - for(p=pBt->pCursor; p; p=p->pNext){ - if( (p->curFlags & BTCF_Incrblob)!=0 - && (isClearTable || p->info.nKey==iRow) - ){ - p->eState = CURSOR_INVALID; + pBtree->hasIncrblobCur = 0; + for(p=pBtree->pBt->pCursor; p; p=p->pNext){ + if( (p->curFlags & BTCF_Incrblob)!=0 ){ + pBtree->hasIncrblobCur = 1; + if( isClearTable || p->info.nKey==iRow ){ + p->eState = CURSOR_INVALID; + } } } } @@ -53598,7 +54414,7 @@ static int saveCursorPosition(BtCursor *pCur){ ** table, then malloc space for and store the pCur->nKey bytes of key ** data. */ - if( 0==pCur->apPage[0]->intKey ){ + if( 0==pCur->curIntKey ){ void *pKey = sqlite3Malloc( pCur->nKey ); if( pKey ){ rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey); @@ -53611,7 +54427,7 @@ static int saveCursorPosition(BtCursor *pCur){ rc = SQLITE_NOMEM; } } - assert( !pCur->apPage[0]->intKey || !pCur->pKey ); + assert( !pCur->curIntKey || !pCur->pKey ); if( rc==SQLITE_OK ){ btreeReleaseAllCursorPages(pCur); @@ -53633,6 +54449,15 @@ static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*); ** routine is called just before cursor pExcept is used to modify the ** table, for example in BtreeDelete() or BtreeInsert(). ** +** If there are two or more cursors on the same btree, then all such +** cursors should have their BTCF_Multiple flag set. The btreeCursor() +** routine enforces that rule. This routine only needs to be called in +** the uncommon case when pExpect has the BTCF_Multiple flag set. +** +** If pExpect!=NULL and if no other cursors are found on the same root-page, +** then the BTCF_Multiple flag on pExpect is cleared, to avoid another +** pointless call to this routine. +** ** Implementation note: This routine merely checks to see if any cursors ** need to be saved. It calls out to saveCursorsOnList() in the (unusual) ** event that cursors are in need to being saved. @@ -53644,7 +54469,9 @@ static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){ for(p=pBt->pCursor; p; p=p->pNext){ if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break; } - return p ? saveCursorsOnList(p, iRoot, pExcept) : SQLITE_OK; + if( p ) return saveCursorsOnList(p, iRoot, pExcept); + if( pExcept ) pExcept->curFlags &= ~BTCF_Multiple; + return SQLITE_OK; } /* This helper routine to saveAllCursors does the actual work of saving @@ -53932,39 +54759,88 @@ static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){ ** the page, 1 means the second cell, and so forth) return a pointer ** to the cell content. ** +** findCellPastPtr() does the same except it skips past the initial +** 4-byte child pointer found on interior pages, if there is one. +** ** This routine works only for pages that do not contain overflow cells. */ #define findCell(P,I) \ - ((P)->aData + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)]))) -#define findCellv2(D,M,O,I) (D+(M&get2byte(D+(O+2*(I))))) + ((P)->aData + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) +#define findCellPastPtr(P,I) \ + ((P)->aDataOfst + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) /* -** This a more complex version of findCell() that works for -** pages that do contain overflow cells. +** This is common tail processing for btreeParseCellPtr() and +** btreeParseCellPtrIndex() for the case when the cell does not fit entirely +** on a single B-tree page. Make necessary adjustments to the CellInfo +** structure. */ -static u8 *findOverflowCell(MemPage *pPage, int iCell){ - int i; - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - for(i=pPage->nOverflow-1; i>=0; i--){ - int k; - k = pPage->aiOvfl[i]; - if( k<=iCell ){ - if( k==iCell ){ - return pPage->apOvfl[i]; - } - iCell--; - } +static SQLITE_NOINLINE void btreeParseCellAdjustSizeForOverflow( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + /* If the payload will not fit completely on the local page, we have + ** to decide how much to store locally and how much to spill onto + ** overflow pages. The strategy is to minimize the amount of unused + ** space on overflow pages while keeping the amount of local storage + ** in between minLocal and maxLocal. + ** + ** Warning: changing the way overflow payload is distributed in any + ** way will result in an incompatible file format. + */ + int minLocal; /* Minimum amount of payload held locally */ + int maxLocal; /* Maximum amount of payload held locally */ + int surplus; /* Overflow payload available for local storage */ + + minLocal = pPage->minLocal; + maxLocal = pPage->maxLocal; + surplus = minLocal + (pInfo->nPayload - minLocal)%(pPage->pBt->usableSize-4); + testcase( surplus==maxLocal ); + testcase( surplus==maxLocal+1 ); + if( surplus <= maxLocal ){ + pInfo->nLocal = (u16)surplus; + }else{ + pInfo->nLocal = (u16)minLocal; } - return findCell(pPage, iCell); + pInfo->iOverflow = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell); + pInfo->nSize = pInfo->iOverflow + 4; } /* -** Parse a cell content block and fill in the CellInfo structure. There -** are two versions of this function. btreeParseCell() takes a -** cell index as the second argument and btreeParseCellPtr() -** takes a pointer to the body of the cell as its second argument. +** The following routines are implementations of the MemPage.xParseCell() +** method. +** +** Parse a cell content block and fill in the CellInfo structure. +** +** btreeParseCellPtr() => table btree leaf nodes +** btreeParseCellNoPayload() => table btree internal nodes +** btreeParseCellPtrIndex() => index btree nodes +** +** There is also a wrapper function btreeParseCell() that works for +** all MemPage types and that references the cell by index rather than +** by pointer. */ +static void btreeParseCellPtrNoPayload( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->leaf==0 ); + assert( pPage->noPayload ); + assert( pPage->childPtrSize==4 ); +#ifndef SQLITE_DEBUG + UNUSED_PARAMETER(pPage); +#endif + pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey); + pInfo->nPayload = 0; + pInfo->nLocal = 0; + pInfo->iOverflow = 0; + pInfo->pPayload = 0; + return; +} static void btreeParseCellPtr( MemPage *pPage, /* Page containing the cell */ u8 *pCell, /* Pointer to the cell text. */ @@ -53972,26 +54848,93 @@ static void btreeParseCellPtr( ){ u8 *pIter; /* For scanning through pCell */ u32 nPayload; /* Number of bytes of cell payload */ + u64 iKey; /* Extracted Key value */ assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( pPage->leaf==0 || pPage->leaf==1 ); - if( pPage->intKeyLeaf ){ - assert( pPage->childPtrSize==0 ); - pIter = pCell + getVarint32(pCell, nPayload); - pIter += getVarint(pIter, (u64*)&pInfo->nKey); - }else if( pPage->noPayload ){ - assert( pPage->childPtrSize==4 ); - pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey); - pInfo->nPayload = 0; - pInfo->nLocal = 0; + assert( pPage->intKeyLeaf || pPage->noPayload ); + assert( pPage->noPayload==0 ); + assert( pPage->intKeyLeaf ); + assert( pPage->childPtrSize==0 ); + pIter = pCell; + + /* The next block of code is equivalent to: + ** + ** pIter += getVarint32(pIter, nPayload); + ** + ** The code is inlined to avoid a function call. + */ + nPayload = *pIter; + if( nPayload>=0x80 ){ + u8 *pEnd = &pIter[8]; + nPayload &= 0x7f; + do{ + nPayload = (nPayload<<7) | (*++pIter & 0x7f); + }while( (*pIter)>=0x80 && pIternKey); + ** + ** The code is inlined to avoid a function call. + */ + iKey = *pIter; + if( iKey>=0x80 ){ + u8 *pEnd = &pIter[7]; + iKey &= 0x7f; + while(1){ + iKey = (iKey<<7) | (*++pIter & 0x7f); + if( (*pIter)<0x80 ) break; + if( pIter>=pEnd ){ + iKey = (iKey<<8) | *++pIter; + break; + } + } + } + pIter++; + + pInfo->nKey = *(i64*)&iKey; + pInfo->nPayload = nPayload; + pInfo->pPayload = pIter; + testcase( nPayload==pPage->maxLocal ); + testcase( nPayload==pPage->maxLocal+1 ); + if( nPayload<=pPage->maxLocal ){ + /* This is the (easy) common case where the entire payload fits + ** on the local page. No overflow is required. + */ + pInfo->nSize = nPayload + (u16)(pIter - pCell); + if( pInfo->nSize<4 ) pInfo->nSize = 4; + pInfo->nLocal = (u16)nPayload; pInfo->iOverflow = 0; - pInfo->pPayload = 0; - return; }else{ - pIter = pCell + pPage->childPtrSize; - pIter += getVarint32(pIter, nPayload); - pInfo->nKey = nPayload; + btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo); + } +} +static void btreeParseCellPtrIndex( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + u8 *pIter; /* For scanning through pCell */ + u32 nPayload; /* Number of bytes of cell payload */ + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->leaf==0 || pPage->leaf==1 ); + assert( pPage->intKeyLeaf==0 ); + assert( pPage->noPayload==0 ); + pIter = pCell + pPage->childPtrSize; + nPayload = *pIter; + if( nPayload>=0x80 ){ + u8 *pEnd = &pIter[8]; + nPayload &= 0x7f; + do{ + nPayload = (nPayload<<7) | (*++pIter & 0x7f); + }while( *(pIter)>=0x80 && pIternKey = nPayload; pInfo->nPayload = nPayload; pInfo->pPayload = pIter; testcase( nPayload==pPage->maxLocal ); @@ -54005,31 +54948,7 @@ static void btreeParseCellPtr( pInfo->nLocal = (u16)nPayload; pInfo->iOverflow = 0; }else{ - /* If the payload will not fit completely on the local page, we have - ** to decide how much to store locally and how much to spill onto - ** overflow pages. The strategy is to minimize the amount of unused - ** space on overflow pages while keeping the amount of local storage - ** in between minLocal and maxLocal. - ** - ** Warning: changing the way overflow payload is distributed in any - ** way will result in an incompatible file format. - */ - int minLocal; /* Minimum amount of payload held locally */ - int maxLocal; /* Maximum amount of payload held locally */ - int surplus; /* Overflow payload available for local storage */ - - minLocal = pPage->minLocal; - maxLocal = pPage->maxLocal; - surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize - 4); - testcase( surplus==maxLocal ); - testcase( surplus==maxLocal+1 ); - if( surplus <= maxLocal ){ - pInfo->nLocal = (u16)surplus; - }else{ - pInfo->nLocal = (u16)minLocal; - } - pInfo->iOverflow = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell); - pInfo->nSize = pInfo->iOverflow + 4; + btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo); } } static void btreeParseCell( @@ -54037,14 +54956,20 @@ static void btreeParseCell( int iCell, /* The cell index. First cell is 0 */ CellInfo *pInfo /* Fill in this structure */ ){ - btreeParseCellPtr(pPage, findCell(pPage, iCell), pInfo); + pPage->xParseCell(pPage, findCell(pPage, iCell), pInfo); } /* +** The following routines are implementations of the MemPage.xCellSize +** method. +** ** Compute the total number of bytes that a Cell needs in the cell ** data area of the btree-page. The return number includes the cell ** data header and the local payload, but not any overflow page or ** the space used by the cell pointer. +** +** cellSizePtrNoPayload() => table internal nodes +** cellSizePtr() => all index nodes & table leaf nodes */ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */ @@ -54057,18 +54982,13 @@ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of ** this function verifies that this invariant is not violated. */ CellInfo debuginfo; - btreeParseCellPtr(pPage, pCell, &debuginfo); + pPage->xParseCell(pPage, pCell, &debuginfo); #endif - if( pPage->noPayload ){ - pEnd = &pIter[9]; - while( (*pIter++)&0x80 && pIterchildPtrSize==4 ); - return (u16)(pIter - pCell); - } + assert( pPage->noPayload==0 ); nSize = *pIter; if( nSize>=0x80 ){ - pEnd = &pIter[9]; + pEnd = &pIter[8]; nSize &= 0x7f; do{ nSize = (nSize<<7) | (*++pIter & 0x7f); @@ -54100,12 +55020,34 @@ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ assert( nSize==debuginfo.nSize || CORRUPT_DB ); return (u16)nSize; } +static u16 cellSizePtrNoPayload(MemPage *pPage, u8 *pCell){ + u8 *pIter = pCell + 4; /* For looping over bytes of pCell */ + u8 *pEnd; /* End mark for a varint */ + +#ifdef SQLITE_DEBUG + /* The value returned by this function should always be the same as + ** the (CellInfo.nSize) value found by doing a full parse of the + ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of + ** this function verifies that this invariant is not violated. */ + CellInfo debuginfo; + pPage->xParseCell(pPage, pCell, &debuginfo); +#else + UNUSED_PARAMETER(pPage); +#endif + + assert( pPage->childPtrSize==4 ); + pEnd = pIter + 9; + while( (*pIter++)&0x80 && pIterxCellSize(pPage, findCell(pPage, iCell)); } #endif @@ -54119,7 +55061,7 @@ static void ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell, int *pRC){ CellInfo info; if( *pRC ) return; assert( pCell!=0 ); - btreeParseCellPtr(pPage, pCell, &info); + pPage->xParseCell(pPage, pCell, &info); if( info.iOverflow ){ Pgno ovfl = get4byte(&pCell[info.iOverflow]); ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, pRC); @@ -54176,26 +55118,18 @@ static int defragmentPage(MemPage *pPage){ pc = get2byte(pAddr); testcase( pc==iCellFirst ); testcase( pc==iCellLast ); -#if !defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK) /* These conditions have already been verified in btreeInitPage() - ** if SQLITE_ENABLE_OVERSIZE_CELL_CHECK is defined + ** if PRAGMA cell_size_check=ON. */ if( pciCellLast ){ return SQLITE_CORRUPT_BKPT; } -#endif assert( pc>=iCellFirst && pc<=iCellLast ); - size = cellSizePtr(pPage, &src[pc]); + size = pPage->xCellSize(pPage, &src[pc]); cbrk -= size; -#if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK) - if( cbrkusableSize ){ return SQLITE_CORRUPT_BKPT; } -#endif assert( cbrk+size<=usableSize && cbrk>=iCellFirst ); testcase( cbrk+size==usableSize ); testcase( pc+size==usableSize ); @@ -54233,18 +55167,20 @@ static int defragmentPage(MemPage *pPage){ ** This function may detect corruption within pPg. If corruption is ** detected then *pRc is set to SQLITE_CORRUPT and NULL is returned. ** -** If a slot of at least nByte bytes is found but cannot be used because -** there are already at least 60 fragmented bytes on the page, return NULL. -** In this case, if pbDefrag parameter is not NULL, set *pbDefrag to true. +** Slots on the free list that are between 1 and 3 bytes larger than nByte +** will be ignored if adding the extra space to the fragmentation count +** causes the fragmentation count to exceed 60. */ -static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){ +static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){ const int hdr = pPg->hdrOffset; u8 * const aData = pPg->aData; - int iAddr; - int pc; + int iAddr = hdr + 1; + int pc = get2byte(&aData[iAddr]); + int x; int usableSize = pPg->pBt->usableSize; - for(iAddr=hdr+1; (pc = get2byte(&aData[iAddr]))>0; iAddr=pc){ + assert( pc>0 ); + do{ int size; /* Size of the free slot */ /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of ** increasing offset. */ @@ -54256,24 +55192,21 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){ ** freeblock form a big-endian integer which is the size of the freeblock ** in bytes, including the 4-byte header. */ size = get2byte(&aData[pc+2]); - if( size>=nByte ){ - int x = size - nByte; + if( (x = size - nByte)>=0 ){ testcase( x==4 ); testcase( x==3 ); - if( x<4 ){ + if( pc < pPg->cellOffset+2*pPg->nCell || size+pc > usableSize ){ + *pRc = SQLITE_CORRUPT_BKPT; + return 0; + }else if( x<4 ){ /* EVIDENCE-OF: R-11498-58022 In a well-formed b-tree page, the total ** number of bytes in fragments may not exceed 60. */ - if( aData[hdr+7]>=60 ){ - if( pbDefrag ) *pbDefrag = 1; - return 0; - } + if( aData[hdr+7]>57 ) return 0; + /* Remove the slot from the free-list. Update the number of ** fragmented bytes within the page. */ memcpy(&aData[iAddr], &aData[pc], 2); aData[hdr+7] += (u8)x; - }else if( size+pc > usableSize ){ - *pRc = SQLITE_CORRUPT_BKPT; - return 0; }else{ /* The slot remains on the free-list. Reduce its size to account ** for the portion used by the new allocation. */ @@ -54281,7 +55214,9 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){ } return &aData[pc + x]; } - } + iAddr = pc; + pc = get2byte(&aData[pc]); + }while( pc ); return 0; } @@ -54322,8 +55257,15 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ ** then the cell content offset of an empty page wants to be 65536. ** However, that integer is too large to be stored in a 2-byte unsigned ** integer, so a value of 0 is used in its place. */ - top = get2byteNotZero(&data[hdr+5]); - if( gap>top ) return SQLITE_CORRUPT_BKPT; + top = get2byte(&data[hdr+5]); + assert( top<=(int)pPage->pBt->usableSize ); /* Prevent by getAndInitPage() */ + if( gap>top ){ + if( top==0 && pPage->pBt->usableSize==65536 ){ + top = 65536; + }else{ + return SQLITE_CORRUPT_BKPT; + } + } /* If there is enough space between gap and top for one more cell pointer ** array entry offset, and if the freelist is not empty, then search the @@ -54332,15 +55274,14 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ testcase( gap+2==top ); testcase( gap+1==top ); testcase( gap==top ); - if( gap+2<=top && (data[hdr+1] || data[hdr+2]) ){ - int bDefrag = 0; - u8 *pSpace = pageFindSlot(pPage, nByte, &rc, &bDefrag); - if( rc ) return rc; - if( bDefrag ) goto defragment_page; + if( (data[hdr+2] || data[hdr+1]) && gap+2<=top ){ + u8 *pSpace = pageFindSlot(pPage, nByte, &rc); if( pSpace ){ assert( pSpace>=data && (pSpace - data)<65536 ); *pIdx = (int)(pSpace - data); return SQLITE_OK; + }else if( rc ){ + return rc; } } @@ -54349,7 +55290,6 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ */ testcase( gap+2+nByte==top ); if( gap+2+nByte>top ){ - defragment_page: assert( pPage->nCell>0 || CORRUPT_DB ); rc = defragmentPage(pPage); if( rc ) return rc; @@ -54396,7 +55336,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){ assert( pPage->pBt!=0 ); assert( sqlite3PagerIswriteable(pPage->pDbPage) ); - assert( iStart>=pPage->hdrOffset+6+pPage->childPtrSize ); + assert( CORRUPT_DB || iStart>=pPage->hdrOffset+6+pPage->childPtrSize ); assert( CORRUPT_DB || iEnd <= pPage->pBt->usableSize ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( iSize>=4 ); /* Minimum cell size is 4 */ @@ -54425,7 +55365,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){ /* At this point: ** iFreeBlk: First freeblock after iStart, or zero if none - ** iPtr: The address of a pointer iFreeBlk + ** iPtr: The address of a pointer to iFreeBlk ** ** Check to see if iFreeBlk should be coalesced onto the end of iStart. */ @@ -54433,6 +55373,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){ nFrag = iFreeBlk - iEnd; if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_BKPT; iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]); + if( iEnd > pPage->pBt->usableSize ) return SQLITE_CORRUPT_BKPT; iSize = iEnd - iStart; iFreeBlk = get2byte(&data[iFreeBlk]); } @@ -54490,6 +55431,7 @@ static int decodeFlags(MemPage *pPage, int flagByte){ pPage->leaf = (u8)(flagByte>>3); assert( PTF_LEAF == 1<<3 ); flagByte &= ~PTF_LEAF; pPage->childPtrSize = 4-4*pPage->leaf; + pPage->xCellSize = cellSizePtr; pBt = pPage->pBt; if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){ /* EVIDENCE-OF: R-03640-13415 A value of 5 means the page is an interior @@ -54499,8 +55441,16 @@ static int decodeFlags(MemPage *pPage, int flagByte){ ** table b-tree page. */ assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 ); pPage->intKey = 1; - pPage->intKeyLeaf = pPage->leaf; - pPage->noPayload = !pPage->leaf; + if( pPage->leaf ){ + pPage->intKeyLeaf = 1; + pPage->noPayload = 0; + pPage->xParseCell = btreeParseCellPtr; + }else{ + pPage->intKeyLeaf = 0; + pPage->noPayload = 1; + pPage->xCellSize = cellSizePtrNoPayload; + pPage->xParseCell = btreeParseCellPtrNoPayload; + } pPage->maxLocal = pBt->maxLeaf; pPage->minLocal = pBt->minLeaf; }else if( flagByte==PTF_ZERODATA ){ @@ -54513,6 +55463,7 @@ static int decodeFlags(MemPage *pPage, int flagByte){ pPage->intKey = 0; pPage->intKeyLeaf = 0; pPage->noPayload = 0; + pPage->xParseCell = btreeParseCellPtrIndex; pPage->maxLocal = pBt->maxLocal; pPage->minLocal = pBt->minLocal; }else{ @@ -54536,6 +55487,7 @@ static int decodeFlags(MemPage *pPage, int flagByte){ static int btreeInitPage(MemPage *pPage){ assert( pPage->pBt!=0 ); + assert( pPage->pBt->db!=0 ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) ); @@ -54567,6 +55519,7 @@ static int btreeInitPage(MemPage *pPage){ pPage->cellOffset = cellOffset = hdr + 8 + pPage->childPtrSize; pPage->aDataEnd = &data[usableSize]; pPage->aCellIdx = &data[cellOffset]; + pPage->aDataOfst = &data[pPage->childPtrSize]; /* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates ** the start of the cell content area. A zero value for this integer is ** interpreted as 65536. */ @@ -54594,20 +55547,19 @@ static int btreeInitPage(MemPage *pPage){ */ iCellFirst = cellOffset + 2*pPage->nCell; iCellLast = usableSize - 4; -#if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK) - { + if( pBt->db->flags & SQLITE_CellSizeCk ){ int i; /* Index into the cell pointer array */ int sz; /* Size of a cell */ if( !pPage->leaf ) iCellLast--; for(i=0; inCell; i++){ - pc = get2byte(&data[cellOffset+i*2]); + pc = get2byteAligned(&data[cellOffset+i*2]); testcase( pc==iCellFirst ); testcase( pc==iCellLast ); if( pciCellLast ){ return SQLITE_CORRUPT_BKPT; } - sz = cellSizePtr(pPage, &data[pc]); + sz = pPage->xCellSize(pPage, &data[pc]); testcase( pc+sz==usableSize ); if( pc+sz>usableSize ){ return SQLITE_CORRUPT_BKPT; @@ -54615,7 +55567,6 @@ static int btreeInitPage(MemPage *pPage){ } if( !pPage->leaf ) iCellLast++; } -#endif /* Compute the total free space on the page ** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the @@ -54688,6 +55639,7 @@ static void zeroPage(MemPage *pPage, int flags){ pPage->cellOffset = first; pPage->aDataEnd = &data[pBt->usableSize]; pPage->aCellIdx = &data[first]; + pPage->aDataOfst = &data[pPage->childPtrSize]; pPage->nOverflow = 0; assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); pPage->maskPage = (u16)(pBt->pageSize - 1); @@ -54706,16 +55658,16 @@ static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){ pPage->pDbPage = pDbPage; pPage->pBt = pBt; pPage->pgno = pgno; - pPage->hdrOffset = pPage->pgno==1 ? 100 : 0; + pPage->hdrOffset = pgno==1 ? 100 : 0; return pPage; } /* ** Get a page from the pager. Initialize the MemPage.pBt and -** MemPage.aData elements if needed. +** MemPage.aData elements if needed. See also: btreeGetUnusedPage(). ** -** If the noContent flag is set, it means that we do not care about -** the content of the page at this time. So do not go to the disk +** If the PAGER_GET_NOCONTENT flag is set, it means that we do not care +** about the content of the page at this time. So do not go to the disk ** to fetch the content. Just fill in the content with zeros for now. ** If in the future we call sqlite3PagerWrite() on this page, that ** means we have started to be concerned about content and the disk @@ -54767,35 +55719,62 @@ SQLITE_PRIVATE u32 sqlite3BtreeLastPage(Btree *p){ } /* -** Get a page from the pager and initialize it. This routine is just a -** convenience wrapper around separate calls to btreeGetPage() and -** btreeInitPage(). +** Get a page from the pager and initialize it. +** +** If pCur!=0 then the page is being fetched as part of a moveToChild() +** call. Do additional sanity checking on the page in this case. +** And if the fetch fails, this routine must decrement pCur->iPage. ** -** If an error occurs, then the value *ppPage is set to is undefined. It +** The page is fetched as read-write unless pCur is not NULL and is +** a read-only cursor. +** +** If an error occurs, then *ppPage is undefined. It ** may remain unchanged, or it may be set to an invalid value. */ static int getAndInitPage( BtShared *pBt, /* The database file */ Pgno pgno, /* Number of the page to get */ MemPage **ppPage, /* Write the page pointer here */ - int bReadonly /* PAGER_GET_READONLY or 0 */ + BtCursor *pCur, /* Cursor to receive the page, or NULL */ + int bReadOnly /* True for a read-only page */ ){ int rc; + DbPage *pDbPage; assert( sqlite3_mutex_held(pBt->mutex) ); - assert( bReadonly==PAGER_GET_READONLY || bReadonly==0 ); + assert( pCur==0 || ppPage==&pCur->apPage[pCur->iPage] ); + assert( pCur==0 || bReadOnly==pCur->curPagerFlags ); + assert( pCur==0 || pCur->iPage>0 ); if( pgno>btreePagecount(pBt) ){ rc = SQLITE_CORRUPT_BKPT; - }else{ - rc = btreeGetPage(pBt, pgno, ppPage, bReadonly); - if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){ - rc = btreeInitPage(*ppPage); - if( rc!=SQLITE_OK ){ - releasePage(*ppPage); - } + goto getAndInitPage_error; + } + rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly); + if( rc ){ + goto getAndInitPage_error; + } + *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); + if( (*ppPage)->isInit==0 ){ + rc = btreeInitPage(*ppPage); + if( rc!=SQLITE_OK ){ + releasePage(*ppPage); + goto getAndInitPage_error; } } + /* If obtaining a child page for a cursor, we must verify that the page is + ** compatible with the root page. */ + if( pCur + && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey) + ){ + rc = SQLITE_CORRUPT_BKPT; + releasePage(*ppPage); + goto getAndInitPage_error; + } + return SQLITE_OK; + +getAndInitPage_error: + if( pCur ) pCur->iPage--; testcase( pgno==0 ); assert( pgno!=0 || rc==SQLITE_CORRUPT ); return rc; @@ -54805,18 +55784,49 @@ static int getAndInitPage( ** Release a MemPage. This should be called once for each prior ** call to btreeGetPage. */ +static void releasePageNotNull(MemPage *pPage){ + assert( pPage->aData ); + assert( pPage->pBt ); + assert( pPage->pDbPage!=0 ); + assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); + assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + sqlite3PagerUnrefNotNull(pPage->pDbPage); +} static void releasePage(MemPage *pPage){ - if( pPage ){ - assert( pPage->aData ); - assert( pPage->pBt ); - assert( pPage->pDbPage!=0 ); - assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); - assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - sqlite3PagerUnrefNotNull(pPage->pDbPage); + if( pPage ) releasePageNotNull(pPage); +} + +/* +** Get an unused page. +** +** This works just like btreeGetPage() with the addition: +** +** * If the page is already in use for some other purpose, immediately +** release it and return an SQLITE_CURRUPT error. +** * Make sure the isInit flag is clear +*/ +static int btreeGetUnusedPage( + BtShared *pBt, /* The btree */ + Pgno pgno, /* Number of the page to fetch */ + MemPage **ppPage, /* Return the page in this parameter */ + int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */ +){ + int rc = btreeGetPage(pBt, pgno, ppPage, flags); + if( rc==SQLITE_OK ){ + if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){ + releasePage(*ppPage); + *ppPage = 0; + return SQLITE_CORRUPT_BKPT; + } + (*ppPage)->isInit = 0; + }else{ + *ppPage = 0; } + return rc; } + /* ** During a rollback, when the pager reloads information into the cache ** so that the cache is restored to its original state at the start of @@ -55409,7 +56419,7 @@ SQLITE_PRIVATE int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve, if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE && ((pageSize-1)&pageSize)==0 ){ assert( (pageSize & 7)==0 ); - assert( !pBt->pPage1 && !pBt->pCursor ); + assert( !pBt->pCursor ); pBt->pageSize = (u32)pageSize; freeTempSpace(pBt); } @@ -55759,7 +56769,7 @@ static void unlockBtreeIfUnused(BtShared *pBt){ assert( pPage1->aData ); assert( sqlite3PagerRefcount(pBt->pPager)==1 ); pBt->pPage1 = 0; - releasePage(pPage1); + releasePageNotNull(pPage1); } } @@ -56064,15 +57074,17 @@ static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){ u8 isInitOrig = pPage->isInit; int i; int nCell; + int rc; - btreeInitPage(pPage); + rc = btreeInitPage(pPage); + if( rc ) return rc; nCell = pPage->nCell; for(i=0; ixParseCell(pPage, pCell, &info); if( info.iOverflow && pCell+info.iOverflow+3<=pPage->aData+pPage->maskPage && iFrom==get4byte(&pCell[info.iOverflow]) @@ -56371,7 +57383,7 @@ SQLITE_PRIVATE int sqlite3BtreeIncrVacuum(Btree *p){ static int autoVacuumCommit(BtShared *pBt){ int rc = SQLITE_OK; Pager *pPager = pBt->pPager; - VVA_ONLY( int nRef = sqlite3PagerRefcount(pPager) ); + VVA_ONLY( int nRef = sqlite3PagerRefcount(pPager); ) assert( sqlite3_mutex_held(pBt->mutex) ); invalidateAllOverflowCache(pBt); @@ -56813,6 +57825,7 @@ static int btreeCursor( BtCursor *pCur /* Space for new cursor */ ){ BtShared *pBt = p->pBt; /* Shared b-tree handle */ + BtCursor *pX; /* Looping over other all cursors */ assert( sqlite3BtreeHoldsMutex(p) ); assert( wrFlag==0 || wrFlag==1 ); @@ -56828,10 +57841,8 @@ static int btreeCursor( assert( p->inTrans>TRANS_NONE ); assert( wrFlag==0 || p->inTrans==TRANS_WRITE ); assert( pBt->pPage1 && pBt->pPage1->aData ); + assert( wrFlag==0 || (pBt->btsFlags & BTS_READ_ONLY)==0 ); - if( NEVER(wrFlag && (pBt->btsFlags & BTS_READ_ONLY)!=0) ){ - return SQLITE_READONLY; - } if( wrFlag ){ allocateTempSpace(pBt); if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM; @@ -56850,10 +57861,16 @@ static int btreeCursor( pCur->pBt = pBt; assert( wrFlag==0 || wrFlag==BTCF_WriteFlag ); pCur->curFlags = wrFlag; - pCur->pNext = pBt->pCursor; - if( pCur->pNext ){ - pCur->pNext->pPrev = pCur; + pCur->curPagerFlags = wrFlag ? 0 : PAGER_GET_READONLY; + /* If there are two or more cursors on the same btree, then all such + ** cursors *must* have the BTCF_Multiple flag set. */ + for(pX=pBt->pCursor; pX; pX=pX->pNext){ + if( pX->pgnoRoot==(Pgno)iTable ){ + pX->curFlags |= BTCF_Multiple; + pCur->curFlags |= BTCF_Multiple; + } } + pCur->pNext = pBt->pCursor; pBt->pCursor = pCur; pCur->eState = CURSOR_INVALID; return SQLITE_OK; @@ -56866,9 +57883,13 @@ SQLITE_PRIVATE int sqlite3BtreeCursor( BtCursor *pCur /* Write new cursor here */ ){ int rc; - sqlite3BtreeEnter(p); - rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); - sqlite3BtreeLeave(p); + if( iTable<1 ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + sqlite3BtreeEnter(p); + rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); + sqlite3BtreeLeave(p); + } return rc; } @@ -56907,13 +57928,18 @@ SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor *pCur){ BtShared *pBt = pCur->pBt; sqlite3BtreeEnter(pBtree); sqlite3BtreeClearCursor(pCur); - if( pCur->pPrev ){ - pCur->pPrev->pNext = pCur->pNext; - }else{ + assert( pBt->pCursor!=0 ); + if( pBt->pCursor==pCur ){ pBt->pCursor = pCur->pNext; - } - if( pCur->pNext ){ - pCur->pNext->pPrev = pCur->pPrev; + }else{ + BtCursor *pPrev = pBt->pCursor; + do{ + if( pPrev->pNext==pCur ){ + pPrev->pNext = pCur->pNext; + break; + } + pPrev = pPrev->pNext; + }while( ALWAYS(pPrev) ); } for(i=0; i<=pCur->iPage; i++){ releasePage(pCur->apPage[i]); @@ -56933,13 +57959,6 @@ SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor *pCur){ ** ** BtCursor.info is a cache of the information in the current cell. ** Using this cache reduces the number of calls to btreeParseCell(). -** -** 2007-06-25: There is a bug in some versions of MSVC that cause the -** compiler to crash when getCellInfo() is implemented as a macro. -** But there is a measureable speed advantage to using the macro on gcc -** (when less compiler optimizations like -Os or -O0 are used and the -** compiler is not doing aggressive inlining.) So we use a real function -** for MSVC and a macro for everything else. Ticket #2457. */ #ifndef NDEBUG static void assertCellInfo(BtCursor *pCur){ @@ -56952,28 +57971,15 @@ SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor *pCur){ #else #define assertCellInfo(x) #endif -#ifdef _MSC_VER - /* Use a real function in MSVC to work around bugs in that compiler. */ - static void getCellInfo(BtCursor *pCur){ - if( pCur->info.nSize==0 ){ - int iPage = pCur->iPage; - btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); - pCur->curFlags |= BTCF_ValidNKey; - }else{ - assertCellInfo(pCur); - } - } -#else /* if not _MSC_VER */ - /* Use a macro in all other compilers so that the function is inlined */ -#define getCellInfo(pCur) \ - if( pCur->info.nSize==0 ){ \ - int iPage = pCur->iPage; \ - btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); \ - pCur->curFlags |= BTCF_ValidNKey; \ - }else{ \ - assertCellInfo(pCur); \ +static SQLITE_NOINLINE void getCellInfo(BtCursor *pCur){ + if( pCur->info.nSize==0 ){ + int iPage = pCur->iPage; + pCur->curFlags |= BTCF_ValidNKey; + btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); + }else{ + assertCellInfo(pCur); } -#endif /* _MSC_VER */ +} #ifndef NDEBUG /* The next routine used only within assert() statements */ /* @@ -57431,13 +58437,18 @@ static const void *fetchPayload( BtCursor *pCur, /* Cursor pointing to entry to read from */ u32 *pAmt /* Write the number of available bytes here */ ){ + u32 amt; assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]); assert( pCur->eState==CURSOR_VALID ); assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); assert( cursorHoldsMutex(pCur) ); assert( pCur->aiIdx[pCur->iPage]apPage[pCur->iPage]->nCell ); assert( pCur->info.nSize>0 ); - *pAmt = pCur->info.nLocal; + assert( pCur->info.pPayload>pCur->apPage[pCur->iPage]->aData || CORRUPT_DB ); + assert( pCur->info.pPayloadapPage[pCur->iPage]->aDataEnd ||CORRUPT_DB); + amt = (int)(pCur->apPage[pCur->iPage]->aDataEnd - pCur->info.pPayload); + if( pCur->info.nLocalinfo.nLocal; + *pAmt = amt; return (void*)pCur->info.pPayload; } @@ -57474,9 +58485,6 @@ SQLITE_PRIVATE const void *sqlite3BtreeDataFetch(BtCursor *pCur, u32 *pAmt){ ** vice-versa). */ static int moveToChild(BtCursor *pCur, u32 newPgno){ - int rc; - int i = pCur->iPage; - MemPage *pNewPage; BtShared *pBt = pCur->pBt; assert( cursorHoldsMutex(pCur) ); @@ -57486,19 +58494,12 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){ if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ return SQLITE_CORRUPT_BKPT; } - rc = getAndInitPage(pBt, newPgno, &pNewPage, - (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0); - if( rc ) return rc; - pCur->apPage[i+1] = pNewPage; - pCur->aiIdx[i+1] = 0; - pCur->iPage++; - pCur->info.nSize = 0; pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); - if( pNewPage->nCell<1 || pNewPage->intKey!=pCur->apPage[i]->intKey ){ - return SQLITE_CORRUPT_BKPT; - } - return SQLITE_OK; + pCur->iPage++; + pCur->aiIdx[pCur->iPage] = 0; + return getAndInitPage(pBt, newPgno, &pCur->apPage[pCur->iPage], + pCur, pCur->curPagerFlags); } #if SQLITE_DEBUG @@ -57542,11 +58543,9 @@ static void moveToParent(BtCursor *pCur){ pCur->apPage[pCur->iPage]->pgno ); testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell ); - - releasePage(pCur->apPage[pCur->iPage]); - pCur->iPage--; pCur->info.nSize = 0; pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); + releasePageNotNull(pCur->apPage[pCur->iPage--]); } /* @@ -57587,18 +58586,23 @@ static int moveToRoot(BtCursor *pCur){ } if( pCur->iPage>=0 ){ - while( pCur->iPage ) releasePage(pCur->apPage[pCur->iPage--]); + while( pCur->iPage ){ + assert( pCur->apPage[pCur->iPage]!=0 ); + releasePageNotNull(pCur->apPage[pCur->iPage--]); + } }else if( pCur->pgnoRoot==0 ){ pCur->eState = CURSOR_INVALID; return SQLITE_OK; }else{ + assert( pCur->iPage==(-1) ); rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0], - (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0); + 0, pCur->curPagerFlags); if( rc!=SQLITE_OK ){ pCur->eState = CURSOR_INVALID; return rc; } pCur->iPage = 0; + pCur->curIntKey = pCur->apPage[0]->intKey; } pRoot = pCur->apPage[0]; assert( pRoot->pgno==pCur->pgnoRoot ); @@ -57801,7 +58805,7 @@ SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked( /* If the cursor is already positioned at the point we are trying ** to move to, then just return without doing any work */ if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0 - && pCur->apPage[0]->intKey + && pCur->curIntKey ){ if( pCur->info.nKey==intKey ){ *pRes = 0; @@ -57836,7 +58840,8 @@ SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked( assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); return SQLITE_OK; } - assert( pCur->apPage[0]->intKey || pIdxKey ); + assert( pCur->apPage[0]->intKey==pCur->curIntKey ); + assert( pCur->curIntKey || pIdxKey ); for(;;){ int lwr, upr, idx, c; Pgno chldPg; @@ -57859,7 +58864,7 @@ SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked( if( xRecordCompare==0 ){ for(;;){ i64 nCellKey; - pCell = findCell(pPage, idx) + pPage->childPtrSize; + pCell = findCellPastPtr(pPage, idx); if( pPage->intKeyLeaf ){ while( 0x80 <= *(pCell++) ){ if( pCell>=pPage->aDataEnd ) return SQLITE_CORRUPT_BKPT; @@ -57891,8 +58896,8 @@ SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked( } }else{ for(;;){ - int nCell; - pCell = findCell(pPage, idx) + pPage->childPtrSize; + int nCell; /* Size of the pCell cell in bytes */ + pCell = findCellPastPtr(pPage, idx); /* The maximum supported page-size is 65536 bytes. This means that ** the maximum number of record bytes stored on an index B-Tree @@ -57920,12 +58925,25 @@ SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked( /* The record flows over onto one or more overflow pages. In ** this case the whole cell needs to be parsed, a buffer allocated ** and accessPayload() used to retrieve the record into the - ** buffer before VdbeRecordCompare() can be called. */ + ** buffer before VdbeRecordCompare() can be called. + ** + ** If the record is corrupt, the xRecordCompare routine may read + ** up to two varints past the end of the buffer. An extra 18 + ** bytes of padding is allocated at the end of the buffer in + ** case this happens. */ void *pCellKey; u8 * const pCellBody = pCell - pPage->childPtrSize; - btreeParseCellPtr(pPage, pCellBody, &pCur->info); + pPage->xParseCell(pPage, pCellBody, &pCur->info); nCell = (int)pCur->info.nKey; - pCellKey = sqlite3Malloc( nCell ); + testcase( nCell<0 ); /* True if key size is 2^32 or more */ + testcase( nCell==0 ); /* Invalid key size: 0x80 0x80 0x00 */ + testcase( nCell==1 ); /* Invalid key size: 0x80 0x80 0x01 */ + testcase( nCell==2 ); /* Minimum legal index key size */ + if( nCell<2 ){ + rc = SQLITE_CORRUPT_BKPT; + goto moveto_finish; + } + pCellKey = sqlite3Malloc( nCell+18 ); if( pCellKey==0 ){ rc = SQLITE_NOMEM; goto moveto_finish; @@ -58218,8 +59236,7 @@ SQLITE_PRIVATE int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ ** sqlite3PagerUnref() on the new page when it is done. ** ** SQLITE_OK is returned on success. Any other return value indicates -** an error. *ppPage and *pPgno are undefined in the event of an error. -** Do not invoke sqlite3PagerUnref() on *ppPage if an error is returned. +** an error. *ppPage is set to NULL in the event of an error. ** ** If the "nearby" parameter is not 0, then an effort is made to ** locate a page close to the page number "nearby". This can be used in an @@ -58262,6 +59279,7 @@ static int allocateBtreePage( /* There are pages on the freelist. Reuse one of those pages. */ Pgno iTrunk; u8 searchList = 0; /* If the free-list must be searched for 'nearby' */ + u32 nSearch = 0; /* Count of the number of search attempts */ /* If eMode==BTALLOC_EXACT and a query of the pointer-map ** shows that the page 'nearby' is somewhere on the free-list, then @@ -58310,10 +59328,10 @@ static int allocateBtreePage( iTrunk = get4byte(&pPage1->aData[32]); } testcase( iTrunk==mxPage ); - if( iTrunk>mxPage ){ + if( iTrunk>mxPage || nSearch++ > n ){ rc = SQLITE_CORRUPT_BKPT; }else{ - rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0); + rc = btreeGetUnusedPage(pBt, iTrunk, &pTrunk, 0); } if( rc ){ pTrunk = 0; @@ -58378,7 +59396,7 @@ static int allocateBtreePage( goto end_allocate_page; } testcase( iNewTrunk==mxPage ); - rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0); + rc = btreeGetUnusedPage(pBt, iNewTrunk, &pNewTrunk, 0); if( rc!=SQLITE_OK ){ goto end_allocate_page; } @@ -58458,11 +59476,12 @@ static int allocateBtreePage( } put4byte(&aData[4], k-1); noContent = !btreeGetHasContent(pBt, *pPgno)? PAGER_GET_NOCONTENT : 0; - rc = btreeGetPage(pBt, *pPgno, ppPage, noContent); + rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, noContent); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite((*ppPage)->pDbPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); + *ppPage = 0; } } searchList = 0; @@ -58506,7 +59525,7 @@ static int allocateBtreePage( MemPage *pPg = 0; TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent); + rc = btreeGetUnusedPage(pBt, pBt->nPage, &pPg, bNoContent); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pPg->pDbPage); releasePage(pPg); @@ -58520,11 +59539,12 @@ static int allocateBtreePage( *pPgno = pBt->nPage; assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent); + rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, bNoContent); if( rc ) return rc; rc = sqlite3PagerWrite((*ppPage)->pDbPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); + *ppPage = 0; } TRACE(("ALLOCATE: %d from end of file\n", *pPgno)); } @@ -58534,17 +59554,8 @@ static int allocateBtreePage( end_allocate_page: releasePage(pTrunk); releasePage(pPrevTrunk); - if( rc==SQLITE_OK ){ - if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){ - releasePage(*ppPage); - *ppPage = 0; - return SQLITE_CORRUPT_BKPT; - } - (*ppPage)->isInit = 0; - }else{ - *ppPage = 0; - } - assert( rc!=SQLITE_OK || sqlite3PagerIswriteable((*ppPage)->pDbPage) ); + assert( rc!=SQLITE_OK || sqlite3PagerPageRefcount((*ppPage)->pDbPage)<=1 ); + assert( rc!=SQLITE_OK || (*ppPage)->isInit==0 ); return rc; } @@ -58569,9 +59580,10 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ int nFree; /* Initial number of pages on free-list */ assert( sqlite3_mutex_held(pBt->mutex) ); - assert( iPage>1 ); + assert( CORRUPT_DB || iPage>1 ); assert( !pMemPage || pMemPage->pgno==iPage ); + if( iPage<2 ) return SQLITE_CORRUPT_BKPT; if( pMemPage ){ pPage = pMemPage; sqlite3PagerRef(pPage->pDbPage); @@ -58711,7 +59723,7 @@ static int clearCell( u32 ovflPageSize; assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - btreeParseCellPtr(pPage, pCell, &info); + pPage->xParseCell(pPage, pCell, &info); *pnSize = info.nSize; if( info.iOverflow==0 ){ return SQLITE_OK; /* No overflow pages. Return without doing anything */ @@ -58723,7 +59735,9 @@ static int clearCell( assert( pBt->usableSize > 4 ); ovflPageSize = pBt->usableSize - 4; nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize; - assert( ovflPgno==0 || nOvfl>0 ); + assert( nOvfl>0 || + (CORRUPT_DB && (info.nPayload + ovflPageSize)0x7fffffff || pKey==0) ){ - return SQLITE_CORRUPT_BKPT; - } + assert( nKey<=0x7fffffff && pKey!=0 ); nPayload = (int)nKey; pSrc = pKey; nSrc = (int)nKey; @@ -58863,7 +59875,7 @@ static int fillInCell( #if SQLITE_DEBUG { CellInfo info; - btreeParseCellPtr(pPage, pCell, &info); + pPage->xParseCell(pPage, pCell, &info); assert( nHeader=(int)(info.pPayload - pCell) ); assert( info.nKey==nKey ); assert( *pnSize == info.nSize ); @@ -58978,7 +59990,7 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ if( *pRC ) return; assert( idx>=0 && idxnCell ); - assert( sz==cellSize(pPage, idx) ); + assert( CORRUPT_DB || sz==cellSize(pPage, idx) ); assert( sqlite3PagerIswriteable(pPage->pDbPage) ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); data = pPage->aData; @@ -59033,10 +60045,8 @@ static void insertCell( ){ int idx = 0; /* Where to write new cell content in data[] */ int j; /* Loop counter */ - int end; /* First byte past the last cell pointer in data[] */ - int ins; /* Index in data[] where new cell pointer is inserted */ - int cellOffset; /* Address of first cell pointer in data[] */ u8 *data; /* The content of the whole page */ + u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */ if( *pRC ) return; @@ -59051,7 +60061,7 @@ static void insertCell( ** wanted to be less than 4 but got rounded up to 4 on the leaf, then size ** might be less than 8 (leaf-size + pointer) on the interior node. Hence ** the term after the || in the following assert(). */ - assert( sz==cellSizePtr(pPage, pCell) || (sz==8 && iChild>0) ); + assert( sz==pPage->xCellSize(pPage, pCell) || (sz==8 && iChild>0) ); if( pPage->nOverflow || sz+2>pPage->nFree ){ if( pTemp ){ memcpy(pTemp, pCell, sz); @@ -59064,6 +60074,14 @@ static void insertCell( assert( j<(int)(sizeof(pPage->apOvfl)/sizeof(pPage->apOvfl[0])) ); pPage->apOvfl[j] = pCell; pPage->aiOvfl[j] = (u16)i; + + /* When multiple overflows occur, they are always sequential and in + ** sorted order. This invariants arise because multiple overflows can + ** only occur when inserting divider cells into the parent page during + ** balancing, and the dividers are adjacent and sorted. + */ + assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */ + assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */ }else{ int rc = sqlite3PagerWrite(pPage->pDbPage); if( rc!=SQLITE_OK ){ @@ -59072,24 +60090,26 @@ static void insertCell( } assert( sqlite3PagerIswriteable(pPage->pDbPage) ); data = pPage->aData; - cellOffset = pPage->cellOffset; - end = cellOffset + 2*pPage->nCell; - ins = cellOffset + 2*i; + assert( &data[pPage->cellOffset]==pPage->aCellIdx ); rc = allocateSpace(pPage, sz, &idx); if( rc ){ *pRC = rc; return; } - /* The allocateSpace() routine guarantees the following two properties - ** if it returns success */ - assert( idx >= end+2 ); + /* The allocateSpace() routine guarantees the following properties + ** if it returns successfully */ + assert( idx >= 0 ); + assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB ); assert( idx+sz <= (int)pPage->pBt->usableSize ); - pPage->nCell++; pPage->nFree -= (u16)(2 + sz); memcpy(&data[idx], pCell, sz); if( iChild ){ put4byte(&data[idx], iChild); } - memmove(&data[ins+2], &data[ins], end-ins); - put2byte(&data[ins], idx); - put2byte(&data[pPage->hdrOffset+3], pPage->nCell); + pIns = pPage->aCellIdx + i*2; + memmove(pIns+2, pIns, 2*(pPage->nCell - i)); + put2byte(pIns, idx); + pPage->nCell++; + /* increment the cell count */ + if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++; + assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell ); #ifndef SQLITE_OMIT_AUTOVACUUM if( pPage->pBt->autoVacuum ){ /* The cell may contain a pointer to an overflow page. If so, write @@ -59101,6 +60121,52 @@ static void insertCell( } } +/* +** A CellArray object contains a cache of pointers and sizes for a +** consecutive sequence of cells that might be held multiple pages. +*/ +typedef struct CellArray CellArray; +struct CellArray { + int nCell; /* Number of cells in apCell[] */ + MemPage *pRef; /* Reference page */ + u8 **apCell; /* All cells begin balanced */ + u16 *szCell; /* Local size of all cells in apCell[] */ +}; + +/* +** Make sure the cell sizes at idx, idx+1, ..., idx+N-1 have been +** computed. +*/ +static void populateCellCache(CellArray *p, int idx, int N){ + assert( idx>=0 && idx+N<=p->nCell ); + while( N>0 ){ + assert( p->apCell[idx]!=0 ); + if( p->szCell[idx]==0 ){ + p->szCell[idx] = p->pRef->xCellSize(p->pRef, p->apCell[idx]); + }else{ + assert( CORRUPT_DB || + p->szCell[idx]==p->pRef->xCellSize(p->pRef, p->apCell[idx]) ); + } + idx++; + N--; + } +} + +/* +** Return the size of the Nth element of the cell array +*/ +static SQLITE_NOINLINE u16 computeCellSize(CellArray *p, int N){ + assert( N>=0 && NnCell ); + assert( p->szCell[N]==0 ); + p->szCell[N] = p->pRef->xCellSize(p->pRef, p->apCell[N]); + return p->szCell[N]; +} +static u16 cachedCellSize(CellArray *p, int N){ + assert( N>=0 && NnCell ); + if( p->szCell[N] ) return p->szCell[N]; + return computeCellSize(p, N); +} + /* ** Array apCell[] contains pointers to nCell b-tree page cells. The ** szCell[] array contains the size in bytes of each cell. This function @@ -59114,7 +60180,7 @@ static void insertCell( ** The MemPage.nFree field is invalidated by this function. It is the ** responsibility of the caller to set it correctly. */ -static void rebuildPage( +static int rebuildPage( MemPage *pPg, /* Edit this page */ int nCell, /* Final number of cells on page */ u8 **apCell, /* Array of cells */ @@ -59139,10 +60205,12 @@ static void rebuildPage( pCell = &pTmp[pCell - aData]; } pData -= szCell[i]; - memcpy(pData, pCell, szCell[i]); put2byte(pCellptr, (pData - aData)); pCellptr += 2; - assert( szCell[i]==cellSizePtr(pPg, pCell) ); + if( pData < pCellptr ) return SQLITE_CORRUPT_BKPT; + memcpy(pData, pCell, szCell[i]); + assert( szCell[i]==pPg->xCellSize(pPg, pCell) || CORRUPT_DB ); + testcase( szCell[i]!=pPg->xCellSize(pPg,pCell) ); } /* The pPg->nFree field is now set incorrectly. The caller will fix it. */ @@ -59153,6 +60221,7 @@ static void rebuildPage( put2byte(&aData[hdr+3], pPg->nCell); put2byte(&aData[hdr+5], pData - aData); aData[hdr+7] = 0x00; + return SQLITE_OK; } /* @@ -59185,25 +60254,25 @@ static int pageInsertArray( u8 *pBegin, /* End of cell-pointer array */ u8 **ppData, /* IN/OUT: Page content -area pointer */ u8 *pCellptr, /* Pointer to cell-pointer area */ + int iFirst, /* Index of first cell to add */ int nCell, /* Number of cells to add to pPg */ - u8 **apCell, /* Array of cells */ - u16 *szCell /* Array of cell sizes */ + CellArray *pCArray /* Array of cells */ ){ int i; u8 *aData = pPg->aData; u8 *pData = *ppData; - const int bFreelist = aData[1] || aData[2]; + int iEnd = iFirst + nCell; assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */ - for(i=0; iapCell[i], sz); put2byte(pCellptr, (pSlot - aData)); pCellptr += 2; } @@ -59222,22 +60291,27 @@ static int pageInsertArray( */ static int pageFreeArray( MemPage *pPg, /* Page to edit */ + int iFirst, /* First cell to delete */ int nCell, /* Cells to delete */ - u8 **apCell, /* Array of cells */ - u16 *szCell /* Array of cell sizes */ + CellArray *pCArray /* Array of cells */ ){ u8 * const aData = pPg->aData; u8 * const pEnd = &aData[pPg->pBt->usableSize]; u8 * const pStart = &aData[pPg->hdrOffset + 8 + pPg->childPtrSize]; int nRet = 0; int i; + int iEnd = iFirst + nCell; u8 *pFree = 0; int szFree = 0; - for(i=0; iapCell[i]; if( pCell>=pStart && pCellszCell[i]; assert( sz>0 ); if( pFree!=(pCell + sz) ){ if( pFree ){ assert( pFree>aData && (pFree - aData)<65536 ); @@ -59272,13 +60346,12 @@ static int pageFreeArray( ** The pPg->nFree field is invalid when this function returns. It is the ** responsibility of the caller to set it correctly. */ -static void editPage( +static int editPage( MemPage *pPg, /* Edit this page */ int iOld, /* Index of first cell currently on page */ int iNew, /* Index of new first cell on page */ int nNew, /* Final number of cells on page */ - u8 **apCell, /* Array of cells */ - u16 *szCell /* Array of cell sizes */ + CellArray *pCArray /* Array of cells and sizes */ ){ u8 * const aData = pPg->aData; const int hdr = pPg->hdrOffset; @@ -59297,16 +60370,12 @@ static void editPage( /* Remove cells from the start and end of the page */ if( iOldaCellIdx, &pPg->aCellIdx[nShift*2], nCell*2); nCell -= nShift; } if( iNewEnd < iOldEnd ){ - nCell -= pageFreeArray( - pPg, iOldEnd-iNewEnd, &apCell[iNewEnd], &szCell[iNewEnd] - ); + nCell -= pageFreeArray(pPg, iNewEnd, iOldEnd - iNewEnd, pCArray); } pData = &aData[get2byteNotZero(&aData[hdr+5])]; @@ -59320,7 +60389,7 @@ static void editPage( memmove(&pCellptr[nAdd*2], pCellptr, nCell*2); if( pageInsertArray( pPg, pBegin, &pData, pCellptr, - nAdd, &apCell[iNew], &szCell[iNew] + iNew, nAdd, pCArray ) ) goto editpage_fail; nCell += nAdd; } @@ -59334,7 +60403,7 @@ static void editPage( nCell++; if( pageInsertArray( pPg, pBegin, &pData, pCellptr, - 1, &apCell[iCell + iNew], &szCell[iCell + iNew] + iCell+iNew, 1, pCArray ) ) goto editpage_fail; } } @@ -59343,7 +60412,7 @@ static void editPage( pCellptr = &pPg->aCellIdx[nCell*2]; if( pageInsertArray( pPg, pBegin, &pData, pCellptr, - nNew-nCell, &apCell[iNew+nCell], &szCell[iNew+nCell] + iNew+nCell, nNew-nCell, pCArray ) ) goto editpage_fail; pPg->nCell = nNew; @@ -59354,19 +60423,21 @@ static void editPage( #ifdef SQLITE_DEBUG for(i=0; iaCellIdx[i*2]); + u8 *pCell = pCArray->apCell[i+iNew]; + int iOff = get2byteAligned(&pPg->aCellIdx[i*2]); if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){ pCell = &pTmp[pCell - aData]; } - assert( 0==memcmp(pCell, &aData[iOff], szCell[i+iNew]) ); + assert( 0==memcmp(pCell, &aData[iOff], + pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) ); } #endif - return; + return SQLITE_OK; editpage_fail: /* Unable to edit this page. Rebuild it from scratch instead. */ - rebuildPage(pPg, nNew, &apCell[iNew], &szCell[iNew]); + populateCellCache(pCArray, iNew, nNew); + return rebuildPage(pPg, nNew, &pCArray->apCell[iNew], &pCArray->szCell[iNew]); } /* @@ -59432,13 +60503,14 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){ u8 *pOut = &pSpace[4]; u8 *pCell = pPage->apOvfl[0]; - u16 szCell = cellSizePtr(pPage, pCell); + u16 szCell = pPage->xCellSize(pPage, pCell); u8 *pStop; assert( sqlite3PagerIswriteable(pNew->pDbPage) ); assert( pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) ); zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF); - rebuildPage(pNew, 1, &pCell, &szCell); + rc = rebuildPage(pNew, 1, &pCell, &szCell); + if( NEVER(rc) ) return rc; pNew->nFree = pBt->usableSize - pNew->cellOffset - 2 - szCell; /* If this is an auto-vacuum database, update the pointer map @@ -59511,7 +60583,7 @@ static int ptrmapCheckPages(MemPage **apPage, int nPage){ u8 *z; z = findCell(pPage, j); - btreeParseCellPtr(pPage, z, &info); + pPage->xParseCell(pPage, z, &info); if( info.iOverflow ){ Pgno ovfl = get4byte(&z[info.iOverflow]); ptrmapGet(pBt, ovfl, &e, &n); @@ -59642,7 +60714,6 @@ static int balance_nonroot( int bBulk /* True if this call is part of a bulk load */ ){ BtShared *pBt; /* The whole database */ - int nCell = 0; /* Number of cells in apCell[] */ int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */ int nNew = 0; /* Number of pages in apNew[] */ int nOld; /* Number of pages in apOld[] */ @@ -59653,7 +60724,6 @@ static int balance_nonroot( int leafData; /* True if pPage is a leaf of a LEAFDATA tree */ int usableSpace; /* Bytes in pPage beyond the header */ int pageFlags; /* Value of pPage->aData[0] */ - int subtotal; /* Subtotal of bytes in cells on one page */ int iSpace1 = 0; /* First unused byte of aSpace1[] */ int iOvflSpace = 0; /* First unused byte of aOvflSpace[] */ int szScratch; /* Size of scratch memory requested */ @@ -59661,19 +60731,20 @@ static int balance_nonroot( MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */ u8 *pRight; /* Location in parent of right-sibling pointer */ u8 *apDiv[NB-1]; /* Divider cells in pParent */ - int cntNew[NB+2]; /* Index in aCell[] of cell after i-th page */ - int cntOld[NB+2]; /* Old index in aCell[] after i-th page */ + int cntNew[NB+2]; /* Index in b.paCell[] of cell after i-th page */ + int cntOld[NB+2]; /* Old index in b.apCell[] */ int szNew[NB+2]; /* Combined size of cells placed on i-th page */ - u8 **apCell = 0; /* All cells begin balanced */ - u16 *szCell; /* Local size of all cells in apCell[] */ u8 *aSpace1; /* Space for copies of dividers cells */ Pgno pgno; /* Temp var to store a page number in */ u8 abDone[NB+2]; /* True after i'th new page is populated */ Pgno aPgno[NB+2]; /* Page numbers of new pages before shuffling */ Pgno aPgOrder[NB+2]; /* Copy of aPgno[] used for sorting pages */ u16 aPgFlags[NB+2]; /* flags field of new pages before shuffling */ + CellArray b; /* Parsed information on cells being balanced */ memset(abDone, 0, sizeof(abDone)); + b.nCell = 0; + b.apCell = 0; pBt = pParent->pBt; assert( sqlite3_mutex_held(pBt->mutex) ); assert( sqlite3PagerIswriteable(pParent->pDbPage) ); @@ -59715,7 +60786,6 @@ static int balance_nonroot( }else if( iParentIdx==i ){ nxDiv = i-2+bBulk; }else{ - assert( bBulk==0 ); nxDiv = iParentIdx-1; } i = 2-bBulk; @@ -59728,7 +60798,7 @@ static int balance_nonroot( } pgno = get4byte(pRight); while( 1 ){ - rc = getAndInitPage(pBt, pgno, &apOld[i], 0); + rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0); if( rc ){ memset(apOld, 0, (i+1)*sizeof(MemPage*)); goto balance_cleanup; @@ -59739,12 +60809,12 @@ static int balance_nonroot( if( i+nxDiv==pParent->aiOvfl[0] && pParent->nOverflow ){ apDiv[i] = pParent->apOvfl[0]; pgno = get4byte(apDiv[i]); - szNew[i] = cellSizePtr(pParent, apDiv[i]); + szNew[i] = pParent->xCellSize(pParent, apDiv[i]); pParent->nOverflow = 0; }else{ apDiv[i] = findCell(pParent, i+nxDiv-pParent->nOverflow); pgno = get4byte(apDiv[i]); - szNew[i] = cellSizePtr(pParent, apDiv[i]); + szNew[i] = pParent->xCellSize(pParent, apDiv[i]); /* Drop the cell from the parent page. apDiv[i] still points to ** the cell within the parent, even though it has been dropped. @@ -59783,130 +60853,201 @@ static int balance_nonroot( ** Allocate space for memory structures */ szScratch = - nMaxCells*sizeof(u8*) /* apCell */ - + nMaxCells*sizeof(u16) /* szCell */ + nMaxCells*sizeof(u8*) /* b.apCell */ + + nMaxCells*sizeof(u16) /* b.szCell */ + pBt->pageSize; /* aSpace1 */ /* EVIDENCE-OF: R-28375-38319 SQLite will never request a scratch buffer ** that is more than 6 times the database page size. */ assert( szScratch<=6*(int)pBt->pageSize ); - apCell = sqlite3ScratchMalloc( szScratch ); - if( apCell==0 ){ + b.apCell = sqlite3ScratchMalloc( szScratch ); + if( b.apCell==0 ){ rc = SQLITE_NOMEM; goto balance_cleanup; } - szCell = (u16*)&apCell[nMaxCells]; - aSpace1 = (u8*)&szCell[nMaxCells]; + b.szCell = (u16*)&b.apCell[nMaxCells]; + aSpace1 = (u8*)&b.szCell[nMaxCells]; assert( EIGHT_BYTE_ALIGNMENT(aSpace1) ); /* ** Load pointers to all cells on sibling pages and the divider cells - ** into the local apCell[] array. Make copies of the divider cells + ** into the local b.apCell[] array. Make copies of the divider cells ** into space obtained from aSpace1[]. The divider cells have already ** been removed from pParent. ** ** If the siblings are on leaf pages, then the child pointers of the ** divider cells are stripped from the cells before they are copied - ** into aSpace1[]. In this way, all cells in apCell[] are without + ** into aSpace1[]. In this way, all cells in b.apCell[] are without ** child pointers. If siblings are not leaves, then all cell in - ** apCell[] include child pointers. Either way, all cells in apCell[] + ** b.apCell[] include child pointers. Either way, all cells in b.apCell[] ** are alike. ** ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf. ** leafData: 1 if pPage holds key+data and pParent holds only keys. */ - leafCorrection = apOld[0]->leaf*4; - leafData = apOld[0]->intKeyLeaf; + b.pRef = apOld[0]; + leafCorrection = b.pRef->leaf*4; + leafData = b.pRef->intKeyLeaf; for(i=0; inCell; + u8 *aData = pOld->aData; + u16 maskPage = pOld->maskPage; + u8 *piCell = aData + pOld->cellOffset; + u8 *piEnd; + + /* Verify that all sibling pages are of the same "type" (table-leaf, + ** table-interior, index-leaf, or index-interior). + */ + if( pOld->aData[0]!=apOld[0]->aData[0] ){ + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } - limit = pOld->nCell+pOld->nOverflow; + /* Load b.apCell[] with pointers to all cells in pOld. If pOld + ** constains overflow cells, include them in the b.apCell[] array + ** in the correct spot. + ** + ** Note that when there are multiple overflow cells, it is always the + ** case that they are sequential and adjacent. This invariant arises + ** because multiple overflows can only occurs when inserting divider + ** cells into a parent on a prior balance, and divider cells are always + ** adjacent and are inserted in order. There is an assert() tagged + ** with "NOTE 1" in the overflow cell insertion loop to prove this + ** invariant. + ** + ** This must be done in advance. Once the balance starts, the cell + ** offset section of the btree page will be overwritten and we will no + ** long be able to find the cells if a pointer to each cell is not saved + ** first. + */ + memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*limit); if( pOld->nOverflow>0 ){ + memset(&b.szCell[b.nCell+limit], 0, sizeof(b.szCell[0])*pOld->nOverflow); + limit = pOld->aiOvfl[0]; for(j=0; jaData; - u16 maskPage = pOld->maskPage; - u16 cellOffset = pOld->cellOffset; - for(j=0; jnOverflow; k++){ + assert( k==0 || pOld->aiOvfl[k-1]+1==pOld->aiOvfl[k] );/* NOTE 1 */ + b.apCell[b.nCell] = pOld->apOvfl[k]; + b.nCell++; } - } - cntOld[i] = nCell; + } + piEnd = aData + pOld->cellOffset + 2*pOld->nCell; + while( piCellmaxLocal+23 ); assert( iSpace1 <= (int)pBt->pageSize ); memcpy(pTemp, apDiv[i], sz); - apCell[nCell] = pTemp+leafCorrection; + b.apCell[b.nCell] = pTemp+leafCorrection; assert( leafCorrection==0 || leafCorrection==4 ); - szCell[nCell] = szCell[nCell] - leafCorrection; + b.szCell[b.nCell] = b.szCell[b.nCell] - leafCorrection; if( !pOld->leaf ){ assert( leafCorrection==0 ); assert( pOld->hdrOffset==0 ); /* The right pointer of the child page pOld becomes the left ** pointer of the divider cell */ - memcpy(apCell[nCell], &pOld->aData[8], 4); + memcpy(b.apCell[b.nCell], &pOld->aData[8], 4); }else{ assert( leafCorrection==4 ); - if( szCell[nCell]<4 ){ + while( b.szCell[b.nCell]<4 ){ /* Do not allow any cells smaller than 4 bytes. If a smaller cell ** does exist, pad it with 0x00 bytes. */ - assert( szCell[nCell]==3 ); - assert( apCell[nCell]==&aSpace1[iSpace1-3] ); + assert( b.szCell[b.nCell]==3 || CORRUPT_DB ); + assert( b.apCell[b.nCell]==&aSpace1[iSpace1-3] || CORRUPT_DB ); aSpace1[iSpace1++] = 0x00; - szCell[nCell] = 4; + b.szCell[b.nCell]++; } } - nCell++; + b.nCell++; } } /* - ** Figure out the number of pages needed to hold all nCell cells. + ** Figure out the number of pages needed to hold all b.nCell cells. ** Store this number in "k". Also compute szNew[] which is the total ** size of all cells on the i-th page and cntNew[] which is the index - ** in apCell[] of the cell that divides page i from page i+1. - ** cntNew[k] should equal nCell. + ** in b.apCell[] of the cell that divides page i from page i+1. + ** cntNew[k] should equal b.nCell. ** ** Values computed by this block: ** ** k: The total number of sibling pages ** szNew[i]: Spaced used on the i-th sibling page. - ** cntNew[i]: Index in apCell[] and szCell[] for the first cell to + ** cntNew[i]: Index in b.apCell[] and b.szCell[] for the first cell to ** the right of the i-th sibling page. ** usableSpace: Number of bytes of space available on each sibling. ** */ usableSpace = pBt->usableSize - 12 + leafCorrection; - for(subtotal=k=i=0; i usableSpace ){ - szNew[k] = subtotal - szCell[i] - 2; - cntNew[k] = i; - if( leafData ){ i--; } - subtotal = 0; - k++; - if( k>NB+1 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } - } - } - szNew[k] = subtotal; - cntNew[k] = nCell; - k++; + for(i=0; inFree; + if( szNew[i]<0 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } + for(j=0; jnOverflow; j++){ + szNew[i] += 2 + p->xCellSize(p, p->apOvfl[j]); + } + cntNew[i] = cntOld[i]; + } + k = nOld; + for(i=0; iusableSpace ){ + if( i+1>=k ){ + k = i+2; + if( k>NB+2 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } + szNew[k-1] = 0; + cntNew[k-1] = b.nCell; + } + sz = 2 + cachedCellSize(&b, cntNew[i]-1); + szNew[i] -= sz; + if( !leafData ){ + if( cntNew[i]usableSpace ) break; + szNew[i] += sz; + cntNew[i]++; + if( !leafData ){ + if( cntNew[i]=b.nCell ){ + k = i+1; + }else if( cntNew[i] <= (i>0 ? cntNew[i-1] : 0) ){ + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } + } /* ** The packing computed by the previous block is biased toward the siblings @@ -59927,19 +61068,27 @@ static int balance_nonroot( r = cntNew[i-1] - 1; d = r + 1 - leafData; - assert( d szLeft-(b.szCell[r]+2)) ){ + break; + } + szRight += b.szCell[d] + 2; + szLeft -= b.szCell[r] + 2; + cntNew[i-1] = r; + r--; + d--; + }while( r>=0 ); szNew[i] = szRight; szNew[i-1] = szLeft; + if( cntNew[i-1] <= (i>1 ? cntNew[i-2] : 0) ){ + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } } /* Sanity check: For a non-corrupt database file one of the follwing @@ -59959,10 +61108,6 @@ static int balance_nonroot( /* ** Allocate k new pages. Reuse old pages where possible. */ - if( apOld[0]->pgno<=1 ){ - rc = SQLITE_CORRUPT_BKPT; - goto balance_cleanup; - } pageFlags = apOld[0]->aData[0]; for(i=0; inCell + pOld->nOverflow + !leafData; @@ -60110,9 +61255,10 @@ static int balance_nonroot( if( !leafCorrection ){ ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc); } - if( szCell[i]>pNew->minLocal ){ + if( cachedCellSize(&b,i)>pNew->minLocal ){ ptrmapPutOvflPtr(pNew, pCell, &rc); } + if( rc ) goto balance_cleanup; } } } @@ -60126,20 +61272,21 @@ static int balance_nonroot( j = cntNew[i]; assert( jleaf ){ memcpy(&pNew->aData[8], pCell, 4); }else if( leafData ){ /* If the tree is a leaf-data tree, and the siblings are leaves, - ** then there is no divider cell in apCell[]. Instead, the divider + ** then there is no divider cell in b.apCell[]. Instead, the divider ** cell consists of the integer key for the right-most cell of ** the sibling-page assembled above only. */ CellInfo info; j--; - btreeParseCellPtr(pNew, apCell[j], &info); + pNew->xParseCell(pNew, b.apCell[j], &info); pCell = pTemp; sz = 4 + putVarint(&pCell[4], info.nKey); pTemp = 0; @@ -60156,9 +61303,9 @@ static int balance_nonroot( ** cells are at least 4 bytes. It only happens in b-trees used ** to evaluate "IN (SELECT ...)" and similar clauses. */ - if( szCell[j]==4 ){ + if( b.szCell[j]==4 ){ assert(leafCorrection==4); - sz = cellSizePtr(pParent, pCell); + sz = pParent->xCellSize(pParent, pCell); } } iOvflSpace += sz; @@ -60214,12 +61361,13 @@ static int balance_nonroot( iNew = iOld = 0; nNewCell = cntNew[0]; }else{ - iOld = iPgnFree = usableSpace-szNew[iPg]; assert( apNew[iPg]->nOverflow==0 ); @@ -60270,7 +61418,7 @@ static int balance_nonroot( assert( pParent->isInit ); TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n", - nOld, nNew, nCell)); + nOld, nNew, b.nCell)); /* Free any old pages that were not reused as new pages. */ @@ -60293,7 +61441,7 @@ static int balance_nonroot( ** Cleanup before returning. */ balance_cleanup: - sqlite3ScratchFree(apCell); + sqlite3ScratchFree(b.apCell); for(i=0; ipgnoRoot, pCur); - if( rc ) return rc; + if( pCur->curFlags & BTCF_Multiple ){ + rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); + if( rc ) return rc; + } if( pCur->pKeyInfo==0 ){ + assert( pKey==0 ); /* If this is an insert into a table b-tree, invalidate any incrblob ** cursors open on the row being replaced */ invalidateIncrblobCursors(p, nKey, 0); /* If the cursor is currently on the last row and we are appending a - ** new row onto the end, set the "loc" to avoid an unnecessary btreeMoveto() - ** call */ + ** new row onto the end, set the "loc" to avoid an unnecessary + ** btreeMoveto() call */ if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0 && pCur->info.nKey==nKey-1 ){ - loc = -1; + loc = -1; + }else if( loc==0 ){ + rc = sqlite3BtreeMovetoUnpacked(pCur, 0, nKey, appendBias, &loc); + if( rc ) return rc; } - } - - if( !loc ){ + }else if( loc==0 ){ rc = btreeMoveto(pCur, pKey, nKey, appendBias, &loc); if( rc ) return rc; } @@ -60603,7 +61755,7 @@ SQLITE_PRIVATE int sqlite3BtreeInsert( assert( newCell!=0 ); rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew); if( rc ) goto end_insert; - assert( szNew==cellSizePtr(pPage, newCell) ); + assert( szNew==pPage->xCellSize(pPage, newCell) ); assert( szNew <= MX_CELL_SIZE(pBt) ); idx = pCur->aiIdx[pCur->iPage]; if( loc==0 ){ @@ -60687,12 +61839,8 @@ SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor *pCur){ assert( pCur->curFlags & BTCF_WriteFlag ); assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); assert( !hasReadConflicts(p, pCur->pgnoRoot) ); - - if( NEVER(pCur->aiIdx[pCur->iPage]>=pCur->apPage[pCur->iPage]->nCell) - || NEVER(pCur->eState!=CURSOR_VALID) - ){ - return SQLITE_ERROR; /* Something has gone awry. */ - } + assert( pCur->aiIdx[pCur->iPage]apPage[pCur->iPage]->nCell ); + assert( pCur->eState==CURSOR_VALID ); iCellDepth = pCur->iPage; iCellIdx = pCur->aiIdx[iCellDepth]; @@ -60717,8 +61865,10 @@ SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor *pCur){ ** deleted writable. Then free any overflow pages associated with the ** entry and finally remove the cell itself from within the page. */ - rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); - if( rc ) return rc; + if( pCur->curFlags & BTCF_Multiple ){ + rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); + if( rc ) return rc; + } /* If this is a delete operation to remove a row from a table b-tree, ** invalidate any incrblob cursors open on the row being deleted. */ @@ -60744,7 +61894,8 @@ SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor *pCur){ unsigned char *pTmp; pCell = findCell(pLeaf, pLeaf->nCell-1); - nCell = cellSizePtr(pLeaf, pCell); + if( pCell<&pLeaf->aData[4] ) return SQLITE_CORRUPT_BKPT; + nCell = pLeaf->xCellSize(pLeaf, pCell); assert( MX_CELL_SIZE(pBt) >= nCell ); pTmp = pBt->pTmpSpace; assert( pTmp!=0 ); @@ -60836,7 +61987,8 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){ pgnoRoot==PENDING_BYTE_PAGE(pBt) ){ pgnoRoot++; } - assert( pgnoRoot>=3 ); + assert( pgnoRoot>=3 || CORRUPT_DB ); + testcase( pgnoRoot<3 ); /* Allocate a page. The page that currently resides at pgnoRoot will ** be moved to the allocated page (unless the allocated page happens @@ -60965,7 +62117,7 @@ static int clearDatabasePage( if( pgno>btreePagecount(pBt) ){ return SQLITE_CORRUPT_BKPT; } - rc = getAndInitPage(pBt, pgno, &pPage, 0); + rc = getAndInitPage(pBt, pgno, &pPage, 0, 0); if( rc ) return rc; if( pPage->bBusy ){ rc = SQLITE_CORRUPT_BKPT; @@ -60986,7 +62138,8 @@ static int clearDatabasePage( rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange); if( rc ) goto cleardatabasepage_out; }else if( pnChange ){ - assert( pPage->intKey ); + assert( pPage->intKey || CORRUPT_DB ); + testcase( !pPage->intKey ); *pnChange += pPage->nCell; } if( freePageFlag ){ @@ -61504,6 +62657,57 @@ static void checkList( } #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ +/* +** An implementation of a min-heap. +** +** aHeap[0] is the number of elements on the heap. aHeap[1] is the +** root element. The daughter nodes of aHeap[N] are aHeap[N*2] +** and aHeap[N*2+1]. +** +** The heap property is this: Every node is less than or equal to both +** of its daughter nodes. A consequence of the heap property is that the +** root node aHeap[1] is always the minimum value currently in the heap. +** +** The btreeHeapInsert() routine inserts an unsigned 32-bit number onto +** the heap, preserving the heap property. The btreeHeapPull() routine +** removes the root element from the heap (the minimum value in the heap) +** and then moves other nodes around as necessary to preserve the heap +** property. +** +** This heap is used for cell overlap and coverage testing. Each u32 +** entry represents the span of a cell or freeblock on a btree page. +** The upper 16 bits are the index of the first byte of a range and the +** lower 16 bits are the index of the last byte of that range. +*/ +static void btreeHeapInsert(u32 *aHeap, u32 x){ + u32 j, i = ++aHeap[0]; + aHeap[i] = x; + while( (j = i/2)>0 && aHeap[j]>aHeap[i] ){ + x = aHeap[j]; + aHeap[j] = aHeap[i]; + aHeap[i] = x; + i = j; + } +} +static int btreeHeapPull(u32 *aHeap, u32 *pOut){ + u32 j, i, x; + if( (x = aHeap[0])==0 ) return 0; + *pOut = aHeap[1]; + aHeap[1] = aHeap[x]; + aHeap[x] = 0xffffffff; + aHeap[0]--; + i = 1; + while( (j = i*2)<=aHeap[0] ){ + if( aHeap[j]>aHeap[j+1] ) j++; + if( aHeap[i]zPfx; int saved_v1 = pCheck->v1; int saved_v2 = pCheck->v2; + u8 savedIsInit = 0; /* Check that the page exists */ @@ -61554,54 +62766,95 @@ static int checkTreePage( if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){ checkAppendMsg(pCheck, "unable to get the page. error code=%d", rc); - depth = -1; goto end_of_check; } /* Clear MemPage.isInit to make sure the corruption detection code in ** btreeInitPage() is executed. */ + savedIsInit = pPage->isInit; pPage->isInit = 0; if( (rc = btreeInitPage(pPage))!=0 ){ assert( rc==SQLITE_CORRUPT ); /* The only possible error from InitPage */ checkAppendMsg(pCheck, "btreeInitPage() returns error code %d", rc); - releasePage(pPage); - depth = -1; goto end_of_check; } + data = pPage->aData; + hdr = pPage->hdrOffset; - /* Check out all the cells. - */ - depth = 0; - for(i=0; inCell && pCheck->mxErr; i++){ - u8 *pCell; - u32 sz; + /* Set up for cell analysis */ + pCheck->zPfx = "On tree page %d cell %d: "; + contentOffset = get2byteNotZero(&data[hdr+5]); + assert( contentOffset<=usableSize ); /* Enforced by btreeInitPage() */ + + /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the + ** number of cells on the page. */ + nCell = get2byte(&data[hdr+3]); + assert( pPage->nCell==nCell ); + + /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page + ** immediately follows the b-tree page header. */ + cellStart = hdr + 12 - 4*pPage->leaf; + assert( pPage->aCellIdx==&data[cellStart] ); + pCellIdx = &data[cellStart + 2*(nCell-1)]; + + if( !pPage->leaf ){ + /* Analyze the right-child page of internal pages */ + pgno = get4byte(&data[hdr+8]); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pBt->autoVacuum ){ + pCheck->zPfx = "On page %d at right child: "; + checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); + } +#endif + depth = checkTreePage(pCheck, pgno, &maxKey, maxKey); + keyCanBeEqual = 0; + }else{ + /* For leaf pages, the coverage check will occur in the same loop + ** as the other cell checks, so initialize the heap. */ + heap = pCheck->heap; + heap[0] = 0; + } + + /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte + ** integer offsets to the cell contents. */ + for(i=nCell-1; i>=0 && pCheck->mxErr; i--){ CellInfo info; - /* Check payload overflow pages - */ - pCheck->zPfx = "On tree page %d cell %d: "; - pCheck->v1 = iPage; + /* Check cell size */ pCheck->v2 = i; - pCell = findCell(pPage,i); - btreeParseCellPtr(pPage, pCell, &info); - sz = info.nPayload; - /* For intKey pages, check that the keys are in order. - */ + assert( pCellIdx==&data[cellStart + i*2] ); + pc = get2byteAligned(pCellIdx); + pCellIdx -= 2; + if( pcusableSize-4 ){ + checkAppendMsg(pCheck, "Offset %d out of range %d..%d", + pc, contentOffset, usableSize-4); + doCoverageCheck = 0; + continue; + } + pCell = &data[pc]; + pPage->xParseCell(pPage, pCell, &info); + if( pc+info.nSize>usableSize ){ + checkAppendMsg(pCheck, "Extends off end of page"); + doCoverageCheck = 0; + continue; + } + + /* Check for integer primary key out of range */ if( pPage->intKey ){ - if( i==0 ){ - nMinKey = nMaxKey = info.nKey; - }else if( info.nKey <= nMaxKey ){ - checkAppendMsg(pCheck, - "Rowid %lld out of order (previous was %lld)", info.nKey, nMaxKey); + if( keyCanBeEqual ? (info.nKey > maxKey) : (info.nKey >= maxKey) ){ + checkAppendMsg(pCheck, "Rowid %lld out of order", info.nKey); } - nMaxKey = info.nKey; + maxKey = info.nKey; } - if( (sz>info.nLocal) - && (&pCell[info.iOverflow]<=&pPage->aData[pBt->usableSize]) - ){ - int nPage = (sz - info.nLocal + usableSize - 5)/(usableSize - 4); - Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]); + + /* Check the content overflow list */ + if( info.nPayload>info.nLocal ){ + int nPage; /* Number of pages on the overflow chain */ + Pgno pgnoOvfl; /* First page of the overflow chain */ + assert( pc + info.iOverflow <= usableSize ); + nPage = (info.nPayload - info.nLocal + usableSize - 5)/(usableSize - 4); + pgnoOvfl = get4byte(&pCell[info.iOverflow]); #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage); @@ -61610,119 +62863,57 @@ static int checkTreePage( checkList(pCheck, 0, pgnoOvfl, nPage); } - /* Check sanity of left child page. - */ if( !pPage->leaf ){ + /* Check sanity of left child page for internal pages */ pgno = get4byte(pCell); #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); } #endif - d2 = checkTreePage(pCheck, pgno, &nMinKey, i==0?NULL:&nMaxKey); - if( i>0 && d2!=depth ){ + d2 = checkTreePage(pCheck, pgno, &maxKey, maxKey); + keyCanBeEqual = 0; + if( d2!=depth ){ checkAppendMsg(pCheck, "Child page depth differs"); + depth = d2; } - depth = d2; - } - } - - if( !pPage->leaf ){ - pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); - pCheck->zPfx = "On page %d at right child: "; - pCheck->v1 = iPage; -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pBt->autoVacuum ){ - checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); - } -#endif - checkTreePage(pCheck, pgno, NULL, !pPage->nCell?NULL:&nMaxKey); - } - - /* For intKey leaf pages, check that the min/max keys are in order - ** with any left/parent/right pages. - */ - pCheck->zPfx = "Page %d: "; - pCheck->v1 = iPage; - if( pPage->leaf && pPage->intKey ){ - /* if we are a left child page */ - if( pnParentMinKey ){ - /* if we are the left most child page */ - if( !pnParentMaxKey ){ - if( nMaxKey > *pnParentMinKey ){ - checkAppendMsg(pCheck, - "Rowid %lld out of order (max larger than parent min of %lld)", - nMaxKey, *pnParentMinKey); - } - }else{ - if( nMinKey <= *pnParentMinKey ){ - checkAppendMsg(pCheck, - "Rowid %lld out of order (min less than parent min of %lld)", - nMinKey, *pnParentMinKey); - } - if( nMaxKey > *pnParentMaxKey ){ - checkAppendMsg(pCheck, - "Rowid %lld out of order (max larger than parent max of %lld)", - nMaxKey, *pnParentMaxKey); - } - *pnParentMinKey = nMaxKey; - } - /* else if we're a right child page */ - } else if( pnParentMaxKey ){ - if( nMinKey <= *pnParentMaxKey ){ - checkAppendMsg(pCheck, - "Rowid %lld out of order (min less than parent max of %lld)", - nMinKey, *pnParentMaxKey); - } + }else{ + /* Populate the coverage-checking heap for leaf pages */ + btreeHeapInsert(heap, (pc<<16)|(pc+info.nSize-1)); } } + *piMinKey = maxKey; /* Check for complete coverage of the page */ - data = pPage->aData; - hdr = pPage->hdrOffset; - hit = sqlite3PageMalloc( pBt->pageSize ); pCheck->zPfx = 0; - if( hit==0 ){ - pCheck->mallocFailed = 1; - }else{ - int contentOffset = get2byteNotZero(&data[hdr+5]); - assert( contentOffset<=usableSize ); /* Enforced by btreeInitPage() */ - memset(hit+contentOffset, 0, usableSize-contentOffset); - memset(hit, 1, contentOffset); - /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the - ** number of cells on the page. */ - nCell = get2byte(&data[hdr+3]); - /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page - ** immediately follows the b-tree page header. */ - cellStart = hdr + 12 - 4*pPage->leaf; - /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte - ** integer offsets to the cell contents. */ - for(i=0; i=usableSize ){ - pCheck->zPfx = 0; - checkAppendMsg(pCheck, - "Corruption detected in cell %d on page %d",i,iPage); - }else{ - for(j=pc+size-1; j>=pc; j--) hit[j]++; + if( doCoverageCheck && pCheck->mxErr>0 ){ + /* For leaf pages, the min-heap has already been initialized and the + ** cells have already been inserted. But for internal pages, that has + ** not yet been done, so do it now */ + if( !pPage->leaf ){ + heap = pCheck->heap; + heap[0] = 0; + for(i=nCell-1; i>=0; i--){ + u32 size; + pc = get2byteAligned(&data[cellStart+i*2]); + size = pPage->xCellSize(pPage, &data[pc]); + btreeHeapInsert(heap, (pc<<16)|(pc+size-1)); } } - /* EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header + /* Add the freeblocks to the min-heap + ** + ** EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header ** is the offset of the first freeblock, or zero if there are no - ** freeblocks on the page. */ + ** freeblocks on the page. + */ i = get2byte(&data[hdr+1]); while( i>0 ){ int size, j; - assert( i<=usableSize-4 ); /* Enforced by btreeInitPage() */ + assert( (u32)i<=usableSize-4 ); /* Enforced by btreeInitPage() */ size = get2byte(&data[i+2]); - assert( i+size<=usableSize ); /* Enforced by btreeInitPage() */ - for(j=i+size-1; j>=i; j--) hit[j]++; + assert( (u32)(i+size)<=usableSize ); /* Enforced by btreeInitPage() */ + btreeHeapInsert(heap, (((u32)i)<<16)|(i+size-1)); /* EVIDENCE-OF: R-58208-19414 The first 2 bytes of a freeblock are a ** big-endian integer which is the offset in the b-tree page of the next ** freeblock in the chain, or zero if the freeblock is the last on the @@ -61731,33 +62922,50 @@ static int checkTreePage( /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of ** increasing offset. */ assert( j==0 || j>i+size ); /* Enforced by btreeInitPage() */ - assert( j<=usableSize-4 ); /* Enforced by btreeInitPage() */ + assert( (u32)j<=usableSize-4 ); /* Enforced by btreeInitPage() */ i = j; } - for(i=cnt=0; i1 ){ + /* Analyze the min-heap looking for overlap between cells and/or + ** freeblocks, and counting the number of untracked bytes in nFrag. + ** + ** Each min-heap entry is of the form: (start_address<<16)|end_address. + ** There is an implied first entry the covers the page header, the cell + ** pointer index, and the gap between the cell pointer index and the start + ** of cell content. + ** + ** The loop below pulls entries from the min-heap in order and compares + ** the start_address against the previous end_address. If there is an + ** overlap, that means bytes are used multiple times. If there is a gap, + ** that gap is added to the fragmentation count. + */ + nFrag = 0; + prev = contentOffset - 1; /* Implied first min-heap entry */ + while( btreeHeapPull(heap,&x) ){ + if( (prev&0xffff)>=(x>>16) ){ checkAppendMsg(pCheck, - "Multiple uses for byte %d of page %d", i, iPage); + "Multiple uses for byte %u of page %d", x>>16, iPage); break; + }else{ + nFrag += (x>>16) - (prev&0xffff) - 1; + prev = x; } } + nFrag += usableSize - (prev&0xffff) - 1; /* EVIDENCE-OF: R-43263-13491 The total number of bytes in all fragments ** is stored in the fifth field of the b-tree page header. ** EVIDENCE-OF: R-07161-27322 The one-byte integer at offset 7 gives the ** number of fragmented free bytes within the cell content area. */ - if( cnt!=data[hdr+7] ){ + if( heap[0]==0 && nFrag!=data[hdr+7] ){ checkAppendMsg(pCheck, "Fragmentation of %d bytes reported as %d on page %d", - cnt, data[hdr+7], iPage); + nFrag, data[hdr+7], iPage); } } - sqlite3PageFree(hit); - releasePage(pPage); end_of_check: + if( !doCoverageCheck ) pPage->isInit = savedIsInit; + releasePage(pPage); pCheck->zPfx = saved_zPfx; pCheck->v1 = saved_v1; pCheck->v2 = saved_v2; @@ -61787,14 +62995,15 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck( int *pnErr /* Write number of errors seen to this variable */ ){ Pgno i; - int nRef; IntegrityCk sCheck; BtShared *pBt = p->pBt; + int savedDbFlags = pBt->db->flags; char zErr[100]; + VVA_ONLY( int nRef ); sqlite3BtreeEnter(p); assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE ); - nRef = sqlite3PagerRefcount(pBt->pPager); + assert( (nRef = sqlite3PagerRefcount(pBt->pPager))>=0 ); sCheck.pBt = pBt; sCheck.pPager = pBt->pPager; sCheck.nPage = btreePagecount(sCheck.pBt); @@ -61804,22 +63013,26 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck( sCheck.zPfx = 0; sCheck.v1 = 0; sCheck.v2 = 0; - *pnErr = 0; + sCheck.aPgRef = 0; + sCheck.heap = 0; + sqlite3StrAccumInit(&sCheck.errMsg, 0, zErr, sizeof(zErr), SQLITE_MAX_LENGTH); if( sCheck.nPage==0 ){ - sqlite3BtreeLeave(p); - return 0; + goto integrity_ck_cleanup; } sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1); if( !sCheck.aPgRef ){ - *pnErr = 1; - sqlite3BtreeLeave(p); - return 0; + sCheck.mallocFailed = 1; + goto integrity_ck_cleanup; } + sCheck.heap = (u32*)sqlite3PageMalloc( pBt->pageSize ); + if( sCheck.heap==0 ){ + sCheck.mallocFailed = 1; + goto integrity_ck_cleanup; + } + i = PENDING_BYTE_PAGE(pBt); if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i); - sqlite3StrAccumInit(&sCheck.errMsg, zErr, sizeof(zErr), SQLITE_MAX_LENGTH); - sCheck.errMsg.useMalloc = 2; /* Check the integrity of the freelist */ @@ -61830,17 +63043,19 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck( /* Check all the tables. */ + testcase( pBt->db->flags & SQLITE_CellSizeCk ); + pBt->db->flags &= ~SQLITE_CellSizeCk; for(i=0; (int)iautoVacuum && aRoot[i]>1 ){ checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0); } #endif - sCheck.zPfx = "List of tree roots: "; - checkTreePage(&sCheck, aRoot[i], NULL, NULL); - sCheck.zPfx = 0; + checkTreePage(&sCheck, aRoot[i], ¬Used, LARGEST_INT64); } + pBt->db->flags = savedDbFlags; /* Make sure every page in the file is referenced */ @@ -61864,28 +63079,20 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck( #endif } - /* Make sure this analysis did not leave any unref() pages. - ** This is an internal consistency check; an integrity check - ** of the integrity check. - */ - if( NEVER(nRef != sqlite3PagerRefcount(pBt->pPager)) ){ - checkAppendMsg(&sCheck, - "Outstanding page count goes from %d to %d during this analysis", - nRef, sqlite3PagerRefcount(pBt->pPager) - ); - } - /* Clean up and report errors. */ - sqlite3BtreeLeave(p); +integrity_ck_cleanup: + sqlite3PageFree(sCheck.heap); sqlite3_free(sCheck.aPgRef); if( sCheck.mallocFailed ){ sqlite3StrAccumReset(&sCheck.errMsg); - *pnErr = sCheck.nErr+1; - return 0; + sCheck.nErr++; } *pnErr = sCheck.nErr; if( sCheck.nErr==0 ) sqlite3StrAccumReset(&sCheck.errMsg); + /* Make sure this analysis did not leave any unref() pages. */ + assert( nRef==sqlite3PagerRefcount(pBt->pPager) ); + sqlite3BtreeLeave(p); return sqlite3StrAccumFinish(&sCheck.errMsg); } #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ @@ -62096,6 +63303,7 @@ SQLITE_PRIVATE int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void */ SQLITE_PRIVATE void sqlite3BtreeIncrblobCursor(BtCursor *pCur){ pCur->curFlags |= BTCF_Incrblob; + pCur->pBtree->hasIncrblobCur = 1; } #endif @@ -62181,6 +63389,8 @@ SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); ** This file contains the implementation of the sqlite3_backup_XXX() ** API functions and the related features. */ +/* #include "sqliteInt.h" */ +/* #include "btreeInt.h" */ /* ** Structure allocated for each backup operation. @@ -62852,9 +64062,13 @@ SQLITE_API int SQLITE_STDCALL sqlite3_backup_pagecount(sqlite3_backup *p){ ** corresponding to the source database is held when this function is ** called. */ -SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *pBackup, Pgno iPage, const u8 *aData){ - sqlite3_backup *p; /* Iterator variable */ - for(p=pBackup; p; p=p->pNext){ +static SQLITE_NOINLINE void backupUpdate( + sqlite3_backup *p, + Pgno iPage, + const u8 *aData +){ + assert( p!=0 ); + do{ assert( sqlite3_mutex_held(p->pSrc->pBt->mutex) ); if( !isFatalError(p->rc) && iPageiNext ){ /* The backup process p has already copied page iPage. But now it @@ -62871,7 +64085,10 @@ SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *pBackup, Pgno iPage, con p->rc = rc; } } - } + }while( (p = p->pNext)!=0 ); +} +SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *pBackup, Pgno iPage, const u8 *aData){ + if( pBackup ) backupUpdate(pBackup, iPage, aData); } /* @@ -62972,6 +64189,8 @@ copy_finished: ** only within the VDBE. Interface routines refer to a Mem using the ** name sqlite_value */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ #ifdef SQLITE_DEBUG /* @@ -63155,10 +64374,11 @@ SQLITE_PRIVATE int sqlite3VdbeMemMakeWriteable(Mem *pMem){ pMem->z[pMem->n] = 0; pMem->z[pMem->n+1] = 0; pMem->flags |= MEM_Term; + } + pMem->flags &= ~MEM_Ephem; #ifdef SQLITE_DEBUG - pMem->pScopyFrom = 0; + pMem->pScopyFrom = 0; #endif - } return SQLITE_OK; } @@ -63542,7 +64762,7 @@ SQLITE_PRIVATE int sqlite3VdbeMemNumerify(Mem *pMem){ SQLITE_PRIVATE void sqlite3VdbeMemCast(Mem *pMem, u8 aff, u8 encoding){ if( pMem->flags & MEM_Null ) return; switch( aff ){ - case SQLITE_AFF_NONE: { /* Really a cast to BLOB */ + case SQLITE_AFF_BLOB: { /* Really a cast to BLOB */ if( (pMem->flags & MEM_Blob)==0 ){ sqlite3ValueApplyAffinity(pMem, SQLITE_AFF_TEXT, encoding); assert( pMem->flags & MEM_Str || pMem->db->mallocFailed ); @@ -63724,10 +64944,6 @@ SQLITE_PRIVATE void sqlite3VdbeMemAboutToChange(Vdbe *pVdbe, Mem *pMem){ } #endif /* SQLITE_DEBUG */ -/* -** Size of struct Mem not including the Mem.zMalloc member. -*/ -#define MEMCELLSIZE offsetof(Mem,zMalloc) /* ** Make an shallow copy of pFrom into pTo. Prior contents of @@ -63735,10 +64951,15 @@ SQLITE_PRIVATE void sqlite3VdbeMemAboutToChange(Vdbe *pVdbe, Mem *pMem){ ** pFrom->z is used, then pTo->z points to the same thing as pFrom->z ** and flags gets srcType (either MEM_Ephem or MEM_Static). */ +static SQLITE_NOINLINE void vdbeClrCopy(Mem *pTo, const Mem *pFrom, int eType){ + vdbeMemClearExternAndSetNull(pTo); + assert( !VdbeMemDynamic(pTo) ); + sqlite3VdbeMemShallowCopy(pTo, pFrom, eType); +} SQLITE_PRIVATE void sqlite3VdbeMemShallowCopy(Mem *pTo, const Mem *pFrom, int srcType){ assert( (pFrom->flags & MEM_RowSet)==0 ); assert( pTo->db==pFrom->db ); - if( VdbeMemDynamic(pTo) ) vdbeMemClearExternAndSetNull(pTo); + if( VdbeMemDynamic(pTo) ){ vdbeClrCopy(pTo,pFrom,srcType); return; } memcpy(pTo, pFrom, MEMCELLSIZE); if( (pFrom->flags&MEM_Static)==0 ){ pTo->flags &= ~(MEM_Dyn|MEM_Static|MEM_Ephem); @@ -63754,7 +64975,10 @@ SQLITE_PRIVATE void sqlite3VdbeMemShallowCopy(Mem *pTo, const Mem *pFrom, int sr SQLITE_PRIVATE int sqlite3VdbeMemCopy(Mem *pTo, const Mem *pFrom){ int rc = SQLITE_OK; - assert( pTo->db==pFrom->db ); + /* The pFrom==0 case in the following assert() is when an sqlite3_value + ** from sqlite3_value_dup() is used as the argument + ** to sqlite3_result_value(). */ + assert( pTo->db==pFrom->db || pFrom->db==0 ); assert( (pFrom->flags & MEM_RowSet)==0 ); if( VdbeMemDynamic(pTo) ) vdbeMemClearExternAndSetNull(pTo); memcpy(pTo, pFrom, MEMCELLSIZE); @@ -63901,6 +65125,32 @@ SQLITE_PRIVATE int sqlite3VdbeMemSetStr( ** If this routine fails for any reason (malloc returns NULL or unable ** to read from the disk) then the pMem is left in an inconsistent state. */ +static SQLITE_NOINLINE int vdbeMemFromBtreeResize( + BtCursor *pCur, /* Cursor pointing at record to retrieve. */ + u32 offset, /* Offset from the start of data to return bytes from. */ + u32 amt, /* Number of bytes to return. */ + int key, /* If true, retrieve from the btree key, not data. */ + Mem *pMem /* OUT: Return data in this Mem structure. */ +){ + int rc; + pMem->flags = MEM_Null; + if( SQLITE_OK==(rc = sqlite3VdbeMemClearAndResize(pMem, amt+2)) ){ + if( key ){ + rc = sqlite3BtreeKey(pCur, offset, amt, pMem->z); + }else{ + rc = sqlite3BtreeData(pCur, offset, amt, pMem->z); + } + if( rc==SQLITE_OK ){ + pMem->z[amt] = 0; + pMem->z[amt+1] = 0; + pMem->flags = MEM_Blob|MEM_Term; + pMem->n = (int)amt; + }else{ + sqlite3VdbeMemRelease(pMem); + } + } + return rc; +} SQLITE_PRIVATE int sqlite3VdbeMemFromBtree( BtCursor *pCur, /* Cursor pointing at record to retrieve. */ u32 offset, /* Offset from the start of data to return bytes from. */ @@ -63930,22 +65180,7 @@ SQLITE_PRIVATE int sqlite3VdbeMemFromBtree( pMem->flags = MEM_Blob|MEM_Ephem; pMem->n = (int)amt; }else{ - pMem->flags = MEM_Null; - if( SQLITE_OK==(rc = sqlite3VdbeMemClearAndResize(pMem, amt+2)) ){ - if( key ){ - rc = sqlite3BtreeKey(pCur, offset, amt, pMem->z); - }else{ - rc = sqlite3BtreeData(pCur, offset, amt, pMem->z); - } - if( rc==SQLITE_OK ){ - pMem->z[amt] = 0; - pMem->z[amt+1] = 0; - pMem->flags = MEM_Blob|MEM_Term; - pMem->n = (int)amt; - }else{ - sqlite3VdbeMemRelease(pMem); - } - } + rc = vdbeMemFromBtreeResize(pCur, offset, amt, key, pMem); } return rc; @@ -64266,7 +65501,7 @@ static int valueFromExpr( if( zVal==0 ) goto no_mem; sqlite3ValueSetStr(pVal, -1, zVal, SQLITE_UTF8, SQLITE_DYNAMIC); } - if( (op==TK_INTEGER || op==TK_FLOAT ) && affinity==SQLITE_AFF_NONE ){ + if( (op==TK_INTEGER || op==TK_FLOAT ) && affinity==SQLITE_AFF_BLOB ){ sqlite3ValueApplyAffinity(pVal, SQLITE_AFF_NUMERIC, SQLITE_UTF8); }else{ sqlite3ValueApplyAffinity(pVal, affinity, SQLITE_UTF8); @@ -64602,7 +65837,7 @@ SQLITE_PRIVATE void sqlite3Stat4ProbeFree(UnpackedRecord *pRec){ Mem *aMem = pRec->aMem; sqlite3 *db = aMem[0].db; for(i=0; ipKeyInfo); sqlite3DbFree(db, pRec); @@ -64633,19 +65868,28 @@ SQLITE_PRIVATE void sqlite3ValueFree(sqlite3_value *v){ } /* -** Return the number of bytes in the sqlite3_value object assuming -** that it uses the encoding "enc" +** The sqlite3ValueBytes() routine returns the number of bytes in the +** sqlite3_value object assuming that it uses the encoding "enc". +** The valueBytes() routine is a helper function. */ +static SQLITE_NOINLINE int valueBytes(sqlite3_value *pVal, u8 enc){ + return valueToText(pVal, enc)!=0 ? pVal->n : 0; +} SQLITE_PRIVATE int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){ Mem *p = (Mem*)pVal; - if( (p->flags & MEM_Blob)!=0 || sqlite3ValueText(pVal, enc) ){ + assert( (p->flags & MEM_Null)==0 || (p->flags & (MEM_Str|MEM_Blob))==0 ); + if( (p->flags & MEM_Str)!=0 && pVal->enc==enc ){ + return p->n; + } + if( (p->flags & MEM_Blob)!=0 ){ if( p->flags & MEM_Zero ){ return p->n + p->u.nZero; }else{ return p->n; } } - return 0; + if( p->flags & MEM_Null ) return 0; + return valueBytes(pVal, enc); } /************** End of vdbemem.c *********************************************/ @@ -64664,6 +65908,8 @@ SQLITE_PRIVATE int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){ ** This file contains code used for creating, destroying, and populating ** a VDBE (or an "sqlite3_stmt" as it is known to the outside world.) */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ /* ** Create a new virtual database engine. @@ -64688,6 +65934,17 @@ SQLITE_PRIVATE Vdbe *sqlite3VdbeCreate(Parse *pParse){ return p; } +/* +** Change the error string stored in Vdbe.zErrMsg +*/ +SQLITE_PRIVATE void sqlite3VdbeError(Vdbe *p, const char *zFormat, ...){ + va_list ap; + sqlite3DbFree(p->db, p->zErrMsg); + va_start(ap, zFormat); + p->zErrMsg = sqlite3VMPrintf(p->db, zFormat, ap); + va_end(ap); +} + /* ** Remember the SQL string for a prepared statement. */ @@ -64872,6 +66129,23 @@ SQLITE_PRIVATE int sqlite3VdbeAddOp4( return addr; } +/* +** Add an opcode that includes the p4 value with a P4_INT64 type. +*/ +SQLITE_PRIVATE int sqlite3VdbeAddOp4Dup8( + Vdbe *p, /* Add the opcode to this VM */ + int op, /* The new opcode */ + int p1, /* The P1 operand */ + int p2, /* The P2 operand */ + int p3, /* The P3 operand */ + const u8 *zP4, /* The P4 operand */ + int p4type /* P4 operand type */ +){ + char *p4copy = sqlite3DbMallocRaw(sqlite3VdbeDb(p), 8); + if( p4copy ) memcpy(p4copy, zP4, 8); + return sqlite3VdbeAddOp4(p, op, p1, p2, p3, p4copy, p4type); +} + /* ** Add an OP_ParseSchema opcode. This routine is broken out from ** sqlite3VdbeAddOp4() since it needs to also needs to mark all btrees @@ -65036,6 +66310,7 @@ static Op *opIterNext(VdbeOpIter *p){ ** * OP_VUpdate ** * OP_VRename ** * OP_FkCounter with P2==0 (immediate foreign key constraint) +** * OP_CreateTable and OP_InitCoroutine (for CREATE TABLE AS SELECT ...) ** ** Then check that the value of Parse.mayAbort is true if an ** ABORT may be thrown, or false otherwise. Return true if it does @@ -65047,6 +66322,8 @@ static Op *opIterNext(VdbeOpIter *p){ SQLITE_PRIVATE int sqlite3VdbeAssertMayAbort(Vdbe *v, int mayAbort){ int hasAbort = 0; int hasFkCounter = 0; + int hasCreateTable = 0; + int hasInitCoroutine = 0; Op *pOp; VdbeOpIter sIter; memset(&sIter, 0, sizeof(sIter)); @@ -65061,6 +66338,8 @@ SQLITE_PRIVATE int sqlite3VdbeAssertMayAbort(Vdbe *v, int mayAbort){ hasAbort = 1; break; } + if( opcode==OP_CreateTable ) hasCreateTable = 1; + if( opcode==OP_InitCoroutine ) hasInitCoroutine = 1; #ifndef SQLITE_OMIT_FOREIGN_KEY if( opcode==OP_FkCounter && pOp->p1==0 && pOp->p2==1 ){ hasFkCounter = 1; @@ -65074,7 +66353,8 @@ SQLITE_PRIVATE int sqlite3VdbeAssertMayAbort(Vdbe *v, int mayAbort){ ** through all opcodes and hasAbort may be set incorrectly. Return ** true for this case to prevent the assert() in the callers frame ** from failing. */ - return ( v->db->mallocFailed || hasAbort==mayAbort || hasFkCounter ); + return ( v->db->mallocFailed || hasAbort==mayAbort || hasFkCounter + || (hasCreateTable && hasInitCoroutine) ); } #endif /* SQLITE_DEBUG - the sqlite3AssertMayAbort() function */ @@ -65105,11 +66385,6 @@ static void resolveP2Values(Vdbe *p, int *pMaxFuncArgs){ /* NOTE: Be sure to update mkopcodeh.awk when adding or removing ** cases from this switch! */ switch( opcode ){ - case OP_Function: - case OP_AggStep: { - if( pOp->p5>nMaxArgs ) nMaxArgs = pOp->p5; - break; - } case OP_Transaction: { if( pOp->p2!=0 ) p->readOnly = 0; /* fall thru */ @@ -65353,6 +66628,10 @@ static void freeP4(sqlite3 *db, int p4type, void *p4){ if( p4 ){ assert( db ); switch( p4type ){ + case P4_FUNCCTX: { + freeEphemeralFunction(db, ((sqlite3_context*)p4)->pFunc); + /* Fall through into the next case */ + } case P4_REAL: case P4_INT64: case P4_DYNAMIC: @@ -65737,6 +67016,13 @@ static char *displayP4(Op *pOp, char *zTemp, int nTemp){ sqlite3_snprintf(nTemp, zTemp, "%s(%d)", pDef->zName, pDef->nArg); break; } +#ifdef SQLITE_DEBUG + case P4_FUNCCTX: { + FuncDef *pDef = pOp->p4.pCtx->pFunc; + sqlite3_snprintf(nTemp, zTemp, "%s(%d)", pDef->zName, pDef->nArg); + break; + } +#endif case P4_INT64: { sqlite3_snprintf(nTemp, zTemp, "%lld", *pOp->p4.pI64); break; @@ -65857,12 +67143,11 @@ SQLITE_PRIVATE void sqlite3VdbeEnter(Vdbe *p){ /* ** Unlock all of the btrees previously locked by a call to sqlite3VdbeEnter(). */ -SQLITE_PRIVATE void sqlite3VdbeLeave(Vdbe *p){ +static SQLITE_NOINLINE void vdbeLeave(Vdbe *p){ int i; sqlite3 *db; Db *aDb; int nDb; - if( DbMaskAllZero(p->lockMask) ) return; /* The common case */ db = p->db; aDb = db->aDb; nDb = db->nDb; @@ -65872,6 +67157,10 @@ SQLITE_PRIVATE void sqlite3VdbeLeave(Vdbe *p){ } } } +SQLITE_PRIVATE void sqlite3VdbeLeave(Vdbe *p){ + if( DbMaskAllZero(p->lockMask) ) return; /* The common case */ + vdbeLeave(p); +} #endif #if defined(VDBE_PROFILE) || defined(SQLITE_DEBUG) @@ -66044,7 +67333,7 @@ SQLITE_PRIVATE int sqlite3VdbeList( }else if( db->u1.isInterrupted ){ p->rc = SQLITE_INTERRUPT; rc = SQLITE_ERROR; - sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3ErrStr(p->rc)); + sqlite3VdbeError(p, sqlite3ErrStr(p->rc)); }else{ char *zP4; Op *pOp; @@ -66439,6 +67728,22 @@ SQLITE_PRIVATE void sqlite3VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){ #endif } +/* +** Close all cursors in the current frame. +*/ +static void closeCursorsInFrame(Vdbe *p){ + if( p->apCsr ){ + int i; + for(i=0; inCursor; i++){ + VdbeCursor *pC = p->apCsr[i]; + if( pC ){ + sqlite3VdbeFreeCursor(p, pC); + p->apCsr[i] = 0; + } + } + } +} + /* ** Copy the values stored in the VdbeFrame structure to its Vdbe. This ** is used, for example, when a trigger sub-program is halted to restore @@ -66446,6 +67751,7 @@ SQLITE_PRIVATE void sqlite3VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){ */ SQLITE_PRIVATE int sqlite3VdbeFrameRestore(VdbeFrame *pFrame){ Vdbe *v = pFrame->v; + closeCursorsInFrame(v); #ifdef SQLITE_ENABLE_STMT_SCANSTATUS v->anExec = pFrame->anExec; #endif @@ -66480,17 +67786,7 @@ static void closeAllCursors(Vdbe *p){ p->nFrame = 0; } assert( p->nFrame==0 ); - - if( p->apCsr ){ - int i; - for(i=0; inCursor; i++){ - VdbeCursor *pC = p->apCsr[i]; - if( pC ){ - sqlite3VdbeFreeCursor(p, pC); - p->apCsr[i] = 0; - } - } - } + closeCursorsInFrame(p); if( p->aMem ){ releaseMemArray(&p->aMem[1], p->nMem); } @@ -66940,7 +68236,7 @@ SQLITE_PRIVATE int sqlite3VdbeCheckFk(Vdbe *p, int deferred){ ){ p->rc = SQLITE_CONSTRAINT_FOREIGNKEY; p->errorAction = OE_Abort; - sqlite3SetString(&p->zErrMsg, db, "FOREIGN KEY constraint failed"); + sqlite3VdbeError(p, "FOREIGN KEY constraint failed"); return SQLITE_ERROR; } return SQLITE_OK; @@ -67561,6 +68857,13 @@ SQLITE_PRIVATE u32 sqlite3VdbeSerialType(Mem *pMem, int file_format){ return ((n*2) + 12 + ((flags&MEM_Str)!=0)); } +/* +** The sizes for serial types less than 12 +*/ +static const u8 sqlite3SmallTypeSizes[] = { + 0, 1, 2, 3, 4, 6, 8, 8, 0, 0, 0, 0 +}; + /* ** Return the length of the data corresponding to the supplied serial-type. */ @@ -67568,8 +68871,7 @@ SQLITE_PRIVATE u32 sqlite3VdbeSerialTypeLen(u32 serial_type){ if( serial_type>=12 ){ return (serial_type-12)/2; }else{ - static const u8 aSize[] = { 0, 1, 2, 3, 4, 6, 8, 8, 0, 0, 0, 0 }; - return aSize[serial_type]; + return sqlite3SmallTypeSizes[serial_type]; } } @@ -67653,7 +68955,7 @@ SQLITE_PRIVATE u32 sqlite3VdbeSerialPut(u8 *buf, Mem *pMem, u32 serial_type){ }else{ v = pMem->u.i; } - len = i = sqlite3VdbeSerialTypeLen(serial_type); + len = i = sqlite3SmallTypeSizes[serial_type]; assert( i>0 ); do{ buf[--i] = (u8)(v&0xFF); @@ -67938,6 +69240,7 @@ static int vdbeRecordCompareDebug( /* mem1.u.i = 0; // not needed, here to silence compiler warning */ idx1 = getVarint32(aKey1, szHdr1); + if( szHdr1>98307 ) return SQLITE_CORRUPT; d1 = szHdr1; assert( pKeyInfo->nField+pKeyInfo->nXField>=pPKey2->nField || CORRUPT_DB ); assert( pKeyInfo->aSortOrder!=0 ); @@ -68235,7 +69538,7 @@ static i64 vdbeRecordDecodeInt(u32 serial_type, const u8 *aKey){ ** pPKey2->errCode is set to SQLITE_NOMEM and, if it is not NULL, the ** malloc-failed flag set on database handle (pPKey2->pKeyInfo->db). */ -static int vdbeRecordCompareWithSkip( +SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip( int nKey1, const void *pKey1, /* Left key */ UnpackedRecord *pPKey2, /* Right key */ int bSkip /* If true, skip the first field */ @@ -68283,7 +69586,7 @@ static int vdbeRecordCompareWithSkip( if( pRhs->flags & MEM_Int ){ serial_type = aKey1[idx1]; testcase( serial_type==12 ); - if( serial_type>=12 ){ + if( serial_type>=10 ){ rc = +1; }else if( serial_type==0 ){ rc = -1; @@ -68309,7 +69612,11 @@ static int vdbeRecordCompareWithSkip( /* RHS is real */ else if( pRhs->flags & MEM_Real ){ serial_type = aKey1[idx1]; - if( serial_type>=12 ){ + if( serial_type>=10 ){ + /* Serial types 12 or greater are strings and blobs (greater than + ** numbers). Types 10 and 11 are currently "reserved for future + ** use", so it doesn't really matter what the results of comparing + ** them to numberic values are. */ rc = +1; }else if( serial_type==0 ){ rc = -1; @@ -68421,7 +69728,7 @@ SQLITE_PRIVATE int sqlite3VdbeRecordCompare( int nKey1, const void *pKey1, /* Left key */ UnpackedRecord *pPKey2 /* Right key */ ){ - return vdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 0); + return sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 0); } @@ -68509,7 +69816,7 @@ static int vdbeRecordCompareInt( }else if( pPKey2->nField>1 ){ /* The first fields of the two keys are equal. Compare the trailing ** fields. */ - res = vdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 1); + res = sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 1); }else{ /* The first fields of the two keys are equal and there are no trailing ** fields. Return pPKey2->default_rc in this case. */ @@ -68557,7 +69864,7 @@ static int vdbeRecordCompareString( res = nStr - pPKey2->aMem[0].n; if( res==0 ){ if( pPKey2->nField>1 ){ - res = vdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 1); + res = sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 1); }else{ res = pPKey2->default_rc; } @@ -68678,7 +69985,7 @@ SQLITE_PRIVATE int sqlite3VdbeIdxRowid(sqlite3 *db, BtCursor *pCur, i64 *rowid){ if( unlikely(typeRowid<1 || typeRowid>9 || typeRowid==7) ){ goto idx_rowid_corruption; } - lenRowid = sqlite3VdbeSerialTypeLen(typeRowid); + lenRowid = sqlite3SmallTypeSizes[typeRowid]; testcase( (u32)m.n==szHdr+lenRowid ); if( unlikely((u32)m.nstartTime>0 ); + assert( db->xProfile!=0 ); + assert( db->init.busy==0 ); + assert( p->zSql!=0 ); + sqlite3OsCurrentTimeInt64(db->pVfs, &iNow); + db->xProfile(db->pProfileArg, p->zSql, (iNow - p->startTime)*1000000); + p->startTime = 0; +} +/* +** The checkProfileCallback(DB,P) macro checks to see if a profile callback +** is needed, and it invokes the callback if it is needed. +*/ +# define checkProfileCallback(DB,P) \ + if( ((P)->startTime)>0 ){ invokeProfileCallback(DB,P); } +#else +# define checkProfileCallback(DB,P) /*no-op*/ +#endif + /* ** The following routine destroys a virtual machine that is created by ** the sqlite3_compile() routine. The integer returned is an SQLITE_ @@ -68909,6 +70243,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_finalize(sqlite3_stmt *pStmt){ sqlite3 *db = v->db; if( vdbeSafety(v) ) return SQLITE_MISUSE_BKPT; sqlite3_mutex_enter(db->mutex); + checkProfileCallback(db, v); rc = sqlite3VdbeFinalize(v); rc = sqlite3ApiExit(db, rc); sqlite3LeaveMutexAndCloseZombie(db); @@ -68930,12 +70265,14 @@ SQLITE_API int SQLITE_STDCALL sqlite3_reset(sqlite3_stmt *pStmt){ rc = SQLITE_OK; }else{ Vdbe *v = (Vdbe*)pStmt; - sqlite3_mutex_enter(v->db->mutex); + sqlite3 *db = v->db; + sqlite3_mutex_enter(db->mutex); + checkProfileCallback(db, v); rc = sqlite3VdbeReset(v); sqlite3VdbeRewind(v); - assert( (rc & (v->db->errMask))==rc ); - rc = sqlite3ApiExit(v->db, rc); - sqlite3_mutex_leave(v->db->mutex); + assert( (rc & (db->errMask))==rc ); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); } return rc; } @@ -68970,7 +70307,10 @@ SQLITE_API int SQLITE_STDCALL sqlite3_clear_bindings(sqlite3_stmt *pStmt){ SQLITE_API const void *SQLITE_STDCALL sqlite3_value_blob(sqlite3_value *pVal){ Mem *p = (Mem*)pVal; if( p->flags & (MEM_Blob|MEM_Str) ){ - sqlite3VdbeMemExpandBlob(p); + if( sqlite3VdbeMemExpandBlob(p)!=SQLITE_OK ){ + assert( p->flags==MEM_Null && p->z==0 ); + return 0; + } p->flags |= MEM_Blob; return p->n ? p->z : 0; }else{ @@ -69048,6 +70388,36 @@ SQLITE_API int SQLITE_STDCALL sqlite3_value_type(sqlite3_value* pVal){ return aType[pVal->flags&MEM_AffMask]; } +/* Make a copy of an sqlite3_value object +*/ +SQLITE_API sqlite3_value *SQLITE_STDCALL sqlite3_value_dup(const sqlite3_value *pOrig){ + sqlite3_value *pNew; + if( pOrig==0 ) return 0; + pNew = sqlite3_malloc( sizeof(*pNew) ); + if( pNew==0 ) return 0; + memset(pNew, 0, sizeof(*pNew)); + memcpy(pNew, pOrig, MEMCELLSIZE); + pNew->flags &= ~MEM_Dyn; + pNew->db = 0; + if( pNew->flags&(MEM_Str|MEM_Blob) ){ + pNew->flags &= ~(MEM_Static|MEM_Dyn); + pNew->flags |= MEM_Ephem; + if( sqlite3VdbeMemMakeWriteable(pNew)!=SQLITE_OK ){ + sqlite3ValueFree(pNew); + pNew = 0; + } + } + return pNew; +} + +/* Destroy an sqlite3_value object previously obtained from +** sqlite3_value_dup(). +*/ +SQLITE_API void SQLITE_STDCALL sqlite3_value_free(sqlite3_value *pOld){ + sqlite3ValueFree(pOld); +} + + /**************************** sqlite3_result_ ******************************* ** The following routines are used by user-defined functions to specify ** the function result. @@ -69202,6 +70572,15 @@ SQLITE_API void SQLITE_STDCALL sqlite3_result_zeroblob(sqlite3_context *pCtx, in assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); sqlite3VdbeMemSetZeroBlob(pCtx->pOut, n); } +SQLITE_API int SQLITE_STDCALL sqlite3_result_zeroblob64(sqlite3_context *pCtx, u64 n){ + Mem *pOut = pCtx->pOut; + assert( sqlite3_mutex_held(pOut->db->mutex) ); + if( n>(u64)pOut->db->aLimit[SQLITE_LIMIT_LENGTH] ){ + return SQLITE_TOOBIG; + } + sqlite3VdbeMemSetZeroBlob(pCtx->pOut, (int)n); + return SQLITE_OK; +} SQLITE_API void SQLITE_STDCALL sqlite3_result_error_code(sqlite3_context *pCtx, int errCode){ pCtx->isError = errCode; pCtx->fErrorOrAux = 1; @@ -69256,6 +70635,7 @@ static int doWalCallbacks(sqlite3 *db){ return rc; } + /* ** Execute the statement pStmt, either until a row of data is ready, the ** statement is completely executed or an error occurs. @@ -69324,8 +70704,10 @@ static int sqlite3Step(Vdbe *p){ ); #ifndef SQLITE_OMIT_TRACE - if( db->xProfile && !db->init.busy ){ + if( db->xProfile && !db->init.busy && p->zSql ){ sqlite3OsCurrentTimeInt64(db->pVfs, &p->startTime); + }else{ + assert( p->startTime==0 ); } #endif @@ -69349,13 +70731,8 @@ static int sqlite3Step(Vdbe *p){ } #ifndef SQLITE_OMIT_TRACE - /* Invoke the profile callback if there is one - */ - if( rc!=SQLITE_ROW && db->xProfile && !db->init.busy && p->zSql ){ - sqlite3_int64 iNow; - sqlite3OsCurrentTimeInt64(db->pVfs, &iNow); - db->xProfile(db->pProfileArg, p->zSql, (iNow - p->startTime)*1000000); - } + /* If the statement completed successfully, invoke the profile callback */ + if( rc!=SQLITE_ROW ) checkProfileCallback(db, p); #endif if( rc==SQLITE_DONE ){ @@ -70183,6 +71560,20 @@ SQLITE_API int SQLITE_STDCALL sqlite3_bind_zeroblob(sqlite3_stmt *pStmt, int i, } return rc; } +SQLITE_API int SQLITE_STDCALL sqlite3_bind_zeroblob64(sqlite3_stmt *pStmt, int i, sqlite3_uint64 n){ + int rc; + Vdbe *p = (Vdbe *)pStmt; + sqlite3_mutex_enter(p->db->mutex); + if( n>(u64)p->db->aLimit[SQLITE_LIMIT_LENGTH] ){ + rc = SQLITE_TOOBIG; + }else{ + assert( (n & 0x7FFFFFFF)==n ); + rc = sqlite3_bind_zeroblob(pStmt, i, n); + } + rc = sqlite3ApiExit(p->db, rc); + sqlite3_mutex_leave(p->db->mutex); + return rc; +} /* ** Return the number of wildcards that can be potentially bound to. @@ -70432,6 +71823,8 @@ SQLITE_API void SQLITE_STDCALL sqlite3_stmt_scanstatus_reset(sqlite3_stmt *pStmt ** ** The Vdbe parse-tree explainer is also found here. */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ #ifndef SQLITE_OMIT_TRACE @@ -70499,9 +71892,8 @@ SQLITE_PRIVATE char *sqlite3VdbeExpandSql( char zBase[100]; /* Initial working space */ db = p->db; - sqlite3StrAccumInit(&out, zBase, sizeof(zBase), + sqlite3StrAccumInit(&out, db, zBase, sizeof(zBase), db->aLimit[SQLITE_LIMIT_LENGTH]); - out.db = db; if( db->nVdbeExec>1 ){ while( *zRawSql ){ const char *zStart = zRawSql; @@ -70510,6 +71902,8 @@ SQLITE_PRIVATE char *sqlite3VdbeExpandSql( assert( (zRawSql - zStart) > 0 ); sqlite3StrAccumAppend(&out, zStart, (int)(zRawSql-zStart)); } + }else if( p->nVar==0 ){ + sqlite3StrAccumAppend(&out, zRawSql, sqlite3Strlen30(zRawSql)); }else{ while( zRawSql[0] ){ n = findNextHostParameter(zRawSql, &nToken); @@ -70526,10 +71920,12 @@ SQLITE_PRIVATE char *sqlite3VdbeExpandSql( idx = nextIndex; } }else{ - assert( zRawSql[0]==':' || zRawSql[0]=='$' || zRawSql[0]=='@' ); + assert( zRawSql[0]==':' || zRawSql[0]=='$' || + zRawSql[0]=='@' || zRawSql[0]=='#' ); testcase( zRawSql[0]==':' ); testcase( zRawSql[0]=='$' ); testcase( zRawSql[0]=='@' ); + testcase( zRawSql[0]=='#' ); idx = sqlite3VdbeParameterIndex(p, zRawSql, nToken); assert( idx>0 ); } @@ -70621,6 +72017,8 @@ SQLITE_PRIVATE char *sqlite3VdbeExpandSql( ** in this file for details. If in doubt, do not deviate from existing ** commenting and indentation practices when changing or adding code. */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ /* ** Invoke this macro on memory cells just prior to changing the @@ -70871,7 +72269,7 @@ static void applyNumericAffinity(Mem *pRec, int bTryForInt){ ** SQLITE_AFF_TEXT: ** Convert pRec to a text representation. ** -** SQLITE_AFF_NONE: +** SQLITE_AFF_BLOB: ** No-op. pRec is unchanged. */ static void applyAffinity( @@ -70897,6 +72295,7 @@ static void applyAffinity( if( 0==(pRec->flags&MEM_Str) && (pRec->flags&(MEM_Real|MEM_Int)) ){ sqlite3VdbeMemStringify(pRec, enc, 1); } + pRec->flags &= ~(MEM_Real|MEM_Int); } } @@ -71204,6 +72603,21 @@ static int checkSavepointCount(sqlite3 *db){ } #endif +/* +** Return the register of pOp->p2 after first preparing it to be +** overwritten with an integer value. +*/ +static Mem *out2Prerelease(Vdbe *p, VdbeOp *pOp){ + Mem *pOut; + assert( pOp->p2>0 ); + assert( pOp->p2<=(p->nMem-p->nCursor) ); + pOut = &p->aMem[pOp->p2]; + memAboutToChange(p, pOut); + if( VdbeMemDynamic(pOut) ) sqlite3VdbeMemSetNull(pOut); + pOut->flags = MEM_Int; + return pOut; +} + /* ** Execute as much of a VDBE program as we can. @@ -71212,9 +72626,11 @@ static int checkSavepointCount(sqlite3 *db){ SQLITE_PRIVATE int sqlite3VdbeExec( Vdbe *p /* The VDBE */ ){ - int pc=0; /* The program counter */ Op *aOp = p->aOp; /* Copy of p->aOp */ - Op *pOp; /* Current operation */ + Op *pOp = aOp; /* Current operation */ +#if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE) + Op *pOrigOp; /* Value of pOp at the top of the loop */ +#endif int rc = SQLITE_OK; /* Value to return */ sqlite3 *db = p->db; /* The database */ u8 resetSchemaOnFault = 0; /* Reset schema after an error if positive */ @@ -71254,13 +72670,9 @@ SQLITE_PRIVATE int sqlite3VdbeExec( sqlite3VdbeIOTraceSql(p); #ifndef SQLITE_OMIT_PROGRESS_CALLBACK if( db->xProgress ){ + u32 iPrior = p->aCounter[SQLITE_STMTSTATUS_VM_STEP]; assert( 0 < db->nProgressOps ); - nProgressLimit = (unsigned)p->aCounter[SQLITE_STMTSTATUS_VM_STEP]; - if( nProgressLimit==0 ){ - nProgressLimit = db->nProgressOps; - }else{ - nProgressLimit %= (unsigned)db->nProgressOps; - } + nProgressLimit = db->nProgressOps - (iPrior % db->nProgressOps); } #endif #ifdef SQLITE_DEBUG @@ -71290,23 +72702,22 @@ SQLITE_PRIVATE int sqlite3VdbeExec( } sqlite3EndBenignMalloc(); #endif - for(pc=p->pc; rc==SQLITE_OK; pc++){ - assert( pc>=0 && pcnOp ); + for(pOp=&aOp[p->pc]; rc==SQLITE_OK; pOp++){ + assert( pOp>=aOp && pOp<&aOp[p->nOp]); if( db->mallocFailed ) goto no_mem; #ifdef VDBE_PROFILE start = sqlite3Hwtime(); #endif nVmStep++; - pOp = &aOp[pc]; #ifdef SQLITE_ENABLE_STMT_SCANSTATUS - if( p->anExec ) p->anExec[pc]++; + if( p->anExec ) p->anExec[(int)(pOp-aOp)]++; #endif /* Only allow tracing if SQLITE_DEBUG is defined. */ #ifdef SQLITE_DEBUG if( db->flags & SQLITE_VdbeTrace ){ - sqlite3VdbePrintOp(stdout, pc, pOp); + sqlite3VdbePrintOp(stdout, (int)(pOp - aOp), pOp); } #endif @@ -71323,23 +72734,9 @@ SQLITE_PRIVATE int sqlite3VdbeExec( } #endif - /* On any opcode with the "out2-prerelease" tag, free any - ** external allocations out of mem[p2] and set mem[p2] to be - ** an undefined integer. Opcodes will either fill in the integer - ** value or convert mem[p2] to a different type. - */ - assert( pOp->opflags==sqlite3OpcodeProperty[pOp->opcode] ); - if( pOp->opflags & OPFLG_OUT2_PRERELEASE ){ - assert( pOp->p2>0 ); - assert( pOp->p2<=(p->nMem-p->nCursor) ); - pOut = &aMem[pOp->p2]; - memAboutToChange(p, pOut); - if( VdbeMemDynamic(pOut) ) sqlite3VdbeMemSetNull(pOut); - pOut->flags = MEM_Int; - } - /* Sanity checking on other operands */ #ifdef SQLITE_DEBUG + assert( pOp->opflags==sqlite3OpcodeProperty[pOp->opcode] ); if( (pOp->opflags & OPFLG_IN1)!=0 ){ assert( pOp->p1>0 ); assert( pOp->p1<=(p->nMem-p->nCursor) ); @@ -71372,6 +72769,9 @@ SQLITE_PRIVATE int sqlite3VdbeExec( memAboutToChange(p, &aMem[pOp->p3]); } #endif +#if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE) + pOrigOp = pOp; +#endif switch( pOp->opcode ){ @@ -71395,7 +72795,7 @@ SQLITE_PRIVATE int sqlite3VdbeExec( ** ** Other keywords in the comment that follows each case are used to ** construct the OPFLG_INITIALIZER value that initializes opcodeProperty[]. -** Keywords include: in1, in2, in3, out2_prerelease, out2, out3. See +** Keywords include: in1, in2, in3, out2, out3. See ** the mkopcodeh.awk script for additional information. ** ** Documentation about VDBE opcodes is generated by scanning this file @@ -71423,7 +72823,8 @@ SQLITE_PRIVATE int sqlite3VdbeExec( ** to the current line should be indented for EXPLAIN output. */ case OP_Goto: { /* jump */ - pc = pOp->p2 - 1; +jump_to_p2_and_check_for_interrupt: + pOp = &aOp[pOp->p2 - 1]; /* Opcodes that are used as the bottom of a loop (OP_Next, OP_Prev, ** OP_VNext, OP_RowSetNext, or OP_SorterNext) all jump here upon @@ -71468,9 +72869,13 @@ case OP_Gosub: { /* jump */ assert( VdbeMemDynamic(pIn1)==0 ); memAboutToChange(p, pIn1); pIn1->flags = MEM_Int; - pIn1->u.i = pc; + pIn1->u.i = (int)(pOp-aOp); REGISTER_TRACE(pOp->p1, pIn1); - pc = pOp->p2 - 1; + + /* Most jump operations do a goto to this spot in order to update + ** the pOp pointer. */ +jump_to_p2: + pOp = &aOp[pOp->p2 - 1]; break; } @@ -71482,7 +72887,7 @@ case OP_Gosub: { /* jump */ case OP_Return: { /* in1 */ pIn1 = &aMem[pOp->p1]; assert( pIn1->flags==MEM_Int ); - pc = (int)pIn1->u.i; + pOp = &aOp[pIn1->u.i]; pIn1->flags = MEM_Undefined; break; } @@ -71506,7 +72911,7 @@ case OP_InitCoroutine: { /* jump */ assert( !VdbeMemDynamic(pOut) ); pOut->u.i = pOp->p3 - 1; pOut->flags = MEM_Int; - if( pOp->p2 ) pc = pOp->p2 - 1; + if( pOp->p2 ) goto jump_to_p2; break; } @@ -71526,7 +72931,7 @@ case OP_EndCoroutine: { /* in1 */ pCaller = &aOp[pIn1->u.i]; assert( pCaller->opcode==OP_Yield ); assert( pCaller->p2>=0 && pCaller->p2nOp ); - pc = pCaller->p2 - 1; + pOp = &aOp[pCaller->p2 - 1]; pIn1->flags = MEM_Undefined; break; } @@ -71550,9 +72955,9 @@ case OP_Yield: { /* in1, jump */ assert( VdbeMemDynamic(pIn1)==0 ); pIn1->flags = MEM_Int; pcDest = (int)pIn1->u.i; - pIn1->u.i = pc; + pIn1->u.i = (int)(pOp - aOp); REGISTER_TRACE(pOp->p1, pIn1); - pc = pcDest; + pOp = &aOp[pcDest]; break; } @@ -71603,30 +73008,34 @@ case OP_HaltIfNull: { /* in3 */ case OP_Halt: { const char *zType; const char *zLogFmt; + VdbeFrame *pFrame; + int pcx; + pcx = (int)(pOp - aOp); if( pOp->p1==SQLITE_OK && p->pFrame ){ /* Halt the sub-program. Return control to the parent frame. */ - VdbeFrame *pFrame = p->pFrame; + pFrame = p->pFrame; p->pFrame = pFrame->pParent; p->nFrame--; sqlite3VdbeSetChanges(db, p->nChange); - pc = sqlite3VdbeFrameRestore(pFrame); + pcx = sqlite3VdbeFrameRestore(pFrame); lastRowid = db->lastRowid; if( pOp->p2==OE_Ignore ){ - /* Instruction pc is the OP_Program that invoked the sub-program + /* Instruction pcx is the OP_Program that invoked the sub-program ** currently being halted. If the p2 instruction of this OP_Halt ** instruction is set to OE_Ignore, then the sub-program is throwing ** an IGNORE exception. In this case jump to the address specified ** as the p2 of the calling OP_Program. */ - pc = p->aOp[pc].p2-1; + pcx = p->aOp[pcx].p2-1; } aOp = p->aOp; aMem = p->aMem; + pOp = &aOp[pcx]; break; } p->rc = pOp->p1; p->errorAction = (u8)pOp->p2; - p->pc = pc; + p->pc = pcx; if( p->rc ){ if( pOp->p5 ){ static const char * const azType[] = { "NOT NULL", "UNIQUE", "CHECK", @@ -71643,14 +73052,13 @@ case OP_Halt: { assert( zType!=0 || pOp->p4.z!=0 ); zLogFmt = "abort at %d in [%s]: %s"; if( zType && pOp->p4.z ){ - sqlite3SetString(&p->zErrMsg, db, "%s constraint failed: %s", - zType, pOp->p4.z); + sqlite3VdbeError(p, "%s constraint failed: %s", zType, pOp->p4.z); }else if( pOp->p4.z ){ - sqlite3SetString(&p->zErrMsg, db, "%s", pOp->p4.z); + sqlite3VdbeError(p, "%s", pOp->p4.z); }else{ - sqlite3SetString(&p->zErrMsg, db, "%s constraint failed", zType); + sqlite3VdbeError(p, "%s constraint failed", zType); } - sqlite3_log(pOp->p1, zLogFmt, pc, p->zSql, p->zErrMsg); + sqlite3_log(pOp->p1, zLogFmt, pcx, p->zSql, p->zErrMsg); } rc = sqlite3VdbeHalt(p); assert( rc==SQLITE_BUSY || rc==SQLITE_OK || rc==SQLITE_ERROR ); @@ -71669,7 +73077,8 @@ case OP_Halt: { ** ** The 32-bit integer value P1 is written into register P2. */ -case OP_Integer: { /* out2-prerelease */ +case OP_Integer: { /* out2 */ + pOut = out2Prerelease(p, pOp); pOut->u.i = pOp->p1; break; } @@ -71680,7 +73089,8 @@ case OP_Integer: { /* out2-prerelease */ ** P4 is a pointer to a 64-bit integer value. ** Write that value into register P2. */ -case OP_Int64: { /* out2-prerelease */ +case OP_Int64: { /* out2 */ + pOut = out2Prerelease(p, pOp); assert( pOp->p4.pI64!=0 ); pOut->u.i = *pOp->p4.pI64; break; @@ -71693,7 +73103,8 @@ case OP_Int64: { /* out2-prerelease */ ** P4 is a pointer to a 64-bit floating point value. ** Write that value into register P2. */ -case OP_Real: { /* same as TK_FLOAT, out2-prerelease */ +case OP_Real: { /* same as TK_FLOAT, out2 */ + pOut = out2Prerelease(p, pOp); pOut->flags = MEM_Real; assert( !sqlite3IsNaN(*pOp->p4.pReal) ); pOut->u.r = *pOp->p4.pReal; @@ -71709,8 +73120,9 @@ case OP_Real: { /* same as TK_FLOAT, out2-prerelease */ ** this transformation, the length of string P4 is computed and stored ** as the P1 parameter. */ -case OP_String8: { /* same as TK_STRING, out2-prerelease */ +case OP_String8: { /* same as TK_STRING, out2 */ assert( pOp->p4.z!=0 ); + pOut = out2Prerelease(p, pOp); pOp->opcode = OP_String; pOp->p1 = sqlite3Strlen30(pOp->p4.z); @@ -71747,8 +73159,9 @@ case OP_String8: { /* same as TK_STRING, out2-prerelease */ ** the same sequence of bytes, it is merely interpreted as a BLOB instead ** of a string, as if it had been CAST. */ -case OP_String: { /* out2-prerelease */ +case OP_String: { /* out2 */ assert( pOp->p4.z!=0 ); + pOut = out2Prerelease(p, pOp); pOut->flags = MEM_Str|MEM_Static|MEM_Term; pOut->z = pOp->p4.z; pOut->n = pOp->p1; @@ -71776,9 +73189,10 @@ case OP_String: { /* out2-prerelease */ ** NULL values will not compare equal even if SQLITE_NULLEQ is set on ** OP_Ne or OP_Eq. */ -case OP_Null: { /* out2-prerelease */ +case OP_Null: { /* out2 */ int cnt; u16 nullFlag; + pOut = out2Prerelease(p, pOp); cnt = pOp->p3-pOp->p2; assert( pOp->p3<=(p->nMem-p->nCursor) ); pOut->flags = nullFlag = pOp->p1 ? (MEM_Null|MEM_Cleared) : MEM_Null; @@ -71813,8 +73227,9 @@ case OP_SoftNull: { ** P4 points to a blob of data P1 bytes long. Store this ** blob in register P2. */ -case OP_Blob: { /* out2-prerelease */ +case OP_Blob: { /* out2 */ assert( pOp->p1 <= SQLITE_MAX_LENGTH ); + pOut = out2Prerelease(p, pOp); sqlite3VdbeMemSetStr(pOut, pOp->p4.z, pOp->p1, 0, 0); pOut->enc = encoding; UPDATE_MAX_BLOBSIZE(pOut); @@ -71829,7 +73244,7 @@ case OP_Blob: { /* out2-prerelease */ ** If the parameter is named, then its name appears in P4. ** The P4 value is used by sqlite3_bind_parameter_name(). */ -case OP_Variable: { /* out2-prerelease */ +case OP_Variable: { /* out2 */ Mem *pVar; /* Value being transferred */ assert( pOp->p1>0 && pOp->p1<=p->nVar ); @@ -71838,6 +73253,7 @@ case OP_Variable: { /* out2-prerelease */ if( sqlite3VdbeMemTooBig(pVar) ){ goto too_big; } + pOut = out2Prerelease(p, pOp); sqlite3VdbeMemShallowCopy(pOut, pVar, MEM_Static); UPDATE_MAX_BLOBSIZE(pOut); break; @@ -71872,10 +73288,11 @@ case OP_Move: { memAboutToChange(p, pOut); sqlite3VdbeMemMove(pOut, pIn1); #ifdef SQLITE_DEBUG - if( pOut->pScopyFrom>=&aMem[p1] && pOut->pScopyFrom<&aMem[p1+pOp->p3] ){ - pOut->pScopyFrom += p1 - pOp->p2; + if( pOut->pScopyFrom>=&aMem[p1] && pOut->pScopyFrompScopyFrom += pOp->p2 - p1; } #endif + Deephemeralize(pOut); REGISTER_TRACE(p2++, pOut); pIn1++; pOut++; @@ -72014,7 +73431,7 @@ case OP_ResultRow: { /* Return SQLITE_ROW */ - p->pc = pc + 1; + p->pc = (int)(pOp - aOp) + 1; rc = SQLITE_ROW; goto vdbe_return; } @@ -72217,10 +73634,10 @@ case OP_CollSeq: { break; } -/* Opcode: Function P1 P2 P3 P4 P5 +/* Opcode: Function0 P1 P2 P3 P4 P5 ** Synopsis: r[P3]=func(r[P2@P5]) ** -** Invoke a user function (P4 is a pointer to a Function structure that +** Invoke a user function (P4 is a pointer to a FuncDef object that ** defines the function) with P5 arguments taken from register P2 and ** successors. The result of the function is stored in register P3. ** Register P3 must not be one of the function inputs. @@ -72232,59 +73649,100 @@ case OP_CollSeq: { ** sqlite3_set_auxdata() API may be safely retained until the next ** invocation of this opcode. ** -** See also: AggStep and AggFinal +** See also: Function, AggStep, AggFinal */ -case OP_Function: { - int i; - Mem *pArg; - sqlite3_context ctx; - sqlite3_value **apVal; +/* Opcode: Function P1 P2 P3 P4 P5 +** Synopsis: r[P3]=func(r[P2@P5]) +** +** Invoke a user function (P4 is a pointer to an sqlite3_context object that +** contains a pointer to the function to be run) with P5 arguments taken +** from register P2 and successors. The result of the function is stored +** in register P3. Register P3 must not be one of the function inputs. +** +** P1 is a 32-bit bitmask indicating whether or not each argument to the +** function was determined to be constant at compile time. If the first +** argument was constant then bit 0 of P1 is set. This is used to determine +** whether meta data associated with a user function argument using the +** sqlite3_set_auxdata() API may be safely retained until the next +** invocation of this opcode. +** +** SQL functions are initially coded as OP_Function0 with P4 pointing +** to a FuncDef object. But on first evaluation, the P4 operand is +** automatically converted into an sqlite3_context object and the operation +** changed to this OP_Function opcode. In this way, the initialization of +** the sqlite3_context object occurs only once, rather than once for each +** evaluation of the function. +** +** See also: Function0, AggStep, AggFinal +*/ +case OP_Function0: { int n; + sqlite3_context *pCtx; + assert( pOp->p4type==P4_FUNCDEF ); n = pOp->p5; - apVal = p->apArg; - assert( apVal || n==0 ); assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) ); - ctx.pOut = &aMem[pOp->p3]; - memAboutToChange(p, ctx.pOut); - assert( n==0 || (pOp->p2>0 && pOp->p2+n<=(p->nMem-p->nCursor)+1) ); assert( pOp->p3p2 || pOp->p3>=pOp->p2+n ); - pArg = &aMem[pOp->p2]; - for(i=0; ip2+i, pArg); + pCtx = sqlite3DbMallocRaw(db, sizeof(*pCtx) + (n-1)*sizeof(sqlite3_value*)); + if( pCtx==0 ) goto no_mem; + pCtx->pOut = 0; + pCtx->pFunc = pOp->p4.pFunc; + pCtx->iOp = (int)(pOp - aOp); + pCtx->pVdbe = p; + pCtx->argc = n; + pOp->p4type = P4_FUNCCTX; + pOp->p4.pCtx = pCtx; + pOp->opcode = OP_Function; + /* Fall through into OP_Function */ +} +case OP_Function: { + int i; + sqlite3_context *pCtx; + + assert( pOp->p4type==P4_FUNCCTX ); + pCtx = pOp->p4.pCtx; + + /* If this function is inside of a trigger, the register array in aMem[] + ** might change from one evaluation to the next. The next block of code + ** checks to see if the register array has changed, and if so it + ** reinitializes the relavant parts of the sqlite3_context object */ + pOut = &aMem[pOp->p3]; + if( pCtx->pOut != pOut ){ + pCtx->pOut = pOut; + for(i=pCtx->argc-1; i>=0; i--) pCtx->argv[i] = &aMem[pOp->p2+i]; } - assert( pOp->p4type==P4_FUNCDEF ); - ctx.pFunc = pOp->p4.pFunc; - ctx.iOp = pc; - ctx.pVdbe = p; - MemSetTypeFlag(ctx.pOut, MEM_Null); - ctx.fErrorOrAux = 0; + memAboutToChange(p, pCtx->pOut); +#ifdef SQLITE_DEBUG + for(i=0; iargc; i++){ + assert( memIsValid(pCtx->argv[i]) ); + REGISTER_TRACE(pOp->p2+i, pCtx->argv[i]); + } +#endif + MemSetTypeFlag(pCtx->pOut, MEM_Null); + pCtx->fErrorOrAux = 0; db->lastRowid = lastRowid; - (*ctx.pFunc->xFunc)(&ctx, n, apVal); /* IMP: R-24505-23230 */ + (*pCtx->pFunc->xFunc)(pCtx, pCtx->argc, pCtx->argv); /* IMP: R-24505-23230 */ lastRowid = db->lastRowid; /* Remember rowid changes made by xFunc */ /* If the function returned an error, throw an exception */ - if( ctx.fErrorOrAux ){ - if( ctx.isError ){ - sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(ctx.pOut)); - rc = ctx.isError; + if( pCtx->fErrorOrAux ){ + if( pCtx->isError ){ + sqlite3VdbeError(p, "%s", sqlite3_value_text(pCtx->pOut)); + rc = pCtx->isError; } - sqlite3VdbeDeleteAuxData(p, pc, pOp->p1); + sqlite3VdbeDeleteAuxData(p, pCtx->iOp, pOp->p1); } /* Copy the result of the function into register P3 */ - sqlite3VdbeChangeEncoding(ctx.pOut, encoding); - if( sqlite3VdbeMemTooBig(ctx.pOut) ){ - goto too_big; + if( pOut->flags & (MEM_Str|MEM_Blob) ){ + sqlite3VdbeChangeEncoding(pCtx->pOut, encoding); + if( sqlite3VdbeMemTooBig(pCtx->pOut) ) goto too_big; } - REGISTER_TRACE(pOp->p3, ctx.pOut); - UPDATE_MAX_BLOBSIZE(ctx.pOut); + REGISTER_TRACE(pOp->p3, pCtx->pOut); + UPDATE_MAX_BLOBSIZE(pCtx->pOut); break; } @@ -72403,8 +73861,7 @@ case OP_MustBeInt: { /* jump, in1 */ rc = SQLITE_MISMATCH; goto abort_due_to_error; }else{ - pc = pOp->p2 - 1; - break; + goto jump_to_p2; } } } @@ -72448,9 +73905,9 @@ case OP_RealAffinity: { /* in1 */ ** A NULL value is not changed by this routine. It remains NULL. */ case OP_Cast: { /* in1 */ - assert( pOp->p2>=SQLITE_AFF_NONE && pOp->p2<=SQLITE_AFF_REAL ); + assert( pOp->p2>=SQLITE_AFF_BLOB && pOp->p2<=SQLITE_AFF_REAL ); testcase( pOp->p2==SQLITE_AFF_TEXT ); - testcase( pOp->p2==SQLITE_AFF_NONE ); + testcase( pOp->p2==SQLITE_AFF_BLOB ); testcase( pOp->p2==SQLITE_AFF_NUMERIC ); testcase( pOp->p2==SQLITE_AFF_INTEGER ); testcase( pOp->p2==SQLITE_AFF_REAL ); @@ -72590,7 +74047,7 @@ case OP_Ge: { /* same as TK_GE, jump, in1, in3 */ }else{ VdbeBranchTaken(2,3); if( pOp->p5 & SQLITE_JUMPIFNULL ){ - pc = pOp->p2-1; + goto jump_to_p2; } } break; @@ -72642,6 +74099,12 @@ case OP_Ge: { /* same as TK_GE, jump, in1, in3 */ default: res = res>=0; break; } + /* Undo any changes made by applyAffinity() to the input registers. */ + assert( (pIn1->flags & MEM_Dyn) == (flags1 & MEM_Dyn) ); + pIn1->flags = flags1; + assert( (pIn3->flags & MEM_Dyn) == (flags3 & MEM_Dyn) ); + pIn3->flags = flags3; + if( pOp->p5 & SQLITE_STOREP2 ){ pOut = &aMem[pOp->p2]; memAboutToChange(p, pOut); @@ -72651,14 +74114,9 @@ case OP_Ge: { /* same as TK_GE, jump, in1, in3 */ }else{ VdbeBranchTaken(res!=0, (pOp->p5 & SQLITE_NULLEQ)?2:3); if( res ){ - pc = pOp->p2-1; + goto jump_to_p2; } } - /* Undo any changes made by applyAffinity() to the input registers. */ - assert( (pIn1->flags & MEM_Dyn) == (flags1 & MEM_Dyn) ); - pIn1->flags = flags1; - assert( (pIn3->flags & MEM_Dyn) == (flags3 & MEM_Dyn) ); - pIn3->flags = flags3; break; } @@ -72753,11 +74211,11 @@ case OP_Compare: { */ case OP_Jump: { /* jump */ if( iCompare<0 ){ - pc = pOp->p1 - 1; VdbeBranchTaken(0,3); + VdbeBranchTaken(0,3); pOp = &aOp[pOp->p1 - 1]; }else if( iCompare==0 ){ - pc = pOp->p2 - 1; VdbeBranchTaken(1,3); + VdbeBranchTaken(1,3); pOp = &aOp[pOp->p2 - 1]; }else{ - pc = pOp->p3 - 1; VdbeBranchTaken(2,3); + VdbeBranchTaken(2,3); pOp = &aOp[pOp->p3 - 1]; } break; } @@ -72867,7 +74325,7 @@ case OP_Once: { /* jump */ assert( pOp->p1nOnceFlag ); VdbeBranchTaken(p->aOnceFlag[pOp->p1]!=0, 2); if( p->aOnceFlag[pOp->p1] ){ - pc = pOp->p2-1; + goto jump_to_p2; }else{ p->aOnceFlag[pOp->p1] = 1; } @@ -72902,7 +74360,7 @@ case OP_IfNot: { /* jump, in1 */ } VdbeBranchTaken(c!=0, 2); if( c ){ - pc = pOp->p2-1; + goto jump_to_p2; } break; } @@ -72916,7 +74374,7 @@ case OP_IsNull: { /* same as TK_ISNULL, jump, in1 */ pIn1 = &aMem[pOp->p1]; VdbeBranchTaken( (pIn1->flags & MEM_Null)!=0, 2); if( (pIn1->flags & MEM_Null)!=0 ){ - pc = pOp->p2 - 1; + goto jump_to_p2; } break; } @@ -72930,7 +74388,7 @@ case OP_NotNull: { /* same as TK_NOTNULL, jump, in1 */ pIn1 = &aMem[pOp->p1]; VdbeBranchTaken( (pIn1->flags & MEM_Null)==0, 2); if( (pIn1->flags & MEM_Null)==0 ){ - pc = pOp->p2 - 1; + goto jump_to_p2; } break; } @@ -73144,7 +74602,7 @@ case OP_Column: { } } - /* If after trying to extra new entries from the header, nHdrParsed is + /* If after trying to extract new entries from the header, nHdrParsed is ** still not up to p2, that means that the record has fewer than p2 ** columns. So the result will be either the default value or a NULL. */ @@ -73260,7 +74718,7 @@ case OP_Affinity: { ** The mapping from character to affinity is given by the SQLITE_AFF_ ** macros defined in sqliteInt.h. ** -** If P4 is NULL then all index fields have the affinity NONE. +** If P4 is NULL then all index fields have the affinity BLOB. */ case OP_MakeRecord: { u8 *zNewRecord; /* A buffer to hold the data for the new record */ @@ -73268,7 +74726,7 @@ case OP_MakeRecord: { u64 nData; /* Number of bytes of data space */ int nHdr; /* Number of bytes of header space */ i64 nByte; /* Data space required for this record */ - int nZero; /* Number of zero bytes at the end of the record */ + i64 nZero; /* Number of zero bytes at the end of the record */ int nVarint; /* Number of bytes in a varint */ u32 serial_type; /* Type field */ Mem *pData0; /* First field to be combined into the record */ @@ -73332,7 +74790,7 @@ case OP_MakeRecord: { len = sqlite3VdbeSerialTypeLen(serial_type); if( pRec->flags & MEM_Zero ){ if( nData ){ - sqlite3VdbeMemExpandBlob(pRec); + if( sqlite3VdbeMemExpandBlob(pRec) ) goto no_mem; }else{ nZero += pRec->u.nZero; len -= pRec->u.nZero; @@ -73360,7 +74818,7 @@ case OP_MakeRecord: { if( nVarintdb->aLimit[SQLITE_LIMIT_LENGTH] ){ + if( nByte+nZero>db->aLimit[SQLITE_LIMIT_LENGTH] ){ goto too_big; } @@ -73411,7 +74869,7 @@ case OP_MakeRecord: { ** opened by cursor P1 in register P2 */ #ifndef SQLITE_OMIT_BTREECOUNT -case OP_Count: { /* out2-prerelease */ +case OP_Count: { /* out2 */ i64 nEntry; BtCursor *pCrsr; @@ -73419,6 +74877,7 @@ case OP_Count: { /* out2-prerelease */ assert( pCrsr ); nEntry = 0; /* Not needed. Only used to silence a warning. */ rc = sqlite3BtreeCount(pCrsr, &nEntry); + pOut = out2Prerelease(p, pOp); pOut->u.i = nEntry; break; } @@ -73457,8 +74916,7 @@ case OP_Savepoint: { /* A new savepoint cannot be created if there are active write ** statements (i.e. open read/write incremental blob handles). */ - sqlite3SetString(&p->zErrMsg, db, "cannot open savepoint - " - "SQL statements in progress"); + sqlite3VdbeError(p, "cannot open savepoint - SQL statements in progress"); rc = SQLITE_BUSY; }else{ nName = sqlite3Strlen30(zName); @@ -73509,15 +74967,14 @@ case OP_Savepoint: { iSavepoint++; } if( !pSavepoint ){ - sqlite3SetString(&p->zErrMsg, db, "no such savepoint: %s", zName); + sqlite3VdbeError(p, "no such savepoint: %s", zName); rc = SQLITE_ERROR; }else if( db->nVdbeWrite>0 && p1==SAVEPOINT_RELEASE ){ /* It is not possible to release (commit) a savepoint if there are ** active write statements. */ - sqlite3SetString(&p->zErrMsg, db, - "cannot release savepoint - SQL statements in progress" - ); + sqlite3VdbeError(p, "cannot release savepoint - " + "SQL statements in progress"); rc = SQLITE_BUSY; }else{ @@ -73532,7 +74989,7 @@ case OP_Savepoint: { } db->autoCommit = 1; if( sqlite3VdbeHalt(p)==SQLITE_BUSY ){ - p->pc = pc; + p->pc = (int)(pOp - aOp); db->autoCommit = 0; p->rc = rc = SQLITE_BUSY; goto vdbe_return; @@ -73591,7 +75048,7 @@ case OP_Savepoint: { db->nDeferredImmCons = pSavepoint->nDeferredImmCons; } - if( !isTransaction ){ + if( !isTransaction || p1==SAVEPOINT_ROLLBACK ){ rc = sqlite3VtabSavepoint(db, p1, iSavepoint); if( rc!=SQLITE_OK ) goto abort_due_to_error; } @@ -73623,23 +75080,12 @@ case OP_AutoCommit: { assert( db->nVdbeActive>0 ); /* At least this one VM is active */ assert( p->bIsReader ); -#if 0 - if( turnOnAC && iRollback && db->nVdbeActive>1 ){ - /* If this instruction implements a ROLLBACK and other VMs are - ** still running, and a transaction is active, return an error indicating - ** that the other VMs must complete first. - */ - sqlite3SetString(&p->zErrMsg, db, "cannot rollback transaction - " - "SQL statements in progress"); - rc = SQLITE_BUSY; - }else -#endif if( turnOnAC && !iRollback && db->nVdbeWrite>0 ){ /* If this instruction implements a COMMIT and other VMs are writing ** return an error indicating that the other VMs must complete first. */ - sqlite3SetString(&p->zErrMsg, db, "cannot commit transaction - " - "SQL statements in progress"); + sqlite3VdbeError(p, "cannot commit transaction - " + "SQL statements in progress"); rc = SQLITE_BUSY; }else if( desiredAutoCommit!=db->autoCommit ){ if( iRollback ){ @@ -73651,7 +75097,7 @@ case OP_AutoCommit: { }else{ db->autoCommit = (u8)desiredAutoCommit; if( sqlite3VdbeHalt(p)==SQLITE_BUSY ){ - p->pc = pc; + p->pc = (int)(pOp - aOp); db->autoCommit = (u8)(1-desiredAutoCommit); p->rc = rc = SQLITE_BUSY; goto vdbe_return; @@ -73666,7 +75112,7 @@ case OP_AutoCommit: { } goto vdbe_return; }else{ - sqlite3SetString(&p->zErrMsg, db, + sqlite3VdbeError(p, (!desiredAutoCommit)?"cannot start a transaction within a transaction":( (iRollback)?"cannot rollback - no transaction is active": "cannot commit - no transaction is active")); @@ -73728,7 +75174,7 @@ case OP_Transaction: { if( pBt ){ rc = sqlite3BtreeBeginTrans(pBt, pOp->p2); if( rc==SQLITE_BUSY ){ - p->pc = pc; + p->pc = (int)(pOp - aOp); p->rc = rc = SQLITE_BUSY; goto vdbe_return; } @@ -73807,7 +75253,7 @@ case OP_Transaction: { ** must be started or there must be an open cursor) before ** executing this instruction. */ -case OP_ReadCookie: { /* out2-prerelease */ +case OP_ReadCookie: { /* out2 */ int iMeta; int iDb; int iCookie; @@ -73821,6 +75267,7 @@ case OP_ReadCookie: { /* out2-prerelease */ assert( DbMaskTest(p->btreeMask, iDb) ); sqlite3BtreeGetMeta(db->aDb[iDb].pBt, iCookie, (u32 *)&iMeta); + pOut = out2Prerelease(p, pOp); pOut->u.i = iMeta; break; } @@ -74142,7 +75589,7 @@ case OP_SequenceTest: { pC = p->apCsr[pOp->p1]; assert( pC->pSorter ); if( (pC->seqCount++)==0 ){ - pc = pOp->p2 - 1; + goto jump_to_p2; } break; } @@ -74189,6 +75636,26 @@ case OP_Close: { break; } +#ifdef SQLITE_ENABLE_COLUMN_USED_MASK +/* Opcode: ColumnsUsed P1 * * P4 * +** +** This opcode (which only exists if SQLite was compiled with +** SQLITE_ENABLE_COLUMN_USED_MASK) identifies which columns of the +** table or index for cursor P1 are used. P4 is a 64-bit integer +** (P4_INT64) in which the first 63 bits are one for each of the +** first 63 columns of the table or index that are actually used +** by the cursor. The high-order bit is set if any column after +** the 64th is used. +*/ +case OP_ColumnsUsed: { + VdbeCursor *pC; + pC = p->apCsr[pOp->p1]; + assert( pC->pCursor ); + pC->maskUsed = *(u64*)pOp->p4.pI64; + break; +} +#endif + /* Opcode: SeekGE P1 P2 P3 P4 * ** Synopsis: key=r[P3@P4] ** @@ -74319,7 +75786,7 @@ case OP_SeekGT: { /* jump, in3 */ if( (pIn3->flags & MEM_Real)==0 ){ /* If the P3 value cannot be converted into any kind of a number, ** then the seek is not possible, so jump to P2 */ - pc = pOp->p2 - 1; VdbeBranchTaken(1,2); + VdbeBranchTaken(1,2); goto jump_to_p2; break; } @@ -74410,7 +75877,7 @@ case OP_SeekGT: { /* jump, in3 */ assert( pOp->p2>0 ); VdbeBranchTaken(res!=0,2); if( res ){ - pc = pOp->p2 - 1; + goto jump_to_p2; } break; } @@ -74504,6 +75971,7 @@ case OP_NoConflict: /* jump, in3 */ case OP_NotFound: /* jump, in3 */ case OP_Found: { /* jump, in3 */ int alreadyExists; + int takeJump; int ii; VdbeCursor *pC; int res; @@ -74526,7 +75994,7 @@ case OP_Found: { /* jump, in3 */ pIn3 = &aMem[pOp->p3]; assert( pC->pCursor!=0 ); assert( pC->isTable==0 ); - pFree = 0; /* Not needed. Only used to suppress a compiler warning. */ + pFree = 0; if( pOp->p4.i>0 ){ r.pKeyInfo = pC->pKeyInfo; r.nField = (u16)pOp->p4.i; @@ -74549,21 +76017,20 @@ case OP_Found: { /* jump, in3 */ sqlite3VdbeRecordUnpack(pC->pKeyInfo, pIn3->n, pIn3->z, pIdxKey); } pIdxKey->default_rc = 0; + takeJump = 0; if( pOp->opcode==OP_NoConflict ){ /* For the OP_NoConflict opcode, take the jump if any of the ** input fields are NULL, since any key with a NULL will not ** conflict */ for(ii=0; iinField; ii++){ if( pIdxKey->aMem[ii].flags & MEM_Null ){ - pc = pOp->p2 - 1; VdbeBranchTaken(1,2); + takeJump = 1; break; } } } rc = sqlite3BtreeMovetoUnpacked(pC->pCursor, pIdxKey, 0, 0, &res); - if( pOp->p4.i==0 ){ - sqlite3DbFree(db, pFree); - } + sqlite3DbFree(db, pFree); if( rc!=SQLITE_OK ){ break; } @@ -74574,10 +76041,10 @@ case OP_Found: { /* jump, in3 */ pC->cacheStatus = CACHE_STALE; if( pOp->opcode==OP_Found ){ VdbeBranchTaken(alreadyExists!=0,2); - if( alreadyExists ) pc = pOp->p2 - 1; + if( alreadyExists ) goto jump_to_p2; }else{ - VdbeBranchTaken(alreadyExists==0,2); - if( !alreadyExists ) pc = pOp->p2 - 1; + VdbeBranchTaken(takeJump||alreadyExists==0,2); + if( takeJump || !alreadyExists ) goto jump_to_p2; } break; } @@ -74626,10 +76093,8 @@ case OP_NotExists: { /* jump, in3 */ pC->cacheStatus = CACHE_STALE; pC->deferredMoveto = 0; VdbeBranchTaken(res!=0,2); - if( res!=0 ){ - pc = pOp->p2 - 1; - } pC->seekResult = res; + if( res!=0 ) goto jump_to_p2; break; } @@ -74641,9 +76106,10 @@ case OP_NotExists: { /* jump, in3 */ ** The sequence number on the cursor is incremented after this ** instruction. */ -case OP_Sequence: { /* out2-prerelease */ +case OP_Sequence: { /* out2 */ assert( pOp->p1>=0 && pOp->p1nCursor ); assert( p->apCsr[pOp->p1]!=0 ); + pOut = out2Prerelease(p, pOp); pOut->u.i = p->apCsr[pOp->p1]->seqCount++; break; } @@ -74664,7 +76130,7 @@ case OP_Sequence: { /* out2-prerelease */ ** generated record number. This P3 mechanism is used to help implement the ** AUTOINCREMENT feature. */ -case OP_NewRowid: { /* out2-prerelease */ +case OP_NewRowid: { /* out2 */ i64 v; /* The new rowid */ VdbeCursor *pC; /* Cursor of table to get the new rowid */ int res; /* Result of an sqlite3BtreeLast() */ @@ -74674,12 +76140,12 @@ case OP_NewRowid: { /* out2-prerelease */ v = 0; res = 0; + pOut = out2Prerelease(p, pOp); assert( pOp->p1>=0 && pOp->p1nCursor ); pC = p->apCsr[pOp->p1]; assert( pC!=0 ); - if( NEVER(pC->pCursor==0) ){ - /* The zero initialization above is all that is needed */ - }else{ + assert( pC->pCursor!=0 ); + { /* The next rowid or record number (different terms for the same ** thing) is obtained in a two-step algorithm. ** @@ -74987,9 +76453,7 @@ case OP_SorterCompare: { res = 0; rc = sqlite3VdbeSorterCompare(pC, pIn3, nKeyCol, &res); VdbeBranchTaken(res!=0,2); - if( res ){ - pc = pOp->p2-1; - } + if( res ) goto jump_to_p2; break; }; @@ -75118,12 +76582,13 @@ case OP_RowData: { ** be a separate OP_VRowid opcode for use with virtual tables, but this ** one opcode now works for both table types. */ -case OP_Rowid: { /* out2-prerelease */ +case OP_Rowid: { /* out2 */ VdbeCursor *pC; i64 v; sqlite3_vtab *pVtab; const sqlite3_module *pModule; + pOut = out2Prerelease(p, pOp); assert( pOp->p1>=0 && pOp->p1nCursor ); pC = p->apCsr[pOp->p1]; assert( pC!=0 ); @@ -75176,7 +76641,7 @@ case OP_NullRow: { break; } -/* Opcode: Last P1 P2 * * * +/* Opcode: Last P1 P2 P3 * * ** ** The next use of the Rowid or Column or Prev instruction for P1 ** will refer to the last entry in the database table or index. @@ -75203,12 +76668,13 @@ case OP_Last: { /* jump */ pC->nullRow = (u8)res; pC->deferredMoveto = 0; pC->cacheStatus = CACHE_STALE; + pC->seekResult = pOp->p3; #ifdef SQLITE_DEBUG pC->seekOp = OP_Last; #endif if( pOp->p2>0 ){ VdbeBranchTaken(res!=0,2); - if( res ) pc = pOp->p2 - 1; + if( res ) goto jump_to_p2; } break; } @@ -75272,9 +76738,7 @@ case OP_Rewind: { /* jump */ pC->nullRow = (u8)res; assert( pOp->p2>0 && pOp->p2nOp ); VdbeBranchTaken(res!=0,2); - if( res ){ - pc = pOp->p2 - 1; - } + if( res ) goto jump_to_p2; break; } @@ -75385,11 +76849,11 @@ next_tail: VdbeBranchTaken(res==0,2); if( res==0 ){ pC->nullRow = 0; - pc = pOp->p2 - 1; p->aCounter[pOp->p5]++; #ifdef SQLITE_TEST sqlite3_search_count++; #endif + goto jump_to_p2_and_check_for_interrupt; }else{ pC->nullRow = 1; } @@ -75420,7 +76884,6 @@ next_tail: case OP_SorterInsert: /* in2 */ case OP_IdxInsert: { /* in2 */ VdbeCursor *pC; - BtCursor *pCrsr; int nKey; const char *zKey; @@ -75430,18 +76893,17 @@ case OP_IdxInsert: { /* in2 */ assert( isSorter(pC)==(pOp->opcode==OP_SorterInsert) ); pIn2 = &aMem[pOp->p2]; assert( pIn2->flags & MEM_Blob ); - pCrsr = pC->pCursor; if( pOp->p5 & OPFLAG_NCHANGE ) p->nChange++; - assert( pCrsr!=0 ); + assert( pC->pCursor!=0 ); assert( pC->isTable==0 ); rc = ExpandBlob(pIn2); if( rc==SQLITE_OK ){ - if( isSorter(pC) ){ + if( pOp->opcode==OP_SorterInsert ){ rc = sqlite3VdbeSorterWrite(pC, pIn2); }else{ nKey = pIn2->n; zKey = pIn2->z; - rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3, + rc = sqlite3BtreeInsert(pC->pCursor, zKey, nKey, "", 0, 0, pOp->p3, ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0) ); assert( pC->deferredMoveto==0 ); @@ -75497,11 +76959,12 @@ case OP_IdxDelete: { ** ** See also: Rowid, MakeRecord. */ -case OP_IdxRowid: { /* out2-prerelease */ +case OP_IdxRowid: { /* out2 */ BtCursor *pCrsr; VdbeCursor *pC; i64 rowid; + pOut = out2Prerelease(p, pOp); assert( pOp->p1>=0 && pOp->p1nCursor ); pC = p->apCsr[pOp->p1]; assert( pC!=0 ); @@ -75614,9 +77077,7 @@ case OP_IdxGE: { /* jump */ res++; } VdbeBranchTaken(res>0,2); - if( res>0 ){ - pc = pOp->p2 - 1 ; - } + if( res>0 ) goto jump_to_p2; break; } @@ -75640,11 +77101,12 @@ case OP_IdxGE: { /* jump */ ** ** See also: Clear */ -case OP_Destroy: { /* out2-prerelease */ +case OP_Destroy: { /* out2 */ int iMoved; int iDb; assert( p->readOnly==0 ); + pOut = out2Prerelease(p, pOp); pOut->flags = MEM_Null; if( db->nVdbeRead > db->nVDestroy+1 ){ rc = SQLITE_LOCKED; @@ -75753,12 +77215,13 @@ case OP_ResetSorter: { ** ** See documentation on OP_CreateTable for additional information. */ -case OP_CreateIndex: /* out2-prerelease */ -case OP_CreateTable: { /* out2-prerelease */ +case OP_CreateIndex: /* out2 */ +case OP_CreateTable: { /* out2 */ int pgno; int flags; Db *pDb; + pOut = out2Prerelease(p, pOp); pgno = 0; assert( pOp->p1>=0 && pOp->p1nDb ); assert( DbMaskTest(p->btreeMask, pOp->p1) ); @@ -75984,12 +77447,12 @@ case OP_RowSetRead: { /* jump, in1, out3 */ ){ /* The boolean index is empty */ sqlite3VdbeMemSetNull(pIn1); - pc = pOp->p2 - 1; VdbeBranchTaken(1,2); + goto jump_to_p2_and_check_for_interrupt; }else{ /* A value was pulled from the index */ - sqlite3VdbeMemSetInt64(&aMem[pOp->p3], val); VdbeBranchTaken(0,2); + sqlite3VdbeMemSetInt64(&aMem[pOp->p3], val); } goto check_for_interrupt; } @@ -76040,10 +77503,7 @@ case OP_RowSetTest: { /* jump, in1, in3 */ if( iSet ){ exists = sqlite3RowSetTest(pIn1->u.pRowSet, iSet, pIn3->u.i); VdbeBranchTaken(exists!=0,2); - if( exists ){ - pc = pOp->p2 - 1; - break; - } + if( exists ) goto jump_to_p2; } if( iSet>=0 ){ sqlite3RowSetInsert(pIn1->u.pRowSet, pIn3->u.i); @@ -76102,7 +77562,7 @@ case OP_Program: { /* jump */ if( p->nFrame>=db->aLimit[SQLITE_LIMIT_TRIGGER_DEPTH] ){ rc = SQLITE_ERROR; - sqlite3SetString(&p->zErrMsg, db, "too many levels of trigger recursion"); + sqlite3VdbeError(p, "too many levels of trigger recursion"); break; } @@ -76132,7 +77592,7 @@ case OP_Program: { /* jump */ pFrame->v = p; pFrame->nChildMem = nMem; pFrame->nChildCsr = pProgram->nCsr; - pFrame->pc = pc; + pFrame->pc = (int)(pOp - aOp); pFrame->aMem = p->aMem; pFrame->nMem = p->nMem; pFrame->apCsr = p->apCsr; @@ -76155,7 +77615,7 @@ case OP_Program: { /* jump */ pFrame = pRt->u.pFrame; assert( pProgram->nMem+pProgram->nCsr==pFrame->nChildMem ); assert( pProgram->nCsr==pFrame->nChildCsr ); - assert( pc==pFrame->pc ); + assert( (int)(pOp - aOp)==pFrame->pc ); } p->nFrame++; @@ -76176,7 +77636,7 @@ case OP_Program: { /* jump */ #ifdef SQLITE_ENABLE_STMT_SCANSTATUS p->anExec = 0; #endif - pc = -1; + pOp = &aOp[-1]; memset(p->aOnceFlag, 0, p->nOnceFlag); break; @@ -76194,9 +77654,10 @@ case OP_Program: { /* jump */ ** the value of the P1 argument to the value of the P1 argument to the ** calling OP_Program instruction. */ -case OP_Param: { /* out2-prerelease */ +case OP_Param: { /* out2 */ VdbeFrame *pFrame; Mem *pIn; + pOut = out2Prerelease(p, pOp); pFrame = p->pFrame; pIn = &pFrame->aMem[pOp->p1 + pFrame->aOp[pFrame->pc].p1]; sqlite3VdbeMemShallowCopy(pOut, pIn, MEM_Ephem); @@ -76240,10 +77701,10 @@ case OP_FkCounter: { case OP_FkIfZero: { /* jump */ if( pOp->p1 ){ VdbeBranchTaken(db->nDeferredCons==0 && db->nDeferredImmCons==0, 2); - if( db->nDeferredCons==0 && db->nDeferredImmCons==0 ) pc = pOp->p2-1; + if( db->nDeferredCons==0 && db->nDeferredImmCons==0 ) goto jump_to_p2; }else{ VdbeBranchTaken(p->nFkConstraint==0 && db->nDeferredImmCons==0, 2); - if( p->nFkConstraint==0 && db->nDeferredImmCons==0 ) pc = pOp->p2-1; + if( p->nFkConstraint==0 && db->nDeferredImmCons==0 ) goto jump_to_p2; } break; } @@ -76294,9 +77755,7 @@ case OP_IfPos: { /* jump, in1 */ pIn1 = &aMem[pOp->p1]; assert( pIn1->flags&MEM_Int ); VdbeBranchTaken( pIn1->u.i>0, 2); - if( pIn1->u.i>0 ){ - pc = pOp->p2 - 1; - } + if( pIn1->u.i>0 ) goto jump_to_p2; break; } @@ -76311,9 +77770,7 @@ case OP_IfNeg: { /* jump, in1 */ assert( pIn1->flags&MEM_Int ); pIn1->u.i += pOp->p3; VdbeBranchTaken(pIn1->u.i<0, 2); - if( pIn1->u.i<0 ){ - pc = pOp->p2 - 1; - } + if( pIn1->u.i<0 ) goto jump_to_p2; break; } @@ -76330,7 +77787,7 @@ case OP_IfNotZero: { /* jump, in1 */ VdbeBranchTaken(pIn1->u.i<0, 2); if( pIn1->u.i ){ pIn1->u.i += pOp->p3; - pc = pOp->p2 - 1; + goto jump_to_p2; } break; } @@ -76346,9 +77803,7 @@ case OP_DecrJumpZero: { /* jump, in1 */ assert( pIn1->flags&MEM_Int ); pIn1->u.i--; VdbeBranchTaken(pIn1->u.i==0, 2); - if( pIn1->u.i==0 ){ - pc = pOp->p2 - 1; - } + if( pIn1->u.i==0 ) goto jump_to_p2; break; } @@ -76364,63 +77819,105 @@ case OP_JumpZeroIncr: { /* jump, in1 */ pIn1 = &aMem[pOp->p1]; assert( pIn1->flags&MEM_Int ); VdbeBranchTaken(pIn1->u.i==0, 2); - if( (pIn1->u.i++)==0 ){ - pc = pOp->p2 - 1; - } + if( (pIn1->u.i++)==0 ) goto jump_to_p2; break; } -/* Opcode: AggStep * P2 P3 P4 P5 +/* Opcode: AggStep0 * P2 P3 P4 P5 ** Synopsis: accum=r[P3] step(r[P2@P5]) ** ** Execute the step function for an aggregate. The ** function has P5 arguments. P4 is a pointer to the FuncDef -** structure that specifies the function. Use register -** P3 as the accumulator. +** structure that specifies the function. Register P3 is the +** accumulator. ** ** The P5 arguments are taken from register P2 and its ** successors. */ -case OP_AggStep: { +/* Opcode: AggStep * P2 P3 P4 P5 +** Synopsis: accum=r[P3] step(r[P2@P5]) +** +** Execute the step function for an aggregate. The +** function has P5 arguments. P4 is a pointer to an sqlite3_context +** object that is used to run the function. Register P3 is +** as the accumulator. +** +** The P5 arguments are taken from register P2 and its +** successors. +** +** This opcode is initially coded as OP_AggStep0. On first evaluation, +** the FuncDef stored in P4 is converted into an sqlite3_context and +** the opcode is changed. In this way, the initialization of the +** sqlite3_context only happens once, instead of on each call to the +** step function. +*/ +case OP_AggStep0: { int n; + sqlite3_context *pCtx; + + assert( pOp->p4type==P4_FUNCDEF ); + n = pOp->p5; + assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) ); + assert( n==0 || (pOp->p2>0 && pOp->p2+n<=(p->nMem-p->nCursor)+1) ); + assert( pOp->p3p2 || pOp->p3>=pOp->p2+n ); + pCtx = sqlite3DbMallocRaw(db, sizeof(*pCtx) + (n-1)*sizeof(sqlite3_value*)); + if( pCtx==0 ) goto no_mem; + pCtx->pMem = 0; + pCtx->pFunc = pOp->p4.pFunc; + pCtx->iOp = (int)(pOp - aOp); + pCtx->pVdbe = p; + pCtx->argc = n; + pOp->p4type = P4_FUNCCTX; + pOp->p4.pCtx = pCtx; + pOp->opcode = OP_AggStep; + /* Fall through into OP_AggStep */ +} +case OP_AggStep: { int i; + sqlite3_context *pCtx; Mem *pMem; - Mem *pRec; Mem t; - sqlite3_context ctx; - sqlite3_value **apVal; - n = pOp->p5; - assert( n>=0 ); - pRec = &aMem[pOp->p2]; - apVal = p->apArg; - assert( apVal || n==0 ); - for(i=0; ip4type==P4_FUNCCTX ); + pCtx = pOp->p4.pCtx; + pMem = &aMem[pOp->p3]; + + /* If this function is inside of a trigger, the register array in aMem[] + ** might change from one evaluation to the next. The next block of code + ** checks to see if the register array has changed, and if so it + ** reinitializes the relavant parts of the sqlite3_context object */ + if( pCtx->pMem != pMem ){ + pCtx->pMem = pMem; + for(i=pCtx->argc-1; i>=0; i--) pCtx->argv[i] = &aMem[pOp->p2+i]; } - ctx.pFunc = pOp->p4.pFunc; - assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) ); - ctx.pMem = pMem = &aMem[pOp->p3]; + +#ifdef SQLITE_DEBUG + for(i=0; iargc; i++){ + assert( memIsValid(pCtx->argv[i]) ); + REGISTER_TRACE(pOp->p2+i, pCtx->argv[i]); + } +#endif + pMem->n++; sqlite3VdbeMemInit(&t, db, MEM_Null); - ctx.pOut = &t; - ctx.isError = 0; - ctx.pVdbe = p; - ctx.iOp = pc; - ctx.skipFlag = 0; - (ctx.pFunc->xStep)(&ctx, n, apVal); /* IMP: R-24505-23230 */ - if( ctx.isError ){ - sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(&t)); - rc = ctx.isError; + pCtx->pOut = &t; + pCtx->fErrorOrAux = 0; + pCtx->skipFlag = 0; + (pCtx->pFunc->xStep)(pCtx,pCtx->argc,pCtx->argv); /* IMP: R-24505-23230 */ + if( pCtx->fErrorOrAux ){ + if( pCtx->isError ){ + sqlite3VdbeError(p, "%s", sqlite3_value_text(&t)); + rc = pCtx->isError; + } + sqlite3VdbeMemRelease(&t); + }else{ + assert( t.flags==MEM_Null ); } - if( ctx.skipFlag ){ + if( pCtx->skipFlag ){ assert( pOp[-1].opcode==OP_CollSeq ); i = pOp[-1].p1; if( i ) sqlite3VdbeMemSetInt64(&aMem[i], 1); } - sqlite3VdbeMemRelease(&t); break; } @@ -76444,7 +77941,7 @@ case OP_AggFinal: { assert( (pMem->flags & ~(MEM_Null|MEM_Agg))==0 ); rc = sqlite3VdbeMemFinalize(pMem, pOp->p4.pFunc); if( rc ){ - sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(pMem)); + sqlite3VdbeError(p, "%s", sqlite3_value_text(pMem)); } sqlite3VdbeChangeEncoding(pMem, encoding); UPDATE_MAX_BLOBSIZE(pMem); @@ -76503,7 +78000,7 @@ case OP_Checkpoint: { ** ** Write a string containing the final journal-mode to register P2. */ -case OP_JournalMode: { /* out2-prerelease */ +case OP_JournalMode: { /* out2 */ Btree *pBt; /* Btree to change journal mode of */ Pager *pPager; /* Pager associated with pBt */ int eNew; /* New journal mode */ @@ -76512,6 +78009,7 @@ case OP_JournalMode: { /* out2-prerelease */ const char *zFilename; /* Name of database file for pPager */ #endif + pOut = out2Prerelease(p, pOp); eNew = pOp->p3; assert( eNew==PAGER_JOURNALMODE_DELETE || eNew==PAGER_JOURNALMODE_TRUNCATE @@ -76548,7 +78046,7 @@ case OP_JournalMode: { /* out2-prerelease */ ){ if( !db->autoCommit || db->nVdbeRead>1 ){ rc = SQLITE_ERROR; - sqlite3SetString(&p->zErrMsg, db, + sqlite3VdbeError(p, "cannot change %s wal mode from within a transaction", (eNew==PAGER_JOURNALMODE_WAL ? "into" : "out of") ); @@ -76587,7 +78085,6 @@ case OP_JournalMode: { /* out2-prerelease */ } eNew = sqlite3PagerSetJournalMode(pPager, eNew); - pOut = &aMem[pOp->p2]; pOut->flags = MEM_Str|MEM_Static|MEM_Term; pOut->z = (char *)sqlite3JournalModename(eNew); pOut->n = sqlite3Strlen30(pOut->z); @@ -76628,8 +78125,8 @@ case OP_IncrVacuum: { /* jump */ rc = sqlite3BtreeIncrVacuum(pBt); VdbeBranchTaken(rc==SQLITE_DONE,2); if( rc==SQLITE_DONE ){ - pc = pOp->p2 - 1; rc = SQLITE_OK; + goto jump_to_p2; } break; } @@ -76680,7 +78177,7 @@ case OP_TableLock: { rc = sqlite3BtreeLockTable(db->aDb[p1].pBt, pOp->p2, isWriteLock); if( (rc&0xFF)==SQLITE_LOCKED ){ const char *z = pOp->p4.z; - sqlite3SetString(&p->zErrMsg, db, "database table is locked: %s", z); + sqlite3VdbeError(p, "database table is locked: %s", z); } } break; @@ -76782,8 +78279,9 @@ case OP_VOpen: { pCur->pVtabCursor = pVtabCursor; pVtab->nRef++; }else{ - db->mallocFailed = 1; + assert( db->mallocFailed ); pModule->xClose(pVtabCursor); + goto no_mem; } } break; @@ -76839,25 +78337,19 @@ case OP_VFilter: { /* jump */ iQuery = (int)pQuery->u.i; /* Invoke the xFilter method */ - { - res = 0; - apArg = p->apArg; - for(i = 0; ixFilter(pVtabCursor, iQuery, pOp->p4.z, nArg, apArg); - sqlite3VtabImportErrmsg(p, pVtab); - if( rc==SQLITE_OK ){ - res = pModule->xEof(pVtabCursor); - } - VdbeBranchTaken(res!=0,2); - if( res ){ - pc = pOp->p2 - 1; - } + res = 0; + apArg = p->apArg; + for(i = 0; ixFilter(pVtabCursor, iQuery, pOp->p4.z, nArg, apArg); + sqlite3VtabImportErrmsg(p, pVtab); + if( rc==SQLITE_OK ){ + res = pModule->xEof(pVtabCursor); } pCur->nullRow = 0; - + VdbeBranchTaken(res!=0,2); + if( res ) goto jump_to_p2; break; } #endif /* SQLITE_OMIT_VIRTUALTABLE */ @@ -76944,7 +78436,7 @@ case OP_VNext: { /* jump */ VdbeBranchTaken(!res,2); if( !res ){ /* If there is data, jump to P2 */ - pc = pOp->p2 - 1; + goto jump_to_p2_and_check_for_interrupt; } goto check_for_interrupt; } @@ -77067,7 +78559,8 @@ case OP_VUpdate: { ** ** Write the current number of pages in database P1 to memory cell P2. */ -case OP_Pagecount: { /* out2-prerelease */ +case OP_Pagecount: { /* out2 */ + pOut = out2Prerelease(p, pOp); pOut->u.i = sqlite3BtreeLastPage(db->aDb[pOp->p1].pBt); break; } @@ -77083,10 +78576,11 @@ case OP_Pagecount: { /* out2-prerelease */ ** ** Store the maximum page count after the change in register P2. */ -case OP_MaxPgcnt: { /* out2-prerelease */ +case OP_MaxPgcnt: { /* out2 */ unsigned int newMax; Btree *pBt; + pOut = out2Prerelease(p, pOp); pBt = db->aDb[pOp->p1].pBt; newMax = 0; if( pOp->p3 ){ @@ -77115,9 +78609,6 @@ case OP_Init: { /* jump */ char *zTrace; char *z; - if( pOp->p2 ){ - pc = pOp->p2 - 1; - } #ifndef SQLITE_OMIT_TRACE if( db->xTrace && !p->doingRerun @@ -77145,6 +78636,7 @@ case OP_Init: { /* jump */ } #endif /* SQLITE_DEBUG */ #endif /* SQLITE_OMIT_TRACE */ + if( pOp->p2 ) goto jump_to_p2; break; } @@ -77176,8 +78668,8 @@ default: { /* This is really OP_Noop and OP_Explain */ #ifdef VDBE_PROFILE { u64 endTime = sqlite3Hwtime(); - if( endTime>start ) pOp->cycles += endTime - start; - pOp->cnt++; + if( endTime>start ) pOrigOp->cycles += endTime - start; + pOrigOp->cnt++; } #endif @@ -77187,16 +78679,16 @@ default: { /* This is really OP_Noop and OP_Explain */ ** the evaluator loop. So we can leave it out when NDEBUG is defined. */ #ifndef NDEBUG - assert( pc>=-1 && pcnOp ); + assert( pOp>=&aOp[-1] && pOp<&aOp[p->nOp-1] ); #ifdef SQLITE_DEBUG if( db->flags & SQLITE_VdbeTrace ){ if( rc!=0 ) printf("rc=%d\n",rc); - if( pOp->opflags & (OPFLG_OUT2_PRERELEASE|OPFLG_OUT2) ){ - registerTrace(pOp->p2, &aMem[pOp->p2]); + if( pOrigOp->opflags & (OPFLG_OUT2) ){ + registerTrace(pOrigOp->p2, &aMem[pOrigOp->p2]); } - if( pOp->opflags & OPFLG_OUT3 ){ - registerTrace(pOp->p3, &aMem[pOp->p3]); + if( pOrigOp->opflags & OPFLG_OUT3 ){ + registerTrace(pOrigOp->p3, &aMem[pOrigOp->p3]); } } #endif /* SQLITE_DEBUG */ @@ -77211,7 +78703,7 @@ vdbe_error_halt: p->rc = rc; testcase( sqlite3GlobalConfig.xLog!=0 ); sqlite3_log(rc, "statement aborts at %d: [%s] %s", - pc, p->zSql, p->zErrMsg); + (int)(pOp - aOp), p->zSql, p->zErrMsg); sqlite3VdbeHalt(p); if( rc==SQLITE_IOERR_NOMEM ) db->mallocFailed = 1; rc = SQLITE_ERROR; @@ -77233,7 +78725,7 @@ vdbe_return: ** is encountered. */ too_big: - sqlite3SetString(&p->zErrMsg, db, "string or blob too big"); + sqlite3VdbeError(p, "string or blob too big"); rc = SQLITE_TOOBIG; goto vdbe_error_halt; @@ -77241,7 +78733,7 @@ too_big: */ no_mem: db->mallocFailed = 1; - sqlite3SetString(&p->zErrMsg, db, "out of memory"); + sqlite3VdbeError(p, "out of memory"); rc = SQLITE_NOMEM; goto vdbe_error_halt; @@ -77252,7 +78744,7 @@ abort_due_to_error: assert( p->zErrMsg==0 ); if( db->mallocFailed ) rc = SQLITE_NOMEM; if( rc!=SQLITE_IOERR_NOMEM ){ - sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3ErrStr(rc)); + sqlite3VdbeError(p, "%s", sqlite3ErrStr(rc)); } goto vdbe_error_halt; @@ -77263,7 +78755,7 @@ abort_due_to_interrupt: assert( db->u1.isInterrupted ); rc = SQLITE_INTERRUPT; p->rc = rc; - sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3ErrStr(rc)); + sqlite3VdbeError(p, "%s", sqlite3ErrStr(rc)); goto vdbe_error_halt; } @@ -77285,6 +78777,8 @@ abort_due_to_interrupt: ** This file contains code used to implement incremental BLOB I/O. */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ #ifndef SQLITE_OMIT_INCRBLOB @@ -77883,6 +79377,8 @@ SQLITE_API int SQLITE_STDCALL sqlite3_blob_reopen(sqlite3_blob *pBlob, sqlite3_i ** thread to merge the output of each of the others to a single PMA for ** the main thread to read from. */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ /* ** If SQLITE_DEBUG_SORTER_THREADS is defined, this module outputs various @@ -78037,6 +79533,7 @@ struct MergeEngine { ** after the thread has finished are not dire. So we don't worry about ** memory barriers and such here. */ +typedef int (*SorterCompare)(SortSubtask*,int*,const void*,int,const void*,int); struct SortSubtask { SQLiteThread *pThread; /* Background thread, if any */ int bDone; /* Set if thread is finished but not joined */ @@ -78044,10 +79541,12 @@ struct SortSubtask { UnpackedRecord *pUnpacked; /* Space to unpack a record */ SorterList list; /* List for thread to write to a PMA */ int nPMA; /* Number of PMAs currently in file */ + SorterCompare xCompare; /* Compare function to use */ SorterFile file; /* Temp file for level-0 PMAs */ SorterFile file2; /* Space for other PMAs */ }; + /* ** Main sorter structure. A single instance of this is allocated for each ** sorter cursor created by the VDBE. @@ -78074,9 +79573,13 @@ struct VdbeSorter { u8 bUseThreads; /* True to use background threads */ u8 iPrev; /* Previous thread used to flush PMA */ u8 nTask; /* Size of aTask[] array */ + u8 typeMask; SortSubtask aTask[1]; /* One or more subtasks */ }; +#define SORTER_TYPE_INTEGER 0x01 +#define SORTER_TYPE_TEXT 0x02 + /* ** An instance of the following object is used to read records out of a ** PMA, in sorted order. The next key to be read is cached in nKey/aKey. @@ -78488,32 +79991,162 @@ static int vdbePmaReaderInit( return rc; } +/* +** A version of vdbeSorterCompare() that assumes that it has already been +** determined that the first field of key1 is equal to the first field of +** key2. +*/ +static int vdbeSorterCompareTail( + SortSubtask *pTask, /* Subtask context (for pKeyInfo) */ + int *pbKey2Cached, /* True if pTask->pUnpacked is pKey2 */ + const void *pKey1, int nKey1, /* Left side of comparison */ + const void *pKey2, int nKey2 /* Right side of comparison */ +){ + UnpackedRecord *r2 = pTask->pUnpacked; + if( *pbKey2Cached==0 ){ + sqlite3VdbeRecordUnpack(pTask->pSorter->pKeyInfo, nKey2, pKey2, r2); + *pbKey2Cached = 1; + } + return sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, r2, 1); +} /* ** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2, ** size nKey2 bytes). Use (pTask->pKeyInfo) for the collation sequences ** used by the comparison. Return the result of the comparison. ** -** Before returning, object (pTask->pUnpacked) is populated with the -** unpacked version of key2. Or, if pKey2 is passed a NULL pointer, then it -** is assumed that the (pTask->pUnpacked) structure already contains the -** unpacked key to use as key2. +** If IN/OUT parameter *pbKey2Cached is true when this function is called, +** it is assumed that (pTask->pUnpacked) contains the unpacked version +** of key2. If it is false, (pTask->pUnpacked) is populated with the unpacked +** version of key2 and *pbKey2Cached set to true before returning. ** ** If an OOM error is encountered, (pTask->pUnpacked->error_rc) is set ** to SQLITE_NOMEM. */ static int vdbeSorterCompare( SortSubtask *pTask, /* Subtask context (for pKeyInfo) */ + int *pbKey2Cached, /* True if pTask->pUnpacked is pKey2 */ const void *pKey1, int nKey1, /* Left side of comparison */ const void *pKey2, int nKey2 /* Right side of comparison */ ){ UnpackedRecord *r2 = pTask->pUnpacked; - if( pKey2 ){ + if( !*pbKey2Cached ){ sqlite3VdbeRecordUnpack(pTask->pSorter->pKeyInfo, nKey2, pKey2, r2); + *pbKey2Cached = 1; } return sqlite3VdbeRecordCompare(nKey1, pKey1, r2); } +/* +** A specially optimized version of vdbeSorterCompare() that assumes that +** the first field of each key is a TEXT value and that the collation +** sequence to compare them with is BINARY. +*/ +static int vdbeSorterCompareText( + SortSubtask *pTask, /* Subtask context (for pKeyInfo) */ + int *pbKey2Cached, /* True if pTask->pUnpacked is pKey2 */ + const void *pKey1, int nKey1, /* Left side of comparison */ + const void *pKey2, int nKey2 /* Right side of comparison */ +){ + const u8 * const p1 = (const u8 * const)pKey1; + const u8 * const p2 = (const u8 * const)pKey2; + const u8 * const v1 = &p1[ p1[0] ]; /* Pointer to value 1 */ + const u8 * const v2 = &p2[ p2[0] ]; /* Pointer to value 2 */ + + int n1; + int n2; + int res; + + getVarint32(&p1[1], n1); n1 = (n1 - 13) / 2; + getVarint32(&p2[1], n2); n2 = (n2 - 13) / 2; + res = memcmp(v1, v2, MIN(n1, n2)); + if( res==0 ){ + res = n1 - n2; + } + + if( res==0 ){ + if( pTask->pSorter->pKeyInfo->nField>1 ){ + res = vdbeSorterCompareTail( + pTask, pbKey2Cached, pKey1, nKey1, pKey2, nKey2 + ); + } + }else{ + if( pTask->pSorter->pKeyInfo->aSortOrder[0] ){ + res = res * -1; + } + } + + return res; +} + +/* +** A specially optimized version of vdbeSorterCompare() that assumes that +** the first field of each key is an INTEGER value. +*/ +static int vdbeSorterCompareInt( + SortSubtask *pTask, /* Subtask context (for pKeyInfo) */ + int *pbKey2Cached, /* True if pTask->pUnpacked is pKey2 */ + const void *pKey1, int nKey1, /* Left side of comparison */ + const void *pKey2, int nKey2 /* Right side of comparison */ +){ + const u8 * const p1 = (const u8 * const)pKey1; + const u8 * const p2 = (const u8 * const)pKey2; + const int s1 = p1[1]; /* Left hand serial type */ + const int s2 = p2[1]; /* Right hand serial type */ + const u8 * const v1 = &p1[ p1[0] ]; /* Pointer to value 1 */ + const u8 * const v2 = &p2[ p2[0] ]; /* Pointer to value 2 */ + int res; /* Return value */ + + assert( (s1>0 && s1<7) || s1==8 || s1==9 ); + assert( (s2>0 && s2<7) || s2==8 || s2==9 ); + + if( s1>7 && s2>7 ){ + res = s1 - s2; + }else{ + if( s1==s2 ){ + if( (*v1 ^ *v2) & 0x80 ){ + /* The two values have different signs */ + res = (*v1 & 0x80) ? -1 : +1; + }else{ + /* The two values have the same sign. Compare using memcmp(). */ + static const u8 aLen[] = {0, 1, 2, 3, 4, 6, 8 }; + int i; + res = 0; + for(i=0; i7 ){ + res = +1; + }else if( s1>7 ){ + res = -1; + }else{ + res = s1 - s2; + } + assert( res!=0 ); + + if( res>0 ){ + if( *v1 & 0x80 ) res = -1; + }else{ + if( *v2 & 0x80 ) res = +1; + } + } + } + + if( res==0 ){ + if( pTask->pSorter->pKeyInfo->nField>1 ){ + res = vdbeSorterCompareTail( + pTask, pbKey2Cached, pKey1, nKey1, pKey2, nKey2 + ); + } + }else if( pTask->pSorter->pKeyInfo->aSortOrder[0] ){ + res = res * -1; + } + + return res; +} + /* ** Initialize the temporary index cursor just opened as a sorter cursor. ** @@ -78581,9 +80214,13 @@ SQLITE_PRIVATE int sqlite3VdbeSorterInit( pSorter->pKeyInfo = pKeyInfo = (KeyInfo*)((u8*)pSorter + sz); memcpy(pKeyInfo, pCsr->pKeyInfo, szKeyInfo); pKeyInfo->db = 0; - if( nField && nWorker==0 ) pKeyInfo->nField = nField; + if( nField && nWorker==0 ){ + pKeyInfo->nXField += (pKeyInfo->nField - nField); + pKeyInfo->nField = nField; + } pSorter->pgsz = pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt); pSorter->nTask = nWorker + 1; + pSorter->iPrev = nWorker-1; pSorter->bUseThreads = (pSorter->nTask>1); pSorter->db = db; for(i=0; inTask; i++){ @@ -78609,6 +80246,12 @@ SQLITE_PRIVATE int sqlite3VdbeSorterInit( if( !pSorter->list.aMemory ) rc = SQLITE_NOMEM; } } + + if( (pKeyInfo->nField+pKeyInfo->nXField)<13 + && (pKeyInfo->aColl[0]==0 || pKeyInfo->aColl[0]==db->pDfltColl) + ){ + pSorter->typeMask = SORTER_TYPE_INTEGER | SORTER_TYPE_TEXT; + } } return rc; @@ -78633,30 +80276,24 @@ static void vdbeSorterRecordFree(sqlite3 *db, SorterRecord *pRecord){ */ static void vdbeSortSubtaskCleanup(sqlite3 *db, SortSubtask *pTask){ sqlite3DbFree(db, pTask->pUnpacked); - pTask->pUnpacked = 0; #if SQLITE_MAX_WORKER_THREADS>0 /* pTask->list.aMemory can only be non-zero if it was handed memory ** from the main thread. That only occurs SQLITE_MAX_WORKER_THREADS>0 */ if( pTask->list.aMemory ){ sqlite3_free(pTask->list.aMemory); - pTask->list.aMemory = 0; }else #endif { assert( pTask->list.aMemory==0 ); vdbeSorterRecordFree(0, pTask->list.pList); } - pTask->list.pList = 0; if( pTask->file.pFd ){ sqlite3OsCloseFree(pTask->file.pFd); - pTask->file.pFd = 0; - pTask->file.iEof = 0; } if( pTask->file2.pFd ){ sqlite3OsCloseFree(pTask->file2.pFd); - pTask->file2.pFd = 0; - pTask->file2.iEof = 0; } + memset(pTask, 0, sizeof(SortSubtask)); } #ifdef SQLITE_DEBUG_SORTER_THREADS @@ -78836,6 +80473,7 @@ SQLITE_PRIVATE void sqlite3VdbeSorterReset(sqlite3 *db, VdbeSorter *pSorter){ for(i=0; inTask; i++){ SortSubtask *pTask = &pSorter->aTask[i]; vdbeSortSubtaskCleanup(db, pTask); + pTask->pSorter = pSorter; } if( pSorter->list.aMemory==0 ){ vdbeSorterRecordFree(0, pSorter->list.pList); @@ -78945,28 +80583,42 @@ static void vdbeSorterMerge( ){ SorterRecord *pFinal = 0; SorterRecord **pp = &pFinal; - void *pVal2 = p2 ? SRVAL(p2) : 0; + int bCached = 0; while( p1 && p2 ){ int res; - res = vdbeSorterCompare(pTask, SRVAL(p1), p1->nVal, pVal2, p2->nVal); + res = pTask->xCompare( + pTask, &bCached, SRVAL(p1), p1->nVal, SRVAL(p2), p2->nVal + ); + if( res<=0 ){ *pp = p1; pp = &p1->u.pNext; p1 = p1->u.pNext; - pVal2 = 0; }else{ *pp = p2; - pp = &p2->u.pNext; + pp = &p2->u.pNext; p2 = p2->u.pNext; - if( p2==0 ) break; - pVal2 = SRVAL(p2); + bCached = 0; } } *pp = p1 ? p1 : p2; *ppOut = pFinal; } +/* +** Return the SorterCompare function to compare values collected by the +** sorter object passed as the only argument. +*/ +static SorterCompare vdbeSorterGetCompare(VdbeSorter *p){ + if( p->typeMask==SORTER_TYPE_INTEGER ){ + return vdbeSorterCompareInt; + }else if( p->typeMask==SORTER_TYPE_TEXT ){ + return vdbeSorterCompareText; + } + return vdbeSorterCompare; +} + /* ** Sort the linked list of records headed at pTask->pList. Return ** SQLITE_OK if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if @@ -78981,12 +80633,14 @@ static int vdbeSorterSort(SortSubtask *pTask, SorterList *pList){ rc = vdbeSortAllocUnpacked(pTask); if( rc!=SQLITE_OK ) return rc; + p = pList->pList; + pTask->xCompare = vdbeSorterGetCompare(pTask->pSorter); + aSlot = (SorterRecord **)sqlite3MallocZero(64 * sizeof(SorterRecord *)); if( !aSlot ){ return SQLITE_NOMEM; } - p = pList->pList; while( p ){ SorterRecord *pNext; if( pList->aMemory ){ @@ -79200,13 +80854,12 @@ static int vdbeMergeEngineStep( int i; /* Index of aTree[] to recalculate */ PmaReader *pReadr1; /* First PmaReader to compare */ PmaReader *pReadr2; /* Second PmaReader to compare */ - u8 *pKey2; /* To pReadr2->aKey, or 0 if record cached */ + int bCached = 0; /* Find the first two PmaReaders to compare. The one that was just ** advanced (iPrev) and the one next to it in the array. */ pReadr1 = &pMerger->aReadr[(iPrev & 0xFFFE)]; pReadr2 = &pMerger->aReadr[(iPrev | 0x0001)]; - pKey2 = pReadr2->aKey; for(i=(pMerger->nTree+iPrev)/2; i>0; i=i/2){ /* Compare pReadr1 and pReadr2. Store the result in variable iRes. */ @@ -79216,8 +80869,8 @@ static int vdbeMergeEngineStep( }else if( pReadr2->pFd==0 ){ iRes = -1; }else{ - iRes = vdbeSorterCompare(pTask, - pReadr1->aKey, pReadr1->nKey, pKey2, pReadr2->nKey + iRes = pTask->xCompare(pTask, &bCached, + pReadr1->aKey, pReadr1->nKey, pReadr2->aKey, pReadr2->nKey ); } @@ -79239,9 +80892,9 @@ static int vdbeMergeEngineStep( if( iRes<0 || (iRes==0 && pReadr1aTree[i] = (int)(pReadr1 - pMerger->aReadr); pReadr2 = &pMerger->aReadr[ pMerger->aTree[i ^ 0x0001] ]; - pKey2 = pReadr2->aKey; + bCached = 0; }else{ - if( pReadr1->pFd ) pKey2 = 0; + if( pReadr1->pFd ) bCached = 0; pMerger->aTree[i] = (int)(pReadr2 - pMerger->aReadr); pReadr1 = &pMerger->aReadr[ pMerger->aTree[i ^ 0x0001] ]; } @@ -79348,6 +81001,16 @@ SQLITE_PRIVATE int sqlite3VdbeSorterWrite( int bFlush; /* True to flush contents of memory to PMA */ int nReq; /* Bytes of memory required */ int nPMA; /* Bytes of PMA space required */ + int t; /* serial type of first record field */ + + getVarint32((const u8*)&pVal->z[1], t); + if( t>0 && t<10 && t!=7 ){ + pSorter->typeMask &= SORTER_TYPE_INTEGER; + }else if( t>10 && (t & 0x01) ){ + pSorter->typeMask &= SORTER_TYPE_TEXT; + }else{ + pSorter->typeMask = 0; + } assert( pSorter ); @@ -79613,10 +81276,12 @@ static void vdbeMergeEngineCompare( }else if( p2->pFd==0 ){ iRes = i1; }else{ + SortSubtask *pTask = pMerger->pTask; + int bCached = 0; int res; - assert( pMerger->pTask->pUnpacked!=0 ); /* from vdbeSortSubtaskMain() */ - res = vdbeSorterCompare( - pMerger->pTask, p1->aKey, p1->nKey, p2->aKey, p2->nKey + assert( pTask->pUnpacked!=0 ); /* from vdbeSortSubtaskMain() */ + res = pTask->xCompare( + pTask, &bCached, p1->aKey, p1->nKey, p2->aKey, p2->nKey ); if( res<=0 ){ iRes = i1; @@ -79640,11 +81305,12 @@ static void vdbeMergeEngineCompare( #define INCRINIT_TASK 1 #define INCRINIT_ROOT 2 -/* Forward reference. -** The vdbeIncrMergeInit() and vdbePmaReaderIncrMergeInit() routines call each -** other (when building a merge tree). +/* +** Forward reference required as the vdbeIncrMergeInit() and +** vdbePmaReaderIncrInit() routines are called mutually recursively when +** building a merge tree. */ -static int vdbePmaReaderIncrMergeInit(PmaReader *pReadr, int eMode); +static int vdbePmaReaderIncrInit(PmaReader *pReadr, int eMode); /* ** Initialize the MergeEngine object passed as the second argument. Once this @@ -79691,7 +81357,7 @@ static int vdbeMergeEngineInit( ** better advantage of multi-processor hardware. */ rc = vdbePmaReaderNext(&pMerger->aReadr[nTree-i-1]); }else{ - rc = vdbePmaReaderIncrMergeInit(&pMerger->aReadr[i], INCRINIT_NORMAL); + rc = vdbePmaReaderIncrInit(&pMerger->aReadr[i], INCRINIT_NORMAL); } if( rc!=SQLITE_OK ) return rc; } @@ -79703,17 +81369,15 @@ static int vdbeMergeEngineInit( } /* -** Initialize the IncrMerge field of a PmaReader. -** -** If the PmaReader passed as the first argument is not an incremental-reader -** (if pReadr->pIncr==0), then this function is a no-op. Otherwise, it serves -** to open and/or initialize the temp file related fields of the IncrMerge +** The PmaReader passed as the first argument is guaranteed to be an +** incremental-reader (pReadr->pIncr!=0). This function serves to open +** and/or initialize the temp file related fields of the IncrMerge ** object at (pReadr->pIncr). ** ** If argument eMode is set to INCRINIT_NORMAL, then all PmaReaders -** in the sub-tree headed by pReadr are also initialized. Data is then loaded -** into the buffers belonging to pReadr and it is set to -** point to the first key in its range. +** in the sub-tree headed by pReadr are also initialized. Data is then +** loaded into the buffers belonging to pReadr and it is set to point to +** the first key in its range. ** ** If argument eMode is set to INCRINIT_TASK, then pReadr is guaranteed ** to be a multi-threaded PmaReader and this function is being called in a @@ -79740,59 +81404,62 @@ static int vdbeMergeEngineInit( static int vdbePmaReaderIncrMergeInit(PmaReader *pReadr, int eMode){ int rc = SQLITE_OK; IncrMerger *pIncr = pReadr->pIncr; + SortSubtask *pTask = pIncr->pTask; + sqlite3 *db = pTask->pSorter->db; /* eMode is always INCRINIT_NORMAL in single-threaded mode */ assert( SQLITE_MAX_WORKER_THREADS>0 || eMode==INCRINIT_NORMAL ); - if( pIncr ){ - SortSubtask *pTask = pIncr->pTask; - sqlite3 *db = pTask->pSorter->db; - - rc = vdbeMergeEngineInit(pTask, pIncr->pMerger, eMode); + rc = vdbeMergeEngineInit(pTask, pIncr->pMerger, eMode); - /* Set up the required files for pIncr. A multi-theaded IncrMerge object - ** requires two temp files to itself, whereas a single-threaded object - ** only requires a region of pTask->file2. */ - if( rc==SQLITE_OK ){ - int mxSz = pIncr->mxSz; + /* Set up the required files for pIncr. A multi-theaded IncrMerge object + ** requires two temp files to itself, whereas a single-threaded object + ** only requires a region of pTask->file2. */ + if( rc==SQLITE_OK ){ + int mxSz = pIncr->mxSz; #if SQLITE_MAX_WORKER_THREADS>0 - if( pIncr->bUseThread ){ - rc = vdbeSorterOpenTempFile(db, mxSz, &pIncr->aFile[0].pFd); - if( rc==SQLITE_OK ){ - rc = vdbeSorterOpenTempFile(db, mxSz, &pIncr->aFile[1].pFd); - } - }else + if( pIncr->bUseThread ){ + rc = vdbeSorterOpenTempFile(db, mxSz, &pIncr->aFile[0].pFd); + if( rc==SQLITE_OK ){ + rc = vdbeSorterOpenTempFile(db, mxSz, &pIncr->aFile[1].pFd); + } + }else #endif - /*if( !pIncr->bUseThread )*/{ - if( pTask->file2.pFd==0 ){ - assert( pTask->file2.iEof>0 ); - rc = vdbeSorterOpenTempFile(db, pTask->file2.iEof, &pTask->file2.pFd); - pTask->file2.iEof = 0; - } - if( rc==SQLITE_OK ){ - pIncr->aFile[1].pFd = pTask->file2.pFd; - pIncr->iStartOff = pTask->file2.iEof; - pTask->file2.iEof += mxSz; - } + /*if( !pIncr->bUseThread )*/{ + if( pTask->file2.pFd==0 ){ + assert( pTask->file2.iEof>0 ); + rc = vdbeSorterOpenTempFile(db, pTask->file2.iEof, &pTask->file2.pFd); + pTask->file2.iEof = 0; + } + if( rc==SQLITE_OK ){ + pIncr->aFile[1].pFd = pTask->file2.pFd; + pIncr->iStartOff = pTask->file2.iEof; + pTask->file2.iEof += mxSz; } } + } #if SQLITE_MAX_WORKER_THREADS>0 - if( rc==SQLITE_OK && pIncr->bUseThread ){ - /* Use the current thread to populate aFile[1], even though this - ** PmaReader is multi-threaded. The reason being that this function - ** is already running in background thread pIncr->pTask->thread. */ - assert( eMode==INCRINIT_ROOT || eMode==INCRINIT_TASK ); - rc = vdbeIncrPopulate(pIncr); - } + if( rc==SQLITE_OK && pIncr->bUseThread ){ + /* Use the current thread to populate aFile[1], even though this + ** PmaReader is multi-threaded. If this is an INCRINIT_TASK object, + ** then this function is already running in background thread + ** pIncr->pTask->thread. + ** + ** If this is the INCRINIT_ROOT object, then it is running in the + ** main VDBE thread. But that is Ok, as that thread cannot return + ** control to the VDBE or proceed with anything useful until the + ** first results are ready from this merger object anyway. + */ + assert( eMode==INCRINIT_ROOT || eMode==INCRINIT_TASK ); + rc = vdbeIncrPopulate(pIncr); + } #endif - if( rc==SQLITE_OK - && (SQLITE_MAX_WORKER_THREADS==0 || eMode!=INCRINIT_TASK) - ){ - rc = vdbePmaReaderNext(pReadr); - } + if( rc==SQLITE_OK && (SQLITE_MAX_WORKER_THREADS==0 || eMode!=INCRINIT_TASK) ){ + rc = vdbePmaReaderNext(pReadr); } + return rc; } @@ -79801,7 +81468,7 @@ static int vdbePmaReaderIncrMergeInit(PmaReader *pReadr, int eMode){ ** The main routine for vdbePmaReaderIncrMergeInit() operations run in ** background threads. */ -static void *vdbePmaReaderBgInit(void *pCtx){ +static void *vdbePmaReaderBgIncrInit(void *pCtx){ PmaReader *pReader = (PmaReader*)pCtx; void *pRet = SQLITE_INT_TO_PTR( vdbePmaReaderIncrMergeInit(pReader,INCRINIT_TASK) @@ -79809,20 +81476,36 @@ static void *vdbePmaReaderBgInit(void *pCtx){ pReader->pIncr->pTask->bDone = 1; return pRet; } +#endif /* -** Use a background thread to invoke vdbePmaReaderIncrMergeInit(INCRINIT_TASK) -** on the PmaReader object passed as the first argument. -** -** This call will initialize the various fields of the pReadr->pIncr -** structure and, if it is a multi-threaded IncrMerger, launch a -** background thread to populate aFile[1]. +** If the PmaReader passed as the first argument is not an incremental-reader +** (if pReadr->pIncr==0), then this function is a no-op. Otherwise, it invokes +** the vdbePmaReaderIncrMergeInit() function with the parameters passed to +** this routine to initialize the incremental merge. +** +** If the IncrMerger object is multi-threaded (IncrMerger.bUseThread==1), +** then a background thread is launched to call vdbePmaReaderIncrMergeInit(). +** Or, if the IncrMerger is single threaded, the same function is called +** using the current thread. */ -static int vdbePmaReaderBgIncrInit(PmaReader *pReadr){ - void *pCtx = (void*)pReadr; - return vdbeSorterCreateThread(pReadr->pIncr->pTask, vdbePmaReaderBgInit, pCtx); -} +static int vdbePmaReaderIncrInit(PmaReader *pReadr, int eMode){ + IncrMerger *pIncr = pReadr->pIncr; /* Incremental merger */ + int rc = SQLITE_OK; /* Return code */ + if( pIncr ){ +#if SQLITE_MAX_WORKER_THREADS>0 + assert( pIncr->bUseThread==0 || eMode==INCRINIT_TASK ); + if( pIncr->bUseThread ){ + void *pCtx = (void*)pReadr; + rc = vdbeSorterCreateThread(pIncr->pTask, vdbePmaReaderBgIncrInit, pCtx); + }else #endif + { + rc = vdbePmaReaderIncrMergeInit(pReadr, eMode); + } + } + return rc; +} /* ** Allocate a new MergeEngine object to merge the contents of nPMA level-0 @@ -80034,6 +81717,11 @@ static int vdbeSorterSetupMerge(VdbeSorter *pSorter){ MergeEngine *pMain = 0; #if SQLITE_MAX_WORKER_THREADS sqlite3 *db = pTask0->pSorter->db; + int i; + SorterCompare xCompare = vdbeSorterGetCompare(pSorter); + for(i=0; inTask; i++){ + pSorter->aTask[i].xCompare = xCompare; + } #endif rc = vdbeSorterMergeTreeBuild(pSorter, &pMain); @@ -80062,15 +81750,21 @@ static int vdbeSorterSetupMerge(VdbeSorter *pSorter){ } } for(iTask=0; rc==SQLITE_OK && iTasknTask; iTask++){ + /* Check that: + ** + ** a) The incremental merge object is configured to use the + ** right task, and + ** b) If it is using task (nTask-1), it is configured to run + ** in single-threaded mode. This is important, as the + ** root merge (INCRINIT_ROOT) will be using the same task + ** object. + */ PmaReader *p = &pMain->aReadr[iTask]; - assert( p->pIncr==0 || p->pIncr->pTask==&pSorter->aTask[iTask] ); - if( p->pIncr ){ - if( iTask==pSorter->nTask-1 ){ - rc = vdbePmaReaderIncrMergeInit(p, INCRINIT_TASK); - }else{ - rc = vdbePmaReaderBgIncrInit(p); - } - } + assert( p->pIncr==0 || ( + (p->pIncr->pTask==&pSorter->aTask[iTask]) /* a */ + && (iTask!=pSorter->nTask-1 || p->pIncr->bUseThread==0) /* b */ + )); + rc = vdbePmaReaderIncrInit(p, INCRINIT_TASK); } } pMain = 0; @@ -80301,6 +81995,7 @@ SQLITE_PRIVATE int sqlite3VdbeSorterCompare( ** 2) The sqlite3JournalCreate() function is called. */ #ifdef SQLITE_ENABLE_ATOMIC_WRITE +/* #include "sqliteInt.h" */ /* @@ -80548,6 +82243,7 @@ SQLITE_PRIVATE int sqlite3JournalSize(sqlite3_vfs *pVfs){ ** The in-memory rollback journal is used to journal transactions for ** ":memory:" databases and when the journal_mode=MEMORY pragma is used. */ +/* #include "sqliteInt.h" */ /* Forward references to internal structures */ typedef struct MemJournal MemJournal; @@ -80803,6 +82499,7 @@ SQLITE_PRIVATE int sqlite3MemJournalSize(void){ ** This file contains routines used for walking the parser tree for ** an SQL statement. */ +/* #include "sqliteInt.h" */ /* #include */ /* #include */ @@ -80961,6 +82658,7 @@ SQLITE_PRIVATE int sqlite3WalkSelect(Walker *pWalker, Select *p){ ** resolve all identifiers by associating them with a particular ** table and column. */ +/* #include "sqliteInt.h" */ /* #include */ /* #include */ @@ -81025,7 +82723,7 @@ static void incrAggFunctionDepth(Expr *pExpr, int N){ ** SELECT a+b, c+d FROM t1 ORDER BY (a+b) COLLATE nocase; ** ** The nSubquery parameter specifies how many levels of subquery the -** alias is removed from the original expression. The usually value is +** alias is removed from the original expression. The usual value is ** zero but it might be more if the alias is contained within a subquery ** of the original expression. The Expr.op2 field of TK_AGG_FUNCTION ** structures must be increased by the nSubquery amount. @@ -81045,7 +82743,6 @@ static void resolveAlias( assert( iCol>=0 && iColnExpr ); pOrig = pEList->a[iCol].pExpr; assert( pOrig!=0 ); - assert( pOrig->flags & EP_Resolved ); db = pParse->db; pDup = sqlite3ExprDup(db, pOrig, 0); if( pDup==0 ) return; @@ -81305,7 +83002,7 @@ static int lookupName( break; } } - if( iCol>=pTab->nCol && sqlite3IsRowid(zCol) && HasRowid(pTab) ){ + if( iCol>=pTab->nCol && sqlite3IsRowid(zCol) && VisibleRowid(pTab) ){ /* IMP: R-51414-32910 */ /* IMP: R-44911-55124 */ iCol = -1; @@ -81335,7 +83032,7 @@ static int lookupName( ** Perhaps the name is a reference to the ROWID */ if( cnt==0 && cntTab==1 && pMatch && sqlite3IsRowid(zCol) - && HasRowid(pMatch->pTab) ){ + && VisibleRowid(pMatch->pTab) ){ cnt = 1; pExpr->iColumn = -1; /* IMP: R-44911-55124 */ pExpr->affinity = SQLITE_AFF_INTEGER; @@ -81939,9 +83636,11 @@ static int resolveCompoundOrderBy( if( pItem->pExpr==pE ){ pItem->pExpr = pNew; }else{ - assert( pItem->pExpr->op==TK_COLLATE ); - assert( pItem->pExpr->pLeft==pE ); - pItem->pExpr->pLeft = pNew; + Expr *pParent = pItem->pExpr; + assert( pParent->op==TK_COLLATE ); + while( pParent->pLeft->op==TK_COLLATE ) pParent = pParent->pLeft; + assert( pParent->pLeft==pE ); + pParent->pLeft = pNew; } sqlite3ExprDelete(db, pE); pItem->u.x.iOrderByCol = (u16)iCol; @@ -82141,7 +83840,7 @@ static int resolveSelectStep(Walker *pWalker, Select *p){ ** after the names have been resolved. */ if( p->selFlags & SF_Converted ){ Select *pSub = p->pSrc->a[0].pSelect; - assert( p->pSrc->nSrc==1 && isCompound==0 && p->pOrderBy ); + assert( p->pSrc->nSrc==1 && p->pOrderBy ); assert( pSub->pPrior && pSub->pOrderBy==0 ); pSub->pOrderBy = p->pOrderBy; p->pOrderBy = 0; @@ -82243,8 +83942,15 @@ static int resolveSelectStep(Walker *pWalker, Select *p){ ** The ORDER BY clause for compounds SELECT statements is handled ** below, after all of the result-sets for all of the elements of ** the compound have been resolved. + ** + ** If there is an ORDER BY clause on a term of a compound-select other + ** than the right-most term, then that is a syntax error. But the error + ** is not detected until much later, and so we need to go ahead and + ** resolve those symbols on the incorrect ORDER BY for consistency. */ - if( !isCompound && resolveOrderGroupBy(&sNC, p, p->pOrderBy, "ORDER") ){ + if( isCompound<=nCompound /* Defer right-most ORDER BY of a compound */ + && resolveOrderGroupBy(&sNC, p, p->pOrderBy, "ORDER") + ){ return WRC_Abort; } if( db->mallocFailed ){ @@ -82269,6 +83975,13 @@ static int resolveSelectStep(Walker *pWalker, Select *p){ } } + /* If this is part of a compound SELECT, check that it has the right + ** number of expressions in the select list. */ + if( p->pNext && p->pEList->nExpr!=p->pNext->pEList->nExpr ){ + sqlite3SelectWrongNumTermsError(pParse, p->pNext); + return WRC_Abort; + } + /* Advance to the next term of the compound */ p = p->pPrior; @@ -82458,6 +84171,7 @@ SQLITE_PRIVATE void sqlite3ResolveSelfReference( ** This file contains routines used for analyzing expressions and ** for generating VDBE code that evaluates expressions in SQLite. */ +/* #include "sqliteInt.h" */ /* ** Return the 'affinity' of the expression pExpr if any. @@ -82636,13 +84350,13 @@ SQLITE_PRIVATE char sqlite3CompareAffinity(Expr *pExpr, char aff2){ if( sqlite3IsNumericAffinity(aff1) || sqlite3IsNumericAffinity(aff2) ){ return SQLITE_AFF_NUMERIC; }else{ - return SQLITE_AFF_NONE; + return SQLITE_AFF_BLOB; } }else if( !aff1 && !aff2 ){ /* Neither side of the comparison is a column. Compare the ** results directly. */ - return SQLITE_AFF_NONE; + return SQLITE_AFF_BLOB; }else{ /* One side is a column, the other is not. Use the columns affinity. */ assert( aff1==0 || aff2==0 ); @@ -82666,7 +84380,7 @@ static char comparisonAffinity(Expr *pExpr){ }else if( ExprHasProperty(pExpr, EP_xIsSelect) ){ aff = sqlite3CompareAffinity(pExpr->x.pSelect->pEList->a[0].pExpr, aff); }else if( !aff ){ - aff = SQLITE_AFF_NONE; + aff = SQLITE_AFF_BLOB; } return aff; } @@ -82680,7 +84394,7 @@ static char comparisonAffinity(Expr *pExpr){ SQLITE_PRIVATE int sqlite3IndexAffinityOk(Expr *pExpr, char idx_affinity){ char aff = comparisonAffinity(pExpr); switch( aff ){ - case SQLITE_AFF_NONE: + case SQLITE_AFF_BLOB: return 1; case SQLITE_AFF_TEXT: return idx_affinity==SQLITE_AFF_TEXT; @@ -83486,7 +85200,7 @@ SQLITE_PRIVATE SrcList *sqlite3SrcListDup(sqlite3 *db, SrcList *p, int flags){ pNewItem->isCorrelated = pOldItem->isCorrelated; pNewItem->viaCoroutine = pOldItem->viaCoroutine; pNewItem->isRecursive = pOldItem->isRecursive; - pNewItem->zIndex = sqlite3DbStrDup(db, pOldItem->zIndex); + pNewItem->zIndexedBy = sqlite3DbStrDup(db, pOldItem->zIndexedBy); pNewItem->notIndexed = pOldItem->notIndexed; pNewItem->pIndex = pOldItem->pIndex; pTab = pNewItem->pTab = pOldItem->pTab; @@ -83696,7 +85410,8 @@ SQLITE_PRIVATE u32 sqlite3ExprListFlags(const ExprList *pList){ u32 m = 0; if( pList ){ for(i=0; inExpr; i++){ - m |= pList->a[i].pExpr->flags; + Expr *pExpr = pList->a[i].pExpr; + if( ALWAYS(pExpr) ) m |= pExpr->flags; } } return m; @@ -83712,7 +85427,7 @@ SQLITE_PRIVATE u32 sqlite3ExprListFlags(const ExprList *pList){ ** ** sqlite3ExprIsConstant() pWalker->eCode==1 ** sqlite3ExprIsConstantNotJoin() pWalker->eCode==2 -** sqlite3ExprRefOneTableOnly() pWalker->eCode==3 +** sqlite3ExprIsTableConstant() pWalker->eCode==3 ** sqlite3ExprIsConstantOrFunction() pWalker->eCode==4 or 5 ** ** In all cases, the callbacks set Walker.eCode=0 and abort if the expression @@ -83820,7 +85535,7 @@ SQLITE_PRIVATE int sqlite3ExprIsConstantNotJoin(Expr *p){ } /* -** Walk an expression tree. Return non-zero if the expression constant +** Walk an expression tree. Return non-zero if the expression is constant ** for any single row of the table with cursor iCur. In other words, the ** expression must not refer to any non-deterministic function nor any ** table other than iCur. @@ -83926,7 +85641,7 @@ SQLITE_PRIVATE int sqlite3ExprCanBeNull(const Expr *p){ */ SQLITE_PRIVATE int sqlite3ExprNeedsNoAffinityChange(const Expr *p, char aff){ u8 op; - if( aff==SQLITE_AFF_NONE ) return 1; + if( aff==SQLITE_AFF_BLOB ) return 1; while( p->op==TK_UPLUS || p->op==TK_UMINUS ){ p = p->pLeft; } op = p->op; if( op==TK_REGISTER ) op = p->op2; @@ -84136,7 +85851,7 @@ SQLITE_PRIVATE int sqlite3FindInIndex(Parse *pParse, Expr *pX, u32 inFlags, int ** ephemeral table. */ p = (ExprHasProperty(pX, EP_xIsSelect) ? pX->x.pSelect : 0); - if( ALWAYS(pParse->nErr==0) && isCandidateForInOpt(p) ){ + if( pParse->nErr==0 && isCandidateForInOpt(p) ){ sqlite3 *db = pParse->db; /* Database connection */ Table *pTab; /* Table . */ Expr *pExpr; /* Expression */ @@ -84377,7 +86092,7 @@ SQLITE_PRIVATE int sqlite3CodeSubselect( int r1, r2, r3; if( !affinity ){ - affinity = SQLITE_AFF_NONE; + affinity = SQLITE_AFF_BLOB; } if( pKeyInfo ){ assert( sqlite3KeyInfoIsWriteable(pKeyInfo) ); @@ -84461,6 +86176,7 @@ SQLITE_PRIVATE int sqlite3CodeSubselect( pSel->pLimit = sqlite3PExpr(pParse, TK_INTEGER, 0, 0, &sqlite3IntTokens[1]); pSel->iLimit = 0; + pSel->selFlags &= ~SF_MultiValue; if( sqlite3Select(pParse, pSel, &dest) ){ return 0; } @@ -84651,17 +86367,6 @@ static void sqlite3ExprCodeIN( } #endif /* SQLITE_OMIT_SUBQUERY */ -/* -** Duplicate an 8-byte value -*/ -static char *dup8bytes(Vdbe *v, const char *in){ - char *out = sqlite3DbMallocRaw(sqlite3VdbeDb(v), 8); - if( out ){ - memcpy(out, in, 8); - } - return out; -} - #ifndef SQLITE_OMIT_FLOATING_POINT /* ** Generate an instruction that will put the floating point @@ -84674,12 +86379,10 @@ static char *dup8bytes(Vdbe *v, const char *in){ static void codeReal(Vdbe *v, const char *z, int negateFlag, int iMem){ if( ALWAYS(z!=0) ){ double value; - char *zV; sqlite3AtoF(z, &value, sqlite3Strlen30(z), SQLITE_UTF8); assert( !sqlite3IsNaN(value) ); /* The new AtoF never returns NaN */ if( negateFlag ) value = -value; - zV = dup8bytes(v, (char*)&value); - sqlite3VdbeAddOp4(v, OP_Real, 0, iMem, 0, zV, P4_REAL); + sqlite3VdbeAddOp4Dup8(v, OP_Real, 0, iMem, 0, (u8*)&value, P4_REAL); } } #endif @@ -84705,10 +86408,8 @@ static void codeInteger(Parse *pParse, Expr *pExpr, int negFlag, int iMem){ assert( z!=0 ); c = sqlite3DecOrHexToI64(z, &value); if( c==0 || (c==2 && negFlag) ){ - char *zV; if( negFlag ){ value = c==2 ? SMALLEST_INT64 : -value; } - zV = dup8bytes(v, (char*)&value); - sqlite3VdbeAddOp4(v, OP_Int64, 0, iMem, 0, zV, P4_INT64); + sqlite3VdbeAddOp4Dup8(v, OP_Int64, 0, iMem, 0, (u8*)&value, P4_INT64); }else{ #ifdef SQLITE_OMIT_FLOATING_POINT sqlite3ErrorMsg(pParse, "oversized integer: %s%s", negFlag ? "-" : "", z); @@ -85313,7 +87014,7 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target) */ if( pDef->funcFlags & SQLITE_FUNC_UNLIKELY ){ assert( nFarg>=1 ); - sqlite3ExprCode(pParse, pFarg->a[0].pExpr, target); + inReg = sqlite3ExprCodeTarget(pParse, pFarg->a[0].pExpr, target); break; } @@ -85383,7 +87084,7 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target) if( !pColl ) pColl = db->pDfltColl; sqlite3VdbeAddOp4(v, OP_CollSeq, 0, 0, 0, (char *)pColl, P4_COLLSEQ); } - sqlite3VdbeAddOp4(v, OP_Function, constMask, r1, target, + sqlite3VdbeAddOp4(v, OP_Function0, constMask, r1, target, (char*)pDef, P4_FUNCDEF); sqlite3VdbeChangeP5(v, (u8)nFarg); if( nFarg && constMask==0 ){ @@ -85754,268 +87455,6 @@ SQLITE_PRIVATE void sqlite3ExprCodeAndCache(Parse *pParse, Expr *pExpr, int targ exprToRegister(pExpr, iMem); } -#ifdef SQLITE_DEBUG -/* -** Generate a human-readable explanation of an expression tree. -*/ -SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView *pView, const Expr *pExpr, u8 moreToFollow){ - const char *zBinOp = 0; /* Binary operator */ - const char *zUniOp = 0; /* Unary operator */ - pView = sqlite3TreeViewPush(pView, moreToFollow); - if( pExpr==0 ){ - sqlite3TreeViewLine(pView, "nil"); - sqlite3TreeViewPop(pView); - return; - } - switch( pExpr->op ){ - case TK_AGG_COLUMN: { - sqlite3TreeViewLine(pView, "AGG{%d:%d}", - pExpr->iTable, pExpr->iColumn); - break; - } - case TK_COLUMN: { - if( pExpr->iTable<0 ){ - /* This only happens when coding check constraints */ - sqlite3TreeViewLine(pView, "COLUMN(%d)", pExpr->iColumn); - }else{ - sqlite3TreeViewLine(pView, "{%d:%d}", - pExpr->iTable, pExpr->iColumn); - } - break; - } - case TK_INTEGER: { - if( pExpr->flags & EP_IntValue ){ - sqlite3TreeViewLine(pView, "%d", pExpr->u.iValue); - }else{ - sqlite3TreeViewLine(pView, "%s", pExpr->u.zToken); - } - break; - } -#ifndef SQLITE_OMIT_FLOATING_POINT - case TK_FLOAT: { - sqlite3TreeViewLine(pView,"%s", pExpr->u.zToken); - break; - } -#endif - case TK_STRING: { - sqlite3TreeViewLine(pView,"%Q", pExpr->u.zToken); - break; - } - case TK_NULL: { - sqlite3TreeViewLine(pView,"NULL"); - break; - } -#ifndef SQLITE_OMIT_BLOB_LITERAL - case TK_BLOB: { - sqlite3TreeViewLine(pView,"%s", pExpr->u.zToken); - break; - } -#endif - case TK_VARIABLE: { - sqlite3TreeViewLine(pView,"VARIABLE(%s,%d)", - pExpr->u.zToken, pExpr->iColumn); - break; - } - case TK_REGISTER: { - sqlite3TreeViewLine(pView,"REGISTER(%d)", pExpr->iTable); - break; - } - case TK_AS: { - sqlite3TreeViewLine(pView,"AS %Q", pExpr->u.zToken); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); - break; - } - case TK_ID: { - sqlite3TreeViewLine(pView,"ID %Q", pExpr->u.zToken); - break; - } -#ifndef SQLITE_OMIT_CAST - case TK_CAST: { - /* Expressions of the form: CAST(pLeft AS token) */ - sqlite3TreeViewLine(pView,"CAST %Q", pExpr->u.zToken); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); - break; - } -#endif /* SQLITE_OMIT_CAST */ - case TK_LT: zBinOp = "LT"; break; - case TK_LE: zBinOp = "LE"; break; - case TK_GT: zBinOp = "GT"; break; - case TK_GE: zBinOp = "GE"; break; - case TK_NE: zBinOp = "NE"; break; - case TK_EQ: zBinOp = "EQ"; break; - case TK_IS: zBinOp = "IS"; break; - case TK_ISNOT: zBinOp = "ISNOT"; break; - case TK_AND: zBinOp = "AND"; break; - case TK_OR: zBinOp = "OR"; break; - case TK_PLUS: zBinOp = "ADD"; break; - case TK_STAR: zBinOp = "MUL"; break; - case TK_MINUS: zBinOp = "SUB"; break; - case TK_REM: zBinOp = "REM"; break; - case TK_BITAND: zBinOp = "BITAND"; break; - case TK_BITOR: zBinOp = "BITOR"; break; - case TK_SLASH: zBinOp = "DIV"; break; - case TK_LSHIFT: zBinOp = "LSHIFT"; break; - case TK_RSHIFT: zBinOp = "RSHIFT"; break; - case TK_CONCAT: zBinOp = "CONCAT"; break; - case TK_DOT: zBinOp = "DOT"; break; - - case TK_UMINUS: zUniOp = "UMINUS"; break; - case TK_UPLUS: zUniOp = "UPLUS"; break; - case TK_BITNOT: zUniOp = "BITNOT"; break; - case TK_NOT: zUniOp = "NOT"; break; - case TK_ISNULL: zUniOp = "ISNULL"; break; - case TK_NOTNULL: zUniOp = "NOTNULL"; break; - - case TK_COLLATE: { - sqlite3TreeViewLine(pView, "COLLATE %Q", pExpr->u.zToken); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); - break; - } - - case TK_AGG_FUNCTION: - case TK_FUNCTION: { - ExprList *pFarg; /* List of function arguments */ - if( ExprHasProperty(pExpr, EP_TokenOnly) ){ - pFarg = 0; - }else{ - pFarg = pExpr->x.pList; - } - if( pExpr->op==TK_AGG_FUNCTION ){ - sqlite3TreeViewLine(pView, "AGG_FUNCTION%d %Q", - pExpr->op2, pExpr->u.zToken); - }else{ - sqlite3TreeViewLine(pView, "FUNCTION %Q", pExpr->u.zToken); - } - if( pFarg ){ - sqlite3TreeViewExprList(pView, pFarg, 0, 0); - } - break; - } -#ifndef SQLITE_OMIT_SUBQUERY - case TK_EXISTS: { - sqlite3TreeViewLine(pView, "EXISTS-expr"); - sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); - break; - } - case TK_SELECT: { - sqlite3TreeViewLine(pView, "SELECT-expr"); - sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); - break; - } - case TK_IN: { - sqlite3TreeViewLine(pView, "IN"); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); - if( ExprHasProperty(pExpr, EP_xIsSelect) ){ - sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); - }else{ - sqlite3TreeViewExprList(pView, pExpr->x.pList, 0, 0); - } - break; - } -#endif /* SQLITE_OMIT_SUBQUERY */ - - /* - ** x BETWEEN y AND z - ** - ** This is equivalent to - ** - ** x>=y AND x<=z - ** - ** X is stored in pExpr->pLeft. - ** Y is stored in pExpr->pList->a[0].pExpr. - ** Z is stored in pExpr->pList->a[1].pExpr. - */ - case TK_BETWEEN: { - Expr *pX = pExpr->pLeft; - Expr *pY = pExpr->x.pList->a[0].pExpr; - Expr *pZ = pExpr->x.pList->a[1].pExpr; - sqlite3TreeViewLine(pView, "BETWEEN"); - sqlite3TreeViewExpr(pView, pX, 1); - sqlite3TreeViewExpr(pView, pY, 1); - sqlite3TreeViewExpr(pView, pZ, 0); - break; - } - case TK_TRIGGER: { - /* If the opcode is TK_TRIGGER, then the expression is a reference - ** to a column in the new.* or old.* pseudo-tables available to - ** trigger programs. In this case Expr.iTable is set to 1 for the - ** new.* pseudo-table, or 0 for the old.* pseudo-table. Expr.iColumn - ** is set to the column of the pseudo-table to read, or to -1 to - ** read the rowid field. - */ - sqlite3TreeViewLine(pView, "%s(%d)", - pExpr->iTable ? "NEW" : "OLD", pExpr->iColumn); - break; - } - case TK_CASE: { - sqlite3TreeViewLine(pView, "CASE"); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); - sqlite3TreeViewExprList(pView, pExpr->x.pList, 0, 0); - break; - } -#ifndef SQLITE_OMIT_TRIGGER - case TK_RAISE: { - const char *zType = "unk"; - switch( pExpr->affinity ){ - case OE_Rollback: zType = "rollback"; break; - case OE_Abort: zType = "abort"; break; - case OE_Fail: zType = "fail"; break; - case OE_Ignore: zType = "ignore"; break; - } - sqlite3TreeViewLine(pView, "RAISE %s(%Q)", zType, pExpr->u.zToken); - break; - } -#endif - default: { - sqlite3TreeViewLine(pView, "op=%d", pExpr->op); - break; - } - } - if( zBinOp ){ - sqlite3TreeViewLine(pView, "%s", zBinOp); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); - sqlite3TreeViewExpr(pView, pExpr->pRight, 0); - }else if( zUniOp ){ - sqlite3TreeViewLine(pView, "%s", zUniOp); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); - } - sqlite3TreeViewPop(pView); -} -#endif /* SQLITE_DEBUG */ - -#ifdef SQLITE_DEBUG -/* -** Generate a human-readable explanation of an expression list. -*/ -SQLITE_PRIVATE void sqlite3TreeViewExprList( - TreeView *pView, - const ExprList *pList, - u8 moreToFollow, - const char *zLabel -){ - int i; - pView = sqlite3TreeViewPush(pView, moreToFollow); - if( zLabel==0 || zLabel[0]==0 ) zLabel = "LIST"; - if( pList==0 ){ - sqlite3TreeViewLine(pView, "%s (empty)", zLabel); - }else{ - sqlite3TreeViewLine(pView, "%s", zLabel); - for(i=0; inExpr; i++){ - sqlite3TreeViewExpr(pView, pList->a[i].pExpr, inExpr-1); -#if 0 - if( pList->a[i].zName ){ - sqlite3ExplainPrintf(pOut, " AS %s", pList->a[i].zName); - } - if( pList->a[i].bSpanIsTab ){ - sqlite3ExplainPrintf(pOut, " (%s)", pList->a[i].zSpan); - } -#endif - } - } - sqlite3TreeViewPop(pView); -} -#endif /* SQLITE_DEBUG */ - /* ** Generate code that pushes the value of every element of the given ** expression list into a sequence of registers beginning at target. @@ -86407,6 +87846,21 @@ SQLITE_PRIVATE void sqlite3ExprIfFalse(Parse *pParse, Expr *pExpr, int dest, int sqlite3ReleaseTempReg(pParse, regFree2); } +/* +** Like sqlite3ExprIfFalse() except that a copy is made of pExpr before +** code generation, and that copy is deleted after code generation. This +** ensures that the original pExpr is unchanged. +*/ +SQLITE_PRIVATE void sqlite3ExprIfFalseDup(Parse *pParse, Expr *pExpr, int dest,int jumpIfNull){ + sqlite3 *db = pParse->db; + Expr *pCopy = sqlite3ExprDup(db, pExpr, 0); + if( db->mallocFailed==0 ){ + sqlite3ExprIfFalse(pParse, pCopy, dest, jumpIfNull); + } + sqlite3ExprDelete(db, pCopy); +} + + /* ** Do a deep comparison of two expression trees. Return 0 if the two ** expressions are completely identical. Return 1 if they differ only @@ -86461,7 +87915,7 @@ SQLITE_PRIVATE int sqlite3ExprCompare(Expr *pA, Expr *pB, int iTab){ if( sqlite3ExprCompare(pA->pLeft, pB->pLeft, iTab) ) return 2; if( sqlite3ExprCompare(pA->pRight, pB->pRight, iTab) ) return 2; if( sqlite3ExprListCompare(pA->x.pList, pB->x.pList, iTab) ) return 2; - if( ALWAYS((combinedFlags & EP_Reduced)==0) ){ + if( ALWAYS((combinedFlags & EP_Reduced)==0) && pA->op!=TK_STRING ){ if( pA->iColumn!=pB->iColumn ) return 2; if( pA->iTable!=pB->iTable && (pA->iTable!=iTab || NEVER(pB->iTable>=0)) ) return 2; @@ -86880,6 +88334,7 @@ SQLITE_PRIVATE void sqlite3ClearTempRegCache(Parse *pParse){ ** This file contains C code routines that used to generate VDBE code ** that implements the ALTER TABLE command. */ +/* #include "sqliteInt.h" */ /* ** The code in this file only exists if we are not omitting the @@ -86993,6 +88448,7 @@ static void renameParentFunc( n = sqlite3GetToken(z, &token); }while( token==TK_SPACE ); + if( token==TK_ILLEGAL ) break; zParent = sqlite3DbStrNDup(db, (const char *)z, n); if( zParent==0 ) break; sqlite3Dequote(zParent); @@ -87558,7 +89014,7 @@ SQLITE_PRIVATE void sqlite3AlterFinishAddColumn(Parse *pParse, Token *pColDef){ if( pDflt ){ sqlite3_value *pVal = 0; int rc; - rc = sqlite3ValueFromExpr(db, pDflt, SQLITE_UTF8, SQLITE_AFF_NONE, &pVal); + rc = sqlite3ValueFromExpr(db, pDflt, SQLITE_UTF8, SQLITE_AFF_BLOB, &pVal); assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); if( rc!=SQLITE_OK ){ db->mallocFailed = 1; @@ -87841,6 +89297,7 @@ exit_begin_add_column: ** integer in the equivalent columns in sqlite_stat4. */ #ifndef SQLITE_OMIT_ANALYZE +/* #include "sqliteInt.h" */ #if defined(SQLITE_ENABLE_STAT4) # define IsStat4 1 @@ -88643,7 +90100,7 @@ static void callStatGet(Vdbe *v, int regStat4, int iParam, int regOut){ #else UNUSED_PARAMETER( iParam ); #endif - sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4, regOut); + sqlite3VdbeAddOp3(v, OP_Function0, 0, regStat4, regOut); sqlite3VdbeChangeP4(v, -1, (char*)&statGetFuncdef, P4_FUNCDEF); sqlite3VdbeChangeP5(v, 1 + IsStat34); } @@ -88798,7 +90255,7 @@ static void analyzeOneTable( #endif sqlite3VdbeAddOp2(v, OP_Integer, nCol, regStat4+1); sqlite3VdbeAddOp2(v, OP_Integer, pIdx->nKeyCol, regStat4+2); - sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4+1, regStat4); + sqlite3VdbeAddOp3(v, OP_Function0, 0, regStat4+1, regStat4); sqlite3VdbeChangeP4(v, -1, (char*)&statInitFuncdef, P4_FUNCDEF); sqlite3VdbeChangeP5(v, 2+IsStat34); @@ -88894,7 +90351,7 @@ static void analyzeOneTable( } #endif assert( regChng==(regStat4+1) ); - sqlite3VdbeAddOp3(v, OP_Function, 1, regStat4, regTemp); + sqlite3VdbeAddOp3(v, OP_Function0, 1, regStat4, regTemp); sqlite3VdbeChangeP4(v, -1, (char*)&statPushFuncdef, P4_FUNCDEF); sqlite3VdbeChangeP5(v, 2+IsStat34); sqlite3VdbeAddOp2(v, OP_Next, iIdxCur, addrNextRow); VdbeCoverage(v); @@ -89219,14 +90676,17 @@ static int analysisLoader(void *pData, int argc, char **argv, char **NotUsed){ z = argv[2]; if( pIndex ){ + tRowcnt *aiRowEst = 0; int nCol = pIndex->nKeyCol+1; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 - tRowcnt * const aiRowEst = pIndex->aiRowEst = (tRowcnt*)sqlite3MallocZero( - sizeof(tRowcnt) * nCol - ); - if( aiRowEst==0 ) pInfo->db->mallocFailed = 1; -#else - tRowcnt * const aiRowEst = 0; + /* Index.aiRowEst may already be set here if there are duplicate + ** sqlite_stat1 entries for this index. In that case just clobber + ** the old data with the new instead of allocating a new array. */ + if( pIndex->aiRowEst==0 ){ + pIndex->aiRowEst = (tRowcnt*)sqlite3MallocZero(sizeof(tRowcnt) * nCol); + if( pIndex->aiRowEst==0 ) pInfo->db->mallocFailed = 1; + } + aiRowEst = pIndex->aiRowEst; #endif pIndex->bUnordered = 0; decodeIntArray((char*)z, nCol, aiRowEst, pIndex->aiRowLogEst, pIndex); @@ -89603,6 +91063,7 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ ************************************************************************* ** This file contains code used to implement the ATTACH and DETACH commands. */ +/* #include "sqliteInt.h" */ #ifndef SQLITE_OMIT_ATTACH /* @@ -89889,7 +91350,7 @@ static void detachFunc( sqlite3BtreeClose(pDb->pBt); pDb->pBt = 0; pDb->pSchema = 0; - sqlite3ResetAllSchemasOfConnection(db); + sqlite3CollapseDatabaseArray(db); return; detach_error: @@ -89923,7 +91384,6 @@ static void codeAttach( SQLITE_OK!=(rc = resolveAttachExpr(&sName, pDbname)) || SQLITE_OK!=(rc = resolveAttachExpr(&sName, pKey)) ){ - pParse->nErr++; goto attach_end; } @@ -89951,7 +91411,7 @@ static void codeAttach( assert( v || db->mallocFailed ); if( v ){ - sqlite3VdbeAddOp3(v, OP_Function, 0, regArgs+3-pFunc->nArg, regArgs+3); + sqlite3VdbeAddOp3(v, OP_Function0, 0, regArgs+3-pFunc->nArg, regArgs+3); assert( pFunc->nArg==-1 || (pFunc->nArg&0xff)==pFunc->nArg ); sqlite3VdbeChangeP5(v, (u8)(pFunc->nArg)); sqlite3VdbeChangeP4(v, -1, (char *)pFunc, P4_FUNCDEF); @@ -90193,6 +91653,7 @@ SQLITE_PRIVATE int sqlite3FixTriggerStep( ** systems that do not need this facility may omit it by recompiling ** the library with -DSQLITE_OMIT_AUTHORIZATION=1 */ +/* #include "sqliteInt.h" */ /* ** All of the code in this file may be omitted by defining a single @@ -90463,6 +91924,7 @@ SQLITE_PRIVATE void sqlite3AuthContextPop(AuthContext *pContext){ ** COMMIT ** ROLLBACK */ +/* #include "sqliteInt.h" */ /* ** This routine is called when a new SQL statement is beginning to @@ -90582,9 +92044,11 @@ SQLITE_PRIVATE void sqlite3FinishCoding(Parse *pParse){ assert( pParse->pToplevel==0 ); db = pParse->db; - if( db->mallocFailed ) return; if( pParse->nested ) return; - if( pParse->nErr ) return; + if( db->mallocFailed || pParse->nErr ){ + if( pParse->rc==SQLITE_OK ) pParse->rc = SQLITE_ERROR; + return; + } /* Begin by generating some termination code at the end of the ** vdbe program @@ -90666,7 +92130,7 @@ SQLITE_PRIVATE void sqlite3FinishCoding(Parse *pParse){ /* Get the VDBE program ready for execution */ - if( v && ALWAYS(pParse->nErr==0) && !db->mallocFailed ){ + if( v && pParse->nErr==0 && !db->mallocFailed ){ assert( pParse->iCacheLevel==0 ); /* Disables and re-enables match */ /* A minimum of one cursor is required if autoincrement is used * See ticket [a696379c1f08866] */ @@ -91201,14 +92665,12 @@ SQLITE_PRIVATE int sqlite3TwoPartName( if( ALWAYS(pName2!=0) && pName2->n>0 ){ if( db->init.busy ) { sqlite3ErrorMsg(pParse, "corrupt database"); - pParse->nErr++; return -1; } *pUnqual = pName2; iDb = sqlite3FindDb(db, pName1); if( iDb<0 ){ sqlite3ErrorMsg(pParse, "unknown database %T", pName1); - pParse->nErr++; return -1; } }else{ @@ -91367,7 +92829,7 @@ SQLITE_PRIVATE void sqlite3StartTable( if( !noErr ){ sqlite3ErrorMsg(pParse, "table %T already exists", pName); }else{ - assert( !db->init.busy ); + assert( !db->init.busy || CORRUPT_DB ); sqlite3CodeVerifySchema(pParse, iDb); } goto begin_table_error; @@ -91416,7 +92878,7 @@ SQLITE_PRIVATE void sqlite3StartTable( int j1; int fileFormat; int reg1, reg2, reg3; - sqlite3BeginWriteOperation(pParse, 0, iDb); + sqlite3BeginWriteOperation(pParse, 1, iDb); #ifndef SQLITE_OMIT_VIRTUALTABLE if( isVirtual ){ @@ -91532,10 +92994,10 @@ SQLITE_PRIVATE void sqlite3AddColumn(Parse *pParse, Token *pName){ pCol->zName = z; /* If there is no type specified, columns have the default affinity - ** 'NONE'. If there is a type specified, then sqlite3AddColumnType() will + ** 'BLOB'. If there is a type specified, then sqlite3AddColumnType() will ** be called next to set pCol->affinity correctly. */ - pCol->affinity = SQLITE_AFF_NONE; + pCol->affinity = SQLITE_AFF_BLOB; pCol->szEst = 1; p->nCol++; } @@ -91570,7 +93032,7 @@ SQLITE_PRIVATE void sqlite3AddNotNull(Parse *pParse, int onError){ ** 'CHAR' | SQLITE_AFF_TEXT ** 'CLOB' | SQLITE_AFF_TEXT ** 'TEXT' | SQLITE_AFF_TEXT -** 'BLOB' | SQLITE_AFF_NONE +** 'BLOB' | SQLITE_AFF_BLOB ** 'REAL' | SQLITE_AFF_REAL ** 'FLOA' | SQLITE_AFF_REAL ** 'DOUB' | SQLITE_AFF_REAL @@ -91596,7 +93058,7 @@ SQLITE_PRIVATE char sqlite3AffinityType(const char *zIn, u8 *pszEst){ aff = SQLITE_AFF_TEXT; }else if( h==(('b'<<24)+('l'<<16)+('o'<<8)+'b') /* BLOB */ && (aff==SQLITE_AFF_NUMERIC || aff==SQLITE_AFF_REAL) ){ - aff = SQLITE_AFF_NONE; + aff = SQLITE_AFF_BLOB; if( zIn[0]=='(' ) zChar = zIn; #ifndef SQLITE_OMIT_FLOATING_POINT }else if( h==(('r'<<24)+('e'<<16)+('a'<<8)+'l') /* REAL */ @@ -91656,7 +93118,8 @@ SQLITE_PRIVATE void sqlite3AddColumnType(Parse *pParse, Token *pType){ p = pParse->pNewTable; if( p==0 || NEVER(p->nCol<1) ) return; pCol = &p->aCol[p->nCol-1]; - assert( pCol->zType==0 ); + assert( pCol->zType==0 || CORRUPT_DB ); + sqlite3DbFree(pParse->db, pCol->zType); pCol->zType = sqlite3NameFromToken(pParse->db, pType); pCol->affinity = sqlite3AffinityType(pCol->zType, &pCol->szEst); } @@ -91764,14 +93227,11 @@ SQLITE_PRIVATE void sqlite3AddPrimaryKey( "INTEGER PRIMARY KEY"); #endif }else{ - Vdbe *v = pParse->pVdbe; Index *p; - if( v ) pParse->addrSkipPK = sqlite3VdbeAddOp0(v, OP_Noop); p = sqlite3CreateIndex(pParse, 0, 0, 0, pList, onError, 0, 0, sortOrder, 0); if( p ){ p->idxType = SQLITE_IDXTYPE_PRIMARYKEY; - if( v ) sqlite3VdbeJumpHere(v, pParse->addrSkipPK); } pList = 0; } @@ -91990,7 +93450,7 @@ static char *createTableStmt(sqlite3 *db, Table *p){ zStmt[k++] = '('; for(pCol=p->aCol, i=0; inCol; i++, pCol++){ static const char * const azType[] = { - /* SQLITE_AFF_NONE */ "", + /* SQLITE_AFF_BLOB */ "", /* SQLITE_AFF_TEXT */ " TEXT", /* SQLITE_AFF_NUMERIC */ " NUM", /* SQLITE_AFF_INTEGER */ " INT", @@ -92003,17 +93463,17 @@ static char *createTableStmt(sqlite3 *db, Table *p){ k += sqlite3Strlen30(&zStmt[k]); zSep = zSep2; identPut(zStmt, &k, pCol->zName); - assert( pCol->affinity-SQLITE_AFF_NONE >= 0 ); - assert( pCol->affinity-SQLITE_AFF_NONE < ArraySize(azType) ); - testcase( pCol->affinity==SQLITE_AFF_NONE ); + assert( pCol->affinity-SQLITE_AFF_BLOB >= 0 ); + assert( pCol->affinity-SQLITE_AFF_BLOB < ArraySize(azType) ); + testcase( pCol->affinity==SQLITE_AFF_BLOB ); testcase( pCol->affinity==SQLITE_AFF_TEXT ); testcase( pCol->affinity==SQLITE_AFF_NUMERIC ); testcase( pCol->affinity==SQLITE_AFF_INTEGER ); testcase( pCol->affinity==SQLITE_AFF_REAL ); - zType = azType[pCol->affinity - SQLITE_AFF_NONE]; + zType = azType[pCol->affinity - SQLITE_AFF_BLOB]; len = sqlite3Strlen30(zType); - assert( pCol->affinity==SQLITE_AFF_NONE + assert( pCol->affinity==SQLITE_AFF_BLOB || pCol->affinity==sqlite3AffinityType(zType, 0) ); memcpy(&zStmt[k], zType, len); k += len; @@ -92124,14 +93584,6 @@ static void convertToWithoutRowidTable(Parse *pParse, Table *pTab){ sqlite3VdbeGetOp(v, pParse->addrCrTab)->opcode = OP_CreateIndex; } - /* Bypass the creation of the PRIMARY KEY btree and the sqlite_master - ** table entry. - */ - if( pParse->addrSkipPK ){ - assert( v ); - sqlite3VdbeGetOp(v, pParse->addrSkipPK)->opcode = OP_Goto; - } - /* Locate the PRIMARY KEY index. Or, if this table was originally ** an INTEGER PRIMARY KEY table, create a new PRIMARY KEY index. */ @@ -92149,6 +93601,16 @@ static void convertToWithoutRowidTable(Parse *pParse, Table *pTab){ pTab->iPKey = -1; }else{ pPk = sqlite3PrimaryKeyIndex(pTab); + + /* Bypass the creation of the PRIMARY KEY btree and the sqlite_master + ** table entry. This is only required if currently generating VDBE + ** code for a CREATE TABLE (not when parsing one as part of reading + ** a database schema). */ + if( v ){ + assert( db->init.busy==0 ); + sqlite3VdbeGetOp(v, pPk->tnum)->opcode = OP_Goto; + } + /* ** Remove all redundant columns from the PRIMARY KEY. For example, change ** "PRIMARY KEY(a,b,a,b,c,b,c,d)" into just "PRIMARY KEY(a,b,c,d)". Later @@ -92284,7 +93746,7 @@ SQLITE_PRIVATE void sqlite3EndTable( if( (p->tabFlags & TF_HasPrimaryKey)==0 ){ sqlite3ErrorMsg(pParse, "PRIMARY KEY missing on table %s", p->zName); }else{ - p->tabFlags |= TF_WithoutRowid; + p->tabFlags |= TF_WithoutRowid | TF_NoVisibleRowid; convertToWithoutRowidTable(pParse, p); } } @@ -92352,26 +93814,46 @@ SQLITE_PRIVATE void sqlite3EndTable( ** be redundant. */ if( pSelect ){ - SelectDest dest; - Table *pSelTab; - + SelectDest dest; /* Where the SELECT should store results */ + int regYield; /* Register holding co-routine entry-point */ + int addrTop; /* Top of the co-routine */ + int regRec; /* A record to be insert into the new table */ + int regRowid; /* Rowid of the next row to insert */ + int addrInsLoop; /* Top of the loop for inserting rows */ + Table *pSelTab; /* A table that describes the SELECT results */ + + regYield = ++pParse->nMem; + regRec = ++pParse->nMem; + regRowid = ++pParse->nMem; assert(pParse->nTab==1); + sqlite3MayAbort(pParse); sqlite3VdbeAddOp3(v, OP_OpenWrite, 1, pParse->regRoot, iDb); sqlite3VdbeChangeP5(v, OPFLAG_P2ISREG); pParse->nTab = 2; - sqlite3SelectDestInit(&dest, SRT_Table, 1); + addrTop = sqlite3VdbeCurrentAddr(v) + 1; + sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, addrTop); + sqlite3SelectDestInit(&dest, SRT_Coroutine, regYield); sqlite3Select(pParse, pSelect, &dest); + sqlite3VdbeAddOp1(v, OP_EndCoroutine, regYield); + sqlite3VdbeJumpHere(v, addrTop - 1); + if( pParse->nErr ) return; + pSelTab = sqlite3ResultSetOfSelect(pParse, pSelect); + if( pSelTab==0 ) return; + assert( p->aCol==0 ); + p->nCol = pSelTab->nCol; + p->aCol = pSelTab->aCol; + pSelTab->nCol = 0; + pSelTab->aCol = 0; + sqlite3DeleteTable(db, pSelTab); + addrInsLoop = sqlite3VdbeAddOp1(v, OP_Yield, dest.iSDParm); + VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_MakeRecord, dest.iSdst, dest.nSdst, regRec); + sqlite3TableAffinity(v, p, 0); + sqlite3VdbeAddOp2(v, OP_NewRowid, 1, regRowid); + sqlite3VdbeAddOp3(v, OP_Insert, 1, regRec, regRowid); + sqlite3VdbeAddOp2(v, OP_Goto, 0, addrInsLoop); + sqlite3VdbeJumpHere(v, addrInsLoop); sqlite3VdbeAddOp1(v, OP_Close, 1); - if( pParse->nErr==0 ){ - pSelTab = sqlite3ResultSetOfSelect(pParse, pSelect); - if( pSelTab==0 ) return; - assert( p->aCol==0 ); - p->nCol = pSelTab->nCol; - p->aCol = pSelTab->aCol; - pSelTab->nCol = 0; - pSelTab->aCol = 0; - sqlite3DeleteTable(db, pSelTab); - } } /* Compute the complete text of the CREATE statement */ @@ -92890,6 +94372,7 @@ SQLITE_PRIVATE void sqlite3DropTable(Parse *pParse, SrcList *pName, int isView, } assert( pParse->nErr==0 ); assert( pName->nSrc==1 ); + if( sqlite3ReadSchema(pParse) ) goto exit_drop_table; if( noErr ) db->suppressErr++; pTab = sqlite3LocateTableItem(pParse, isView, &pName->a[0]); if( noErr ) db->suppressErr--; @@ -93203,7 +94686,8 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){ addr2 = sqlite3VdbeCurrentAddr(v); } sqlite3VdbeAddOp3(v, OP_SorterData, iSorter, regRecord, iIdx); - sqlite3VdbeAddOp3(v, OP_IdxInsert, iIdx, regRecord, 1); + sqlite3VdbeAddOp3(v, OP_Last, iIdx, 0, -1); + sqlite3VdbeAddOp3(v, OP_IdxInsert, iIdx, regRecord, 0); sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); sqlite3ReleaseTempReg(pParse, regRecord); sqlite3VdbeAddOp2(v, OP_SorterNext, iSorter, addr2); VdbeCoverage(v); @@ -93296,8 +94780,7 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex( char *zExtra = 0; /* Extra space after the Index object */ Index *pPk = 0; /* PRIMARY KEY index for WITHOUT ROWID tables */ - assert( pParse->nErr==0 ); /* Never called with prior errors */ - if( db->mallocFailed || IN_DECLARE_VTAB ){ + if( db->mallocFailed || IN_DECLARE_VTAB || pParse->nErr>0 ){ goto exit_create_index; } if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){ @@ -93669,10 +95152,15 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex( v = sqlite3GetVdbe(pParse); if( v==0 ) goto exit_create_index; - - /* Create the rootpage for the index - */ sqlite3BeginWriteOperation(pParse, 1, iDb); + + /* Create the rootpage for the index using CreateIndex. But before + ** doing so, code a Noop instruction and store its address in + ** Index.tnum. This is required in case this index is actually a + ** PRIMARY KEY and the table is actually a WITHOUT ROWID table. In + ** that case the convertToWithoutRowidTable() routine will replace + ** the Noop with a Goto to jump over the VDBE code generated below. */ + pIndex->tnum = sqlite3VdbeAddOp0(v, OP_Noop); sqlite3VdbeAddOp2(v, OP_CreateIndex, iDb, iMem); /* Gather the complete text of the CREATE INDEX statement into @@ -93712,6 +95200,8 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex( sqlite3MPrintf(db, "name='%q' AND type='index'", pIndex->zName)); sqlite3VdbeAddOp1(v, OP_Expire, 0); } + + sqlite3VdbeJumpHere(v, pIndex->tnum); } /* When adding an index to the list of indices for a table, make @@ -94114,7 +95604,7 @@ SQLITE_PRIVATE void sqlite3SrcListDelete(sqlite3 *db, SrcList *pList){ sqlite3DbFree(db, pItem->zDatabase); sqlite3DbFree(db, pItem->zName); sqlite3DbFree(db, pItem->zAlias); - sqlite3DbFree(db, pItem->zIndex); + sqlite3DbFree(db, pItem->zIndexedBy); sqlite3DeleteTable(db, pItem->pTab); sqlite3SelectDelete(db, pItem->pSelect); sqlite3ExprDelete(db, pItem->pOn); @@ -94187,13 +95677,13 @@ SQLITE_PRIVATE void sqlite3SrcListIndexedBy(Parse *pParse, SrcList *p, Token *pI assert( pIndexedBy!=0 ); if( p && ALWAYS(p->nSrc>0) ){ struct SrcList_item *pItem = &p->a[p->nSrc-1]; - assert( pItem->notIndexed==0 && pItem->zIndex==0 ); + assert( pItem->notIndexed==0 && pItem->zIndexedBy==0 ); if( pIndexedBy->n==1 && !pIndexedBy->z ){ /* A "NOT INDEXED" clause was supplied. See parse.y ** construct "indexed_opt" for details. */ pItem->notIndexed = 1; }else{ - pItem->zIndex = sqlite3NameFromToken(pParse->db, pIndexedBy); + pItem->zIndexedBy = sqlite3NameFromToken(pParse->db, pIndexedBy); } } } @@ -94216,7 +95706,6 @@ SQLITE_PRIVATE void sqlite3SrcListIndexedBy(Parse *pParse, SrcList *p, Token *pI SQLITE_PRIVATE void sqlite3SrcListShiftJoinType(SrcList *p){ if( p ){ int i; - assert( p->a || p->nSrc==0 ); for(i=p->nSrc-1; i>0; i--){ p->a[i].jointype = p->a[i-1].jointype; } @@ -94463,8 +95952,7 @@ SQLITE_PRIVATE void sqlite3UniqueConstraint( StrAccum errMsg; Table *pTab = pIdx->pTable; - sqlite3StrAccumInit(&errMsg, 0, 0, 200); - errMsg.db = pParse->db; + sqlite3StrAccumInit(&errMsg, pParse->db, 0, 0, 200); for(j=0; jnKeyCol; j++){ char *zCol = pTab->aCol[pIdx->aiColumn[j]].zName; if( j ) sqlite3StrAccumAppend(&errMsg, ", ", 2); @@ -94757,6 +96245,7 @@ SQLITE_PRIVATE void sqlite3WithDelete(sqlite3 *db, With *pWith){ ** of user defined functions and collation sequences. */ +/* #include "sqliteInt.h" */ /* ** Invoke the 'collation needed' callback to request a collation sequence @@ -95234,6 +96723,7 @@ SQLITE_PRIVATE Schema *sqlite3SchemaGet(sqlite3 *db, Btree *pBt){ ** This file contains C code routines that are called by the parser ** in order to generate code for DELETE FROM statements. */ +/* #include "sqliteInt.h" */ /* ** While a SrcList can in general represent multiple tables and subqueries @@ -96019,8 +97509,8 @@ SQLITE_PRIVATE int sqlite3GenerateIndexKey( *piPartIdxLabel = sqlite3VdbeMakeLabel(v); pParse->iPartIdxTab = iDataCur; sqlite3ExprCachePush(pParse); - sqlite3ExprIfFalse(pParse, pIdx->pPartIdxWhere, *piPartIdxLabel, - SQLITE_JUMPIFNULL); + sqlite3ExprIfFalseDup(pParse, pIdx->pPartIdxWhere, *piPartIdxLabel, + SQLITE_JUMPIFNULL); }else{ *piPartIdxLabel = 0; } @@ -96076,8 +97566,10 @@ SQLITE_PRIVATE void sqlite3ResolvePartIdxLabel(Parse *pParse, int iLabel){ ** functions of SQLite. (Some function, and in particular the date and ** time functions, are implemented separately.) */ +/* #include "sqliteInt.h" */ /* #include */ /* #include */ +/* #include "vdbeInt.h" */ /* ** Return the collating function associated with a function. @@ -96293,13 +97785,13 @@ static void printfFunc( StrAccum str; const char *zFormat; int n; + sqlite3 *db = sqlite3_context_db_handle(context); if( argc>=1 && (zFormat = (const char*)sqlite3_value_text(argv[0]))!=0 ){ x.nArg = argc-1; x.nUsed = 0; x.apArg = argv+1; - sqlite3StrAccumInit(&str, 0, 0, SQLITE_MAX_LENGTH); - str.db = sqlite3_context_db_handle(context); + sqlite3StrAccumInit(&str, db, 0, 0, db->aLimit[SQLITE_LIMIT_LENGTH]); sqlite3XPrintf(&str, SQLITE_PRINTF_SQLFUNC, zFormat, &x); n = str.nChar; sqlite3_result_text(context, sqlite3StrAccumFinish(&str), n, @@ -96449,7 +97941,7 @@ static void roundFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ #endif /* -** Allocate nByte bytes of space using sqlite3_malloc(). If the +** Allocate nByte bytes of space using sqlite3Malloc(). If the ** allocation fails, call sqlite3_result_error_nomem() to notify ** the database handle that malloc() has failed and return NULL. ** If nByte is larger than the maximum string or blob length, then @@ -96636,17 +98128,15 @@ struct compareInfo { /* ** For LIKE and GLOB matching on EBCDIC machines, assume that every -** character is exactly one byte in size. Also, all characters are -** able to participate in upper-case-to-lower-case mappings in EBCDIC -** whereas only characters less than 0x80 do in ASCII. +** character is exactly one byte in size. Also, provde the Utf8Read() +** macro for fast reading of the next character in the common case where +** the next character is ASCII. */ #if defined(SQLITE_EBCDIC) # define sqlite3Utf8Read(A) (*((*A)++)) -# define GlobUpperToLower(A) A = sqlite3UpperToLower[A] -# define GlobUpperToLowerAscii(A) A = sqlite3UpperToLower[A] +# define Utf8Read(A) (*(A++)) #else -# define GlobUpperToLower(A) if( A<=0x7f ){ A = sqlite3UpperToLower[A]; } -# define GlobUpperToLowerAscii(A) A = sqlite3UpperToLower[A] +# define Utf8Read(A) (A[0]<0x80?*(A++):sqlite3Utf8Read(&A)) #endif static const struct compareInfo globInfo = { '*', '?', '[', 0 }; @@ -96688,7 +98178,7 @@ static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 }; ** Ec Where E is the "esc" character and c is any other ** character, including '%', '_', and esc, match exactly c. ** -** The comments through this routine usually assume glob matching. +** The comments within this routine usually assume glob matching. ** ** This routine is usually quick, but can be N**2 in the worst case. */ @@ -96712,13 +98202,12 @@ static int patternCompare( */ matchOther = esc ? esc : pInfo->matchSet; - while( (c = sqlite3Utf8Read(&zPattern))!=0 ){ + while( (c = Utf8Read(zPattern))!=0 ){ if( c==matchAll ){ /* Match "*" */ /* Skip over multiple "*" characters in the pattern. If there ** are also "?" characters, skip those as well, but consume a ** single character of the input string for each "?" skipped */ - while( (c=sqlite3Utf8Read(&zPattern)) == matchAll - || c == matchOne ){ + while( (c=Utf8Read(zPattern)) == matchAll || c == matchOne ){ if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){ return 0; } @@ -96763,7 +98252,7 @@ static int patternCompare( if( patternCompare(zPattern,zString,pInfo,esc) ) return 1; } }else{ - while( (c2 = sqlite3Utf8Read(&zString))!=0 ){ + while( (c2 = Utf8Read(zString))!=0 ){ if( c2!=c ) continue; if( patternCompare(zPattern,zString,pInfo,esc) ) return 1; } @@ -96809,7 +98298,7 @@ static int patternCompare( continue; } } - c2 = sqlite3Utf8Read(&zString); + c2 = Utf8Read(zString); if( c==c2 ) continue; if( noCase && c<0x80 && c2<0x80 && sqlite3Tolower(c)==sqlite3Tolower(c2) ){ continue; @@ -97118,7 +98607,7 @@ static void charFunc( ){ unsigned char *z, *zOut; int i; - zOut = z = sqlite3_malloc( argc*4+1 ); + zOut = z = sqlite3_malloc64( argc*4+1 ); if( z==0 ){ sqlite3_result_error_nomem(context); return; @@ -97186,16 +98675,14 @@ static void zeroblobFunc( sqlite3_value **argv ){ i64 n; - sqlite3 *db = sqlite3_context_db_handle(context); + int rc; assert( argc==1 ); UNUSED_PARAMETER(argc); n = sqlite3_value_int64(argv[0]); - testcase( n==db->aLimit[SQLITE_LIMIT_LENGTH] ); - testcase( n==db->aLimit[SQLITE_LIMIT_LENGTH]+1 ); - if( n>db->aLimit[SQLITE_LIMIT_LENGTH] ){ - sqlite3_result_error_toobig(context); - }else{ - sqlite3_result_zeroblob(context, (int)n); /* IMP: R-00293-64994 */ + if( n<0 ) n = 0; + rc = sqlite3_result_zeroblob64(context, n); /* IMP: R-00293-64994 */ + if( rc ){ + sqlite3_result_error_code(context, rc); } } @@ -97266,7 +98753,7 @@ static void replaceFunc( return; } zOld = zOut; - zOut = sqlite3_realloc(zOut, (int)nOut); + zOut = sqlite3_realloc64(zOut, (int)nOut); if( zOut==0 ){ sqlite3_result_error_nomem(context); sqlite3_free(zOld); @@ -97628,8 +99115,7 @@ static void groupConcatStep( if( pAccum ){ sqlite3 *db = sqlite3_context_db_handle(context); - int firstTerm = pAccum->useMalloc==0; - pAccum->useMalloc = 2; + int firstTerm = pAccum->mxAlloc==0; pAccum->mxAlloc = db->aLimit[SQLITE_LIMIT_LENGTH]; if( !firstTerm ){ if( argc==2 ){ @@ -97877,6 +99363,7 @@ SQLITE_PRIVATE void sqlite3RegisterGlobalFunctions(void){ ** This file contains code used by the compiler to add foreign key ** support to compiled SQL statements. */ +/* #include "sqliteInt.h" */ #ifndef SQLITE_OMIT_FOREIGN_KEY #ifndef SQLITE_OMIT_TRIGGER @@ -99049,7 +100536,8 @@ static Trigger *fkActionTrigger( iFromCol = aiCol ? aiCol[i] : pFKey->aCol[0].iFrom; assert( iFromCol>=0 ); - tToCol.z = pIdx ? pTab->aCol[pIdx->aiColumn[i]].zName : "oid"; + assert( pIdx!=0 || (pTab->iPKey>=0 && pTab->iPKeynCol) ); + tToCol.z = pTab->aCol[pIdx ? pIdx->aiColumn[i] : pTab->iPKey].zName; tFromCol.z = pFKey->pFrom->aCol[iFromCol].zName; tToCol.n = sqlite3Strlen30(tToCol.z); @@ -99061,10 +100549,10 @@ static Trigger *fkActionTrigger( ** parent table are used for the comparison. */ pEq = sqlite3PExpr(pParse, TK_EQ, sqlite3PExpr(pParse, TK_DOT, - sqlite3PExpr(pParse, TK_ID, 0, 0, &tOld), - sqlite3PExpr(pParse, TK_ID, 0, 0, &tToCol) + sqlite3ExprAlloc(db, TK_ID, &tOld, 0), + sqlite3ExprAlloc(db, TK_ID, &tToCol, 0) , 0), - sqlite3PExpr(pParse, TK_ID, 0, 0, &tFromCol) + sqlite3ExprAlloc(db, TK_ID, &tFromCol, 0) , 0); pWhere = sqlite3ExprAnd(db, pWhere, pEq); @@ -99076,12 +100564,12 @@ static Trigger *fkActionTrigger( if( pChanges ){ pEq = sqlite3PExpr(pParse, TK_IS, sqlite3PExpr(pParse, TK_DOT, - sqlite3PExpr(pParse, TK_ID, 0, 0, &tOld), - sqlite3PExpr(pParse, TK_ID, 0, 0, &tToCol), + sqlite3ExprAlloc(db, TK_ID, &tOld, 0), + sqlite3ExprAlloc(db, TK_ID, &tToCol, 0), 0), sqlite3PExpr(pParse, TK_DOT, - sqlite3PExpr(pParse, TK_ID, 0, 0, &tNew), - sqlite3PExpr(pParse, TK_ID, 0, 0, &tToCol), + sqlite3ExprAlloc(db, TK_ID, &tNew, 0), + sqlite3ExprAlloc(db, TK_ID, &tToCol, 0), 0), 0); pWhen = sqlite3ExprAnd(db, pWhen, pEq); @@ -99091,8 +100579,8 @@ static Trigger *fkActionTrigger( Expr *pNew; if( action==OE_Cascade ){ pNew = sqlite3PExpr(pParse, TK_DOT, - sqlite3PExpr(pParse, TK_ID, 0, 0, &tNew), - sqlite3PExpr(pParse, TK_ID, 0, 0, &tToCol) + sqlite3ExprAlloc(db, TK_ID, &tNew, 0), + sqlite3ExprAlloc(db, TK_ID, &tToCol, 0) , 0); }else if( action==OE_SetDflt ){ Expr *pDflt = pFKey->pFrom->aCol[iFromCol].pDflt; @@ -99139,13 +100627,12 @@ static Trigger *fkActionTrigger( pTrigger = (Trigger *)sqlite3DbMallocZero(db, sizeof(Trigger) + /* struct Trigger */ sizeof(TriggerStep) + /* Single step in trigger program */ - nFrom + 1 /* Space for pStep->target.z */ + nFrom + 1 /* Space for pStep->zTarget */ ); if( pTrigger ){ pStep = pTrigger->step_list = (TriggerStep *)&pTrigger[1]; - pStep->target.z = (char *)&pStep[1]; - pStep->target.n = nFrom; - memcpy((char *)pStep->target.z, zFrom, nFrom); + pStep->zTarget = (char *)&pStep[1]; + memcpy((char *)pStep->zTarget, zFrom, nFrom); pStep->pWhere = sqlite3ExprDup(db, pWhere, EXPRDUP_REDUCE); pStep->pExprList = sqlite3ExprListDup(db, pList, EXPRDUP_REDUCE); @@ -99281,6 +100768,7 @@ SQLITE_PRIVATE void sqlite3FkDelete(sqlite3 *db, Table *pTab){ ** This file contains C code routines that are called by the parser ** to handle INSERT statements in SQLite. */ +/* #include "sqliteInt.h" */ /* ** Generate code that will @@ -99310,7 +100798,7 @@ SQLITE_PRIVATE void sqlite3OpenTable( }else{ Index *pPk = sqlite3PrimaryKeyIndex(pTab); assert( pPk!=0 ); - assert( pPk->tnum=pTab->tnum ); + assert( pPk->tnum==pTab->tnum ); sqlite3VdbeAddOp3(v, opcode, iCur, pPk->tnum, iDb); sqlite3VdbeSetP4KeyInfo(pParse, pPk); VdbeComment((v, "%s", pTab->zName)); @@ -99324,7 +100812,7 @@ SQLITE_PRIVATE void sqlite3OpenTable( ** ** Character Column affinity ** ------------------------------ -** 'A' NONE +** 'A' BLOB ** 'B' TEXT ** 'C' NUMERIC ** 'D' INTEGER @@ -99367,9 +100855,9 @@ SQLITE_PRIVATE const char *sqlite3IndexAffinityStr(Vdbe *v, Index *pIdx){ /* ** Compute the affinity string for table pTab, if it has not already been -** computed. As an optimization, omit trailing SQLITE_AFF_NONE affinities. +** computed. As an optimization, omit trailing SQLITE_AFF_BLOB affinities. ** -** If the affinity exists (if it is no entirely SQLITE_AFF_NONE values) and +** If the affinity exists (if it is no entirely SQLITE_AFF_BLOB values) and ** if iReg>0 then code an OP_Affinity opcode that will set the affinities ** for register iReg and following. Or if affinities exists and iReg==0, ** then just set the P4 operand of the previous opcode (which should be @@ -99379,7 +100867,7 @@ SQLITE_PRIVATE const char *sqlite3IndexAffinityStr(Vdbe *v, Index *pIdx){ ** ** Character Column affinity ** ------------------------------ -** 'A' NONE +** 'A' BLOB ** 'B' TEXT ** 'C' NUMERIC ** 'D' INTEGER @@ -99401,7 +100889,7 @@ SQLITE_PRIVATE void sqlite3TableAffinity(Vdbe *v, Table *pTab, int iReg){ } do{ zColAff[i--] = 0; - }while( i>=0 && zColAff[i]==SQLITE_AFF_NONE ); + }while( i>=0 && zColAff[i]==SQLITE_AFF_BLOB ); pTab->zColAff = zColAff; } i = sqlite3Strlen30(zColAff); @@ -99610,20 +101098,23 @@ static int xferOptimization( /* ** This routine is called to handle SQL of the following forms: ** -** insert into TABLE (IDLIST) values(EXPRLIST) +** insert into TABLE (IDLIST) values(EXPRLIST),(EXPRLIST),... ** insert into TABLE (IDLIST) select +** insert into TABLE (IDLIST) default values ** ** The IDLIST following the table name is always optional. If omitted, -** then a list of all columns for the table is substituted. The IDLIST -** appears in the pColumn parameter. pColumn is NULL if IDLIST is omitted. +** then a list of all (non-hidden) columns for the table is substituted. +** The IDLIST appears in the pColumn parameter. pColumn is NULL if IDLIST +** is omitted. ** -** The pList parameter holds EXPRLIST in the first form of the INSERT -** statement above, and pSelect is NULL. For the second form, pList is -** NULL and pSelect is a pointer to the select statement used to generate -** data for the insert. +** For the pSelect parameter holds the values to be inserted for the +** first two forms shown above. A VALUES clause is really just short-hand +** for a SELECT statement that omits the FROM clause and everything else +** that follows. If the pSelect parameter is NULL, that means that the +** DEFAULT VALUES form of the INSERT statement is intended. ** ** The code generated follows one of four templates. For a simple -** insert with data coming from a VALUES clause, the code executes +** insert with data coming from a single-row VALUES clause, the code executes ** once straight down through. Pseudo-code follows (we call this ** the "1st template"): ** @@ -99730,7 +101221,7 @@ SQLITE_PRIVATE void sqlite3Insert( u8 useTempTable = 0; /* Store SELECT results in intermediate table */ u8 appendFlag = 0; /* True if the insert is likely to be an append */ u8 withoutRowid; /* 0 for normal table. 1 for WITHOUT ROWID table */ - u8 bIdListInOrder = 1; /* True if IDLIST is in table order */ + u8 bIdListInOrder; /* True if IDLIST is in table order */ ExprList *pList = 0; /* List of VALUES() to be inserted */ /* Register allocations */ @@ -99755,8 +101246,8 @@ SQLITE_PRIVATE void sqlite3Insert( } /* If the Select object is really just a simple VALUES() list with a - ** single row values (the common case) then keep that one row of values - ** and go ahead and discard the Select object + ** single row (the common case) then keep that one row of values + ** and discard the other (unused) parts of the pSelect object */ if( pSelect && (pSelect->selFlags & SF_Values)!=0 && pSelect->pPrior==0 ){ pList = pSelect->pEList; @@ -99864,6 +101355,7 @@ SQLITE_PRIVATE void sqlite3Insert( ** is appears in the original table. (The index of the INTEGER ** PRIMARY KEY in the original table is pTab->iPKey.) */ + bIdListInOrder = (pTab->tabFlags & TF_OOOHidden)==0; if( pColumn ){ for(i=0; inId; i++){ pColumn->a[i].idx = -1; @@ -99899,7 +101391,8 @@ SQLITE_PRIVATE void sqlite3Insert( ** co-routine is the common header to the 3rd and 4th templates. */ if( pSelect ){ - /* Data is coming from a SELECT. Generate a co-routine to run the SELECT */ + /* Data is coming from a SELECT or from a multi-row VALUES clause. + ** Generate a co-routine to run the SELECT. */ int regYield; /* Register holding co-routine entry-point */ int addrTop; /* Top of the co-routine */ int rc; /* Result code */ @@ -99912,8 +101405,7 @@ SQLITE_PRIVATE void sqlite3Insert( dest.nSdst = pTab->nCol; rc = sqlite3Select(pParse, pSelect, &dest); regFromSelect = dest.iSdst; - assert( pParse->nErr==0 || rc ); - if( rc || db->mallocFailed ) goto insert_cleanup; + if( rc || db->mallocFailed || pParse->nErr ) goto insert_cleanup; sqlite3VdbeAddOp1(v, OP_EndCoroutine, regYield); sqlite3VdbeJumpHere(v, addrTop - 1); /* label B: */ assert( pSelect->pEList ); @@ -99961,8 +101453,8 @@ SQLITE_PRIVATE void sqlite3Insert( sqlite3ReleaseTempReg(pParse, regTempRowid); } }else{ - /* This is the case if the data for the INSERT is coming from a VALUES - ** clause + /* This is the case if the data for the INSERT is coming from a + ** single-row VALUES clause */ NameContext sNC; memset(&sNC, 0, sizeof(sNC)); @@ -100645,8 +102137,8 @@ SQLITE_PRIVATE void sqlite3GenerateConstraintChecks( if( pIdx->pPartIdxWhere ){ sqlite3VdbeAddOp2(v, OP_Null, 0, aRegIdx[ix]); pParse->ckBase = regNewData+1; - sqlite3ExprIfFalse(pParse, pIdx->pPartIdxWhere, addrUniqueOk, - SQLITE_JUMPIFNULL); + sqlite3ExprIfFalseDup(pParse, pIdx->pPartIdxWhere, addrUniqueOk, + SQLITE_JUMPIFNULL); pParse->ckBase = 0; } @@ -101033,6 +102525,7 @@ static int xferOptimization( int onError, /* How to handle constraint errors */ int iDbDest /* The database of pDest */ ){ + sqlite3 *db = pParse->db; ExprList *pEList; /* The result set of the SELECT */ Table *pSrc; /* The table in the FROM clause of SELECT */ Index *pSrcIdx, *pDestIdx; /* Source and destination indices */ @@ -101180,11 +102673,11 @@ static int xferOptimization( ** the extra complication to make this rule less restrictive is probably ** not worth the effort. Ticket [6284df89debdfa61db8073e062908af0c9b6118e] */ - if( (pParse->db->flags & SQLITE_ForeignKeys)!=0 && pDest->pFKey!=0 ){ + if( (db->flags & SQLITE_ForeignKeys)!=0 && pDest->pFKey!=0 ){ return 0; } #endif - if( (pParse->db->flags & SQLITE_CountRows)!=0 ){ + if( (db->flags & SQLITE_CountRows)!=0 ){ return 0; /* xfer opt does not play well with PRAGMA count_changes */ } @@ -101195,7 +102688,7 @@ static int xferOptimization( #ifdef SQLITE_TEST sqlite3_xferopt_count++; #endif - iDbSrc = sqlite3SchemaToIndex(pParse->db, pSrc->pSchema); + iDbSrc = sqlite3SchemaToIndex(db, pSrc->pSchema); v = sqlite3GetVdbe(pParse); sqlite3CodeVerifySchema(pParse, iDbSrc); iSrc = pParse->nTab++; @@ -101205,14 +102698,18 @@ static int xferOptimization( regRowid = sqlite3GetTempReg(pParse); sqlite3OpenTable(pParse, iDest, iDbDest, pDest, OP_OpenWrite); assert( HasRowid(pDest) || destHasUniqueIdx ); - if( (pDest->iPKey<0 && pDest->pIndex!=0) /* (1) */ + if( (db->flags & SQLITE_Vacuum)==0 && ( + (pDest->iPKey<0 && pDest->pIndex!=0) /* (1) */ || destHasUniqueIdx /* (2) */ || (onError!=OE_Abort && onError!=OE_Rollback) /* (3) */ - ){ + )){ /* In some circumstances, we are able to run the xfer optimization - ** only if the destination table is initially empty. This code makes - ** that determination. Conditions under which the destination must - ** be empty: + ** only if the destination table is initially empty. Unless the + ** SQLITE_Vacuum flag is set, this block generates code to make + ** that determination. If SQLITE_Vacuum is set, then the destination + ** table is always empty. + ** + ** Conditions under which the destination must be empty: ** ** (1) There is no INTEGER PRIMARY KEY but there are indices. ** (If the destination is not initially empty, the rowid fields @@ -101255,6 +102752,7 @@ static int xferOptimization( sqlite3TableLock(pParse, iDbSrc, pSrc->tnum, 0, pSrc->zName); } for(pDestIdx=pDest->pIndex; pDestIdx; pDestIdx=pDestIdx->pNext){ + u8 idxInsFlags = 0; for(pSrcIdx=pSrc->pIndex; ALWAYS(pSrcIdx); pSrcIdx=pSrcIdx->pNext){ if( xferCompatibleIndex(pDestIdx, pSrcIdx) ) break; } @@ -101268,7 +102766,36 @@ static int xferOptimization( VdbeComment((v, "%s", pDestIdx->zName)); addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iSrc, 0); VdbeCoverage(v); sqlite3VdbeAddOp2(v, OP_RowKey, iSrc, regData); + if( db->flags & SQLITE_Vacuum ){ + /* This INSERT command is part of a VACUUM operation, which guarantees + ** that the destination table is empty. If all indexed columns use + ** collation sequence BINARY, then it can also be assumed that the + ** index will be populated by inserting keys in strictly sorted + ** order. In this case, instead of seeking within the b-tree as part + ** of every OP_IdxInsert opcode, an OP_Last is added before the + ** OP_IdxInsert to seek to the point within the b-tree where each key + ** should be inserted. This is faster. + ** + ** If any of the indexed columns use a collation sequence other than + ** BINARY, this optimization is disabled. This is because the user + ** might change the definition of a collation sequence and then run + ** a VACUUM command. In that case keys may not be written in strictly + ** sorted order. */ + for(i=0; inColumn; i++){ + char *zColl = pSrcIdx->azColl[i]; + assert( zColl!=0 ); + if( sqlite3_stricmp("BINARY", zColl) ) break; + } + if( i==pSrcIdx->nColumn ){ + idxInsFlags = OPFLAG_USESEEKRESULT; + sqlite3VdbeAddOp3(v, OP_Last, iDest, 0, -1); + } + } + if( !HasRowid(pSrc) && pDestIdx->idxType==2 ){ + idxInsFlags |= OPFLAG_NCHANGE; + } sqlite3VdbeAddOp3(v, OP_IdxInsert, iDest, regData, 1); + sqlite3VdbeChangeP5(v, idxInsFlags); sqlite3VdbeAddOp2(v, OP_Next, iSrc, addr1+1); VdbeCoverage(v); sqlite3VdbeJumpHere(v, addr1); sqlite3VdbeAddOp2(v, OP_Close, iSrc, 0); @@ -101307,6 +102834,7 @@ static int xferOptimization( ** accessed by users of the library. */ +/* #include "sqliteInt.h" */ /* ** Execute SQL code. Return one of the SQLITE_ success/failure @@ -101475,6 +103003,7 @@ exec_out: */ #ifndef _SQLITE3EXT_H_ #define _SQLITE3EXT_H_ +/* #include "sqlite3.h" */ typedef struct sqlite3_api_routines sqlite3_api_routines; @@ -101724,6 +103253,11 @@ struct sqlite3_api_routines { void (*result_text64)(sqlite3_context*,const char*,sqlite3_uint64, void(*)(void*), unsigned char); int (*strglob)(const char*,const char*); + /* Version 3.8.11 and later */ + sqlite3_value *(*value_dup)(const sqlite3_value*); + void (*value_free)(sqlite3_value*); + int (*result_zeroblob64)(sqlite3_context*,sqlite3_uint64); + int (*bind_zeroblob64)(sqlite3_stmt*, int, sqlite3_uint64); }; /* @@ -101954,6 +103488,11 @@ struct sqlite3_api_routines { #define sqlite3_result_blob64 sqlite3_api->result_blob64 #define sqlite3_result_text64 sqlite3_api->result_text64 #define sqlite3_strglob sqlite3_api->strglob +/* Version 3.8.11 and later */ +#define sqlite3_value_dup sqlite3_api->value_dup +#define sqlite3_value_free sqlite3_api->value_free +#define sqlite3_result_zeroblob64 sqlite3_api->result_zeroblob64 +#define sqlite3_bind_zeroblob64 sqlite3_api->bind_zeroblob64 #endif /* SQLITE_CORE */ #ifndef SQLITE_CORE @@ -101975,6 +103514,7 @@ struct sqlite3_api_routines { /************** End of sqlite3ext.h ******************************************/ /************** Continuing where we left off in loadext.c ********************/ +/* #include "sqliteInt.h" */ /* #include */ #ifndef SQLITE_OMIT_LOAD_EXTENSION @@ -102359,7 +103899,12 @@ static const sqlite3_api_routines sqlite3Apis = { sqlite3_reset_auto_extension, sqlite3_result_blob64, sqlite3_result_text64, - sqlite3_strglob + sqlite3_strglob, + /* Version 3.8.11 and later */ + (sqlite3_value*(*)(const sqlite3_value*))sqlite3_value_dup, + sqlite3_value_free, + sqlite3_result_zeroblob64, + sqlite3_bind_zeroblob64 }; /* @@ -102387,7 +103932,7 @@ static int sqlite3LoadExtension( const char *zEntry; char *zAltEntry = 0; void **aHandle; - int nMsg = 300 + sqlite3Strlen30(zFile); + u64 nMsg = 300 + sqlite3Strlen30(zFile); int ii; /* Shared library endings to try if zFile cannot be loaded as written */ @@ -102430,7 +103975,7 @@ static int sqlite3LoadExtension( #endif if( handle==0 ){ if( pzErrMsg ){ - *pzErrMsg = zErrmsg = sqlite3_malloc(nMsg); + *pzErrMsg = zErrmsg = sqlite3_malloc64(nMsg); if( zErrmsg ){ sqlite3_snprintf(nMsg, zErrmsg, "unable to open shared library [%s]", zFile); @@ -102456,7 +104001,7 @@ static int sqlite3LoadExtension( if( xInit==0 && zProc==0 ){ int iFile, iEntry, c; int ncFile = sqlite3Strlen30(zFile); - zAltEntry = sqlite3_malloc(ncFile+30); + zAltEntry = sqlite3_malloc64(ncFile+30); if( zAltEntry==0 ){ sqlite3OsDlClose(pVfs, handle); return SQLITE_NOMEM; @@ -102478,7 +104023,7 @@ static int sqlite3LoadExtension( if( xInit==0 ){ if( pzErrMsg ){ nMsg += sqlite3Strlen30(zEntry); - *pzErrMsg = zErrmsg = sqlite3_malloc(nMsg); + *pzErrMsg = zErrmsg = sqlite3_malloc64(nMsg); if( zErrmsg ){ sqlite3_snprintf(nMsg, zErrmsg, "no entry point [%s] in shared library [%s]", zEntry, zFile); @@ -102577,7 +104122,7 @@ static const sqlite3_api_routines sqlite3Apis = { 0 }; */ typedef struct sqlite3AutoExtList sqlite3AutoExtList; static SQLITE_WSD struct sqlite3AutoExtList { - int nExt; /* Number of entries in aExt[] */ + u32 nExt; /* Number of entries in aExt[] */ void (**aExt)(void); /* Pointers to the extension init functions */ } sqlite3Autoext = { 0, 0 }; @@ -102610,7 +104155,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_auto_extension(void (*xInit)(void)){ }else #endif { - int i; + u32 i; #if SQLITE_THREADSAFE sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); #endif @@ -102620,9 +104165,9 @@ SQLITE_API int SQLITE_STDCALL sqlite3_auto_extension(void (*xInit)(void)){ if( wsdAutoext.aExt[i]==xInit ) break; } if( i==wsdAutoext.nExt ){ - int nByte = (wsdAutoext.nExt+1)*sizeof(wsdAutoext.aExt[0]); + u64 nByte = (wsdAutoext.nExt+1)*sizeof(wsdAutoext.aExt[0]); void (**aNew)(void); - aNew = sqlite3_realloc(wsdAutoext.aExt, nByte); + aNew = sqlite3_realloc64(wsdAutoext.aExt, nByte); if( aNew==0 ){ rc = SQLITE_NOMEM; }else{ @@ -102654,7 +104199,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_cancel_auto_extension(void (*xInit)(void)) int n = 0; wsdAutoextInit; sqlite3_mutex_enter(mutex); - for(i=wsdAutoext.nExt-1; i>=0; i--){ + for(i=(int)wsdAutoext.nExt-1; i>=0; i--){ if( wsdAutoext.aExt[i]==xInit ){ wsdAutoext.nExt--; wsdAutoext.aExt[i] = wsdAutoext.aExt[wsdAutoext.nExt]; @@ -102692,7 +104237,7 @@ SQLITE_API void SQLITE_STDCALL sqlite3_reset_auto_extension(void){ ** If anything goes wrong, set an error in the database connection. */ SQLITE_PRIVATE void sqlite3AutoLoadExtensions(sqlite3 *db){ - int i; + u32 i; int go = 1; int rc; int (*xInit)(sqlite3*,char**,const sqlite3_api_routines*); @@ -102741,6 +104286,7 @@ SQLITE_PRIVATE void sqlite3AutoLoadExtensions(sqlite3 *db){ ************************************************************************* ** This file contains code used to implement the PRAGMA command. */ +/* #include "sqliteInt.h" */ #if !defined(SQLITE_ENABLE_LOCKING_STYLE) # if defined(__APPLE__) @@ -102847,7 +104393,7 @@ static const struct sPragmaNames { #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) { /* zName: */ "cache_size", /* ePragTyp: */ PragTyp_CACHE_SIZE, - /* ePragFlag: */ PragFlag_NeedSchema, + /* ePragFlag: */ 0, /* iArg: */ 0 }, #endif #if !defined(SQLITE_OMIT_FLAG_PRAGMAS) @@ -102860,6 +104406,10 @@ static const struct sPragmaNames { /* ePragTyp: */ PragTyp_CASE_SENSITIVE_LIKE, /* ePragFlag: */ 0, /* iArg: */ 0 }, + { /* zName: */ "cell_size_check", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlag: */ 0, + /* iArg: */ SQLITE_CellSizeCk }, #if !defined(SQLITE_OMIT_FLAG_PRAGMAS) { /* zName: */ "checkpoint_fullfsync", /* ePragTyp: */ PragTyp_FLAG, @@ -103217,7 +104767,7 @@ static const struct sPragmaNames { /* iArg: */ SQLITE_WriteSchema|SQLITE_RecoveryMode }, #endif }; -/* Number of pragmas: 59 on by default, 72 total. */ +/* Number of pragmas: 60 on by default, 73 total. */ /************** End of pragma.h **********************************************/ /************** Continuing where we left off in pragma.c *********************/ @@ -103356,15 +104906,15 @@ static int changeTempStorage(Parse *pParse, const char *zStorageType){ */ static void returnSingleInt(Parse *pParse, const char *zLabel, i64 value){ Vdbe *v = sqlite3GetVdbe(pParse); - int mem = ++pParse->nMem; + int nMem = ++pParse->nMem; i64 *pI64 = sqlite3DbMallocRaw(pParse->db, sizeof(value)); if( pI64 ){ memcpy(pI64, &value, sizeof(value)); } - sqlite3VdbeAddOp4(v, OP_Int64, 0, mem, 0, (char*)pI64, P4_INT64); + sqlite3VdbeAddOp4(v, OP_Int64, 0, nMem, 0, (char*)pI64, P4_INT64); sqlite3VdbeSetNumCols(v, 1); sqlite3VdbeSetColName(v, 0, COLNAME_NAME, zLabel, SQLITE_STATIC); - sqlite3VdbeAddOp2(v, OP_ResultRow, mem, 1); + sqlite3VdbeAddOp2(v, OP_ResultRow, nMem, 1); } @@ -103529,11 +105079,11 @@ SQLITE_PRIVATE void sqlite3Pragma( rc = sqlite3_file_control(db, zDb, SQLITE_FCNTL_PRAGMA, (void*)aFcntl); if( rc==SQLITE_OK ){ if( aFcntl[0] ){ - int mem = ++pParse->nMem; - sqlite3VdbeAddOp4(v, OP_String8, 0, mem, 0, aFcntl[0], 0); + int nMem = ++pParse->nMem; + sqlite3VdbeAddOp4(v, OP_String8, 0, nMem, 0, aFcntl[0], 0); sqlite3VdbeSetNumCols(v, 1); sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "result", SQLITE_STATIC); - sqlite3VdbeAddOp2(v, OP_ResultRow, mem, 1); + sqlite3VdbeAddOp2(v, OP_ResultRow, nMem, 1); sqlite3_free(aFcntl[0]); } goto pragma_out; @@ -103913,11 +105463,13 @@ SQLITE_PRIVATE void sqlite3Pragma( case PragTyp_CACHE_SIZE: { assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); if( !zRight ){ + if( sqlite3ReadSchema(pParse) ) goto pragma_out; returnSingleInt(pParse, "cache_size", pDb->pSchema->cache_size); }else{ int size = sqlite3Atoi(zRight); pDb->pSchema->cache_size = size; sqlite3BtreeSetCacheSize(pDb->pBt, pDb->pSchema->cache_size); + if( sqlite3ReadSchema(pParse) ) goto pragma_out; } break; } @@ -104138,7 +105690,9 @@ SQLITE_PRIVATE void sqlite3Pragma( sqlite3ErrorMsg(pParse, "Safety level may not be changed inside a transaction"); }else{ - pDb->safety_level = getSafetyLevel(zRight,0,1)+1; + int iLevel = (getSafetyLevel(zRight,0,1)+1) & PAGER_SYNCHRONOUS_MASK; + if( iLevel==0 ) iLevel = 1; + pDb->safety_level = iLevel; setAllPagerFlags(db); } } @@ -104233,7 +105787,7 @@ SQLITE_PRIVATE void sqlite3Pragma( }else if( pPk==0 ){ k = 1; }else{ - for(k=1; ALWAYS(k<=pTab->nCol) && pPk->aiColumn[k-1]!=i; k++){} + for(k=1; k<=pTab->nCol && pPk->aiColumn[k-1]!=i; k++){} } sqlite3VdbeAddOp2(v, OP_Integer, k, 6); sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 6); @@ -105186,6 +106740,7 @@ pragma_out: ** interface, and routines that contribute to loading the database schema ** from disk. */ +/* #include "sqliteInt.h" */ /* ** Fill the InitData structure with an error message that indicates @@ -105198,13 +106753,13 @@ static void corruptSchema( ){ sqlite3 *db = pData->db; if( !db->mallocFailed && (db->flags & SQLITE_RecoveryMode)==0 ){ + char *z; if( zObj==0 ) zObj = "?"; - sqlite3SetString(pData->pzErrMsg, db, - "malformed database schema (%s)", zObj); - if( zExtra ){ - *pData->pzErrMsg = sqlite3MAppendf(db, *pData->pzErrMsg, - "%s - %s", *pData->pzErrMsg, zExtra); - } + z = sqlite3_mprintf("malformed database schema (%s)", zObj); + if( z && zExtra ) z = sqlite3_mprintf("%z - %s", z, zExtra); + sqlite3DbFree(db, *pData->pzErrMsg); + *pData->pzErrMsg = z; + if( z==0 ) db->mallocFailed = 1; } pData->rc = db->mallocFailed ? SQLITE_NOMEM : SQLITE_CORRUPT_BKPT; } @@ -105239,7 +106794,7 @@ SQLITE_PRIVATE int sqlite3InitCallback(void *pInit, int argc, char **argv, char if( argv==0 ) return 0; /* Might happen if EMPTY_RESULT_CALLBACKS are on */ if( argv[1]==0 ){ corruptSchema(pData, argv[0], 0); - }else if( argv[2] && argv[2][0] ){ + }else if( sqlite3_strnicmp(argv[2],"create ",7)==0 ){ /* Call the parser to process a CREATE TABLE, INDEX or VIEW. ** But because db->init.busy is set to 1, no VDBE code is generated ** or executed. All the parser does is build the internal data @@ -105270,8 +106825,8 @@ SQLITE_PRIVATE int sqlite3InitCallback(void *pInit, int argc, char **argv, char } } sqlite3_finalize(pStmt); - }else if( argv[0]==0 ){ - corruptSchema(pData, 0, 0); + }else if( argv[0]==0 || (argv[2]!=0 && argv[2][0]!=0) ){ + corruptSchema(pData, argv[0], 0); }else{ /* If the SQL column is blank it means this is an index that ** was created to be the PRIMARY KEY or to fulfill a UNIQUE @@ -105396,7 +106951,7 @@ static int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg){ if( !sqlite3BtreeIsInReadTrans(pDb->pBt) ){ rc = sqlite3BtreeBeginTrans(pDb->pBt, 0); if( rc!=SQLITE_OK ){ - sqlite3SetString(pzErrMsg, db, "%s", sqlite3ErrStr(rc)); + sqlite3SetString(pzErrMsg, db, sqlite3ErrStr(rc)); goto initone_error_out; } openedTransaction = 1; @@ -106080,6 +107635,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_prepare16_v2( ** This file contains C code routines that are called by the parser ** to handle SELECT statements in SQLite. */ +/* #include "sqliteInt.h" */ /* ** Trace output macros @@ -106088,7 +107644,8 @@ SQLITE_API int SQLITE_STDCALL sqlite3_prepare16_v2( /***/ int sqlite3SelectTrace = 0; # define SELECTTRACE(K,P,S,X) \ if(sqlite3SelectTrace&(K)) \ - sqlite3DebugPrintf("%*s%s.%p: ",(P)->nSelectIndent*2-2,"",(S)->zSelName,(S)),\ + sqlite3DebugPrintf("%*s%s.%p: ",(P)->nSelectIndent*2-2,"",\ + (S)->zSelName,(S)),\ sqlite3DebugPrintf X #else # define SELECTTRACE(K,P,S,X) @@ -106178,7 +107735,6 @@ SQLITE_PRIVATE Select *sqlite3SelectNew( Select standin; sqlite3 *db = pParse->db; pNew = sqlite3DbMallocZero(db, sizeof(*pNew) ); - assert( db->mallocFailed || !pOffset || pLimit ); /* OFFSET implies LIMIT */ if( pNew==0 ){ assert( db->mallocFailed ); pNew = &standin; @@ -106198,7 +107754,7 @@ SQLITE_PRIVATE Select *sqlite3SelectNew( pNew->op = TK_SELECT; pNew->pLimit = pLimit; pNew->pOffset = pOffset; - assert( pOffset==0 || pLimit!=0 ); + assert( pOffset==0 || pLimit!=0 || pParse->nErr>0 || db->mallocFailed!=0 ); pNew->addrOpenEphm[0] = -1; pNew->addrOpenEphm[1] = -1; if( db->mallocFailed ) { @@ -106433,6 +107989,12 @@ static void setJoinExpr(Expr *p, int iTable){ assert( !ExprHasProperty(p, EP_TokenOnly|EP_Reduced) ); ExprSetVVAProperty(p, EP_NoReduce); p->iRightJoinTable = (i16)iTable; + if( p->op==TK_FUNCTION && p->x.pList ){ + int i; + for(i=0; ix.pList->nExpr; i++){ + setJoinExpr(p->x.pList->a[i].pExpr, iTable); + } + } setJoinExpr(p->pLeft, iTable); p = p->pRight; } @@ -106781,8 +108343,13 @@ static void selectInnerLoop( /* If the destination is an EXISTS(...) expression, the actual ** values returned by the SELECT are not required. */ - sqlite3ExprCodeExprList(pParse, pEList, regResult, - (eDest==SRT_Output||eDest==SRT_Coroutine)?SQLITE_ECEL_DUP:0); + u8 ecelFlags; + if( eDest==SRT_Mem || eDest==SRT_Output || eDest==SRT_Coroutine ){ + ecelFlags = SQLITE_ECEL_DUP; + }else{ + ecelFlags = 0; + } + sqlite3ExprCodeExprList(pParse, pEList, regResult, ecelFlags); } /* If the DISTINCT keyword was present on the SELECT statement @@ -106837,7 +108404,8 @@ static void selectInnerLoop( default: { assert( pDistinct->eTnctType==WHERE_DISTINCT_UNORDERED ); - codeDistinct(pParse, pDistinct->tabTnct, iContinue, nResultCol, regResult); + codeDistinct(pParse, pDistinct->tabTnct, iContinue, nResultCol, + regResult); break; } } @@ -106879,6 +108447,8 @@ static void selectInnerLoop( int r1 = sqlite3GetTempRange(pParse, nPrefixReg+1); testcase( eDest==SRT_Table ); testcase( eDest==SRT_EphemTab ); + testcase( eDest==SRT_Fifo ); + testcase( eDest==SRT_DistFifo ); sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1+nPrefixReg); #ifndef SQLITE_OMIT_CTE if( eDest==SRT_DistFifo ){ @@ -106888,7 +108458,8 @@ static void selectInnerLoop( ** current row to the index and proceed with writing it to the ** output table as well. */ int addr = sqlite3VdbeCurrentAddr(v) + 4; - sqlite3VdbeAddOp4Int(v, OP_Found, iParm+1, addr, r1, 0); VdbeCoverage(v); + sqlite3VdbeAddOp4Int(v, OP_Found, iParm+1, addr, r1, 0); + VdbeCoverage(v); sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm+1, r1); assert( pSort==0 ); } @@ -107134,7 +108705,6 @@ static KeyInfo *keyInfoFromExprList( return pInfo; } -#ifndef SQLITE_OMIT_COMPOUND_SELECT /* ** Name of the connection operator, used for error messages. */ @@ -107148,7 +108718,6 @@ static const char *selectOpName(int id){ } return z; } -#endif /* SQLITE_OMIT_COMPOUND_SELECT */ #ifndef SQLITE_OMIT_EXPLAIN /* @@ -107294,10 +108863,7 @@ static void generateSortTail( VdbeComment((v, "%s", aOutEx[i].zName ? aOutEx[i].zName : aOutEx[i].zSpan)); } switch( eDest ){ - case SRT_Table: case SRT_EphemTab: { - testcase( eDest==SRT_Table ); - testcase( eDest==SRT_EphemTab ); sqlite3VdbeAddOp2(v, OP_NewRowid, iParm, regRowid); sqlite3VdbeAddOp3(v, OP_Insert, iParm, regRow, regRowid); sqlite3VdbeChangeP5(v, OPFLAG_APPEND); @@ -107374,28 +108940,27 @@ static void generateSortTail( */ #ifdef SQLITE_ENABLE_COLUMN_METADATA # define columnType(A,B,C,D,E,F) columnTypeImpl(A,B,C,D,E,F) +#else /* if !defined(SQLITE_ENABLE_COLUMN_METADATA) */ +# define columnType(A,B,C,D,E,F) columnTypeImpl(A,B,F) +#endif static const char *columnTypeImpl( NameContext *pNC, Expr *pExpr, +#ifdef SQLITE_ENABLE_COLUMN_METADATA const char **pzOrigDb, const char **pzOrigTab, const char **pzOrigCol, +#endif u8 *pEstWidth ){ - char const *zOrigDb = 0; - char const *zOrigTab = 0; - char const *zOrigCol = 0; -#else /* if !defined(SQLITE_ENABLE_COLUMN_METADATA) */ -# define columnType(A,B,C,D,E,F) columnTypeImpl(A,B,F) -static const char *columnTypeImpl( - NameContext *pNC, - Expr *pExpr, - u8 *pEstWidth -){ -#endif /* !defined(SQLITE_ENABLE_COLUMN_METADATA) */ char const *zType = 0; int j; u8 estWidth = 1; +#ifdef SQLITE_ENABLE_COLUMN_METADATA + char const *zOrigDb = 0; + char const *zOrigTab = 0; + char const *zOrigCol = 0; +#endif if( NEVER(pExpr==0) || pNC->pSrcList==0 ) return 0; switch( pExpr->op ){ @@ -107452,6 +109017,9 @@ static const char *columnTypeImpl( /* If iCol is less than zero, then the expression requests the ** rowid of the sub-select or view. This expression is legal (see ** test case misc2.2.2) - it always evaluates to NULL. + ** + ** The ALWAYS() is because iCol>=pS->pEList->nExpr will have been + ** caught already by name resolution. */ NameContext sNC; Expr *p = pS->pEList->a[iCol].pExpr; @@ -107768,12 +109336,15 @@ static void selectAddColumnTypeAndCollation( a = pSelect->pEList->a; for(i=0, pCol=pTab->aCol; inCol; i++, pCol++){ p = a[i].pExpr; - pCol->zType = sqlite3DbStrDup(db, columnType(&sNC, p,0,0,0, &pCol->szEst)); + if( pCol->zType==0 ){ + pCol->zType = sqlite3DbStrDup(db, + columnType(&sNC, p,0,0,0, &pCol->szEst)); + } szAll += pCol->szEst; pCol->affinity = sqlite3ExprAffinity(p); - if( pCol->affinity==0 ) pCol->affinity = SQLITE_AFF_NONE; + if( pCol->affinity==0 ) pCol->affinity = SQLITE_AFF_BLOB; pColl = sqlite3ExprCollSeq(pParse, p); - if( pColl ){ + if( pColl && pCol->zColl==0 ){ pCol->zColl = sqlite3DbStrDup(db, pColl->zName); } } @@ -107927,7 +109498,10 @@ static CollSeq *multiSelectCollSeq(Parse *pParse, Select *p, int iCol){ pRet = 0; } assert( iCol>=0 ); - if( pRet==0 && iColpEList->nExpr ){ + /* iCol must be less than p->pEList->nExpr. Otherwise an error would + ** have been thrown during name resolution and we would not have gotten + ** this far */ + if( pRet==0 && ALWAYS(iColpEList->nExpr) ){ pRet = sqlite3ExprCollSeq(pParse, p->pEList->a[iCol].pExpr); } return pRet; @@ -108117,10 +109691,14 @@ static void generateWithRecursiveQuery( /* Execute the recursive SELECT taking the single row in Current as ** the value for the recursive-table. Store the results in the Queue. */ - p->pPrior = 0; - sqlite3Select(pParse, p, &destQueue); - assert( p->pPrior==0 ); - p->pPrior = pSetup; + if( p->selFlags & SF_Aggregate ){ + sqlite3ErrorMsg(pParse, "recursive aggregate queries not supported"); + }else{ + p->pPrior = 0; + sqlite3Select(pParse, p, &destQueue); + assert( p->pPrior==0 ); + p->pPrior = pSetup; + } /* Keep running the loop until the Queue is empty */ sqlite3VdbeAddOp2(v, OP_Goto, 0, addrTop); @@ -108142,19 +109720,6 @@ static int multiSelectOrderBy( SelectDest *pDest /* What to do with query results */ ); -/* -** Error message for when two or more terms of a compound select have different -** size result sets. -*/ -static void selectWrongNumTermsError(Parse *pParse, Select *p){ - if( p->selFlags & SF_Values ){ - sqlite3ErrorMsg(pParse, "all VALUES must have the same number of terms"); - }else{ - sqlite3ErrorMsg(pParse, "SELECTs to the left and right of %s" - " do not have the same number of result columns", selectOpName(p->op)); - } -} - /* ** Handle the special case of a compound-select that originates from a ** VALUES clause. By handling this as a special case, we avoid deep @@ -108172,20 +109737,15 @@ static int multiSelectValues( SelectDest *pDest /* What to do with query results */ ){ Select *pPrior; - int nExpr = p->pEList->nExpr; int nRow = 1; int rc = 0; - assert( p->pNext==0 ); - assert( p->selFlags & SF_AllValues ); + assert( p->selFlags & SF_MultiValue ); do{ assert( p->selFlags & SF_Values ); assert( p->op==TK_ALL || (p->op==TK_SELECT && p->pPrior==0) ); assert( p->pLimit==0 ); assert( p->pOffset==0 ); - if( p->pEList->nExpr!=nExpr ){ - selectWrongNumTermsError(pParse, p); - return 1; - } + assert( p->pNext==0 || p->pEList->nExpr==p->pNext->pEList->nExpr ); if( p->pPrior==0 ) break; assert( p->pPrior->pNext==p ); p = p->pPrior; @@ -108285,7 +109845,7 @@ static int multiSelect( /* Special handling for a compound-select that originates as a VALUES clause. */ - if( p->selFlags & SF_AllValues ){ + if( p->selFlags & SF_MultiValue ){ rc = multiSelectValues(pParse, p, &dest); goto multi_select_end; } @@ -108294,11 +109854,7 @@ static int multiSelect( ** in their result sets. */ assert( p->pEList && pPrior->pEList ); - if( p->pEList->nExpr!=pPrior->pEList->nExpr ){ - selectWrongNumTermsError(pParse, p); - rc = 1; - goto multi_select_end; - } + assert( p->pEList->nExpr==pPrior->pEList->nExpr ); #ifndef SQLITE_OMIT_CTE if( p->selFlags & SF_Recursive ){ @@ -108590,6 +110146,19 @@ multi_select_end: } #endif /* SQLITE_OMIT_COMPOUND_SELECT */ +/* +** Error message for when two or more terms of a compound select have different +** size result sets. +*/ +SQLITE_PRIVATE void sqlite3SelectWrongNumTermsError(Parse *pParse, Select *p){ + if( p->selFlags & SF_Values ){ + sqlite3ErrorMsg(pParse, "all VALUES must have the same number of terms"); + }else{ + sqlite3ErrorMsg(pParse, "SELECTs to the left and right of %s" + " do not have the same number of result columns", selectOpName(p->op)); + } +} + /* ** Code an output subroutine for a coroutine implementation of a ** SELECT statment. @@ -108645,15 +110214,14 @@ static int generateOutputSubroutine( */ codeOffset(v, p->iOffset, iContinue); + assert( pDest->eDest!=SRT_Exists ); + assert( pDest->eDest!=SRT_Table ); switch( pDest->eDest ){ /* Store the result as data using a unique key. */ - case SRT_Table: case SRT_EphemTab: { int r1 = sqlite3GetTempReg(pParse); int r2 = sqlite3GetTempReg(pParse); - testcase( pDest->eDest==SRT_Table ); - testcase( pDest->eDest==SRT_EphemTab ); sqlite3VdbeAddOp3(v, OP_MakeRecord, pIn->iSdst, pIn->nSdst, r1); sqlite3VdbeAddOp2(v, OP_NewRowid, pDest->iSDParm, r2); sqlite3VdbeAddOp3(v, OP_Insert, pDest->iSDParm, r1, r2); @@ -108670,7 +110238,7 @@ static int generateOutputSubroutine( */ case SRT_Set: { int r1; - assert( pIn->nSdst==1 ); + assert( pIn->nSdst==1 || pParse->nErr>0 ); pDest->affSdst = sqlite3CompareAffinity(p->pEList->a[0].pExpr, pDest->affSdst); r1 = sqlite3GetTempReg(pParse); @@ -108681,22 +110249,12 @@ static int generateOutputSubroutine( break; } -#if 0 /* Never occurs on an ORDER BY query */ - /* If any row exist in the result set, record that fact and abort. - */ - case SRT_Exists: { - sqlite3VdbeAddOp2(v, OP_Integer, 1, pDest->iSDParm); - /* The LIMIT clause will terminate the loop for us */ - break; - } -#endif - /* If this is a scalar select that is part of an expression, then ** store the results in the appropriate memory cell and break out ** of the scan loop. */ case SRT_Mem: { - assert( pIn->nSdst==1 ); + assert( pIn->nSdst==1 || pParse->nErr>0 ); testcase( pIn->nSdst!=1 ); sqlite3ExprCodeMove(pParse, pIn->iSdst, pDest->iSDParm, 1); /* The LIMIT clause will jump out of the loop for us */ break; @@ -108711,7 +110269,7 @@ static int generateOutputSubroutine( pDest->iSdst = sqlite3GetTempRange(pParse, pIn->nSdst); pDest->nSdst = pIn->nSdst; } - sqlite3ExprCodeMove(pParse, pIn->iSdst, pDest->iSdst, pDest->nSdst); + sqlite3ExprCodeMove(pParse, pIn->iSdst, pDest->iSdst, pIn->nSdst); sqlite3VdbeAddOp1(v, OP_Yield, pDest->iSDParm); break; } @@ -108927,8 +110485,8 @@ static int multiSelectOrderBy( if( aPermute ){ struct ExprList_item *pItem; for(i=0, pItem=pOrderBy->a; iu.x.iOrderByCol>0 - && pItem->u.x.iOrderByCol<=p->pEList->nExpr ); + assert( pItem->u.x.iOrderByCol>0 ); + assert( pItem->u.x.iOrderByCol<=p->pEList->nExpr ); aPermute[i] = pItem->u.x.iOrderByCol - 1; } pKeyMerge = multiSelectOrderByKeyInfo(pParse, p, 1); @@ -109138,7 +110696,7 @@ static int multiSelectOrderBy( /*** TBD: Insert subroutine calls to close cursors on incomplete **** subqueries ****/ explainComposite(pParse, p->op, iSub1, iSub2, 0); - return SQLITE_OK; + return pParse->nErr!=0; } #endif @@ -109287,8 +110845,8 @@ static void substSelect( ** ** (**) Restriction (10) was removed from the code on 2005-02-05 but we ** accidently carried the comment forward until 2014-09-15. Original -** text: "The subquery does not use aggregates or the outer query does not -** use LIMIT." +** text: "The subquery does not use aggregates or the outer query +** does not use LIMIT." ** ** (11) The subquery and the outer query do not both have ORDER BY clauses. ** @@ -109498,10 +111056,10 @@ static int flattenSubquery( testcase( (pSub1->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct ); testcase( (pSub1->selFlags & (SF_Distinct|SF_Aggregate))==SF_Aggregate ); assert( pSub->pSrc!=0 ); + assert( pSub->pEList->nExpr==pSub1->pEList->nExpr ); if( (pSub1->selFlags & (SF_Distinct|SF_Aggregate))!=0 || (pSub1->pPrior && pSub1->op!=TK_ALL) || pSub1->pSrc->nSrc<1 - || pSub->pEList->nExpr!=pSub1->pEList->nExpr ){ return 0; } @@ -109781,7 +111339,7 @@ static int flattenSubquery( #if SELECTTRACE_ENABLED if( sqlite3SelectTrace & 0x100 ){ - sqlite3DebugPrintf("After flattening:\n"); + SELECTTRACE(0x100,pParse,p,("After flattening:\n")); sqlite3TreeViewSelect(0, p, 0); } #endif @@ -109790,6 +111348,73 @@ static int flattenSubquery( } #endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */ + + +#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) +/* +** Make copies of relevant WHERE clause terms of the outer query into +** the WHERE clause of subquery. Example: +** +** SELECT * FROM (SELECT a AS x, c-d AS y FROM t1) WHERE x=5 AND y=10; +** +** Transformed into: +** +** SELECT * FROM (SELECT a AS x, c-d AS y FROM t1 WHERE a=5 AND c-d=10) +** WHERE x=5 AND y=10; +** +** The hope is that the terms added to the inner query will make it more +** efficient. +** +** Do not attempt this optimization if: +** +** (1) The inner query is an aggregate. (In that case, we'd really want +** to copy the outer WHERE-clause terms onto the HAVING clause of the +** inner query. But they probably won't help there so do not bother.) +** +** (2) The inner query is the recursive part of a common table expression. +** +** (3) The inner query has a LIMIT clause (since the changes to the WHERE +** close would change the meaning of the LIMIT). +** +** (4) The inner query is the right operand of a LEFT JOIN. (The caller +** enforces this restriction since this routine does not have enough +** information to know.) +** +** Return 0 if no changes are made and non-zero if one or more WHERE clause +** terms are duplicated into the subquery. +*/ +static int pushDownWhereTerms( + sqlite3 *db, /* The database connection (for malloc()) */ + Select *pSubq, /* The subquery whose WHERE clause is to be augmented */ + Expr *pWhere, /* The WHERE clause of the outer query */ + int iCursor /* Cursor number of the subquery */ +){ + Expr *pNew; + int nChng = 0; + if( pWhere==0 ) return 0; + if( (pSubq->selFlags & (SF_Aggregate|SF_Recursive))!=0 ){ + return 0; /* restrictions (1) and (2) */ + } + if( pSubq->pLimit!=0 ){ + return 0; /* restriction (3) */ + } + while( pWhere->op==TK_AND ){ + nChng += pushDownWhereTerms(db, pSubq, pWhere->pRight, iCursor); + pWhere = pWhere->pLeft; + } + if( sqlite3ExprIsTableConstant(pWhere, iCursor) ){ + nChng++; + while( pSubq ){ + pNew = sqlite3ExprDup(db, pWhere, 0); + pNew = substExpr(db, pNew, iCursor, pSubq->pEList); + pSubq->pWhere = sqlite3ExprAnd(db, pSubq->pWhere, pNew); + pSubq = pSubq->pPrior; + } + } + return nChng; +} +#endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */ + /* ** Based on the contents of the AggInfo structure indicated by the first ** argument, this function checks if the following are true: @@ -109873,16 +111498,16 @@ static Table *isSimpleCount(Select *p, AggInfo *pAggInfo){ ** pFrom->pIndex and return SQLITE_OK. */ SQLITE_PRIVATE int sqlite3IndexedByLookup(Parse *pParse, struct SrcList_item *pFrom){ - if( pFrom->pTab && pFrom->zIndex ){ + if( pFrom->pTab && pFrom->zIndexedBy ){ Table *pTab = pFrom->pTab; - char *zIndex = pFrom->zIndex; + char *zIndexedBy = pFrom->zIndexedBy; Index *pIdx; for(pIdx=pTab->pIndex; - pIdx && sqlite3StrICmp(pIdx->zName, zIndex); + pIdx && sqlite3StrICmp(pIdx->zName, zIndexedBy); pIdx=pIdx->pNext ); if( !pIdx ){ - sqlite3ErrorMsg(pParse, "no such index: %s", zIndex, 0); + sqlite3ErrorMsg(pParse, "no such index: %s", zIndexedBy, 0); pParse->checkSchema = 1; return SQLITE_ERROR; } @@ -109950,6 +111575,7 @@ static int convertCompoundSelectToSubquery(Walker *pWalker, Select *p){ pNew->pOrderBy = 0; p->pPrior = 0; p->pNext = 0; + p->pWith = 0; p->selFlags &= ~SF_Compound; assert( (p->selFlags & SF_Converted)==0 ); p->selFlags |= SF_Converted; @@ -110062,7 +111688,7 @@ static int withExpand( pTab->zName = sqlite3DbStrDup(db, pCte->zName); pTab->iPKey = -1; pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) ); - pTab->tabFlags |= TF_Ephemeral; + pTab->tabFlags |= TF_Ephemeral | TF_NoVisibleRowid; pFrom->pSelect = sqlite3SelectDup(db, pCte->pSelect, 0); if( db->mallocFailed ) return SQLITE_NOMEM; assert( pFrom->pSelect ); @@ -110307,13 +111933,6 @@ static int selectExpander(Walker *pWalker, Select *p){ int longNames = (flags & SQLITE_FullColNames)!=0 && (flags & SQLITE_ShortColNames)==0; - /* When processing FROM-clause subqueries, it is always the case - ** that full_column_names=OFF and short_column_names=ON. The - ** sqlite3ResultSetOfSelect() routine makes it so. */ - assert( (p->selFlags & SF_NestedFrom)==0 - || ((flags & SQLITE_FullColNames)==0 && - (flags & SQLITE_ShortColNames)!=0) ); - for(k=0; knExpr; k++){ pE = a[k].pExpr; pRight = pE->pRight; @@ -110488,7 +112107,7 @@ static void sqlite3SelectExpand(Parse *pParse, Select *pSelect){ sqlite3WalkSelect(&w, pSelect); } w.xSelectCallback = selectExpander; - if( (pSelect->selFlags & SF_AllValues)==0 ){ + if( (pSelect->selFlags & SF_MultiValue)==0 ){ w.xSelectCallback2 = selectPopWith; } sqlite3WalkSelect(&w, pSelect); @@ -110674,7 +112293,8 @@ static void updateAccumulator(Parse *pParse, AggInfo *pAggInfo){ } if( pF->iDistinct>=0 ){ addrNext = sqlite3VdbeMakeLabel(v); - assert( nArg==1 ); + testcase( nArg==0 ); /* Error condition */ + testcase( nArg>1 ); /* Also an error */ codeDistinct(pParse, pF->iDistinct, addrNext, 1, regAgg); } if( pF->pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){ @@ -110691,7 +112311,7 @@ static void updateAccumulator(Parse *pParse, AggInfo *pAggInfo){ if( regHit==0 && pAggInfo->nAccumulator ) regHit = ++pParse->nMem; sqlite3VdbeAddOp4(v, OP_CollSeq, regHit, 0, 0, (char *)pColl, P4_COLLSEQ); } - sqlite3VdbeAddOp4(v, OP_AggStep, 0, regAgg, pF->iMem, + sqlite3VdbeAddOp4(v, OP_AggStep0, 0, regAgg, pF->iMem, (void*)pF->pFunc, P4_FUNCDEF); sqlite3VdbeChangeP5(v, (u8)nArg); sqlite3ExprCacheAffinityChange(pParse, regAgg, nArg); @@ -110774,7 +112394,7 @@ SQLITE_PRIVATE int sqlite3Select( WhereInfo *pWInfo; /* Return from sqlite3WhereBegin() */ Vdbe *v; /* The virtual machine under construction */ int isAgg; /* True for select lists like "count(*)" */ - ExprList *pEList; /* List of columns to extract. */ + ExprList *pEList = 0; /* List of columns to extract. */ SrcList *pTabList; /* List of tables to select from */ Expr *pWhere; /* The WHERE clause. May be NULL */ ExprList *pGroupBy; /* The GROUP BY clause. May be NULL */ @@ -110824,12 +112444,11 @@ SQLITE_PRIVATE int sqlite3Select( memset(&sSort, 0, sizeof(sSort)); sSort.pOrderBy = p->pOrderBy; pTabList = p->pSrc; - pEList = p->pEList; if( pParse->nErr || db->mallocFailed ){ goto select_end; } + assert( p->pEList!=0 ); isAgg = (p->selFlags & SF_Aggregate)!=0; - assert( pEList!=0 ); #if SELECTTRACE_ENABLED if( sqlite3SelectTrace & 0x100 ){ SELECTTRACE(0x100,pParse,p, ("after name resolution:\n")); @@ -110838,29 +112457,67 @@ SQLITE_PRIVATE int sqlite3Select( #endif - /* Begin generating code. - */ - v = sqlite3GetVdbe(pParse); - if( v==0 ) goto select_end; - /* If writing to memory or generating a set ** only a single column may be output. */ #ifndef SQLITE_OMIT_SUBQUERY - if( checkForMultiColumnSelectError(pParse, pDest, pEList->nExpr) ){ + if( checkForMultiColumnSelectError(pParse, pDest, p->pEList->nExpr) ){ goto select_end; } #endif - /* Generate code for all sub-queries in the FROM clause + /* Try to flatten subqueries in the FROM clause up into the main query */ #if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) for(i=0; !p->pPrior && inSrc; i++){ struct SrcList_item *pItem = &pTabList->a[i]; - SelectDest dest; Select *pSub = pItem->pSelect; int isAggSub; + if( pSub==0 ) continue; + isAggSub = (pSub->selFlags & SF_Aggregate)!=0; + if( flattenSubquery(pParse, p, i, isAgg, isAggSub) ){ + /* This subquery can be absorbed into its parent. */ + if( isAggSub ){ + isAgg = 1; + p->selFlags |= SF_Aggregate; + } + i = -1; + } + pTabList = p->pSrc; + if( db->mallocFailed ) goto select_end; + if( !IgnorableOrderby(pDest) ){ + sSort.pOrderBy = p->pOrderBy; + } + } +#endif + + /* Get a pointer the VDBE under construction, allocating a new VDBE if one + ** does not already exist */ + v = sqlite3GetVdbe(pParse); + if( v==0 ) goto select_end; +#ifndef SQLITE_OMIT_COMPOUND_SELECT + /* Handle compound SELECT statements using the separate multiSelect() + ** procedure. + */ + if( p->pPrior ){ + rc = multiSelect(pParse, p, pDest); + explainSetInteger(pParse->iSelectId, iRestoreSelectId); +#if SELECTTRACE_ENABLED + SELECTTRACE(1,pParse,p,("end compound-select processing\n")); + pParse->nSelectIndent--; +#endif + return rc; + } +#endif + + /* Generate code for all sub-queries in the FROM clause + */ +#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) + for(i=0; inSrc; i++){ + struct SrcList_item *pItem = &pTabList->a[i]; + SelectDest dest; + Select *pSub = pItem->pSelect; if( pSub==0 ) continue; /* Sometimes the code for a subquery will be generated more than @@ -110885,16 +112542,25 @@ SQLITE_PRIVATE int sqlite3Select( */ pParse->nHeight += sqlite3SelectExprHeight(p); - isAggSub = (pSub->selFlags & SF_Aggregate)!=0; - if( flattenSubquery(pParse, p, i, isAgg, isAggSub) ){ - /* This subquery can be absorbed into its parent. */ - if( isAggSub ){ - isAgg = 1; - p->selFlags |= SF_Aggregate; + /* Make copies of constant WHERE-clause terms in the outer query down + ** inside the subquery. This can help the subquery to run more efficiently. + */ + if( (pItem->jointype & JT_OUTER)==0 + && pushDownWhereTerms(db, pSub, p->pWhere, pItem->iCursor) + ){ +#if SELECTTRACE_ENABLED + if( sqlite3SelectTrace & 0x100 ){ + SELECTTRACE(0x100,pParse,p,("After WHERE-clause push-down:\n")); + sqlite3TreeViewSelect(0, p, 0); } - i = -1; - }else if( pTabList->nSrc==1 - && OptimizationEnabled(db, SQLITE_SubqCoroutine) +#endif + } + + /* Generate code to implement the subquery + */ + if( pTabList->nSrc==1 + && (p->selFlags & SF_All)==0 + && OptimizationEnabled(db, SQLITE_SubqCoroutine) ){ /* Implement a co-routine that will return a single row of the result ** set on each invocation. @@ -110945,33 +112611,23 @@ SQLITE_PRIVATE int sqlite3Select( sqlite3VdbeChangeP1(v, topAddr, retAddr); sqlite3ClearTempRegCache(pParse); } - if( /*pParse->nErr ||*/ db->mallocFailed ){ - goto select_end; - } + if( db->mallocFailed ) goto select_end; pParse->nHeight -= sqlite3SelectExprHeight(p); - pTabList = p->pSrc; - if( !IgnorableOrderby(pDest) ){ - sSort.pOrderBy = p->pOrderBy; - } } - pEList = p->pEList; #endif + + /* Various elements of the SELECT copied into local variables for + ** convenience */ + pEList = p->pEList; pWhere = p->pWhere; pGroupBy = p->pGroupBy; pHaving = p->pHaving; sDistinct.isTnct = (p->selFlags & SF_Distinct)!=0; -#ifndef SQLITE_OMIT_COMPOUND_SELECT - /* If there is are a sequence of queries, do the earlier ones first. - */ - if( p->pPrior ){ - rc = multiSelect(pParse, p, pDest); - explainSetInteger(pParse->iSelectId, iRestoreSelectId); #if SELECTTRACE_ENABLED - SELECTTRACE(1,pParse,p,("end compound-select processing\n")); - pParse->nSelectIndent--; -#endif - return rc; + if( sqlite3SelectTrace & 0x400 ){ + SELECTTRACE(0x400,pParse,p,("After all FROM-clause analysis:\n")); + sqlite3TreeViewSelect(0, p, 0); } #endif @@ -110991,23 +112647,23 @@ SQLITE_PRIVATE int sqlite3Select( ** BY and DISTINCT, and an index or separate temp-table for the other. */ if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct - && sqlite3ExprListCompare(sSort.pOrderBy, p->pEList, -1)==0 + && sqlite3ExprListCompare(sSort.pOrderBy, pEList, -1)==0 ){ p->selFlags &= ~SF_Distinct; - p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0); - pGroupBy = p->pGroupBy; + pGroupBy = p->pGroupBy = sqlite3ExprListDup(db, pEList, 0); /* Notice that even thought SF_Distinct has been cleared from p->selFlags, ** the sDistinct.isTnct is still set. Hence, isTnct represents the ** original setting of the SF_Distinct flag, not the current setting */ assert( sDistinct.isTnct ); } - /* If there is an ORDER BY clause, then this sorting - ** index might end up being unused if the data can be - ** extracted in pre-sorted order. If that is the case, then the - ** OP_OpenEphemeral instruction will be changed to an OP_Noop once - ** we figure out that the sorting index is not needed. The addrSortIndex - ** variable is used to facilitate that change. + /* If there is an ORDER BY clause, then create an ephemeral index to + ** do the sorting. But this sorting ephemeral index might end up + ** being unused if the data can be extracted in pre-sorted order. + ** If that is the case, then the OP_OpenEphemeral instruction will be + ** changed to an OP_Noop once we figure out that the sorting index is + ** not needed. The sSort.addrSortIndex variable is used to facilitate + ** that change. */ if( sSort.pOrderBy ){ KeyInfo *pKeyInfo; @@ -111038,14 +112694,14 @@ SQLITE_PRIVATE int sqlite3Select( sSort.sortFlags |= SORTFLAG_UseSorter; } - /* Open a virtual index to use for the distinct set. + /* Open an ephemeral index to use for the distinct set. */ if( p->selFlags & SF_Distinct ){ sDistinct.tabTnct = pParse->nTab++; sDistinct.addrTnct = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, - sDistinct.tabTnct, 0, 0, - (char*)keyInfoFromExprList(pParse, p->pEList,0,0), - P4_KEYINFO); + sDistinct.tabTnct, 0, 0, + (char*)keyInfoFromExprList(pParse, p->pEList,0,0), + P4_KEYINFO); sqlite3VdbeChangeP5(v, BTREE_UNORDERED); sDistinct.eTnctType = WHERE_DISTINCT_UNORDERED; }else{ @@ -111123,11 +112779,10 @@ SQLITE_PRIVATE int sqlite3Select( p->nSelectRow = 1; } - /* If there is both a GROUP BY and an ORDER BY clause and they are ** identical, then it may be possible to disable the ORDER BY clause ** on the grounds that the GROUP BY will cause elements to come out - ** in the correct order. It also may not - the GROUP BY may use a + ** in the correct order. It also may not - the GROUP BY might use a ** database index that causes rows to be grouped together as required ** but not actually sorted. Either way, record the fact that the ** ORDER BY and GROUP BY clauses are the same by setting the orderByGrp @@ -111305,7 +112960,8 @@ SQLITE_PRIVATE int sqlite3Select( addrTopOfLoop = sqlite3VdbeCurrentAddr(v); sqlite3ExprCacheClear(pParse); if( groupBySort ){ - sqlite3VdbeAddOp3(v, OP_SorterData, sAggInfo.sortingIdx, sortOut,sortPTab); + sqlite3VdbeAddOp3(v, OP_SorterData, sAggInfo.sortingIdx, + sortOut, sortPTab); } for(j=0; jnExpr; j++){ if( groupBySort ){ @@ -111377,7 +113033,8 @@ SQLITE_PRIVATE int sqlite3Select( sqlite3VdbeAddOp1(v, OP_Return, regOutputRow); sqlite3VdbeResolveLabel(v, addrOutputRow); addrOutputRow = sqlite3VdbeCurrentAddr(v); - sqlite3VdbeAddOp2(v, OP_IfPos, iUseFlag, addrOutputRow+2); VdbeCoverage(v); + sqlite3VdbeAddOp2(v, OP_IfPos, iUseFlag, addrOutputRow+2); + VdbeCoverage(v); VdbeComment((v, "Groupby result generator entry point")); sqlite3VdbeAddOp1(v, OP_Return, regOutputRow); finalizeAggFunctions(pParse, &sAggInfo); @@ -111541,7 +113198,8 @@ SQLITE_PRIVATE int sqlite3Select( ** and send them to the callback one by one. */ if( sSort.pOrderBy ){ - explainTempTable(pParse, sSort.nOBSat>0 ? "RIGHT PART OF ORDER BY":"ORDER BY"); + explainTempTable(pParse, + sSort.nOBSat>0 ? "RIGHT PART OF ORDER BY":"ORDER BY"); generateSortTail(pParse, p, &sSort, pEList->nExpr, pDest); } @@ -111549,10 +113207,9 @@ SQLITE_PRIVATE int sqlite3Select( */ sqlite3VdbeResolveLabel(v, iEnd); - /* The SELECT was successfully coded. Set the return code to 0 - ** to indicate no errors. - */ - rc = 0; + /* The SELECT has been coded. If there is an error in the Parse structure, + ** set the return code to 1. Otherwise 0. */ + rc = (pParse->nErr>0); /* Control jumps to here if an error is encountered above, or upon ** successful coding of the SELECT. @@ -111575,100 +113232,6 @@ select_end: return rc; } -#ifdef SQLITE_DEBUG -/* -** Generate a human-readable description of a the Select object. -*/ -SQLITE_PRIVATE void sqlite3TreeViewSelect(TreeView *pView, const Select *p, u8 moreToFollow){ - int n = 0; - pView = sqlite3TreeViewPush(pView, moreToFollow); - sqlite3TreeViewLine(pView, "SELECT%s%s (0x%p)", - ((p->selFlags & SF_Distinct) ? " DISTINCT" : ""), - ((p->selFlags & SF_Aggregate) ? " agg_flag" : ""), p - ); - if( p->pSrc && p->pSrc->nSrc ) n++; - if( p->pWhere ) n++; - if( p->pGroupBy ) n++; - if( p->pHaving ) n++; - if( p->pOrderBy ) n++; - if( p->pLimit ) n++; - if( p->pOffset ) n++; - if( p->pPrior ) n++; - sqlite3TreeViewExprList(pView, p->pEList, (n--)>0, "result-set"); - if( p->pSrc && p->pSrc->nSrc ){ - int i; - pView = sqlite3TreeViewPush(pView, (n--)>0); - sqlite3TreeViewLine(pView, "FROM"); - for(i=0; ipSrc->nSrc; i++){ - struct SrcList_item *pItem = &p->pSrc->a[i]; - StrAccum x; - char zLine[100]; - sqlite3StrAccumInit(&x, zLine, sizeof(zLine), 0); - sqlite3XPrintf(&x, 0, "{%d,*}", pItem->iCursor); - if( pItem->zDatabase ){ - sqlite3XPrintf(&x, 0, " %s.%s", pItem->zDatabase, pItem->zName); - }else if( pItem->zName ){ - sqlite3XPrintf(&x, 0, " %s", pItem->zName); - } - if( pItem->pTab ){ - sqlite3XPrintf(&x, 0, " tabname=%Q", pItem->pTab->zName); - } - if( pItem->zAlias ){ - sqlite3XPrintf(&x, 0, " (AS %s)", pItem->zAlias); - } - if( pItem->jointype & JT_LEFT ){ - sqlite3XPrintf(&x, 0, " LEFT-JOIN"); - } - sqlite3StrAccumFinish(&x); - sqlite3TreeViewItem(pView, zLine, ipSrc->nSrc-1); - if( pItem->pSelect ){ - sqlite3TreeViewSelect(pView, pItem->pSelect, 0); - } - sqlite3TreeViewPop(pView); - } - sqlite3TreeViewPop(pView); - } - if( p->pWhere ){ - sqlite3TreeViewItem(pView, "WHERE", (n--)>0); - sqlite3TreeViewExpr(pView, p->pWhere, 0); - sqlite3TreeViewPop(pView); - } - if( p->pGroupBy ){ - sqlite3TreeViewExprList(pView, p->pGroupBy, (n--)>0, "GROUPBY"); - } - if( p->pHaving ){ - sqlite3TreeViewItem(pView, "HAVING", (n--)>0); - sqlite3TreeViewExpr(pView, p->pHaving, 0); - sqlite3TreeViewPop(pView); - } - if( p->pOrderBy ){ - sqlite3TreeViewExprList(pView, p->pOrderBy, (n--)>0, "ORDERBY"); - } - if( p->pLimit ){ - sqlite3TreeViewItem(pView, "LIMIT", (n--)>0); - sqlite3TreeViewExpr(pView, p->pLimit, 0); - sqlite3TreeViewPop(pView); - } - if( p->pOffset ){ - sqlite3TreeViewItem(pView, "OFFSET", (n--)>0); - sqlite3TreeViewExpr(pView, p->pOffset, 0); - sqlite3TreeViewPop(pView); - } - if( p->pPrior ){ - const char *zOp = "UNION"; - switch( p->op ){ - case TK_ALL: zOp = "UNION ALL"; break; - case TK_INTERSECT: zOp = "INTERSECT"; break; - case TK_EXCEPT: zOp = "EXCEPT"; break; - } - sqlite3TreeViewItem(pView, zOp, (n--)>0); - sqlite3TreeViewSelect(pView, p->pPrior, 0); - sqlite3TreeViewPop(pView); - } - sqlite3TreeViewPop(pView); -} -#endif /* SQLITE_DEBUG */ - /************** End of select.c **********************************************/ /************** Begin file table.c *******************************************/ /* @@ -111689,6 +113252,7 @@ SQLITE_PRIVATE void sqlite3TreeViewSelect(TreeView *pView, const Select *p, u8 m ** These routines are in a separate files so that they will not be linked ** if they are not used. */ +/* #include "sqliteInt.h" */ /* #include */ /* #include */ @@ -111762,7 +113326,7 @@ static int sqlite3_get_table_cb(void *pArg, int nCol, char **argv, char **colv){ z = 0; }else{ int n = sqlite3Strlen30(argv[i])+1; - z = sqlite3_malloc( n ); + z = sqlite3_malloc64( n ); if( z==0 ) goto malloc_failed; memcpy(z, argv[i], n); } @@ -111811,7 +113375,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_get_table( res.nData = 1; res.nAlloc = 20; res.rc = SQLITE_OK; - res.azResult = sqlite3_malloc(sizeof(char*)*res.nAlloc ); + res.azResult = sqlite3_malloc64(sizeof(char*)*res.nAlloc ); if( res.azResult==0 ){ db->errCode = SQLITE_NOMEM; return SQLITE_NOMEM; @@ -111839,7 +113403,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_get_table( } if( res.nAlloc>res.nData ){ char **azNew; - azNew = sqlite3_realloc( res.azResult, sizeof(char*)*res.nData ); + azNew = sqlite3_realloc64( res.azResult, sizeof(char*)*res.nData ); if( azNew==0 ){ sqlite3_free_table(&res.azResult[1]); db->errCode = SQLITE_NOMEM; @@ -111885,6 +113449,7 @@ SQLITE_API void SQLITE_STDCALL sqlite3_free_table( ************************************************************************* ** This file contains the implementation for TRIGGERs */ +/* #include "sqliteInt.h" */ #ifndef SQLITE_OMIT_TRIGGER /* @@ -112067,7 +113632,6 @@ SQLITE_PRIVATE void sqlite3BeginTrigger( /* Do not create a trigger on a system table */ if( sqlite3StrNICmp(pTab->zName, "sqlite_", 7)==0 ){ sqlite3ErrorMsg(pParse, "cannot create trigger on system table"); - pParse->nErr++; goto trigger_cleanup; } @@ -112247,12 +113811,12 @@ static TriggerStep *triggerStepAllocate( ){ TriggerStep *pTriggerStep; - pTriggerStep = sqlite3DbMallocZero(db, sizeof(TriggerStep) + pName->n); + pTriggerStep = sqlite3DbMallocZero(db, sizeof(TriggerStep) + pName->n + 1); if( pTriggerStep ){ char *z = (char*)&pTriggerStep[1]; memcpy(z, pName->z, pName->n); - pTriggerStep->target.z = z; - pTriggerStep->target.n = pName->n; + sqlite3Dequote(z); + pTriggerStep->zTarget = z; pTriggerStep->op = op; } return pTriggerStep; @@ -112535,7 +114099,7 @@ SQLITE_PRIVATE Trigger *sqlite3TriggersExist( } /* -** Convert the pStep->target token into a SrcList and return a pointer +** Convert the pStep->zTarget string into a SrcList and return a pointer ** to that SrcList. ** ** This routine adds a specific database name, if needed, to the target when @@ -112548,17 +114112,17 @@ static SrcList *targetSrcList( Parse *pParse, /* The parsing context */ TriggerStep *pStep /* The trigger containing the target token */ ){ + sqlite3 *db = pParse->db; int iDb; /* Index of the database to use */ SrcList *pSrc; /* SrcList to be returned */ - pSrc = sqlite3SrcListAppend(pParse->db, 0, &pStep->target, 0); + pSrc = sqlite3SrcListAppend(db, 0, 0, 0); if( pSrc ){ assert( pSrc->nSrc>0 ); - assert( pSrc->a!=0 ); - iDb = sqlite3SchemaToIndex(pParse->db, pStep->pTrig->pSchema); + pSrc->a[pSrc->nSrc-1].zName = sqlite3DbStrDup(db, pStep->zTarget); + iDb = sqlite3SchemaToIndex(db, pStep->pTrig->pSchema); if( iDb==0 || iDb>=2 ){ - sqlite3 *db = pParse->db; - assert( iDbdb->nDb ); + assert( iDbnDb ); pSrc->a[pSrc->nSrc-1].zDatabase = sqlite3DbStrDup(db, db->aDb[iDb].zName); } } @@ -112670,6 +114234,7 @@ static void transferParseError(Parse *pTo, Parse *pFrom){ if( pTo->nErr==0 ){ pTo->zErrMsg = pFrom->zErrMsg; pTo->nErr = pFrom->nErr; + pTo->rc = pFrom->rc; }else{ sqlite3DbFree(pFrom->db, pFrom->zErrMsg); } @@ -113008,6 +114573,7 @@ SQLITE_PRIVATE u32 sqlite3TriggerColmask( ** This file contains C code routines that are called by the parser ** to handle UPDATE statements. */ +/* #include "sqliteInt.h" */ #ifndef SQLITE_OMIT_VIRTUALTABLE /* Forward declaration */ @@ -113738,12 +115304,10 @@ static void updateVirtualTable( */ assert( v ); ephemTab = pParse->nTab++; - sqlite3VdbeAddOp2(v, OP_OpenEphemeral, ephemTab, pTab->nCol+1+(pRowid!=0)); - sqlite3VdbeChangeP5(v, BTREE_UNORDERED); /* fill the ephemeral table */ - sqlite3SelectDestInit(&dest, SRT_Table, ephemTab); + sqlite3SelectDestInit(&dest, SRT_EphemTab, ephemTab); sqlite3Select(pParse, pSelect, &dest); /* Generate code to scan the ephemeral table and call VUpdate. */ @@ -113786,6 +115350,8 @@ static void updateVirtualTable( ** Most of the code in this file may be omitted by defining the ** SQLITE_OMIT_VACUUM macro. */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ #if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH) /* @@ -114020,6 +115586,8 @@ SQLITE_PRIVATE int sqlite3RunVacuum(char **pzErrMsg, sqlite3 *db){ ** an "INSERT INTO vacuum_db.xxx SELECT * FROM main.xxx;" to copy ** the contents to the temporary database. */ + assert( (db->flags & SQLITE_Vacuum)==0 ); + db->flags |= SQLITE_Vacuum; rc = execExecSql(db, pzErrMsg, "SELECT 'INSERT INTO vacuum_db.' || quote(name) " "|| ' SELECT * FROM main.' || quote(name) || ';'" @@ -114027,6 +115595,8 @@ SQLITE_PRIVATE int sqlite3RunVacuum(char **pzErrMsg, sqlite3 *db){ "WHERE type = 'table' AND name!='sqlite_sequence' " " AND coalesce(rootpage,1)>0" ); + assert( (db->flags & SQLITE_Vacuum)!=0 ); + db->flags &= ~SQLITE_Vacuum; if( rc!=SQLITE_OK ) goto end_of_vacuum; /* Copy over the sequence table @@ -114154,6 +115724,7 @@ end_of_vacuum: ** This file contains code used to help implement virtual tables. */ #ifndef SQLITE_OMIT_VIRTUALTABLE +/* #include "sqliteInt.h" */ /* ** Before a virtual table xCreate() or xConnect() method is invoked, the @@ -114165,6 +115736,8 @@ end_of_vacuum: struct VtabCtx { VTable *pVTable; /* The virtual table being constructed */ Table *pTab; /* The Table object to which the virtual table belongs */ + VtabCtx *pPrior; /* Parent context (if any) */ + int bDeclared; /* True after sqlite3_declare_vtab() is called */ }; /* @@ -114611,7 +116184,7 @@ SQLITE_PRIVATE void sqlite3VtabArgExtend(Parse *pParse, Token *p){ pArg->z = p->z; pArg->n = p->n; }else{ - assert(pArg->z < p->z); + assert(pArg->z <= p->z); pArg->n = (int)(&p->z[p->n] - pArg->z); } } @@ -114628,15 +116201,27 @@ static int vtabCallConstructor( int (*xConstruct)(sqlite3*,void*,int,const char*const*,sqlite3_vtab**,char**), char **pzErr ){ - VtabCtx sCtx, *pPriorCtx; + VtabCtx sCtx; VTable *pVTable; int rc; const char *const*azArg = (const char *const*)pTab->azModuleArg; int nArg = pTab->nModuleArg; char *zErr = 0; - char *zModuleName = sqlite3MPrintf(db, "%s", pTab->zName); + char *zModuleName; int iDb; + VtabCtx *pCtx; + + /* Check that the virtual-table is not already being initialized */ + for(pCtx=db->pVtabCtx; pCtx; pCtx=pCtx->pPrior){ + if( pCtx->pTab==pTab ){ + *pzErr = sqlite3MPrintf(db, + "vtable constructor called recursively: %s", pTab->zName + ); + return SQLITE_LOCKED; + } + } + zModuleName = sqlite3MPrintf(db, "%s", pTab->zName); if( !zModuleName ){ return SQLITE_NOMEM; } @@ -114657,11 +116242,13 @@ static int vtabCallConstructor( assert( xConstruct ); sCtx.pTab = pTab; sCtx.pVTable = pVTable; - pPriorCtx = db->pVtabCtx; + sCtx.pPrior = db->pVtabCtx; + sCtx.bDeclared = 0; db->pVtabCtx = &sCtx; rc = xConstruct(db, pMod->pAux, nArg, azArg, &pVTable->pVtab, &zErr); - db->pVtabCtx = pPriorCtx; + db->pVtabCtx = sCtx.pPrior; if( rc==SQLITE_NOMEM ) db->mallocFailed = 1; + assert( sCtx.pTab==pTab ); if( SQLITE_OK!=rc ){ if( zErr==0 ){ @@ -114677,13 +116264,14 @@ static int vtabCallConstructor( memset(pVTable->pVtab, 0, sizeof(pVTable->pVtab[0])); pVTable->pVtab->pModule = pMod->pModule; pVTable->nRef = 1; - if( sCtx.pTab ){ + if( sCtx.bDeclared==0 ){ const char *zFormat = "vtable constructor did not declare schema: %s"; *pzErr = sqlite3MPrintf(db, zFormat, pTab->zName); sqlite3VtabUnlock(pVTable); rc = SQLITE_ERROR; }else{ int iCol; + u8 oooHidden = 0; /* If everything went according to plan, link the new VTable structure ** into the linked list headed by pTab->pVTable. Then loop through the ** columns of the table to see if any of them contain the token "hidden". @@ -114696,7 +116284,10 @@ static int vtabCallConstructor( char *zType = pTab->aCol[iCol].zType; int nType; int i = 0; - if( !zType ) continue; + if( !zType ){ + pTab->tabFlags |= oooHidden; + continue; + } nType = sqlite3Strlen30(zType); if( sqlite3StrNICmp("hidden", zType, 6)||(zType[6] && zType[6]!=' ') ){ for(i=0; iaCol[iCol].colFlags |= COLFLAG_HIDDEN; + oooHidden = TF_OOOHidden; + }else{ + pTab->tabFlags |= oooHidden; } } } @@ -114847,8 +116441,8 @@ SQLITE_PRIVATE int sqlite3VtabCallCreate(sqlite3 *db, int iDb, const char *zTab, ** virtual table module. */ SQLITE_API int SQLITE_STDCALL sqlite3_declare_vtab(sqlite3 *db, const char *zCreateTable){ + VtabCtx *pCtx; Parse *pParse; - int rc = SQLITE_OK; Table *pTab; char *zErr = 0; @@ -114859,11 +116453,13 @@ SQLITE_API int SQLITE_STDCALL sqlite3_declare_vtab(sqlite3 *db, const char *zCre } #endif sqlite3_mutex_enter(db->mutex); - if( !db->pVtabCtx || !(pTab = db->pVtabCtx->pTab) ){ + pCtx = db->pVtabCtx; + if( !pCtx || pCtx->bDeclared ){ sqlite3Error(db, SQLITE_MISUSE); sqlite3_mutex_leave(db->mutex); return SQLITE_MISUSE_BKPT; } + pTab = pCtx->pTab; assert( (pTab->tabFlags & TF_Virtual)!=0 ); pParse = sqlite3StackAllocZero(db, sizeof(*pParse)); @@ -114886,7 +116482,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_declare_vtab(sqlite3 *db, const char *zCre pParse->pNewTable->nCol = 0; pParse->pNewTable->aCol = 0; } - db->pVtabCtx->pTab = 0; + pCtx->bDeclared = 1; }else{ sqlite3ErrorWithMsg(db, SQLITE_ERROR, (zErr ? "%s" : 0), zErr); sqlite3DbFree(db, zErr); @@ -114953,8 +116549,10 @@ SQLITE_PRIVATE int sqlite3VtabCallDestroy(sqlite3 *db, int iDb, const char *zTab static void callFinaliser(sqlite3 *db, int offset){ int i; if( db->aVTrans ){ + VTable **aVTrans = db->aVTrans; + db->aVTrans = 0; for(i=0; inVTrans; i++){ - VTable *pVTab = db->aVTrans[i]; + VTable *pVTab = aVTrans[i]; sqlite3_vtab *p = pVTab->pVtab; if( p ){ int (*x)(sqlite3_vtab *); @@ -114964,9 +116562,8 @@ static void callFinaliser(sqlite3 *db, int offset){ pVTab->iSavepoint = 0; sqlite3VtabUnlock(pVTab); } - sqlite3DbFree(db, db->aVTrans); + sqlite3DbFree(db, aVTrans); db->nVTrans = 0; - db->aVTrans = 0; } } @@ -115080,7 +116677,7 @@ SQLITE_PRIVATE int sqlite3VtabSavepoint(sqlite3 *db, int op, int iSavepoint){ int rc = SQLITE_OK; assert( op==SAVEPOINT_RELEASE||op==SAVEPOINT_ROLLBACK||op==SAVEPOINT_BEGIN ); - assert( iSavepoint>=0 ); + assert( iSavepoint>=-1 ); if( db->aVTrans ){ int i; for(i=0; rc==SQLITE_OK && inVTrans; i++){ @@ -115198,7 +116795,7 @@ SQLITE_PRIVATE void sqlite3VtabMakeWritable(Parse *pParse, Table *pTab){ if( pTab==pToplevel->apVtabLock[i] ) return; } n = (pToplevel->nVtabLock+1)*sizeof(pToplevel->apVtabLock[0]); - apVtabLock = sqlite3_realloc(pToplevel->apVtabLock, n); + apVtabLock = sqlite3_realloc64(pToplevel->apVtabLock, n); if( apVtabLock ){ pToplevel->apVtabLock = apVtabLock; pToplevel->apVtabLock[pToplevel->nVtabLock++] = pTab; @@ -115266,9 +116863,9 @@ SQLITE_API int SQLITE_CDECL sqlite3_vtab_config(sqlite3 *db, int op, ...){ #endif /* SQLITE_OMIT_VIRTUALTABLE */ /************** End of vtab.c ************************************************/ -/************** Begin file where.c *******************************************/ +/************** Begin file wherecode.c ***************************************/ /* -** 2001 September 15 +** 2015-06-06 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: @@ -115279,13 +116876,15 @@ SQLITE_API int SQLITE_CDECL sqlite3_vtab_config(sqlite3 *db, int op, ...){ ** ************************************************************************* ** This module contains C code that generates VDBE code used to process -** the WHERE clause of SQL statements. This module is responsible for -** generating the code that loops through a table looking for applicable -** rows. Indices are selected and used to speed the search when doing -** so is applicable. Because this module is responsible for selecting -** indices, you might also think of this module as the "query optimizer". +** the WHERE clause of SQL statements. +** +** This file was split off from where.c on 2015-06-06 in order to reduce the +** size of where.c and make it easier to edit. This file contains the routines +** that actually generate the bulk of the WHERE loop code. The original where.c +** file retains the code that does query planning and analysis. */ -/************** Include whereInt.h in the middle of where.c ******************/ +/* #include "sqliteInt.h" */ +/************** Include whereInt.h in the middle of wherecode.c **************/ /************** Begin file whereInt.h ****************************************/ /* ** 2013-11-12 @@ -115308,7 +116907,7 @@ SQLITE_API int SQLITE_CDECL sqlite3_vtab_config(sqlite3 *db, int op, ...){ ** Trace output macros */ #if defined(SQLITE_TEST) || defined(SQLITE_DEBUG) -/***/ int sqlite3WhereTrace = 0; +/***/ int sqlite3WhereTrace; #endif #if defined(SQLITE_DEBUG) \ && (defined(SQLITE_TEST) || defined(SQLITE_ENABLE_WHERETRACE)) @@ -115450,10 +117049,6 @@ struct WhereOrSet { WhereOrCost a[N_OR_COST]; /* Set of best costs */ }; - -/* Forward declaration of methods */ -static int whereLoopResize(sqlite3*, WhereLoop*, int); - /* ** Each instance of this object holds a sequence of WhereLoop objects ** that implement some or all of a query plan. @@ -115569,6 +117164,7 @@ struct WhereTerm { #define TERM_LIKEOPT 0x100 /* Virtual terms from the LIKE optimization */ #define TERM_LIKECOND 0x200 /* Conditionally this LIKE operator term */ #define TERM_LIKE 0x400 /* The original LIKE operator */ +#define TERM_IS 0x800 /* Term.pExpr is an IS operator */ /* ** An instance of the WhereScan object is used as an iterator for locating @@ -115660,6 +117256,11 @@ struct WhereMaskSet { int ix[BMS]; /* Cursor assigned to each bit */ }; +/* +** Initialize a WhereMaskSet object +*/ +#define initMaskSet(P) (P)->n=0 + /* ** This object is a convenience wrapper holding all information needed ** to construct WhereLoop objects for a particular query. @@ -115711,27 +117312,84 @@ struct WhereInfo { WhereLevel a[1]; /* Information about each nest loop in WHERE */ }; +/* +** Private interfaces - callable only by other where.c routines. +** +** where.c: +*/ +SQLITE_PRIVATE Bitmask sqlite3WhereGetMask(WhereMaskSet*,int); +SQLITE_PRIVATE WhereTerm *sqlite3WhereFindTerm( + WhereClause *pWC, /* The WHERE clause to be searched */ + int iCur, /* Cursor number of LHS */ + int iColumn, /* Column number of LHS */ + Bitmask notReady, /* RHS must not overlap with this mask */ + u32 op, /* Mask of WO_xx values describing operator */ + Index *pIdx /* Must be compatible with this index, if not NULL */ +); + +/* wherecode.c: */ +#ifndef SQLITE_OMIT_EXPLAIN +SQLITE_PRIVATE int sqlite3WhereExplainOneScan( + Parse *pParse, /* Parse context */ + SrcList *pTabList, /* Table list this loop refers to */ + WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ + int iLevel, /* Value for "level" column of output */ + int iFrom, /* Value for "from" column of output */ + u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ +); +#else +# define sqlite3WhereExplainOneScan(u,v,w,x,y,z) 0 +#endif /* SQLITE_OMIT_EXPLAIN */ +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS +SQLITE_PRIVATE void sqlite3WhereAddScanStatus( + Vdbe *v, /* Vdbe to add scanstatus entry to */ + SrcList *pSrclist, /* FROM clause pLvl reads data from */ + WhereLevel *pLvl, /* Level to add scanstatus() entry for */ + int addrExplain /* Address of OP_Explain (or 0) */ +); +#else +# define sqlite3WhereAddScanStatus(a, b, c, d) ((void)d) +#endif +SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart( + WhereInfo *pWInfo, /* Complete information about the WHERE clause */ + int iLevel, /* Which level of pWInfo->a[] should be coded */ + Bitmask notReady /* Which tables are currently available */ +); + +/* whereexpr.c: */ +SQLITE_PRIVATE void sqlite3WhereClauseInit(WhereClause*,WhereInfo*); +SQLITE_PRIVATE void sqlite3WhereClauseClear(WhereClause*); +SQLITE_PRIVATE void sqlite3WhereSplit(WhereClause*,Expr*,u8); +SQLITE_PRIVATE Bitmask sqlite3WhereExprUsage(WhereMaskSet*, Expr*); +SQLITE_PRIVATE Bitmask sqlite3WhereExprListUsage(WhereMaskSet*, ExprList*); +SQLITE_PRIVATE void sqlite3WhereExprAnalyze(SrcList*, WhereClause*); + + + + + /* ** Bitmasks for the operators on WhereTerm objects. These are all ** operators that are of interest to the query planner. An ** OR-ed combination of these values can be used when searching for ** particular WhereTerms within a WhereClause. */ -#define WO_IN 0x001 -#define WO_EQ 0x002 +#define WO_IN 0x0001 +#define WO_EQ 0x0002 #define WO_LT (WO_EQ<<(TK_LT-TK_EQ)) #define WO_LE (WO_EQ<<(TK_LE-TK_EQ)) #define WO_GT (WO_EQ<<(TK_GT-TK_EQ)) #define WO_GE (WO_EQ<<(TK_GE-TK_EQ)) -#define WO_MATCH 0x040 -#define WO_ISNULL 0x080 -#define WO_OR 0x100 /* Two or more OR-connected terms */ -#define WO_AND 0x200 /* Two or more AND-connected terms */ -#define WO_EQUIV 0x400 /* Of the form A==B, both columns */ -#define WO_NOOP 0x800 /* This term does not restrict search space */ +#define WO_MATCH 0x0040 +#define WO_IS 0x0080 +#define WO_ISNULL 0x0100 +#define WO_OR 0x0200 /* Two or more OR-connected terms */ +#define WO_AND 0x0400 /* Two or more AND-connected terms */ +#define WO_EQUIV 0x0800 /* Of the form A==B, both columns */ +#define WO_NOOP 0x1000 /* This term does not restrict search space */ -#define WO_ALL 0xfff /* Mask of all possible WO_* values */ -#define WO_SINGLE 0x0ff /* Mask of all non-compound WO_* values */ +#define WO_ALL 0x1fff /* Mask of all possible WO_* values */ +#define WO_SINGLE 0x01ff /* Mask of all non-compound WO_* values */ /* ** These are definitions of bits in the WhereLoop.wsFlags field. @@ -115759,4133 +117417,4299 @@ struct WhereInfo { #define WHERE_PARTIALIDX 0x00020000 /* The automatic index is partial */ /************** End of whereInt.h ********************************************/ -/************** Continuing where we left off in where.c **********************/ - -/* -** Return the estimated number of output rows from a WHERE clause -*/ -SQLITE_PRIVATE u64 sqlite3WhereOutputRowCount(WhereInfo *pWInfo){ - return sqlite3LogEstToInt(pWInfo->nRowOut); -} - -/* -** Return one of the WHERE_DISTINCT_xxxxx values to indicate how this -** WHERE clause returns outputs for DISTINCT processing. -*/ -SQLITE_PRIVATE int sqlite3WhereIsDistinct(WhereInfo *pWInfo){ - return pWInfo->eDistinct; -} - -/* -** Return TRUE if the WHERE clause returns rows in ORDER BY order. -** Return FALSE if the output needs to be sorted. -*/ -SQLITE_PRIVATE int sqlite3WhereIsOrdered(WhereInfo *pWInfo){ - return pWInfo->nOBSat; -} - -/* -** Return the VDBE address or label to jump to in order to continue -** immediately with the next row of a WHERE clause. -*/ -SQLITE_PRIVATE int sqlite3WhereContinueLabel(WhereInfo *pWInfo){ - assert( pWInfo->iContinue!=0 ); - return pWInfo->iContinue; -} +/************** Continuing where we left off in wherecode.c ******************/ +#ifndef SQLITE_OMIT_EXPLAIN /* -** Return the VDBE address or label to jump to in order to break -** out of a WHERE loop. +** This routine is a helper for explainIndexRange() below +** +** pStr holds the text of an expression that we are building up one term +** at a time. This routine adds a new term to the end of the expression. +** Terms are separated by AND so add the "AND" text for second and subsequent +** terms only. */ -SQLITE_PRIVATE int sqlite3WhereBreakLabel(WhereInfo *pWInfo){ - return pWInfo->iBreak; +static void explainAppendTerm( + StrAccum *pStr, /* The text expression being built */ + int iTerm, /* Index of this term. First is zero */ + const char *zColumn, /* Name of the column */ + const char *zOp /* Name of the operator */ +){ + if( iTerm ) sqlite3StrAccumAppend(pStr, " AND ", 5); + sqlite3StrAccumAppendAll(pStr, zColumn); + sqlite3StrAccumAppend(pStr, zOp, 1); + sqlite3StrAccumAppend(pStr, "?", 1); } /* -** Return TRUE if an UPDATE or DELETE statement can operate directly on -** the rowids returned by a WHERE clause. Return FALSE if doing an -** UPDATE or DELETE might change subsequent WHERE clause results. +** Argument pLevel describes a strategy for scanning table pTab. This +** function appends text to pStr that describes the subset of table +** rows scanned by the strategy in the form of an SQL expression. ** -** If the ONEPASS optimization is used (if this routine returns true) -** then also write the indices of open cursors used by ONEPASS -** into aiCur[0] and aiCur[1]. iaCur[0] gets the cursor of the data -** table and iaCur[1] gets the cursor used by an auxiliary index. -** Either value may be -1, indicating that cursor is not used. -** Any cursors returned will have been opened for writing. +** For example, if the query: ** -** aiCur[0] and aiCur[1] both get -1 if the where-clause logic is -** unable to use the ONEPASS optimization. -*/ -SQLITE_PRIVATE int sqlite3WhereOkOnePass(WhereInfo *pWInfo, int *aiCur){ - memcpy(aiCur, pWInfo->aiCurOnePass, sizeof(int)*2); - return pWInfo->okOnePass; -} - -/* -** Move the content of pSrc into pDest -*/ -static void whereOrMove(WhereOrSet *pDest, WhereOrSet *pSrc){ - pDest->n = pSrc->n; - memcpy(pDest->a, pSrc->a, pDest->n*sizeof(pDest->a[0])); -} - -/* -** Try to insert a new prerequisite/cost entry into the WhereOrSet pSet. +** SELECT * FROM t1 WHERE a=1 AND b>2; ** -** The new entry might overwrite an existing entry, or it might be -** appended, or it might be discarded. Do whatever is the right thing -** so that pSet keeps the N_OR_COST best entries seen so far. +** is run and there is an index on (a, b), then this function returns a +** string similar to: +** +** "a=? AND b>?" */ -static int whereOrInsert( - WhereOrSet *pSet, /* The WhereOrSet to be updated */ - Bitmask prereq, /* Prerequisites of the new entry */ - LogEst rRun, /* Run-cost of the new entry */ - LogEst nOut /* Number of outputs for the new entry */ -){ - u16 i; - WhereOrCost *p; - for(i=pSet->n, p=pSet->a; i>0; i--, p++){ - if( rRun<=p->rRun && (prereq & p->prereq)==prereq ){ - goto whereOrInsert_done; - } - if( p->rRun<=rRun && (p->prereq & prereq)==p->prereq ){ - return 0; +static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop, Table *pTab){ + Index *pIndex = pLoop->u.btree.pIndex; + u16 nEq = pLoop->u.btree.nEq; + u16 nSkip = pLoop->nSkip; + int i, j; + Column *aCol = pTab->aCol; + i16 *aiColumn = pIndex->aiColumn; + + if( nEq==0 && (pLoop->wsFlags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))==0 ) return; + sqlite3StrAccumAppend(pStr, " (", 2); + for(i=0; i=nSkip ){ + explainAppendTerm(pStr, i, z, "="); + }else{ + if( i ) sqlite3StrAccumAppend(pStr, " AND ", 5); + sqlite3XPrintf(pStr, 0, "ANY(%s)", z); } } - if( pSet->na[pSet->n++]; - p->nOut = nOut; - }else{ - p = pSet->a; - for(i=1; in; i++){ - if( p->rRun>pSet->a[i].rRun ) p = pSet->a + i; - } - if( p->rRun<=rRun ) return 0; + + j = i; + if( pLoop->wsFlags&WHERE_BTM_LIMIT ){ + char *z = aiColumn[j] < 0 ? "rowid" : aCol[aiColumn[j]].zName; + explainAppendTerm(pStr, i++, z, ">"); } -whereOrInsert_done: - p->prereq = prereq; - p->rRun = rRun; - if( p->nOut>nOut ) p->nOut = nOut; - return 1; + if( pLoop->wsFlags&WHERE_TOP_LIMIT ){ + char *z = aiColumn[j] < 0 ? "rowid" : aCol[aiColumn[j]].zName; + explainAppendTerm(pStr, i, z, "<"); + } + sqlite3StrAccumAppend(pStr, ")", 1); } /* -** Initialize a preallocated WhereClause structure. +** This function is a no-op unless currently processing an EXPLAIN QUERY PLAN +** command, or if either SQLITE_DEBUG or SQLITE_ENABLE_STMT_SCANSTATUS was +** defined at compile-time. If it is not a no-op, a single OP_Explain opcode +** is added to the output to describe the table scan strategy in pLevel. +** +** If an OP_Explain opcode is added to the VM, its address is returned. +** Otherwise, if no OP_Explain is coded, zero is returned. */ -static void whereClauseInit( - WhereClause *pWC, /* The WhereClause to be initialized */ - WhereInfo *pWInfo /* The WHERE processing context */ +SQLITE_PRIVATE int sqlite3WhereExplainOneScan( + Parse *pParse, /* Parse context */ + SrcList *pTabList, /* Table list this loop refers to */ + WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ + int iLevel, /* Value for "level" column of output */ + int iFrom, /* Value for "from" column of output */ + u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ ){ - pWC->pWInfo = pWInfo; - pWC->pOuter = 0; - pWC->nTerm = 0; - pWC->nSlot = ArraySize(pWC->aStatic); - pWC->a = pWC->aStatic; -} - -/* Forward reference */ -static void whereClauseClear(WhereClause*); + int ret = 0; +#if !defined(SQLITE_DEBUG) && !defined(SQLITE_ENABLE_STMT_SCANSTATUS) + if( pParse->explain==2 ) +#endif + { + struct SrcList_item *pItem = &pTabList->a[pLevel->iFrom]; + Vdbe *v = pParse->pVdbe; /* VM being constructed */ + sqlite3 *db = pParse->db; /* Database handle */ + int iId = pParse->iSelectId; /* Select id (left-most output column) */ + int isSearch; /* True for a SEARCH. False for SCAN. */ + WhereLoop *pLoop; /* The controlling WhereLoop object */ + u32 flags; /* Flags that describe this loop */ + char *zMsg; /* Text to add to EQP output */ + StrAccum str; /* EQP output string */ + char zBuf[100]; /* Initial space for EQP output string */ -/* -** Deallocate all memory associated with a WhereOrInfo object. -*/ -static void whereOrInfoDelete(sqlite3 *db, WhereOrInfo *p){ - whereClauseClear(&p->wc); - sqlite3DbFree(db, p); -} + pLoop = pLevel->pWLoop; + flags = pLoop->wsFlags; + if( (flags&WHERE_MULTI_OR) || (wctrlFlags&WHERE_ONETABLE_ONLY) ) return 0; -/* -** Deallocate all memory associated with a WhereAndInfo object. -*/ -static void whereAndInfoDelete(sqlite3 *db, WhereAndInfo *p){ - whereClauseClear(&p->wc); - sqlite3DbFree(db, p); -} + isSearch = (flags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))!=0 + || ((flags&WHERE_VIRTUALTABLE)==0 && (pLoop->u.btree.nEq>0)) + || (wctrlFlags&(WHERE_ORDERBY_MIN|WHERE_ORDERBY_MAX)); -/* -** Deallocate a WhereClause structure. The WhereClause structure -** itself is not freed. This routine is the inverse of whereClauseInit(). -*/ -static void whereClauseClear(WhereClause *pWC){ - int i; - WhereTerm *a; - sqlite3 *db = pWC->pWInfo->pParse->db; - for(i=pWC->nTerm-1, a=pWC->a; i>=0; i--, a++){ - if( a->wtFlags & TERM_DYNAMIC ){ - sqlite3ExprDelete(db, a->pExpr); + sqlite3StrAccumInit(&str, db, zBuf, sizeof(zBuf), SQLITE_MAX_LENGTH); + sqlite3StrAccumAppendAll(&str, isSearch ? "SEARCH" : "SCAN"); + if( pItem->pSelect ){ + sqlite3XPrintf(&str, 0, " SUBQUERY %d", pItem->iSelectId); + }else{ + sqlite3XPrintf(&str, 0, " TABLE %s", pItem->zName); } - if( a->wtFlags & TERM_ORINFO ){ - whereOrInfoDelete(db, a->u.pOrInfo); - }else if( a->wtFlags & TERM_ANDINFO ){ - whereAndInfoDelete(db, a->u.pAndInfo); + + if( pItem->zAlias ){ + sqlite3XPrintf(&str, 0, " AS %s", pItem->zAlias); } - } - if( pWC->a!=pWC->aStatic ){ - sqlite3DbFree(db, pWC->a); - } -} + if( (flags & (WHERE_IPK|WHERE_VIRTUALTABLE))==0 ){ + const char *zFmt = 0; + Index *pIdx; -/* -** Add a single new WhereTerm entry to the WhereClause object pWC. -** The new WhereTerm object is constructed from Expr p and with wtFlags. -** The index in pWC->a[] of the new WhereTerm is returned on success. -** 0 is returned if the new WhereTerm could not be added due to a memory -** allocation error. The memory allocation failure will be recorded in -** the db->mallocFailed flag so that higher-level functions can detect it. -** -** This routine will increase the size of the pWC->a[] array as necessary. -** -** If the wtFlags argument includes TERM_DYNAMIC, then responsibility -** for freeing the expression p is assumed by the WhereClause object pWC. -** This is true even if this routine fails to allocate a new WhereTerm. -** -** WARNING: This routine might reallocate the space used to store -** WhereTerms. All pointers to WhereTerms should be invalidated after -** calling this routine. Such pointers may be reinitialized by referencing -** the pWC->a[] array. -*/ -static int whereClauseInsert(WhereClause *pWC, Expr *p, u16 wtFlags){ - WhereTerm *pTerm; - int idx; - testcase( wtFlags & TERM_VIRTUAL ); - if( pWC->nTerm>=pWC->nSlot ){ - WhereTerm *pOld = pWC->a; - sqlite3 *db = pWC->pWInfo->pParse->db; - pWC->a = sqlite3DbMallocRaw(db, sizeof(pWC->a[0])*pWC->nSlot*2 ); - if( pWC->a==0 ){ - if( wtFlags & TERM_DYNAMIC ){ - sqlite3ExprDelete(db, p); + assert( pLoop->u.btree.pIndex!=0 ); + pIdx = pLoop->u.btree.pIndex; + assert( !(flags&WHERE_AUTO_INDEX) || (flags&WHERE_IDX_ONLY) ); + if( !HasRowid(pItem->pTab) && IsPrimaryKeyIndex(pIdx) ){ + if( isSearch ){ + zFmt = "PRIMARY KEY"; + } + }else if( flags & WHERE_PARTIALIDX ){ + zFmt = "AUTOMATIC PARTIAL COVERING INDEX"; + }else if( flags & WHERE_AUTO_INDEX ){ + zFmt = "AUTOMATIC COVERING INDEX"; + }else if( flags & WHERE_IDX_ONLY ){ + zFmt = "COVERING INDEX %s"; + }else{ + zFmt = "INDEX %s"; } - pWC->a = pOld; - return 0; + if( zFmt ){ + sqlite3StrAccumAppend(&str, " USING ", 7); + sqlite3XPrintf(&str, 0, zFmt, pIdx->zName); + explainIndexRange(&str, pLoop, pItem->pTab); + } + }else if( (flags & WHERE_IPK)!=0 && (flags & WHERE_CONSTRAINT)!=0 ){ + const char *zRange; + if( flags&(WHERE_COLUMN_EQ|WHERE_COLUMN_IN) ){ + zRange = "(rowid=?)"; + }else if( (flags&WHERE_BOTH_LIMIT)==WHERE_BOTH_LIMIT ){ + zRange = "(rowid>? AND rowid?)"; + }else{ + assert( flags&WHERE_TOP_LIMIT); + zRange = "(rowida, pOld, sizeof(pWC->a[0])*pWC->nTerm); - if( pOld!=pWC->aStatic ){ - sqlite3DbFree(db, pOld); +#ifndef SQLITE_OMIT_VIRTUALTABLE + else if( (flags & WHERE_VIRTUALTABLE)!=0 ){ + sqlite3XPrintf(&str, 0, " VIRTUAL TABLE INDEX %d:%s", + pLoop->u.vtab.idxNum, pLoop->u.vtab.idxStr); } - pWC->nSlot = sqlite3DbMallocSize(db, pWC->a)/sizeof(pWC->a[0]); - memset(&pWC->a[pWC->nTerm], 0, sizeof(pWC->a[0])*(pWC->nSlot-pWC->nTerm)); - } - pTerm = &pWC->a[idx = pWC->nTerm++]; - if( p && ExprHasProperty(p, EP_Unlikely) ){ - pTerm->truthProb = sqlite3LogEst(p->iTable) - 270; - }else{ - pTerm->truthProb = 1; +#endif +#ifdef SQLITE_EXPLAIN_ESTIMATED_ROWS + if( pLoop->nOut>=10 ){ + sqlite3XPrintf(&str, 0, " (~%llu rows)", sqlite3LogEstToInt(pLoop->nOut)); + }else{ + sqlite3StrAccumAppend(&str, " (~1 row)", 9); + } +#endif + zMsg = sqlite3StrAccumFinish(&str); + ret = sqlite3VdbeAddOp4(v, OP_Explain, iId, iLevel, iFrom, zMsg,P4_DYNAMIC); } - pTerm->pExpr = sqlite3ExprSkipCollate(p); - pTerm->wtFlags = wtFlags; - pTerm->pWC = pWC; - pTerm->iParent = -1; - return idx; + return ret; } +#endif /* SQLITE_OMIT_EXPLAIN */ +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS /* -** This routine identifies subexpressions in the WHERE clause where -** each subexpression is separated by the AND operator or some other -** operator specified in the op parameter. The WhereClause structure -** is filled with pointers to subexpressions. For example: -** -** WHERE a=='hello' AND coalesce(b,11)<10 AND (c+12!=d OR c==22) -** \________/ \_______________/ \________________/ -** slot[0] slot[1] slot[2] -** -** The original WHERE clause in pExpr is unaltered. All this routine -** does is make slot[] entries point to substructure within pExpr. +** Configure the VM passed as the first argument with an +** sqlite3_stmt_scanstatus() entry corresponding to the scan used to +** implement level pLvl. Argument pSrclist is a pointer to the FROM +** clause that the scan reads data from. ** -** In the previous sentence and in the diagram, "slot[]" refers to -** the WhereClause.a[] array. The slot[] array grows as needed to contain -** all terms of the WHERE clause. +** If argument addrExplain is not 0, it must be the address of an +** OP_Explain instruction that describes the same loop. */ -static void whereSplit(WhereClause *pWC, Expr *pExpr, u8 op){ - pWC->op = op; - if( pExpr==0 ) return; - if( pExpr->op!=op ){ - whereClauseInsert(pWC, pExpr, 0); +SQLITE_PRIVATE void sqlite3WhereAddScanStatus( + Vdbe *v, /* Vdbe to add scanstatus entry to */ + SrcList *pSrclist, /* FROM clause pLvl reads data from */ + WhereLevel *pLvl, /* Level to add scanstatus() entry for */ + int addrExplain /* Address of OP_Explain (or 0) */ +){ + const char *zObj = 0; + WhereLoop *pLoop = pLvl->pWLoop; + if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 && pLoop->u.btree.pIndex!=0 ){ + zObj = pLoop->u.btree.pIndex->zName; }else{ - whereSplit(pWC, pExpr->pLeft, op); - whereSplit(pWC, pExpr->pRight, op); + zObj = pSrclist->a[pLvl->iFrom].zName; } + sqlite3VdbeScanStatus( + v, addrExplain, pLvl->addrBody, pLvl->addrVisit, pLoop->nOut, zObj + ); } +#endif -/* -** Initialize a WhereMaskSet object -*/ -#define initMaskSet(P) (P)->n=0 /* -** Return the bitmask for the given cursor number. Return 0 if -** iCursor is not in the set. +** Disable a term in the WHERE clause. Except, do not disable the term +** if it controls a LEFT OUTER JOIN and it did not originate in the ON +** or USING clause of that join. +** +** Consider the term t2.z='ok' in the following queries: +** +** (1) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x WHERE t2.z='ok' +** (2) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x AND t2.z='ok' +** (3) SELECT * FROM t1, t2 WHERE t1.a=t2.x AND t2.z='ok' +** +** The t2.z='ok' is disabled in the in (2) because it originates +** in the ON clause. The term is disabled in (3) because it is not part +** of a LEFT OUTER JOIN. In (1), the term is not disabled. +** +** Disabling a term causes that term to not be tested in the inner loop +** of the join. Disabling is an optimization. When terms are satisfied +** by indices, we disable them to prevent redundant tests in the inner +** loop. We would get the correct results if nothing were ever disabled, +** but joins might run a little slower. The trick is to disable as much +** as we can without disabling too much. If we disabled in (1), we'd get +** the wrong answer. See ticket #813. +** +** If all the children of a term are disabled, then that term is also +** automatically disabled. In this way, terms get disabled if derived +** virtual terms are tested first. For example: +** +** x GLOB 'abc*' AND x>='abc' AND x<'acd' +** \___________/ \______/ \_____/ +** parent child1 child2 +** +** Only the parent term was in the original WHERE clause. The child1 +** and child2 terms were added by the LIKE optimization. If both of +** the virtual child terms are valid, then testing of the parent can be +** skipped. +** +** Usually the parent term is marked as TERM_CODED. But if the parent +** term was originally TERM_LIKE, then the parent gets TERM_LIKECOND instead. +** The TERM_LIKECOND marking indicates that the term should be coded inside +** a conditional such that is only evaluated on the second pass of a +** LIKE-optimization loop, when scanning BLOBs instead of strings. */ -static Bitmask getMask(WhereMaskSet *pMaskSet, int iCursor){ - int i; - assert( pMaskSet->n<=(int)sizeof(Bitmask)*8 ); - for(i=0; in; i++){ - if( pMaskSet->ix[i]==iCursor ){ - return MASKBIT(i); +static void disableTerm(WhereLevel *pLevel, WhereTerm *pTerm){ + int nLoop = 0; + while( pTerm + && (pTerm->wtFlags & TERM_CODED)==0 + && (pLevel->iLeftJoin==0 || ExprHasProperty(pTerm->pExpr, EP_FromJoin)) + && (pLevel->notReady & pTerm->prereqAll)==0 + ){ + if( nLoop && (pTerm->wtFlags & TERM_LIKE)!=0 ){ + pTerm->wtFlags |= TERM_LIKECOND; + }else{ + pTerm->wtFlags |= TERM_CODED; } + if( pTerm->iParent<0 ) break; + pTerm = &pTerm->pWC->a[pTerm->iParent]; + pTerm->nChild--; + if( pTerm->nChild!=0 ) break; + nLoop++; } - return 0; } /* -** Create a new mask for cursor iCursor. +** Code an OP_Affinity opcode to apply the column affinity string zAff +** to the n registers starting at base. ** -** There is one cursor per table in the FROM clause. The number of -** tables in the FROM clause is limited by a test early in the -** sqlite3WhereBegin() routine. So we know that the pMaskSet->ix[] -** array will never overflow. -*/ -static void createMask(WhereMaskSet *pMaskSet, int iCursor){ - assert( pMaskSet->n < ArraySize(pMaskSet->ix) ); - pMaskSet->ix[pMaskSet->n++] = iCursor; -} - -/* -** These routines walk (recursively) an expression tree and generate -** a bitmask indicating which tables are used in that expression -** tree. +** As an optimization, SQLITE_AFF_BLOB entries (which are no-ops) at the +** beginning and end of zAff are ignored. If all entries in zAff are +** SQLITE_AFF_BLOB, then no code gets generated. +** +** This routine makes its own copy of zAff so that the caller is free +** to modify zAff after this routine returns. */ -static Bitmask exprListTableUsage(WhereMaskSet*, ExprList*); -static Bitmask exprSelectTableUsage(WhereMaskSet*, Select*); -static Bitmask exprTableUsage(WhereMaskSet *pMaskSet, Expr *p){ - Bitmask mask = 0; - if( p==0 ) return 0; - if( p->op==TK_COLUMN ){ - mask = getMask(pMaskSet, p->iTable); - return mask; +static void codeApplyAffinity(Parse *pParse, int base, int n, char *zAff){ + Vdbe *v = pParse->pVdbe; + if( zAff==0 ){ + assert( pParse->db->mallocFailed ); + return; } - mask = exprTableUsage(pMaskSet, p->pRight); - mask |= exprTableUsage(pMaskSet, p->pLeft); - if( ExprHasProperty(p, EP_xIsSelect) ){ - mask |= exprSelectTableUsage(pMaskSet, p->x.pSelect); - }else{ - mask |= exprListTableUsage(pMaskSet, p->x.pList); + assert( v!=0 ); + + /* Adjust base and n to skip over SQLITE_AFF_BLOB entries at the beginning + ** and end of the affinity string. + */ + while( n>0 && zAff[0]==SQLITE_AFF_BLOB ){ + n--; + base++; + zAff++; } - return mask; -} -static Bitmask exprListTableUsage(WhereMaskSet *pMaskSet, ExprList *pList){ - int i; - Bitmask mask = 0; - if( pList ){ - for(i=0; inExpr; i++){ - mask |= exprTableUsage(pMaskSet, pList->a[i].pExpr); - } + while( n>1 && zAff[n-1]==SQLITE_AFF_BLOB ){ + n--; } - return mask; -} -static Bitmask exprSelectTableUsage(WhereMaskSet *pMaskSet, Select *pS){ - Bitmask mask = 0; - while( pS ){ - SrcList *pSrc = pS->pSrc; - mask |= exprListTableUsage(pMaskSet, pS->pEList); - mask |= exprListTableUsage(pMaskSet, pS->pGroupBy); - mask |= exprListTableUsage(pMaskSet, pS->pOrderBy); - mask |= exprTableUsage(pMaskSet, pS->pWhere); - mask |= exprTableUsage(pMaskSet, pS->pHaving); - if( ALWAYS(pSrc!=0) ){ - int i; - for(i=0; inSrc; i++){ - mask |= exprSelectTableUsage(pMaskSet, pSrc->a[i].pSelect); - mask |= exprTableUsage(pMaskSet, pSrc->a[i].pOn); - } - } - pS = pS->pPrior; + + /* Code the OP_Affinity opcode if there is anything left to do. */ + if( n>0 ){ + sqlite3VdbeAddOp2(v, OP_Affinity, base, n); + sqlite3VdbeChangeP4(v, -1, zAff, n); + sqlite3ExprCacheAffinityChange(pParse, base, n); } - return mask; } -/* -** Return TRUE if the given operator is one of the operators that is -** allowed for an indexable WHERE clause term. The allowed operators are -** "=", "<", ">", "<=", ">=", "IN", and "IS NULL" -*/ -static int allowedOp(int op){ - assert( TK_GT>TK_EQ && TK_GTTK_EQ && TK_LTTK_EQ && TK_LE=TK_EQ && op<=TK_GE) || op==TK_ISNULL; -} /* -** Commute a comparison operator. Expressions of the form "X op Y" -** are converted into "Y op X". +** Generate code for a single equality term of the WHERE clause. An equality +** term can be either X=expr or X IN (...). pTerm is the term to be +** coded. ** -** If left/right precedence rules come into play when determining the -** collating sequence, then COLLATE operators are adjusted to ensure -** that the collating sequence does not change. For example: -** "Y collate NOCASE op X" becomes "X op Y" because any collation sequence on -** the left hand side of a comparison overrides any collation sequence -** attached to the right. For the same reason the EP_Collate flag -** is not commuted. +** The current value for the constraint is left in register iReg. +** +** For a constraint of the form X=expr, the expression is evaluated and its +** result is left on the stack. For constraints of the form X IN (...) +** this routine sets up a loop that will iterate over all values of X. */ -static void exprCommute(Parse *pParse, Expr *pExpr){ - u16 expRight = (pExpr->pRight->flags & EP_Collate); - u16 expLeft = (pExpr->pLeft->flags & EP_Collate); - assert( allowedOp(pExpr->op) && pExpr->op!=TK_IN ); - if( expRight==expLeft ){ - /* Either X and Y both have COLLATE operator or neither do */ - if( expRight ){ - /* Both X and Y have COLLATE operators. Make sure X is always - ** used by clearing the EP_Collate flag from Y. */ - pExpr->pRight->flags &= ~EP_Collate; - }else if( sqlite3ExprCollSeq(pParse, pExpr->pLeft)!=0 ){ - /* Neither X nor Y have COLLATE operators, but X has a non-default - ** collating sequence. So add the EP_Collate marker on X to cause - ** it to be searched first. */ - pExpr->pLeft->flags |= EP_Collate; - } - } - SWAP(Expr*,pExpr->pRight,pExpr->pLeft); - if( pExpr->op>=TK_GT ){ - assert( TK_LT==TK_GT+2 ); - assert( TK_GE==TK_LE+2 ); - assert( TK_GT>TK_EQ ); - assert( TK_GTop>=TK_GT && pExpr->op<=TK_GE ); - pExpr->op = ((pExpr->op-TK_GT)^2)+TK_GT; - } -} +static int codeEqualityTerm( + Parse *pParse, /* The parsing context */ + WhereTerm *pTerm, /* The term of the WHERE clause to be coded */ + WhereLevel *pLevel, /* The level of the FROM clause we are working on */ + int iEq, /* Index of the equality term within this level */ + int bRev, /* True for reverse-order IN operations */ + int iTarget /* Attempt to leave results in this register */ +){ + Expr *pX = pTerm->pExpr; + Vdbe *v = pParse->pVdbe; + int iReg; /* Register holding results */ -/* -** Translate from TK_xx operator to WO_xx bitmask. -*/ -static u16 operatorMask(int op){ - u16 c; - assert( allowedOp(op) ); - if( op==TK_IN ){ - c = WO_IN; - }else if( op==TK_ISNULL ){ - c = WO_ISNULL; + assert( iTarget>0 ); + if( pX->op==TK_EQ || pX->op==TK_IS ){ + iReg = sqlite3ExprCodeTarget(pParse, pX->pRight, iTarget); + }else if( pX->op==TK_ISNULL ){ + iReg = iTarget; + sqlite3VdbeAddOp2(v, OP_Null, 0, iReg); +#ifndef SQLITE_OMIT_SUBQUERY }else{ - assert( (WO_EQ<<(op-TK_EQ)) < 0x7fff ); - c = (u16)(WO_EQ<<(op-TK_EQ)); - } - assert( op!=TK_ISNULL || c==WO_ISNULL ); - assert( op!=TK_IN || c==WO_IN ); - assert( op!=TK_EQ || c==WO_EQ ); - assert( op!=TK_LT || c==WO_LT ); - assert( op!=TK_LE || c==WO_LE ); - assert( op!=TK_GT || c==WO_GT ); - assert( op!=TK_GE || c==WO_GE ); - return c; -} - -/* -** Advance to the next WhereTerm that matches according to the criteria -** established when the pScan object was initialized by whereScanInit(). -** Return NULL if there are no more matching WhereTerms. -*/ -static WhereTerm *whereScanNext(WhereScan *pScan){ - int iCur; /* The cursor on the LHS of the term */ - int iColumn; /* The column on the LHS of the term. -1 for IPK */ - Expr *pX; /* An expression being tested */ - WhereClause *pWC; /* Shorthand for pScan->pWC */ - WhereTerm *pTerm; /* The term being tested */ - int k = pScan->k; /* Where to start scanning */ + int eType; + int iTab; + struct InLoop *pIn; + WhereLoop *pLoop = pLevel->pWLoop; - while( pScan->iEquiv<=pScan->nEquiv ){ - iCur = pScan->aEquiv[pScan->iEquiv-2]; - iColumn = pScan->aEquiv[pScan->iEquiv-1]; - while( (pWC = pScan->pWC)!=0 ){ - for(pTerm=pWC->a+k; knTerm; k++, pTerm++){ - if( pTerm->leftCursor==iCur - && pTerm->u.leftColumn==iColumn - && (pScan->iEquiv<=2 || !ExprHasProperty(pTerm->pExpr, EP_FromJoin)) - ){ - if( (pTerm->eOperator & WO_EQUIV)!=0 - && pScan->nEquivaEquiv) - ){ - int j; - pX = sqlite3ExprSkipCollate(pTerm->pExpr->pRight); - assert( pX->op==TK_COLUMN ); - for(j=0; jnEquiv; j+=2){ - if( pScan->aEquiv[j]==pX->iTable - && pScan->aEquiv[j+1]==pX->iColumn ){ - break; - } - } - if( j==pScan->nEquiv ){ - pScan->aEquiv[j] = pX->iTable; - pScan->aEquiv[j+1] = pX->iColumn; - pScan->nEquiv += 2; - } - } - if( (pTerm->eOperator & pScan->opMask)!=0 ){ - /* Verify the affinity and collating sequence match */ - if( pScan->zCollName && (pTerm->eOperator & WO_ISNULL)==0 ){ - CollSeq *pColl; - Parse *pParse = pWC->pWInfo->pParse; - pX = pTerm->pExpr; - if( !sqlite3IndexAffinityOk(pX, pScan->idxaff) ){ - continue; - } - assert(pX->pLeft); - pColl = sqlite3BinaryCompareCollSeq(pParse, - pX->pLeft, pX->pRight); - if( pColl==0 ) pColl = pParse->db->pDfltColl; - if( sqlite3StrICmp(pColl->zName, pScan->zCollName) ){ - continue; - } - } - if( (pTerm->eOperator & WO_EQ)!=0 - && (pX = pTerm->pExpr->pRight)->op==TK_COLUMN - && pX->iTable==pScan->aEquiv[0] - && pX->iColumn==pScan->aEquiv[1] - ){ - continue; - } - pScan->k = k+1; - return pTerm; - } - } + if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 + && pLoop->u.btree.pIndex!=0 + && pLoop->u.btree.pIndex->aSortOrder[iEq] + ){ + testcase( iEq==0 ); + testcase( bRev ); + bRev = !bRev; + } + assert( pX->op==TK_IN ); + iReg = iTarget; + eType = sqlite3FindInIndex(pParse, pX, IN_INDEX_LOOP, 0); + if( eType==IN_INDEX_INDEX_DESC ){ + testcase( bRev ); + bRev = !bRev; + } + iTab = pX->iTable; + sqlite3VdbeAddOp2(v, bRev ? OP_Last : OP_Rewind, iTab, 0); + VdbeCoverageIf(v, bRev); + VdbeCoverageIf(v, !bRev); + assert( (pLoop->wsFlags & WHERE_MULTI_OR)==0 ); + pLoop->wsFlags |= WHERE_IN_ABLE; + if( pLevel->u.in.nIn==0 ){ + pLevel->addrNxt = sqlite3VdbeMakeLabel(v); + } + pLevel->u.in.nIn++; + pLevel->u.in.aInLoop = + sqlite3DbReallocOrFree(pParse->db, pLevel->u.in.aInLoop, + sizeof(pLevel->u.in.aInLoop[0])*pLevel->u.in.nIn); + pIn = pLevel->u.in.aInLoop; + if( pIn ){ + pIn += pLevel->u.in.nIn - 1; + pIn->iCur = iTab; + if( eType==IN_INDEX_ROWID ){ + pIn->addrInTop = sqlite3VdbeAddOp2(v, OP_Rowid, iTab, iReg); + }else{ + pIn->addrInTop = sqlite3VdbeAddOp3(v, OP_Column, iTab, 0, iReg); } - pScan->pWC = pScan->pWC->pOuter; - k = 0; + pIn->eEndLoopOp = bRev ? OP_PrevIfOpen : OP_NextIfOpen; + sqlite3VdbeAddOp1(v, OP_IsNull, iReg); VdbeCoverage(v); + }else{ + pLevel->u.in.nIn = 0; } - pScan->pWC = pScan->pOrigWC; - k = 0; - pScan->iEquiv += 2; +#endif } - return 0; + disableTerm(pLevel, pTerm); + return iReg; } /* -** Initialize a WHERE clause scanner object. Return a pointer to the -** first match. Return NULL if there are no matches. +** Generate code that will evaluate all == and IN constraints for an +** index scan. ** -** The scanner will be searching the WHERE clause pWC. It will look -** for terms of the form "X " where X is column iColumn of table -** iCur. The must be one of the operators described by opMask. +** For example, consider table t1(a,b,c,d,e,f) with index i1(a,b,c). +** Suppose the WHERE clause is this: a==5 AND b IN (1,2,3) AND c>5 AND c<10 +** The index has as many as three equality constraints, but in this +** example, the third "c" value is an inequality. So only two +** constraints are coded. This routine will generate code to evaluate +** a==5 and b IN (1,2,3). The current values for a and b will be stored +** in consecutive registers and the index of the first register is returned. ** -** If the search is for X and the WHERE clause contains terms of the -** form X=Y then this routine might also return terms of the form -** "Y ". The number of levels of transitivity is limited, -** but is enough to handle most commonly occurring SQL statements. +** In the example above nEq==2. But this subroutine works for any value +** of nEq including 0. If nEq==0, this routine is nearly a no-op. +** The only thing it does is allocate the pLevel->iMem memory cell and +** compute the affinity string. ** -** If X is not the INTEGER PRIMARY KEY then X must be compatible with -** index pIdx. -*/ -static WhereTerm *whereScanInit( - WhereScan *pScan, /* The WhereScan object being initialized */ - WhereClause *pWC, /* The WHERE clause to be scanned */ - int iCur, /* Cursor to scan for */ - int iColumn, /* Column to scan for */ - u32 opMask, /* Operator(s) to scan for */ - Index *pIdx /* Must be compatible with this index */ -){ - int j; - - /* memset(pScan, 0, sizeof(*pScan)); */ - pScan->pOrigWC = pWC; - pScan->pWC = pWC; - if( pIdx && iColumn>=0 ){ - pScan->idxaff = pIdx->pTable->aCol[iColumn].affinity; - for(j=0; pIdx->aiColumn[j]!=iColumn; j++){ - if( NEVER(j>pIdx->nColumn) ) return 0; - } - pScan->zCollName = pIdx->azColl[j]; - }else{ - pScan->idxaff = 0; - pScan->zCollName = 0; - } - pScan->opMask = opMask; - pScan->k = 0; - pScan->aEquiv[0] = iCur; - pScan->aEquiv[1] = iColumn; - pScan->nEquiv = 2; - pScan->iEquiv = 2; - return whereScanNext(pScan); -} - -/* -** Search for a term in the WHERE clause that is of the form "X " -** where X is a reference to the iColumn of table iCur and is one of -** the WO_xx operator codes specified by the op parameter. -** Return a pointer to the term. Return 0 if not found. +** The nExtraReg parameter is 0 or 1. It is 0 if all WHERE clause constraints +** are == or IN and are covered by the nEq. nExtraReg is 1 if there is +** an inequality constraint (such as the "c>=5 AND c<10" in the example) that +** occurs after the nEq quality constraints. ** -** The term returned might by Y= if there is another constraint in -** the WHERE clause that specifies that X=Y. Any such constraints will be -** identified by the WO_EQUIV bit in the pTerm->eOperator field. The -** aEquiv[] array holds X and all its equivalents, with each SQL variable -** taking up two slots in aEquiv[]. The first slot is for the cursor number -** and the second is for the column number. There are 22 slots in aEquiv[] -** so that means we can look for X plus up to 10 other equivalent values. -** Hence a search for X will return if X=A1 and A1=A2 and A2=A3 -** and ... and A9=A10 and A10=. +** This routine allocates a range of nEq+nExtraReg memory cells and returns +** the index of the first memory cell in that range. The code that +** calls this routine will use that memory range to store keys for +** start and termination conditions of the loop. +** key value of the loop. If one or more IN operators appear, then +** this routine allocates an additional nEq memory cells for internal +** use. ** -** If there are multiple terms in the WHERE clause of the form "X " -** then try for the one with no dependencies on - in other words where -** is a constant expression of some kind. Only return entries of -** the form "X Y" where Y is a column in another table if no terms of -** the form "X " exist. If no terms with a constant RHS -** exist, try to return a term that does not use WO_EQUIV. +** Before returning, *pzAff is set to point to a buffer containing a +** copy of the column affinity string of the index allocated using +** sqlite3DbMalloc(). Except, entries in the copy of the string associated +** with equality constraints that use BLOB or NONE affinity are set to +** SQLITE_AFF_BLOB. This is to deal with SQL such as the following: +** +** CREATE TABLE t1(a TEXT PRIMARY KEY, b); +** SELECT ... FROM t1 AS t2, t1 WHERE t1.a = t2.b; +** +** In the example above, the index on t1(a) has TEXT affinity. But since +** the right hand side of the equality constraint (t2.b) has BLOB/NONE affinity, +** no conversion should be attempted before using a t2.b value as part of +** a key to search the index. Hence the first byte in the returned affinity +** string in this example would be set to SQLITE_AFF_BLOB. */ -static WhereTerm *findTerm( - WhereClause *pWC, /* The WHERE clause to be searched */ - int iCur, /* Cursor number of LHS */ - int iColumn, /* Column number of LHS */ - Bitmask notReady, /* RHS must not overlap with this mask */ - u32 op, /* Mask of WO_xx values describing operator */ - Index *pIdx /* Must be compatible with this index, if not NULL */ +static int codeAllEqualityTerms( + Parse *pParse, /* Parsing context */ + WhereLevel *pLevel, /* Which nested loop of the FROM we are coding */ + int bRev, /* Reverse the order of IN operators */ + int nExtraReg, /* Number of extra registers to allocate */ + char **pzAff /* OUT: Set to point to affinity string */ ){ - WhereTerm *pResult = 0; - WhereTerm *p; - WhereScan scan; + u16 nEq; /* The number of == or IN constraints to code */ + u16 nSkip; /* Number of left-most columns to skip */ + Vdbe *v = pParse->pVdbe; /* The vm under construction */ + Index *pIdx; /* The index being used for this loop */ + WhereTerm *pTerm; /* A single constraint term */ + WhereLoop *pLoop; /* The WhereLoop object */ + int j; /* Loop counter */ + int regBase; /* Base register */ + int nReg; /* Number of registers to allocate */ + char *zAff; /* Affinity string to return */ - p = whereScanInit(&scan, pWC, iCur, iColumn, op, pIdx); - while( p ){ - if( (p->prereqRight & notReady)==0 ){ - if( p->prereqRight==0 && (p->eOperator&WO_EQ)!=0 ){ - return p; - } - if( pResult==0 ) pResult = p; - } - p = whereScanNext(&scan); - } - return pResult; -} + /* This module is only called on query plans that use an index. */ + pLoop = pLevel->pWLoop; + assert( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 ); + nEq = pLoop->u.btree.nEq; + nSkip = pLoop->nSkip; + pIdx = pLoop->u.btree.pIndex; + assert( pIdx!=0 ); -/* Forward reference */ -static void exprAnalyze(SrcList*, WhereClause*, int); - -/* -** Call exprAnalyze on all terms in a WHERE clause. -*/ -static void exprAnalyzeAll( - SrcList *pTabList, /* the FROM clause */ - WhereClause *pWC /* the WHERE clause to be analyzed */ -){ - int i; - for(i=pWC->nTerm-1; i>=0; i--){ - exprAnalyze(pTabList, pWC, i); - } -} - -#ifndef SQLITE_OMIT_LIKE_OPTIMIZATION -/* -** Check to see if the given expression is a LIKE or GLOB operator that -** can be optimized using inequality constraints. Return TRUE if it is -** so and false if not. -** -** In order for the operator to be optimizible, the RHS must be a string -** literal that does not begin with a wildcard. The LHS must be a column -** that may only be NULL, a string, or a BLOB, never a number. (This means -** that virtual tables cannot participate in the LIKE optimization.) If the -** collating sequence for the column on the LHS must be appropriate for -** the operator. -*/ -static int isLikeOrGlob( - Parse *pParse, /* Parsing and code generating context */ - Expr *pExpr, /* Test this expression */ - Expr **ppPrefix, /* Pointer to TK_STRING expression with pattern prefix */ - int *pisComplete, /* True if the only wildcard is % in the last character */ - int *pnoCase /* True if uppercase is equivalent to lowercase */ -){ - const char *z = 0; /* String on RHS of LIKE operator */ - Expr *pRight, *pLeft; /* Right and left size of LIKE operator */ - ExprList *pList; /* List of operands to the LIKE operator */ - int c; /* One character in z[] */ - int cnt; /* Number of non-wildcard prefix characters */ - char wc[3]; /* Wildcard characters */ - sqlite3 *db = pParse->db; /* Database connection */ - sqlite3_value *pVal = 0; - int op; /* Opcode of pRight */ + /* Figure out how many memory cells we will need then allocate them. + */ + regBase = pParse->nMem + 1; + nReg = pLoop->u.btree.nEq + nExtraReg; + pParse->nMem += nReg; - if( !sqlite3IsLikeFunction(db, pExpr, pnoCase, wc) ){ - return 0; - } -#ifdef SQLITE_EBCDIC - if( *pnoCase ) return 0; -#endif - pList = pExpr->x.pList; - pLeft = pList->a[1].pExpr; - if( pLeft->op!=TK_COLUMN - || sqlite3ExprAffinity(pLeft)!=SQLITE_AFF_TEXT - || IsVirtual(pLeft->pTab) /* Value might be numeric */ - ){ - /* IMP: R-02065-49465 The left-hand side of the LIKE or GLOB operator must - ** be the name of an indexed column with TEXT affinity. */ - return 0; + zAff = sqlite3DbStrDup(pParse->db, sqlite3IndexAffinityStr(v, pIdx)); + if( !zAff ){ + pParse->db->mallocFailed = 1; } - assert( pLeft->iColumn!=(-1) ); /* Because IPK never has AFF_TEXT */ - pRight = sqlite3ExprSkipCollate(pList->a[0].pExpr); - op = pRight->op; - if( op==TK_VARIABLE ){ - Vdbe *pReprepare = pParse->pReprepare; - int iCol = pRight->iColumn; - pVal = sqlite3VdbeGetBoundValue(pReprepare, iCol, SQLITE_AFF_NONE); - if( pVal && sqlite3_value_type(pVal)==SQLITE_TEXT ){ - z = (char *)sqlite3_value_text(pVal); + if( nSkip ){ + int iIdxCur = pLevel->iIdxCur; + sqlite3VdbeAddOp1(v, (bRev?OP_Last:OP_Rewind), iIdxCur); + VdbeCoverageIf(v, bRev==0); + VdbeCoverageIf(v, bRev!=0); + VdbeComment((v, "begin skip-scan on %s", pIdx->zName)); + j = sqlite3VdbeAddOp0(v, OP_Goto); + pLevel->addrSkip = sqlite3VdbeAddOp4Int(v, (bRev?OP_SeekLT:OP_SeekGT), + iIdxCur, 0, regBase, nSkip); + VdbeCoverageIf(v, bRev==0); + VdbeCoverageIf(v, bRev!=0); + sqlite3VdbeJumpHere(v, j); + for(j=0; jaiColumn[j]>=0 ); + VdbeComment((v, "%s", pIdx->pTable->aCol[pIdx->aiColumn[j]].zName)); } - sqlite3VdbeSetVarmask(pParse->pVdbe, iCol); - assert( pRight->op==TK_VARIABLE || pRight->op==TK_REGISTER ); - }else if( op==TK_STRING ){ - z = pRight->u.zToken; - } - if( z ){ - cnt = 0; - while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ - cnt++; + } + + /* Evaluate the equality constraints + */ + assert( zAff==0 || (int)strlen(zAff)>=nEq ); + for(j=nSkip; jaLTerm[j]; + assert( pTerm!=0 ); + /* The following testcase is true for indices with redundant columns. + ** Ex: CREATE INDEX i1 ON t1(a,b,a); SELECT * FROM t1 WHERE a=0 AND b=0; */ + testcase( (pTerm->wtFlags & TERM_CODED)!=0 ); + testcase( pTerm->wtFlags & TERM_VIRTUAL ); + r1 = codeEqualityTerm(pParse, pTerm, pLevel, j, bRev, regBase+j); + if( r1!=regBase+j ){ + if( nReg==1 ){ + sqlite3ReleaseTempReg(pParse, regBase); + regBase = r1; + }else{ + sqlite3VdbeAddOp2(v, OP_SCopy, r1, regBase+j); + } } - if( cnt!=0 && 255!=(u8)z[cnt-1] ){ - Expr *pPrefix; - *pisComplete = c==wc[0] && z[cnt+1]==0; - pPrefix = sqlite3Expr(db, TK_STRING, z); - if( pPrefix ) pPrefix->u.zToken[cnt] = 0; - *ppPrefix = pPrefix; - if( op==TK_VARIABLE ){ - Vdbe *v = pParse->pVdbe; - sqlite3VdbeSetVarmask(v, pRight->iColumn); - if( *pisComplete && pRight->u.zToken[1] ){ - /* If the rhs of the LIKE expression is a variable, and the current - ** value of the variable means there is no need to invoke the LIKE - ** function, then no OP_Variable will be added to the program. - ** This causes problems for the sqlite3_bind_parameter_name() - ** API. To work around them, add a dummy OP_Variable here. - */ - int r1 = sqlite3GetTempReg(pParse); - sqlite3ExprCodeTarget(pParse, pRight, r1); - sqlite3VdbeChangeP3(v, sqlite3VdbeCurrentAddr(v)-1, 0); - sqlite3ReleaseTempReg(pParse, r1); + testcase( pTerm->eOperator & WO_ISNULL ); + testcase( pTerm->eOperator & WO_IN ); + if( (pTerm->eOperator & (WO_ISNULL|WO_IN))==0 ){ + Expr *pRight = pTerm->pExpr->pRight; + if( (pTerm->wtFlags & TERM_IS)==0 && sqlite3ExprCanBeNull(pRight) ){ + sqlite3VdbeAddOp2(v, OP_IsNull, regBase+j, pLevel->addrBrk); + VdbeCoverage(v); + } + if( zAff ){ + if( sqlite3CompareAffinity(pRight, zAff[j])==SQLITE_AFF_BLOB ){ + zAff[j] = SQLITE_AFF_BLOB; + } + if( sqlite3ExprNeedsNoAffinityChange(pRight, zAff[j]) ){ + zAff[j] = SQLITE_AFF_BLOB; } } - }else{ - z = 0; } } - - sqlite3ValueFree(pVal); - return (z!=0); + *pzAff = zAff; + return regBase; } -#endif /* SQLITE_OMIT_LIKE_OPTIMIZATION */ - -#ifndef SQLITE_OMIT_VIRTUALTABLE /* -** Check to see if the given expression is of the form -** -** column MATCH expr +** If the most recently coded instruction is a constant range contraint +** that originated from the LIKE optimization, then change the P3 to be +** pLoop->iLikeRepCntr and set P5. ** -** If it is then return TRUE. If not, return FALSE. +** The LIKE optimization trys to evaluate "x LIKE 'abc%'" as a range +** expression: "x>='ABC' AND x<'abd'". But this requires that the range +** scan loop run twice, once for strings and a second time for BLOBs. +** The OP_String opcodes on the second pass convert the upper and lower +** bound string contants to blobs. This routine makes the necessary changes +** to the OP_String opcodes for that to happen. */ -static int isMatchOfColumn( - Expr *pExpr /* Test this expression */ +static void whereLikeOptimizationStringFixup( + Vdbe *v, /* prepared statement under construction */ + WhereLevel *pLevel, /* The loop that contains the LIKE operator */ + WhereTerm *pTerm /* The upper or lower bound just coded */ ){ - ExprList *pList; - - if( pExpr->op!=TK_FUNCTION ){ - return 0; - } - if( sqlite3StrICmp(pExpr->u.zToken,"match")!=0 ){ - return 0; - } - pList = pExpr->x.pList; - if( pList->nExpr!=2 ){ - return 0; - } - if( pList->a[1].pExpr->op != TK_COLUMN ){ - return 0; + if( pTerm->wtFlags & TERM_LIKEOPT ){ + VdbeOp *pOp; + assert( pLevel->iLikeRepCntr>0 ); + pOp = sqlite3VdbeGetOp(v, -1); + assert( pOp!=0 ); + assert( pOp->opcode==OP_String8 + || pTerm->pWC->pWInfo->pParse->db->mallocFailed ); + pOp->p3 = pLevel->iLikeRepCntr; + pOp->p5 = 1; } - return 1; } -#endif /* SQLITE_OMIT_VIRTUALTABLE */ -/* -** If the pBase expression originated in the ON or USING clause of -** a join, then transfer the appropriate markings over to derived. -*/ -static void transferJoinMarkings(Expr *pDerived, Expr *pBase){ - if( pDerived ){ - pDerived->flags |= pBase->flags & EP_FromJoin; - pDerived->iRightJoinTable = pBase->iRightJoinTable; - } -} /* -** Mark term iChild as being a child of term iParent +** Generate code for the start of the iLevel-th loop in the WHERE clause +** implementation described by pWInfo. */ -static void markTermAsChild(WhereClause *pWC, int iChild, int iParent){ - pWC->a[iChild].iParent = iParent; - pWC->a[iChild].truthProb = pWC->a[iParent].truthProb; - pWC->a[iParent].nChild++; -} +SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart( + WhereInfo *pWInfo, /* Complete information about the WHERE clause */ + int iLevel, /* Which level of pWInfo->a[] should be coded */ + Bitmask notReady /* Which tables are currently available */ +){ + int j, k; /* Loop counters */ + int iCur; /* The VDBE cursor for the table */ + int addrNxt; /* Where to jump to continue with the next IN case */ + int omitTable; /* True if we use the index only */ + int bRev; /* True if we need to scan in reverse order */ + WhereLevel *pLevel; /* The where level to be coded */ + WhereLoop *pLoop; /* The WhereLoop object being coded */ + WhereClause *pWC; /* Decomposition of the entire WHERE clause */ + WhereTerm *pTerm; /* A WHERE clause term */ + Parse *pParse; /* Parsing context */ + sqlite3 *db; /* Database connection */ + Vdbe *v; /* The prepared stmt under constructions */ + struct SrcList_item *pTabItem; /* FROM clause term being coded */ + int addrBrk; /* Jump here to break out of the loop */ + int addrCont; /* Jump here to continue with next cycle */ + int iRowidReg = 0; /* Rowid is stored in this register, if not zero */ + int iReleaseReg = 0; /* Temp register to free before returning */ -/* -** Return the N-th AND-connected subterm of pTerm. Or if pTerm is not -** a conjunction, then return just pTerm when N==0. If N is exceeds -** the number of available subterms, return NULL. -*/ -static WhereTerm *whereNthSubterm(WhereTerm *pTerm, int N){ - if( pTerm->eOperator!=WO_AND ){ - return N==0 ? pTerm : 0; - } - if( Nu.pAndInfo->wc.nTerm ){ - return &pTerm->u.pAndInfo->wc.a[N]; - } - return 0; -} + pParse = pWInfo->pParse; + v = pParse->pVdbe; + pWC = &pWInfo->sWC; + db = pParse->db; + pLevel = &pWInfo->a[iLevel]; + pLoop = pLevel->pWLoop; + pTabItem = &pWInfo->pTabList->a[pLevel->iFrom]; + iCur = pTabItem->iCursor; + pLevel->notReady = notReady & ~sqlite3WhereGetMask(&pWInfo->sMaskSet, iCur); + bRev = (pWInfo->revMask>>iLevel)&1; + omitTable = (pLoop->wsFlags & WHERE_IDX_ONLY)!=0 + && (pWInfo->wctrlFlags & WHERE_FORCE_TABLE)==0; + VdbeModuleComment((v, "Begin WHERE-loop%d: %s",iLevel,pTabItem->pTab->zName)); -/* -** Subterms pOne and pTwo are contained within WHERE clause pWC. The -** two subterms are in disjunction - they are OR-ed together. -** -** If these two terms are both of the form: "A op B" with the same -** A and B values but different operators and if the operators are -** compatible (if one is = and the other is <, for example) then -** add a new virtual AND term to pWC that is the combination of the -** two. -** -** Some examples: -** -** x x<=y -** x=y OR x=y --> x=y -** x<=y OR x x<=y -** -** The following is NOT generated: -** -** xy --> x!=y -*/ -static void whereCombineDisjuncts( - SrcList *pSrc, /* the FROM clause */ - WhereClause *pWC, /* The complete WHERE clause */ - WhereTerm *pOne, /* First disjunct */ - WhereTerm *pTwo /* Second disjunct */ -){ - u16 eOp = pOne->eOperator | pTwo->eOperator; - sqlite3 *db; /* Database connection (for malloc) */ - Expr *pNew; /* New virtual expression */ - int op; /* Operator for the combined expression */ - int idxNew; /* Index in pWC of the next virtual term */ + /* Create labels for the "break" and "continue" instructions + ** for the current loop. Jump to addrBrk to break out of a loop. + ** Jump to cont to go immediately to the next iteration of the + ** loop. + ** + ** When there is an IN operator, we also have a "addrNxt" label that + ** means to continue with the next IN value combination. When + ** there are no IN operators in the constraints, the "addrNxt" label + ** is the same as "addrBrk". + */ + addrBrk = pLevel->addrBrk = pLevel->addrNxt = sqlite3VdbeMakeLabel(v); + addrCont = pLevel->addrCont = sqlite3VdbeMakeLabel(v); - if( (pOne->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE))==0 ) return; - if( (pTwo->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE))==0 ) return; - if( (eOp & (WO_EQ|WO_LT|WO_LE))!=eOp - && (eOp & (WO_EQ|WO_GT|WO_GE))!=eOp ) return; - assert( pOne->pExpr->pLeft!=0 && pOne->pExpr->pRight!=0 ); - assert( pTwo->pExpr->pLeft!=0 && pTwo->pExpr->pRight!=0 ); - if( sqlite3ExprCompare(pOne->pExpr->pLeft, pTwo->pExpr->pLeft, -1) ) return; - if( sqlite3ExprCompare(pOne->pExpr->pRight, pTwo->pExpr->pRight, -1) )return; - /* If we reach this point, it means the two subterms can be combined */ - if( (eOp & (eOp-1))!=0 ){ - if( eOp & (WO_LT|WO_LE) ){ - eOp = WO_LE; - }else{ - assert( eOp & (WO_GT|WO_GE) ); - eOp = WO_GE; - } + /* If this is the right table of a LEFT OUTER JOIN, allocate and + ** initialize a memory cell that records if this table matches any + ** row of the left table of the join. + */ + if( pLevel->iFrom>0 && (pTabItem[0].jointype & JT_LEFT)!=0 ){ + pLevel->iLeftJoin = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Integer, 0, pLevel->iLeftJoin); + VdbeComment((v, "init LEFT JOIN no-match flag")); } - db = pWC->pWInfo->pParse->db; - pNew = sqlite3ExprDup(db, pOne->pExpr, 0); - if( pNew==0 ) return; - for(op=TK_EQ; eOp!=(WO_EQ<<(op-TK_EQ)); op++){ assert( opop = op; - idxNew = whereClauseInsert(pWC, pNew, TERM_VIRTUAL|TERM_DYNAMIC); - exprAnalyze(pSrc, pWC, idxNew); -} -#if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY) -/* -** Analyze a term that consists of two or more OR-connected -** subterms. So in: -** -** ... WHERE (a=5) AND (b=7 OR c=9 OR d=13) AND (d=13) -** ^^^^^^^^^^^^^^^^^^^^ -** -** This routine analyzes terms such as the middle term in the above example. -** A WhereOrTerm object is computed and attached to the term under -** analysis, regardless of the outcome of the analysis. Hence: -** -** WhereTerm.wtFlags |= TERM_ORINFO -** WhereTerm.u.pOrInfo = a dynamically allocated WhereOrTerm object -** -** The term being analyzed must have two or more of OR-connected subterms. -** A single subterm might be a set of AND-connected sub-subterms. -** Examples of terms under analysis: -** -** (A) t1.x=t2.y OR t1.x=t2.z OR t1.y=15 OR t1.z=t3.a+5 -** (B) x=expr1 OR expr2=x OR x=expr3 -** (C) t1.x=t2.y OR (t1.x=t2.z AND t1.y=15) -** (D) x=expr1 OR (y>11 AND y<22 AND z LIKE '*hello*') -** (E) (p.a=1 AND q.b=2 AND r.c=3) OR (p.x=4 AND q.y=5 AND r.z=6) -** (F) x>A OR (x=A AND y>=B) -** -** CASE 1: -** -** If all subterms are of the form T.C=expr for some single column of C and -** a single table T (as shown in example B above) then create a new virtual -** term that is an equivalent IN expression. In other words, if the term -** being analyzed is: -** -** x = expr1 OR expr2 = x OR x = expr3 -** -** then create a new virtual term like this: -** -** x IN (expr1,expr2,expr3) -** -** CASE 2: -** -** If there are exactly two disjuncts one side has x>A and the other side -** has x=A (for the same x and A) then add a new virtual conjunct term to the -** WHERE clause of the form "x>=A". Example: -** -** x>A OR (x=A AND y>B) adds: x>=A -** -** The added conjunct can sometimes be helpful in query planning. -** -** CASE 3: -** -** If all subterms are indexable by a single table T, then set -** -** WhereTerm.eOperator = WO_OR -** WhereTerm.u.pOrInfo->indexable |= the cursor number for table T -** -** A subterm is "indexable" if it is of the form -** "T.C " where C is any column of table T and -** is one of "=", "<", "<=", ">", ">=", "IS NULL", or "IN". -** A subterm is also indexable if it is an AND of two or more -** subsubterms at least one of which is indexable. Indexable AND -** subterms have their eOperator set to WO_AND and they have -** u.pAndInfo set to a dynamically allocated WhereAndTerm object. -** -** From another point of view, "indexable" means that the subterm could -** potentially be used with an index if an appropriate index exists. -** This analysis does not consider whether or not the index exists; that -** is decided elsewhere. This analysis only looks at whether subterms -** appropriate for indexing exist. -** -** All examples A through E above satisfy case 2. But if a term -** also satisfies case 1 (such as B) we know that the optimizer will -** always prefer case 1, so in that case we pretend that case 2 is not -** satisfied. -** -** It might be the case that multiple tables are indexable. For example, -** (E) above is indexable on tables P, Q, and R. -** -** Terms that satisfy case 2 are candidates for lookup by using -** separate indices to find rowids for each subterm and composing -** the union of all rowids using a RowSet object. This is similar -** to "bitmap indices" in other database engines. -** -** OTHERWISE: -** -** If neither case 1 nor case 2 apply, then leave the eOperator set to -** zero. This term is not useful for search. -*/ -static void exprAnalyzeOrTerm( - SrcList *pSrc, /* the FROM clause */ - WhereClause *pWC, /* the complete WHERE clause */ - int idxTerm /* Index of the OR-term to be analyzed */ -){ - WhereInfo *pWInfo = pWC->pWInfo; /* WHERE clause processing context */ - Parse *pParse = pWInfo->pParse; /* Parser context */ - sqlite3 *db = pParse->db; /* Database connection */ - WhereTerm *pTerm = &pWC->a[idxTerm]; /* The term to be analyzed */ - Expr *pExpr = pTerm->pExpr; /* The expression of the term */ - int i; /* Loop counters */ - WhereClause *pOrWc; /* Breakup of pTerm into subterms */ - WhereTerm *pOrTerm; /* A Sub-term within the pOrWc */ - WhereOrInfo *pOrInfo; /* Additional information associated with pTerm */ - Bitmask chngToIN; /* Tables that might satisfy case 1 */ - Bitmask indexable; /* Tables that are indexable, satisfying case 2 */ + /* Special case of a FROM clause subquery implemented as a co-routine */ + if( pTabItem->viaCoroutine ){ + int regYield = pTabItem->regReturn; + sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, pTabItem->addrFillSub); + pLevel->p2 = sqlite3VdbeAddOp2(v, OP_Yield, regYield, addrBrk); + VdbeCoverage(v); + VdbeComment((v, "next row of \"%s\"", pTabItem->pTab->zName)); + pLevel->op = OP_Goto; + }else - /* - ** Break the OR clause into its separate subterms. The subterms are - ** stored in a WhereClause structure containing within the WhereOrInfo - ** object that is attached to the original OR clause term. - */ - assert( (pTerm->wtFlags & (TERM_DYNAMIC|TERM_ORINFO|TERM_ANDINFO))==0 ); - assert( pExpr->op==TK_OR ); - pTerm->u.pOrInfo = pOrInfo = sqlite3DbMallocZero(db, sizeof(*pOrInfo)); - if( pOrInfo==0 ) return; - pTerm->wtFlags |= TERM_ORINFO; - pOrWc = &pOrInfo->wc; - whereClauseInit(pOrWc, pWInfo); - whereSplit(pOrWc, pExpr, TK_OR); - exprAnalyzeAll(pSrc, pOrWc); - if( db->mallocFailed ) return; - assert( pOrWc->nTerm>=2 ); +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)!=0 ){ + /* Case 1: The table is a virtual-table. Use the VFilter and VNext + ** to access the data. + */ + int iReg; /* P3 Value for OP_VFilter */ + int addrNotFound; + int nConstraint = pLoop->nLTerm; - /* - ** Compute the set of tables that might satisfy cases 1 or 2. - */ - indexable = ~(Bitmask)0; - chngToIN = ~(Bitmask)0; - for(i=pOrWc->nTerm-1, pOrTerm=pOrWc->a; i>=0 && indexable; i--, pOrTerm++){ - if( (pOrTerm->eOperator & WO_SINGLE)==0 ){ - WhereAndInfo *pAndInfo; - assert( (pOrTerm->wtFlags & (TERM_ANDINFO|TERM_ORINFO))==0 ); - chngToIN = 0; - pAndInfo = sqlite3DbMallocRaw(db, sizeof(*pAndInfo)); - if( pAndInfo ){ - WhereClause *pAndWC; - WhereTerm *pAndTerm; - int j; - Bitmask b = 0; - pOrTerm->u.pAndInfo = pAndInfo; - pOrTerm->wtFlags |= TERM_ANDINFO; - pOrTerm->eOperator = WO_AND; - pAndWC = &pAndInfo->wc; - whereClauseInit(pAndWC, pWC->pWInfo); - whereSplit(pAndWC, pOrTerm->pExpr, TK_AND); - exprAnalyzeAll(pSrc, pAndWC); - pAndWC->pOuter = pWC; - testcase( db->mallocFailed ); - if( !db->mallocFailed ){ - for(j=0, pAndTerm=pAndWC->a; jnTerm; j++, pAndTerm++){ - assert( pAndTerm->pExpr ); - if( allowedOp(pAndTerm->pExpr->op) ){ - b |= getMask(&pWInfo->sMaskSet, pAndTerm->leftCursor); - } - } - } - indexable &= b; + sqlite3ExprCachePush(pParse); + iReg = sqlite3GetTempRange(pParse, nConstraint+2); + addrNotFound = pLevel->addrBrk; + for(j=0; jaLTerm[j]; + if( pTerm==0 ) continue; + if( pTerm->eOperator & WO_IN ){ + codeEqualityTerm(pParse, pTerm, pLevel, j, bRev, iTarget); + addrNotFound = pLevel->addrNxt; + }else{ + sqlite3ExprCode(pParse, pTerm->pExpr->pRight, iTarget); } - }else if( pOrTerm->wtFlags & TERM_COPIED ){ - /* Skip this term for now. We revisit it when we process the - ** corresponding TERM_VIRTUAL term */ - }else{ - Bitmask b; - b = getMask(&pWInfo->sMaskSet, pOrTerm->leftCursor); - if( pOrTerm->wtFlags & TERM_VIRTUAL ){ - WhereTerm *pOther = &pOrWc->a[pOrTerm->iParent]; - b |= getMask(&pWInfo->sMaskSet, pOther->leftCursor); + } + sqlite3VdbeAddOp2(v, OP_Integer, pLoop->u.vtab.idxNum, iReg); + sqlite3VdbeAddOp2(v, OP_Integer, nConstraint, iReg+1); + sqlite3VdbeAddOp4(v, OP_VFilter, iCur, addrNotFound, iReg, + pLoop->u.vtab.idxStr, + pLoop->u.vtab.needFree ? P4_MPRINTF : P4_STATIC); + VdbeCoverage(v); + pLoop->u.vtab.needFree = 0; + for(j=0; ju.vtab.omitMask>>j)&1 ){ + disableTerm(pLevel, pLoop->aLTerm[j]); } - indexable &= b; - if( (pOrTerm->eOperator & WO_EQ)==0 ){ - chngToIN = 0; + } + pLevel->op = OP_VNext; + pLevel->p1 = iCur; + pLevel->p2 = sqlite3VdbeCurrentAddr(v); + sqlite3ReleaseTempRange(pParse, iReg, nConstraint+2); + sqlite3ExprCachePop(pParse); + }else +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + + if( (pLoop->wsFlags & WHERE_IPK)!=0 + && (pLoop->wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_EQ))!=0 + ){ + /* Case 2: We can directly reference a single row using an + ** equality comparison against the ROWID field. Or + ** we reference multiple rows using a "rowid IN (...)" + ** construct. + */ + assert( pLoop->u.btree.nEq==1 ); + pTerm = pLoop->aLTerm[0]; + assert( pTerm!=0 ); + assert( pTerm->pExpr!=0 ); + assert( omitTable==0 ); + testcase( pTerm->wtFlags & TERM_VIRTUAL ); + iReleaseReg = ++pParse->nMem; + iRowidReg = codeEqualityTerm(pParse, pTerm, pLevel, 0, bRev, iReleaseReg); + if( iRowidReg!=iReleaseReg ) sqlite3ReleaseTempReg(pParse, iReleaseReg); + addrNxt = pLevel->addrNxt; + sqlite3VdbeAddOp2(v, OP_MustBeInt, iRowidReg, addrNxt); VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_NotExists, iCur, addrNxt, iRowidReg); + VdbeCoverage(v); + sqlite3ExprCacheAffinityChange(pParse, iRowidReg, 1); + sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg); + VdbeComment((v, "pk")); + pLevel->op = OP_Noop; + }else if( (pLoop->wsFlags & WHERE_IPK)!=0 + && (pLoop->wsFlags & WHERE_COLUMN_RANGE)!=0 + ){ + /* Case 3: We have an inequality comparison against the ROWID field. + */ + int testOp = OP_Noop; + int start; + int memEndValue = 0; + WhereTerm *pStart, *pEnd; + + assert( omitTable==0 ); + j = 0; + pStart = pEnd = 0; + if( pLoop->wsFlags & WHERE_BTM_LIMIT ) pStart = pLoop->aLTerm[j++]; + if( pLoop->wsFlags & WHERE_TOP_LIMIT ) pEnd = pLoop->aLTerm[j++]; + assert( pStart!=0 || pEnd!=0 ); + if( bRev ){ + pTerm = pStart; + pStart = pEnd; + pEnd = pTerm; + } + if( pStart ){ + Expr *pX; /* The expression that defines the start bound */ + int r1, rTemp; /* Registers for holding the start boundary */ + + /* The following constant maps TK_xx codes into corresponding + ** seek opcodes. It depends on a particular ordering of TK_xx + */ + const u8 aMoveOp[] = { + /* TK_GT */ OP_SeekGT, + /* TK_LE */ OP_SeekLE, + /* TK_LT */ OP_SeekLT, + /* TK_GE */ OP_SeekGE + }; + assert( TK_LE==TK_GT+1 ); /* Make sure the ordering.. */ + assert( TK_LT==TK_GT+2 ); /* ... of the TK_xx values... */ + assert( TK_GE==TK_GT+3 ); /* ... is correcct. */ + + assert( (pStart->wtFlags & TERM_VNULL)==0 ); + testcase( pStart->wtFlags & TERM_VIRTUAL ); + pX = pStart->pExpr; + assert( pX!=0 ); + testcase( pStart->leftCursor!=iCur ); /* transitive constraints */ + r1 = sqlite3ExprCodeTemp(pParse, pX->pRight, &rTemp); + sqlite3VdbeAddOp3(v, aMoveOp[pX->op-TK_GT], iCur, addrBrk, r1); + VdbeComment((v, "pk")); + VdbeCoverageIf(v, pX->op==TK_GT); + VdbeCoverageIf(v, pX->op==TK_LE); + VdbeCoverageIf(v, pX->op==TK_LT); + VdbeCoverageIf(v, pX->op==TK_GE); + sqlite3ExprCacheAffinityChange(pParse, r1, 1); + sqlite3ReleaseTempReg(pParse, rTemp); + disableTerm(pLevel, pStart); + }else{ + sqlite3VdbeAddOp2(v, bRev ? OP_Last : OP_Rewind, iCur, addrBrk); + VdbeCoverageIf(v, bRev==0); + VdbeCoverageIf(v, bRev!=0); + } + if( pEnd ){ + Expr *pX; + pX = pEnd->pExpr; + assert( pX!=0 ); + assert( (pEnd->wtFlags & TERM_VNULL)==0 ); + testcase( pEnd->leftCursor!=iCur ); /* Transitive constraints */ + testcase( pEnd->wtFlags & TERM_VIRTUAL ); + memEndValue = ++pParse->nMem; + sqlite3ExprCode(pParse, pX->pRight, memEndValue); + if( pX->op==TK_LT || pX->op==TK_GT ){ + testOp = bRev ? OP_Le : OP_Ge; }else{ - chngToIN &= b; + testOp = bRev ? OP_Lt : OP_Gt; } + disableTerm(pLevel, pEnd); } - } + start = sqlite3VdbeCurrentAddr(v); + pLevel->op = bRev ? OP_Prev : OP_Next; + pLevel->p1 = iCur; + pLevel->p2 = start; + assert( pLevel->p5==0 ); + if( testOp!=OP_Noop ){ + iRowidReg = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Rowid, iCur, iRowidReg); + sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg); + sqlite3VdbeAddOp3(v, testOp, memEndValue, addrBrk, iRowidReg); + VdbeCoverageIf(v, testOp==OP_Le); + VdbeCoverageIf(v, testOp==OP_Lt); + VdbeCoverageIf(v, testOp==OP_Ge); + VdbeCoverageIf(v, testOp==OP_Gt); + sqlite3VdbeChangeP5(v, SQLITE_AFF_NUMERIC | SQLITE_JUMPIFNULL); + } + }else if( pLoop->wsFlags & WHERE_INDEXED ){ + /* Case 4: A scan using an index. + ** + ** The WHERE clause may contain zero or more equality + ** terms ("==" or "IN" operators) that refer to the N + ** left-most columns of the index. It may also contain + ** inequality constraints (>, <, >= or <=) on the indexed + ** column that immediately follows the N equalities. Only + ** the right-most column can be an inequality - the rest must + ** use the "==" and "IN" operators. For example, if the + ** index is on (x,y,z), then the following clauses are all + ** optimized: + ** + ** x=5 + ** x=5 AND y=10 + ** x=5 AND y<10 + ** x=5 AND y>5 AND y<10 + ** x=5 AND y=5 AND z<=10 + ** + ** The z<10 term of the following cannot be used, only + ** the x=5 term: + ** + ** x=5 AND z<10 + ** + ** N may be zero if there are inequality constraints. + ** If there are no inequality constraints, then N is at + ** least one. + ** + ** This case is also used when there are no WHERE clause + ** constraints but an index is selected anyway, in order + ** to force the output order to conform to an ORDER BY. + */ + static const u8 aStartOp[] = { + 0, + 0, + OP_Rewind, /* 2: (!start_constraints && startEq && !bRev) */ + OP_Last, /* 3: (!start_constraints && startEq && bRev) */ + OP_SeekGT, /* 4: (start_constraints && !startEq && !bRev) */ + OP_SeekLT, /* 5: (start_constraints && !startEq && bRev) */ + OP_SeekGE, /* 6: (start_constraints && startEq && !bRev) */ + OP_SeekLE /* 7: (start_constraints && startEq && bRev) */ + }; + static const u8 aEndOp[] = { + OP_IdxGE, /* 0: (end_constraints && !bRev && !endEq) */ + OP_IdxGT, /* 1: (end_constraints && !bRev && endEq) */ + OP_IdxLE, /* 2: (end_constraints && bRev && !endEq) */ + OP_IdxLT, /* 3: (end_constraints && bRev && endEq) */ + }; + u16 nEq = pLoop->u.btree.nEq; /* Number of == or IN terms */ + int regBase; /* Base register holding constraint values */ + WhereTerm *pRangeStart = 0; /* Inequality constraint at range start */ + WhereTerm *pRangeEnd = 0; /* Inequality constraint at range end */ + int startEq; /* True if range start uses ==, >= or <= */ + int endEq; /* True if range end uses ==, >= or <= */ + int start_constraints; /* Start of range is constrained */ + int nConstraint; /* Number of constraint terms */ + Index *pIdx; /* The index we will be using */ + int iIdxCur; /* The VDBE cursor for the index */ + int nExtraReg = 0; /* Number of extra registers needed */ + int op; /* Instruction opcode */ + char *zStartAff; /* Affinity for start of range constraint */ + char cEndAff = 0; /* Affinity for end of range constraint */ + u8 bSeekPastNull = 0; /* True to seek past initial nulls */ + u8 bStopAtNull = 0; /* Add condition to terminate at NULLs */ - /* - ** Record the set of tables that satisfy case 3. The set might be - ** empty. - */ - pOrInfo->indexable = indexable; - pTerm->eOperator = indexable==0 ? 0 : WO_OR; + pIdx = pLoop->u.btree.pIndex; + iIdxCur = pLevel->iIdxCur; + assert( nEq>=pLoop->nSkip ); - /* For a two-way OR, attempt to implementation case 2. - */ - if( indexable && pOrWc->nTerm==2 ){ - int iOne = 0; - WhereTerm *pOne; - while( (pOne = whereNthSubterm(&pOrWc->a[0],iOne++))!=0 ){ - int iTwo = 0; - WhereTerm *pTwo; - while( (pTwo = whereNthSubterm(&pOrWc->a[1],iTwo++))!=0 ){ - whereCombineDisjuncts(pSrc, pWC, pOne, pTwo); + /* If this loop satisfies a sort order (pOrderBy) request that + ** was passed to this function to implement a "SELECT min(x) ..." + ** query, then the caller will only allow the loop to run for + ** a single iteration. This means that the first row returned + ** should not have a NULL value stored in 'x'. If column 'x' is + ** the first one after the nEq equality constraints in the index, + ** this requires some special handling. + */ + assert( pWInfo->pOrderBy==0 + || pWInfo->pOrderBy->nExpr==1 + || (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)==0 ); + if( (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)!=0 + && pWInfo->nOBSat>0 + && (pIdx->nKeyCol>nEq) + ){ + assert( pLoop->nSkip==0 ); + bSeekPastNull = 1; + nExtraReg = 1; + } + + /* Find any inequality constraint terms for the start and end + ** of the range. + */ + j = nEq; + if( pLoop->wsFlags & WHERE_BTM_LIMIT ){ + pRangeStart = pLoop->aLTerm[j++]; + nExtraReg = 1; + /* Like optimization range constraints always occur in pairs */ + assert( (pRangeStart->wtFlags & TERM_LIKEOPT)==0 || + (pLoop->wsFlags & WHERE_TOP_LIMIT)!=0 ); + } + if( pLoop->wsFlags & WHERE_TOP_LIMIT ){ + pRangeEnd = pLoop->aLTerm[j++]; + nExtraReg = 1; + if( (pRangeEnd->wtFlags & TERM_LIKEOPT)!=0 ){ + assert( pRangeStart!=0 ); /* LIKE opt constraints */ + assert( pRangeStart->wtFlags & TERM_LIKEOPT ); /* occur in pairs */ + pLevel->iLikeRepCntr = ++pParse->nMem; + testcase( bRev ); + testcase( pIdx->aSortOrder[nEq]==SQLITE_SO_DESC ); + sqlite3VdbeAddOp2(v, OP_Integer, + bRev ^ (pIdx->aSortOrder[nEq]==SQLITE_SO_DESC), + pLevel->iLikeRepCntr); + VdbeComment((v, "LIKE loop counter")); + pLevel->addrLikeRep = sqlite3VdbeCurrentAddr(v); + } + if( pRangeStart==0 + && (j = pIdx->aiColumn[nEq])>=0 + && pIdx->pTable->aCol[j].notNull==0 + ){ + bSeekPastNull = 1; } } - } + assert( pRangeEnd==0 || (pRangeEnd->wtFlags & TERM_VNULL)==0 ); - /* - ** chngToIN holds a set of tables that *might* satisfy case 1. But - ** we have to do some additional checking to see if case 1 really - ** is satisfied. - ** - ** chngToIN will hold either 0, 1, or 2 bits. The 0-bit case means - ** that there is no possibility of transforming the OR clause into an - ** IN operator because one or more terms in the OR clause contain - ** something other than == on a column in the single table. The 1-bit - ** case means that every term of the OR clause is of the form - ** "table.column=expr" for some single table. The one bit that is set - ** will correspond to the common table. We still need to check to make - ** sure the same column is used on all terms. The 2-bit case is when - ** the all terms are of the form "table1.column=table2.column". It - ** might be possible to form an IN operator with either table1.column - ** or table2.column as the LHS if either is common to every term of - ** the OR clause. - ** - ** Note that terms of the form "table.column1=table.column2" (the - ** same table on both sizes of the ==) cannot be optimized. - */ - if( chngToIN ){ - int okToChngToIN = 0; /* True if the conversion to IN is valid */ - int iColumn = -1; /* Column index on lhs of IN operator */ - int iCursor = -1; /* Table cursor common to all terms */ - int j = 0; /* Loop counter */ + /* Generate code to evaluate all constraint terms using == or IN + ** and store the values of those terms in an array of registers + ** starting at regBase. + */ + regBase = codeAllEqualityTerms(pParse,pLevel,bRev,nExtraReg,&zStartAff); + assert( zStartAff==0 || sqlite3Strlen30(zStartAff)>=nEq ); + if( zStartAff ) cEndAff = zStartAff[nEq]; + addrNxt = pLevel->addrNxt; - /* Search for a table and column that appears on one side or the - ** other of the == operator in every subterm. That table and column - ** will be recorded in iCursor and iColumn. There might not be any - ** such table and column. Set okToChngToIN if an appropriate table - ** and column is found but leave okToChngToIN false if not found. + /* If we are doing a reverse order scan on an ascending index, or + ** a forward order scan on a descending index, interchange the + ** start and end terms (pRangeStart and pRangeEnd). */ - for(j=0; j<2 && !okToChngToIN; j++){ - pOrTerm = pOrWc->a; - for(i=pOrWc->nTerm-1; i>=0; i--, pOrTerm++){ - assert( pOrTerm->eOperator & WO_EQ ); - pOrTerm->wtFlags &= ~TERM_OR_OK; - if( pOrTerm->leftCursor==iCursor ){ - /* This is the 2-bit case and we are on the second iteration and - ** current term is from the first iteration. So skip this term. */ - assert( j==1 ); - continue; + if( (nEqnKeyCol && bRev==(pIdx->aSortOrder[nEq]==SQLITE_SO_ASC)) + || (bRev && pIdx->nKeyCol==nEq) + ){ + SWAP(WhereTerm *, pRangeEnd, pRangeStart); + SWAP(u8, bSeekPastNull, bStopAtNull); + } + + testcase( pRangeStart && (pRangeStart->eOperator & WO_LE)!=0 ); + testcase( pRangeStart && (pRangeStart->eOperator & WO_GE)!=0 ); + testcase( pRangeEnd && (pRangeEnd->eOperator & WO_LE)!=0 ); + testcase( pRangeEnd && (pRangeEnd->eOperator & WO_GE)!=0 ); + startEq = !pRangeStart || pRangeStart->eOperator & (WO_LE|WO_GE); + endEq = !pRangeEnd || pRangeEnd->eOperator & (WO_LE|WO_GE); + start_constraints = pRangeStart || nEq>0; + + /* Seek the index cursor to the start of the range. */ + nConstraint = nEq; + if( pRangeStart ){ + Expr *pRight = pRangeStart->pExpr->pRight; + sqlite3ExprCode(pParse, pRight, regBase+nEq); + whereLikeOptimizationStringFixup(v, pLevel, pRangeStart); + if( (pRangeStart->wtFlags & TERM_VNULL)==0 + && sqlite3ExprCanBeNull(pRight) + ){ + sqlite3VdbeAddOp2(v, OP_IsNull, regBase+nEq, addrNxt); + VdbeCoverage(v); + } + if( zStartAff ){ + if( sqlite3CompareAffinity(pRight, zStartAff[nEq])==SQLITE_AFF_BLOB){ + /* Since the comparison is to be performed with no conversions + ** applied to the operands, set the affinity to apply to pRight to + ** SQLITE_AFF_BLOB. */ + zStartAff[nEq] = SQLITE_AFF_BLOB; } - if( (chngToIN & getMask(&pWInfo->sMaskSet, pOrTerm->leftCursor))==0 ){ - /* This term must be of the form t1.a==t2.b where t2 is in the - ** chngToIN set but t1 is not. This term will be either preceded - ** or follwed by an inverted copy (t2.b==t1.a). Skip this term - ** and use its inversion. */ - testcase( pOrTerm->wtFlags & TERM_COPIED ); - testcase( pOrTerm->wtFlags & TERM_VIRTUAL ); - assert( pOrTerm->wtFlags & (TERM_COPIED|TERM_VIRTUAL) ); - continue; + if( sqlite3ExprNeedsNoAffinityChange(pRight, zStartAff[nEq]) ){ + zStartAff[nEq] = SQLITE_AFF_BLOB; } - iColumn = pOrTerm->u.leftColumn; - iCursor = pOrTerm->leftCursor; - break; + } + nConstraint++; + testcase( pRangeStart->wtFlags & TERM_VIRTUAL ); + }else if( bSeekPastNull ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, regBase+nEq); + nConstraint++; + startEq = 0; + start_constraints = 1; + } + codeApplyAffinity(pParse, regBase, nConstraint - bSeekPastNull, zStartAff); + op = aStartOp[(start_constraints<<2) + (startEq<<1) + bRev]; + assert( op!=0 ); + sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint); + VdbeCoverage(v); + VdbeCoverageIf(v, op==OP_Rewind); testcase( op==OP_Rewind ); + VdbeCoverageIf(v, op==OP_Last); testcase( op==OP_Last ); + VdbeCoverageIf(v, op==OP_SeekGT); testcase( op==OP_SeekGT ); + VdbeCoverageIf(v, op==OP_SeekGE); testcase( op==OP_SeekGE ); + VdbeCoverageIf(v, op==OP_SeekLE); testcase( op==OP_SeekLE ); + VdbeCoverageIf(v, op==OP_SeekLT); testcase( op==OP_SeekLT ); + + /* Load the value for the inequality constraint at the end of the + ** range (if any). + */ + nConstraint = nEq; + if( pRangeEnd ){ + Expr *pRight = pRangeEnd->pExpr->pRight; + sqlite3ExprCacheRemove(pParse, regBase+nEq, 1); + sqlite3ExprCode(pParse, pRight, regBase+nEq); + whereLikeOptimizationStringFixup(v, pLevel, pRangeEnd); + if( (pRangeEnd->wtFlags & TERM_VNULL)==0 + && sqlite3ExprCanBeNull(pRight) + ){ + sqlite3VdbeAddOp2(v, OP_IsNull, regBase+nEq, addrNxt); + VdbeCoverage(v); } - if( i<0 ){ - /* No candidate table+column was found. This can only occur - ** on the second iteration */ - assert( j==1 ); - assert( IsPowerOfTwo(chngToIN) ); - assert( chngToIN==getMask(&pWInfo->sMaskSet, iCursor) ); - break; + if( sqlite3CompareAffinity(pRight, cEndAff)!=SQLITE_AFF_BLOB + && !sqlite3ExprNeedsNoAffinityChange(pRight, cEndAff) + ){ + codeApplyAffinity(pParse, regBase+nEq, 1, &cEndAff); } - testcase( j==1 ); + nConstraint++; + testcase( pRangeEnd->wtFlags & TERM_VIRTUAL ); + }else if( bStopAtNull ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, regBase+nEq); + endEq = 0; + nConstraint++; + } + sqlite3DbFree(db, zStartAff); - /* We have found a candidate table and column. Check to see if that - ** table and column is common to every term in the OR clause */ - okToChngToIN = 1; - for(; i>=0 && okToChngToIN; i--, pOrTerm++){ - assert( pOrTerm->eOperator & WO_EQ ); - if( pOrTerm->leftCursor!=iCursor ){ - pOrTerm->wtFlags &= ~TERM_OR_OK; - }else if( pOrTerm->u.leftColumn!=iColumn ){ - okToChngToIN = 0; - }else{ - int affLeft, affRight; - /* If the right-hand side is also a column, then the affinities - ** of both right and left sides must be such that no type - ** conversions are required on the right. (Ticket #2249) - */ - affRight = sqlite3ExprAffinity(pOrTerm->pExpr->pRight); - affLeft = sqlite3ExprAffinity(pOrTerm->pExpr->pLeft); - if( affRight!=0 && affRight!=affLeft ){ - okToChngToIN = 0; - }else{ - pOrTerm->wtFlags |= TERM_OR_OK; - } - } + /* Top of the loop body */ + pLevel->p2 = sqlite3VdbeCurrentAddr(v); + + /* Check if the index cursor is past the end of the range. */ + if( nConstraint ){ + op = aEndOp[bRev*2 + endEq]; + sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint); + testcase( op==OP_IdxGT ); VdbeCoverageIf(v, op==OP_IdxGT ); + testcase( op==OP_IdxGE ); VdbeCoverageIf(v, op==OP_IdxGE ); + testcase( op==OP_IdxLT ); VdbeCoverageIf(v, op==OP_IdxLT ); + testcase( op==OP_IdxLE ); VdbeCoverageIf(v, op==OP_IdxLE ); + } + + /* Seek the table cursor, if required */ + disableTerm(pLevel, pRangeStart); + disableTerm(pLevel, pRangeEnd); + if( omitTable ){ + /* pIdx is a covering index. No need to access the main table. */ + }else if( HasRowid(pIdx->pTable) ){ + iRowidReg = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur, iRowidReg); + sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg); + sqlite3VdbeAddOp2(v, OP_Seek, iCur, iRowidReg); /* Deferred seek */ + }else if( iCur!=iIdxCur ){ + Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable); + iRowidReg = sqlite3GetTempRange(pParse, pPk->nKeyCol); + for(j=0; jnKeyCol; j++){ + k = sqlite3ColumnOfIndex(pIdx, pPk->aiColumn[j]); + sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, k, iRowidReg+j); } + sqlite3VdbeAddOp4Int(v, OP_NotFound, iCur, addrCont, + iRowidReg, pPk->nKeyCol); VdbeCoverage(v); } - /* At this point, okToChngToIN is true if original pTerm satisfies - ** case 1. In that case, construct a new virtual term that is - ** pTerm converted into an IN operator. + /* Record the instruction used to terminate the loop. Disable + ** WHERE clause terms made redundant by the index range scan. */ - if( okToChngToIN ){ - Expr *pDup; /* A transient duplicate expression */ - ExprList *pList = 0; /* The RHS of the IN operator */ - Expr *pLeft = 0; /* The LHS of the IN operator */ - Expr *pNew; /* The complete IN operator */ + if( pLoop->wsFlags & WHERE_ONEROW ){ + pLevel->op = OP_Noop; + }else if( bRev ){ + pLevel->op = OP_Prev; + }else{ + pLevel->op = OP_Next; + } + pLevel->p1 = iIdxCur; + pLevel->p3 = (pLoop->wsFlags&WHERE_UNQ_WANTED)!=0 ? 1:0; + if( (pLoop->wsFlags & WHERE_CONSTRAINT)==0 ){ + pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP; + }else{ + assert( pLevel->p5==0 ); + } + }else - for(i=pOrWc->nTerm-1, pOrTerm=pOrWc->a; i>=0; i--, pOrTerm++){ - if( (pOrTerm->wtFlags & TERM_OR_OK)==0 ) continue; - assert( pOrTerm->eOperator & WO_EQ ); - assert( pOrTerm->leftCursor==iCursor ); - assert( pOrTerm->u.leftColumn==iColumn ); - pDup = sqlite3ExprDup(db, pOrTerm->pExpr->pRight, 0); - pList = sqlite3ExprListAppend(pWInfo->pParse, pList, pDup); - pLeft = pOrTerm->pExpr->pLeft; - } - assert( pLeft!=0 ); - pDup = sqlite3ExprDup(db, pLeft, 0); - pNew = sqlite3PExpr(pParse, TK_IN, pDup, 0, 0); - if( pNew ){ - int idxNew; - transferJoinMarkings(pNew, pExpr); - assert( !ExprHasProperty(pNew, EP_xIsSelect) ); - pNew->x.pList = pList; - idxNew = whereClauseInsert(pWC, pNew, TERM_VIRTUAL|TERM_DYNAMIC); - testcase( idxNew==0 ); - exprAnalyze(pSrc, pWC, idxNew); - pTerm = &pWC->a[idxTerm]; - markTermAsChild(pWC, idxNew, idxTerm); - }else{ - sqlite3ExprListDelete(db, pList); - } - pTerm->eOperator = WO_NOOP; /* case 1 trumps case 3 */ - } - } -} -#endif /* !SQLITE_OMIT_OR_OPTIMIZATION && !SQLITE_OMIT_SUBQUERY */ +#ifndef SQLITE_OMIT_OR_OPTIMIZATION + if( pLoop->wsFlags & WHERE_MULTI_OR ){ + /* Case 5: Two or more separately indexed terms connected by OR + ** + ** Example: + ** + ** CREATE TABLE t1(a,b,c,d); + ** CREATE INDEX i1 ON t1(a); + ** CREATE INDEX i2 ON t1(b); + ** CREATE INDEX i3 ON t1(c); + ** + ** SELECT * FROM t1 WHERE a=5 OR b=7 OR (c=11 AND d=13) + ** + ** In the example, there are three indexed terms connected by OR. + ** The top of the loop looks like this: + ** + ** Null 1 # Zero the rowset in reg 1 + ** + ** Then, for each indexed term, the following. The arguments to + ** RowSetTest are such that the rowid of the current row is inserted + ** into the RowSet. If it is already present, control skips the + ** Gosub opcode and jumps straight to the code generated by WhereEnd(). + ** + ** sqlite3WhereBegin() + ** RowSetTest # Insert rowid into rowset + ** Gosub 2 A + ** sqlite3WhereEnd() + ** + ** Following the above, code to terminate the loop. Label A, the target + ** of the Gosub above, jumps to the instruction right after the Goto. + ** + ** Null 1 # Zero the rowset in reg 1 + ** Goto B # The loop is finished. + ** + ** A: # Return data, whatever. + ** + ** Return 2 # Jump back to the Gosub + ** + ** B: + ** + ** Added 2014-05-26: If the table is a WITHOUT ROWID table, then + ** use an ephemeral index instead of a RowSet to record the primary + ** keys of the rows we have already seen. + ** + */ + WhereClause *pOrWc; /* The OR-clause broken out into subterms */ + SrcList *pOrTab; /* Shortened table list or OR-clause generation */ + Index *pCov = 0; /* Potential covering index (or NULL) */ + int iCovCur = pParse->nTab++; /* Cursor used for index scans (if any) */ -/* -** The input to this routine is an WhereTerm structure with only the -** "pExpr" field filled in. The job of this routine is to analyze the -** subexpression and populate all the other fields of the WhereTerm -** structure. -** -** If the expression is of the form " X" it gets commuted -** to the standard form of "X ". -** -** If the expression is of the form "X Y" where both X and Y are -** columns, then the original expression is unchanged and a new virtual -** term of the form "Y X" is added to the WHERE clause and -** analyzed separately. The original term is marked with TERM_COPIED -** and the new term is marked with TERM_DYNAMIC (because it's pExpr -** needs to be freed with the WhereClause) and TERM_VIRTUAL (because it -** is a commuted copy of a prior term.) The original term has nChild=1 -** and the copy has idxParent set to the index of the original term. -*/ -static void exprAnalyze( - SrcList *pSrc, /* the FROM clause */ - WhereClause *pWC, /* the WHERE clause */ - int idxTerm /* Index of the term to be analyzed */ -){ - WhereInfo *pWInfo = pWC->pWInfo; /* WHERE clause processing context */ - WhereTerm *pTerm; /* The term to be analyzed */ - WhereMaskSet *pMaskSet; /* Set of table index masks */ - Expr *pExpr; /* The expression to be analyzed */ - Bitmask prereqLeft; /* Prerequesites of the pExpr->pLeft */ - Bitmask prereqAll; /* Prerequesites of pExpr */ - Bitmask extraRight = 0; /* Extra dependencies on LEFT JOIN */ - Expr *pStr1 = 0; /* RHS of LIKE/GLOB operator */ - int isComplete = 0; /* RHS of LIKE/GLOB ends with wildcard */ - int noCase = 0; /* uppercase equivalent to lowercase */ - int op; /* Top-level operator. pExpr->op */ - Parse *pParse = pWInfo->pParse; /* Parsing context */ - sqlite3 *db = pParse->db; /* Database connection */ + int regReturn = ++pParse->nMem; /* Register used with OP_Gosub */ + int regRowset = 0; /* Register for RowSet object */ + int regRowid = 0; /* Register holding rowid */ + int iLoopBody = sqlite3VdbeMakeLabel(v); /* Start of loop body */ + int iRetInit; /* Address of regReturn init */ + int untestedTerms = 0; /* Some terms not completely tested */ + int ii; /* Loop counter */ + u16 wctrlFlags; /* Flags for sub-WHERE clause */ + Expr *pAndExpr = 0; /* An ".. AND (...)" expression */ + Table *pTab = pTabItem->pTab; + + pTerm = pLoop->aLTerm[0]; + assert( pTerm!=0 ); + assert( pTerm->eOperator & WO_OR ); + assert( (pTerm->wtFlags & TERM_ORINFO)!=0 ); + pOrWc = &pTerm->u.pOrInfo->wc; + pLevel->op = OP_Return; + pLevel->p1 = regReturn; - if( db->mallocFailed ){ - return; - } - pTerm = &pWC->a[idxTerm]; - pMaskSet = &pWInfo->sMaskSet; - pExpr = pTerm->pExpr; - assert( pExpr->op!=TK_AS && pExpr->op!=TK_COLLATE ); - prereqLeft = exprTableUsage(pMaskSet, pExpr->pLeft); - op = pExpr->op; - if( op==TK_IN ){ - assert( pExpr->pRight==0 ); - if( ExprHasProperty(pExpr, EP_xIsSelect) ){ - pTerm->prereqRight = exprSelectTableUsage(pMaskSet, pExpr->x.pSelect); + /* Set up a new SrcList in pOrTab containing the table being scanned + ** by this loop in the a[0] slot and all notReady tables in a[1..] slots. + ** This becomes the SrcList in the recursive call to sqlite3WhereBegin(). + */ + if( pWInfo->nLevel>1 ){ + int nNotReady; /* The number of notReady tables */ + struct SrcList_item *origSrc; /* Original list of tables */ + nNotReady = pWInfo->nLevel - iLevel - 1; + pOrTab = sqlite3StackAllocRaw(db, + sizeof(*pOrTab)+ nNotReady*sizeof(pOrTab->a[0])); + if( pOrTab==0 ) return notReady; + pOrTab->nAlloc = (u8)(nNotReady + 1); + pOrTab->nSrc = pOrTab->nAlloc; + memcpy(pOrTab->a, pTabItem, sizeof(*pTabItem)); + origSrc = pWInfo->pTabList->a; + for(k=1; k<=nNotReady; k++){ + memcpy(&pOrTab->a[k], &origSrc[pLevel[k].iFrom], sizeof(pOrTab->a[k])); + } }else{ - pTerm->prereqRight = exprListTableUsage(pMaskSet, pExpr->x.pList); - } - }else if( op==TK_ISNULL ){ - pTerm->prereqRight = 0; - }else{ - pTerm->prereqRight = exprTableUsage(pMaskSet, pExpr->pRight); - } - prereqAll = exprTableUsage(pMaskSet, pExpr); - if( ExprHasProperty(pExpr, EP_FromJoin) ){ - Bitmask x = getMask(pMaskSet, pExpr->iRightJoinTable); - prereqAll |= x; - extraRight = x-1; /* ON clause terms may not be used with an index - ** on left table of a LEFT JOIN. Ticket #3015 */ - } - pTerm->prereqAll = prereqAll; - pTerm->leftCursor = -1; - pTerm->iParent = -1; - pTerm->eOperator = 0; - if( allowedOp(op) ){ - Expr *pLeft = sqlite3ExprSkipCollate(pExpr->pLeft); - Expr *pRight = sqlite3ExprSkipCollate(pExpr->pRight); - u16 opMask = (pTerm->prereqRight & prereqLeft)==0 ? WO_ALL : WO_EQUIV; - if( pLeft->op==TK_COLUMN ){ - pTerm->leftCursor = pLeft->iTable; - pTerm->u.leftColumn = pLeft->iColumn; - pTerm->eOperator = operatorMask(op) & opMask; + pOrTab = pWInfo->pTabList; } - if( pRight && pRight->op==TK_COLUMN ){ - WhereTerm *pNew; - Expr *pDup; - u16 eExtraOp = 0; /* Extra bits for pNew->eOperator */ - if( pTerm->leftCursor>=0 ){ - int idxNew; - pDup = sqlite3ExprDup(db, pExpr, 0); - if( db->mallocFailed ){ - sqlite3ExprDelete(db, pDup); - return; - } - idxNew = whereClauseInsert(pWC, pDup, TERM_VIRTUAL|TERM_DYNAMIC); - if( idxNew==0 ) return; - pNew = &pWC->a[idxNew]; - markTermAsChild(pWC, idxNew, idxTerm); - pTerm = &pWC->a[idxTerm]; - pTerm->wtFlags |= TERM_COPIED; - if( pExpr->op==TK_EQ - && !ExprHasProperty(pExpr, EP_FromJoin) - && OptimizationEnabled(db, SQLITE_Transitive) - ){ - pTerm->eOperator |= WO_EQUIV; - eExtraOp = WO_EQUIV; - } + + /* Initialize the rowset register to contain NULL. An SQL NULL is + ** equivalent to an empty rowset. Or, create an ephemeral index + ** capable of holding primary keys in the case of a WITHOUT ROWID. + ** + ** Also initialize regReturn to contain the address of the instruction + ** immediately following the OP_Return at the bottom of the loop. This + ** is required in a few obscure LEFT JOIN cases where control jumps + ** over the top of the loop into the body of it. In this case the + ** correct response for the end-of-loop code (the OP_Return) is to + ** fall through to the next instruction, just as an OP_Next does if + ** called on an uninitialized cursor. + */ + if( (pWInfo->wctrlFlags & WHERE_DUPLICATES_OK)==0 ){ + if( HasRowid(pTab) ){ + regRowset = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Null, 0, regRowset); }else{ - pDup = pExpr; - pNew = pTerm; + Index *pPk = sqlite3PrimaryKeyIndex(pTab); + regRowset = pParse->nTab++; + sqlite3VdbeAddOp2(v, OP_OpenEphemeral, regRowset, pPk->nKeyCol); + sqlite3VdbeSetP4KeyInfo(pParse, pPk); } - exprCommute(pParse, pDup); - pLeft = sqlite3ExprSkipCollate(pDup->pLeft); - pNew->leftCursor = pLeft->iTable; - pNew->u.leftColumn = pLeft->iColumn; - testcase( (prereqLeft | extraRight) != prereqLeft ); - pNew->prereqRight = prereqLeft | extraRight; - pNew->prereqAll = prereqAll; - pNew->eOperator = (operatorMask(pDup->op) + eExtraOp) & opMask; + regRowid = ++pParse->nMem; } - } + iRetInit = sqlite3VdbeAddOp2(v, OP_Integer, 0, regReturn); -#ifndef SQLITE_OMIT_BETWEEN_OPTIMIZATION - /* If a term is the BETWEEN operator, create two new virtual terms - ** that define the range that the BETWEEN implements. For example: - ** - ** a BETWEEN b AND c - ** - ** is converted into: - ** - ** (a BETWEEN b AND c) AND (a>=b) AND (a<=c) - ** - ** The two new terms are added onto the end of the WhereClause object. - ** The new terms are "dynamic" and are children of the original BETWEEN - ** term. That means that if the BETWEEN term is coded, the children are - ** skipped. Or, if the children are satisfied by an index, the original - ** BETWEEN term is skipped. - */ - else if( pExpr->op==TK_BETWEEN && pWC->op==TK_AND ){ - ExprList *pList = pExpr->x.pList; - int i; - static const u8 ops[] = {TK_GE, TK_LE}; - assert( pList!=0 ); - assert( pList->nExpr==2 ); - for(i=0; i<2; i++){ - Expr *pNewExpr; - int idxNew; - pNewExpr = sqlite3PExpr(pParse, ops[i], - sqlite3ExprDup(db, pExpr->pLeft, 0), - sqlite3ExprDup(db, pList->a[i].pExpr, 0), 0); - transferJoinMarkings(pNewExpr, pExpr); - idxNew = whereClauseInsert(pWC, pNewExpr, TERM_VIRTUAL|TERM_DYNAMIC); - testcase( idxNew==0 ); - exprAnalyze(pSrc, pWC, idxNew); - pTerm = &pWC->a[idxTerm]; - markTermAsChild(pWC, idxNew, idxTerm); + /* If the original WHERE clause is z of the form: (x1 OR x2 OR ...) AND y + ** Then for every term xN, evaluate as the subexpression: xN AND z + ** That way, terms in y that are factored into the disjunction will + ** be picked up by the recursive calls to sqlite3WhereBegin() below. + ** + ** Actually, each subexpression is converted to "xN AND w" where w is + ** the "interesting" terms of z - terms that did not originate in the + ** ON or USING clause of a LEFT JOIN, and terms that are usable as + ** indices. + ** + ** This optimization also only applies if the (x1 OR x2 OR ...) term + ** is not contained in the ON clause of a LEFT JOIN. + ** See ticket http://www.sqlite.org/src/info/f2369304e4 + */ + if( pWC->nTerm>1 ){ + int iTerm; + for(iTerm=0; iTermnTerm; iTerm++){ + Expr *pExpr = pWC->a[iTerm].pExpr; + if( &pWC->a[iTerm] == pTerm ) continue; + if( ExprHasProperty(pExpr, EP_FromJoin) ) continue; + if( (pWC->a[iTerm].wtFlags & TERM_VIRTUAL)!=0 ) continue; + if( (pWC->a[iTerm].eOperator & WO_ALL)==0 ) continue; + testcase( pWC->a[iTerm].wtFlags & TERM_ORINFO ); + pExpr = sqlite3ExprDup(db, pExpr, 0); + pAndExpr = sqlite3ExprAnd(db, pAndExpr, pExpr); + } + if( pAndExpr ){ + pAndExpr = sqlite3PExpr(pParse, TK_AND, 0, pAndExpr, 0); + } } - } -#endif /* SQLITE_OMIT_BETWEEN_OPTIMIZATION */ -#if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY) - /* Analyze a term that is composed of two or more subterms connected by - ** an OR operator. - */ - else if( pExpr->op==TK_OR ){ - assert( pWC->op==TK_AND ); - exprAnalyzeOrTerm(pSrc, pWC, idxTerm); - pTerm = &pWC->a[idxTerm]; - } -#endif /* SQLITE_OMIT_OR_OPTIMIZATION */ + /* Run a separate WHERE clause for each term of the OR clause. After + ** eliminating duplicates from other WHERE clauses, the action for each + ** sub-WHERE clause is to to invoke the main loop body as a subroutine. + */ + wctrlFlags = WHERE_OMIT_OPEN_CLOSE + | WHERE_FORCE_TABLE + | WHERE_ONETABLE_ONLY + | WHERE_NO_AUTOINDEX; + for(ii=0; iinTerm; ii++){ + WhereTerm *pOrTerm = &pOrWc->a[ii]; + if( pOrTerm->leftCursor==iCur || (pOrTerm->eOperator & WO_AND)!=0 ){ + WhereInfo *pSubWInfo; /* Info for single OR-term scan */ + Expr *pOrExpr = pOrTerm->pExpr; /* Current OR clause term */ + int j1 = 0; /* Address of jump operation */ + if( pAndExpr && !ExprHasProperty(pOrExpr, EP_FromJoin) ){ + pAndExpr->pLeft = pOrExpr; + pOrExpr = pAndExpr; + } + /* Loop through table entries that match term pOrTerm. */ + WHERETRACE(0xffff, ("Subplan for OR-clause:\n")); + pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrExpr, 0, 0, + wctrlFlags, iCovCur); + assert( pSubWInfo || pParse->nErr || db->mallocFailed ); + if( pSubWInfo ){ + WhereLoop *pSubLoop; + int addrExplain = sqlite3WhereExplainOneScan( + pParse, pOrTab, &pSubWInfo->a[0], iLevel, pLevel->iFrom, 0 + ); + sqlite3WhereAddScanStatus(v, pOrTab, &pSubWInfo->a[0], addrExplain); -#ifndef SQLITE_OMIT_LIKE_OPTIMIZATION - /* Add constraints to reduce the search space on a LIKE or GLOB - ** operator. - ** - ** A like pattern of the form "x LIKE 'aBc%'" is changed into constraints - ** - ** x>='ABC' AND x<'abd' AND x LIKE 'aBc%' - ** - ** The last character of the prefix "abc" is incremented to form the - ** termination condition "abd". If case is not significant (the default - ** for LIKE) then the lower-bound is made all uppercase and the upper- - ** bound is made all lowercase so that the bounds also work when comparing - ** BLOBs. - */ - if( pWC->op==TK_AND - && isLikeOrGlob(pParse, pExpr, &pStr1, &isComplete, &noCase) - ){ - Expr *pLeft; /* LHS of LIKE/GLOB operator */ - Expr *pStr2; /* Copy of pStr1 - RHS of LIKE/GLOB operator */ - Expr *pNewExpr1; - Expr *pNewExpr2; - int idxNew1; - int idxNew2; - const char *zCollSeqName; /* Name of collating sequence */ - const u16 wtFlags = TERM_LIKEOPT | TERM_VIRTUAL | TERM_DYNAMIC; + /* This is the sub-WHERE clause body. First skip over + ** duplicate rows from prior sub-WHERE clauses, and record the + ** rowid (or PRIMARY KEY) for the current row so that the same + ** row will be skipped in subsequent sub-WHERE clauses. + */ + if( (pWInfo->wctrlFlags & WHERE_DUPLICATES_OK)==0 ){ + int r; + int iSet = ((ii==pOrWc->nTerm-1)?-1:ii); + if( HasRowid(pTab) ){ + r = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iCur, regRowid, 0); + j1 = sqlite3VdbeAddOp4Int(v, OP_RowSetTest, regRowset, 0, r,iSet); + VdbeCoverage(v); + }else{ + Index *pPk = sqlite3PrimaryKeyIndex(pTab); + int nPk = pPk->nKeyCol; + int iPk; - pLeft = pExpr->x.pList->a[1].pExpr; - pStr2 = sqlite3ExprDup(db, pStr1, 0); + /* Read the PK into an array of temp registers. */ + r = sqlite3GetTempRange(pParse, nPk); + for(iPk=0; iPkaiColumn[iPk]; + int rx; + rx = sqlite3ExprCodeGetColumn(pParse, pTab, iCol, iCur,r+iPk,0); + if( rx!=r+iPk ){ + sqlite3VdbeAddOp2(v, OP_SCopy, rx, r+iPk); + } + } - /* Convert the lower bound to upper-case and the upper bound to - ** lower-case (upper-case is less than lower-case in ASCII) so that - ** the range constraints also work for BLOBs - */ - if( noCase && !pParse->db->mallocFailed ){ - int i; - char c; - pTerm->wtFlags |= TERM_LIKE; - for(i=0; (c = pStr1->u.zToken[i])!=0; i++){ - pStr1->u.zToken[i] = sqlite3Toupper(c); - pStr2->u.zToken[i] = sqlite3Tolower(c); - } - } + /* Check if the temp table already contains this key. If so, + ** the row has already been included in the result set and + ** can be ignored (by jumping past the Gosub below). Otherwise, + ** insert the key into the temp table and proceed with processing + ** the row. + ** + ** Use some of the same optimizations as OP_RowSetTest: If iSet + ** is zero, assume that the key cannot already be present in + ** the temp table. And if iSet is -1, assume that there is no + ** need to insert the key into the temp table, as it will never + ** be tested for. */ + if( iSet ){ + j1 = sqlite3VdbeAddOp4Int(v, OP_Found, regRowset, 0, r, nPk); + VdbeCoverage(v); + } + if( iSet>=0 ){ + sqlite3VdbeAddOp3(v, OP_MakeRecord, r, nPk, regRowid); + sqlite3VdbeAddOp3(v, OP_IdxInsert, regRowset, regRowid, 0); + if( iSet ) sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); + } - if( !db->mallocFailed ){ - u8 c, *pC; /* Last character before the first wildcard */ - pC = (u8*)&pStr2->u.zToken[sqlite3Strlen30(pStr2->u.zToken)-1]; - c = *pC; - if( noCase ){ - /* The point is to increment the last character before the first - ** wildcard. But if we increment '@', that will push it into the - ** alphabetic range where case conversions will mess up the - ** inequality. To avoid this, make sure to also run the full - ** LIKE on all candidate expressions by clearing the isComplete flag - */ - if( c=='A'-1 ) isComplete = 0; - c = sqlite3UpperToLower[c]; + /* Release the array of temp registers */ + sqlite3ReleaseTempRange(pParse, r, nPk); + } + } + + /* Invoke the main loop body as a subroutine */ + sqlite3VdbeAddOp2(v, OP_Gosub, regReturn, iLoopBody); + + /* Jump here (skipping the main loop body subroutine) if the + ** current sub-WHERE row is a duplicate from prior sub-WHEREs. */ + if( j1 ) sqlite3VdbeJumpHere(v, j1); + + /* The pSubWInfo->untestedTerms flag means that this OR term + ** contained one or more AND term from a notReady table. The + ** terms from the notReady table could not be tested and will + ** need to be tested later. + */ + if( pSubWInfo->untestedTerms ) untestedTerms = 1; + + /* If all of the OR-connected terms are optimized using the same + ** index, and the index is opened using the same cursor number + ** by each call to sqlite3WhereBegin() made by this loop, it may + ** be possible to use that index as a covering index. + ** + ** If the call to sqlite3WhereBegin() above resulted in a scan that + ** uses an index, and this is either the first OR-connected term + ** processed or the index is the same as that used by all previous + ** terms, set pCov to the candidate covering index. Otherwise, set + ** pCov to NULL to indicate that no candidate covering index will + ** be available. + */ + pSubLoop = pSubWInfo->a[0].pWLoop; + assert( (pSubLoop->wsFlags & WHERE_AUTO_INDEX)==0 ); + if( (pSubLoop->wsFlags & WHERE_INDEXED)!=0 + && (ii==0 || pSubLoop->u.btree.pIndex==pCov) + && (HasRowid(pTab) || !IsPrimaryKeyIndex(pSubLoop->u.btree.pIndex)) + ){ + assert( pSubWInfo->a[0].iIdxCur==iCovCur ); + pCov = pSubLoop->u.btree.pIndex; + wctrlFlags |= WHERE_REOPEN_IDX; + }else{ + pCov = 0; + } + + /* Finish the loop through table entries that match term pOrTerm. */ + sqlite3WhereEnd(pSubWInfo); + } } - *pC = c + 1; } - zCollSeqName = noCase ? "NOCASE" : "BINARY"; - pNewExpr1 = sqlite3ExprDup(db, pLeft, 0); - pNewExpr1 = sqlite3PExpr(pParse, TK_GE, - sqlite3ExprAddCollateString(pParse,pNewExpr1,zCollSeqName), - pStr1, 0); - transferJoinMarkings(pNewExpr1, pExpr); - idxNew1 = whereClauseInsert(pWC, pNewExpr1, wtFlags); - testcase( idxNew1==0 ); - exprAnalyze(pSrc, pWC, idxNew1); - pNewExpr2 = sqlite3ExprDup(db, pLeft, 0); - pNewExpr2 = sqlite3PExpr(pParse, TK_LT, - sqlite3ExprAddCollateString(pParse,pNewExpr2,zCollSeqName), - pStr2, 0); - transferJoinMarkings(pNewExpr2, pExpr); - idxNew2 = whereClauseInsert(pWC, pNewExpr2, wtFlags); - testcase( idxNew2==0 ); - exprAnalyze(pSrc, pWC, idxNew2); - pTerm = &pWC->a[idxTerm]; - if( isComplete ){ - markTermAsChild(pWC, idxNew1, idxTerm); - markTermAsChild(pWC, idxNew2, idxTerm); + pLevel->u.pCovidx = pCov; + if( pCov ) pLevel->iIdxCur = iCovCur; + if( pAndExpr ){ + pAndExpr->pLeft = 0; + sqlite3ExprDelete(db, pAndExpr); + } + sqlite3VdbeChangeP1(v, iRetInit, sqlite3VdbeCurrentAddr(v)); + sqlite3VdbeAddOp2(v, OP_Goto, 0, pLevel->addrBrk); + sqlite3VdbeResolveLabel(v, iLoopBody); + + if( pWInfo->nLevel>1 ) sqlite3StackFree(db, pOrTab); + if( !untestedTerms ) disableTerm(pLevel, pTerm); + }else +#endif /* SQLITE_OMIT_OR_OPTIMIZATION */ + + { + /* Case 6: There is no usable index. We must do a complete + ** scan of the entire table. + */ + static const u8 aStep[] = { OP_Next, OP_Prev }; + static const u8 aStart[] = { OP_Rewind, OP_Last }; + assert( bRev==0 || bRev==1 ); + if( pTabItem->isRecursive ){ + /* Tables marked isRecursive have only a single row that is stored in + ** a pseudo-cursor. No need to Rewind or Next such cursors. */ + pLevel->op = OP_Noop; + }else{ + pLevel->op = aStep[bRev]; + pLevel->p1 = iCur; + pLevel->p2 = 1 + sqlite3VdbeAddOp2(v, aStart[bRev], iCur, addrBrk); + VdbeCoverageIf(v, bRev==0); + VdbeCoverageIf(v, bRev!=0); + pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP; } } -#endif /* SQLITE_OMIT_LIKE_OPTIMIZATION */ -#ifndef SQLITE_OMIT_VIRTUALTABLE - /* Add a WO_MATCH auxiliary term to the constraint set if the - ** current expression is of the form: column MATCH expr. - ** This information is used by the xBestIndex methods of - ** virtual tables. The native query optimizer does not attempt - ** to do anything with MATCH functions. - */ - if( isMatchOfColumn(pExpr) ){ - int idxNew; - Expr *pRight, *pLeft; - WhereTerm *pNewTerm; - Bitmask prereqColumn, prereqExpr; +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + pLevel->addrVisit = sqlite3VdbeCurrentAddr(v); +#endif - pRight = pExpr->x.pList->a[0].pExpr; - pLeft = pExpr->x.pList->a[1].pExpr; - prereqExpr = exprTableUsage(pMaskSet, pRight); - prereqColumn = exprTableUsage(pMaskSet, pLeft); - if( (prereqExpr & prereqColumn)==0 ){ - Expr *pNewExpr; - pNewExpr = sqlite3PExpr(pParse, TK_MATCH, - 0, sqlite3ExprDup(db, pRight, 0), 0); - idxNew = whereClauseInsert(pWC, pNewExpr, TERM_VIRTUAL|TERM_DYNAMIC); - testcase( idxNew==0 ); - pNewTerm = &pWC->a[idxNew]; - pNewTerm->prereqRight = prereqExpr; - pNewTerm->leftCursor = pLeft->iTable; - pNewTerm->u.leftColumn = pLeft->iColumn; - pNewTerm->eOperator = WO_MATCH; - markTermAsChild(pWC, idxNew, idxTerm); - pTerm = &pWC->a[idxTerm]; - pTerm->wtFlags |= TERM_COPIED; - pNewTerm->prereqAll = pTerm->prereqAll; + /* Insert code to test every subexpression that can be completely + ** computed using the current set of tables. + */ + for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){ + Expr *pE; + int skipLikeAddr = 0; + testcase( pTerm->wtFlags & TERM_VIRTUAL ); + testcase( pTerm->wtFlags & TERM_CODED ); + if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; + if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ + testcase( pWInfo->untestedTerms==0 + && (pWInfo->wctrlFlags & WHERE_ONETABLE_ONLY)!=0 ); + pWInfo->untestedTerms = 1; + continue; + } + pE = pTerm->pExpr; + assert( pE!=0 ); + if( pLevel->iLeftJoin && !ExprHasProperty(pE, EP_FromJoin) ){ + continue; + } + if( pTerm->wtFlags & TERM_LIKECOND ){ + assert( pLevel->iLikeRepCntr>0 ); + skipLikeAddr = sqlite3VdbeAddOp1(v, OP_IfNot, pLevel->iLikeRepCntr); + VdbeCoverage(v); } + sqlite3ExprIfFalse(pParse, pE, addrCont, SQLITE_JUMPIFNULL); + if( skipLikeAddr ) sqlite3VdbeJumpHere(v, skipLikeAddr); + pTerm->wtFlags |= TERM_CODED; } -#endif /* SQLITE_OMIT_VIRTUALTABLE */ -#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 - /* When sqlite_stat3 histogram data is available an operator of the - ** form "x IS NOT NULL" can sometimes be evaluated more efficiently - ** as "x>NULL" if x is not an INTEGER PRIMARY KEY. So construct a - ** virtual term of that form. + /* Insert code to test for implied constraints based on transitivity + ** of the "==" operator. ** - ** Note that the virtual term must be tagged with TERM_VNULL. This - ** TERM_VNULL tag will suppress the not-null check at the beginning - ** of the loop. Without the TERM_VNULL flag, the not-null check at - ** the start of the loop will prevent any results from being returned. + ** Example: If the WHERE clause contains "t1.a=t2.b" and "t2.b=123" + ** and we are coding the t1 loop and the t2 loop has not yet coded, + ** then we cannot use the "t1.a=t2.b" constraint, but we can code + ** the implied "t1.a=123" constraint. */ - if( pExpr->op==TK_NOTNULL - && pExpr->pLeft->op==TK_COLUMN - && pExpr->pLeft->iColumn>=0 - && OptimizationEnabled(db, SQLITE_Stat34) - ){ - Expr *pNewExpr; - Expr *pLeft = pExpr->pLeft; - int idxNew; - WhereTerm *pNewTerm; - - pNewExpr = sqlite3PExpr(pParse, TK_GT, - sqlite3ExprDup(db, pLeft, 0), - sqlite3PExpr(pParse, TK_NULL, 0, 0, 0), 0); - - idxNew = whereClauseInsert(pWC, pNewExpr, - TERM_VIRTUAL|TERM_DYNAMIC|TERM_VNULL); - if( idxNew ){ - pNewTerm = &pWC->a[idxNew]; - pNewTerm->prereqRight = 0; - pNewTerm->leftCursor = pLeft->iTable; - pNewTerm->u.leftColumn = pLeft->iColumn; - pNewTerm->eOperator = WO_GT; - markTermAsChild(pWC, idxNew, idxTerm); - pTerm = &pWC->a[idxTerm]; - pTerm->wtFlags |= TERM_COPIED; - pNewTerm->prereqAll = pTerm->prereqAll; + for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){ + Expr *pE, *pEAlt; + WhereTerm *pAlt; + if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; + if( (pTerm->eOperator & (WO_EQ|WO_IS))==0 ) continue; + if( (pTerm->eOperator & WO_EQUIV)==0 ) continue; + if( pTerm->leftCursor!=iCur ) continue; + if( pLevel->iLeftJoin ) continue; + pE = pTerm->pExpr; + assert( !ExprHasProperty(pE, EP_FromJoin) ); + assert( (pTerm->prereqRight & pLevel->notReady)!=0 ); + pAlt = sqlite3WhereFindTerm(pWC, iCur, pTerm->u.leftColumn, notReady, + WO_EQ|WO_IN|WO_IS, 0); + if( pAlt==0 ) continue; + if( pAlt->wtFlags & (TERM_CODED) ) continue; + testcase( pAlt->eOperator & WO_EQ ); + testcase( pAlt->eOperator & WO_IS ); + testcase( pAlt->eOperator & WO_IN ); + VdbeModuleComment((v, "begin transitive constraint")); + pEAlt = sqlite3StackAllocRaw(db, sizeof(*pEAlt)); + if( pEAlt ){ + *pEAlt = *pAlt->pExpr; + pEAlt->pLeft = pE->pLeft; + sqlite3ExprIfFalse(pParse, pEAlt, addrCont, SQLITE_JUMPIFNULL); + sqlite3StackFree(db, pEAlt); } } -#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ - /* Prevent ON clause terms of a LEFT JOIN from being used to drive - ** an index for tables to the left of the join. + /* For a LEFT OUTER JOIN, generate code that will record the fact that + ** at least one row of the right table has matched the left table. */ - pTerm->prereqRight |= extraRight; -} - -/* -** This function searches pList for an entry that matches the iCol-th column -** of index pIdx. -** -** If such an expression is found, its index in pList->a[] is returned. If -** no expression is found, -1 is returned. -*/ -static int findIndexCol( - Parse *pParse, /* Parse context */ - ExprList *pList, /* Expression list to search */ - int iBase, /* Cursor for table associated with pIdx */ - Index *pIdx, /* Index to match column of */ - int iCol /* Column of index to match */ -){ - int i; - const char *zColl = pIdx->azColl[iCol]; - - for(i=0; inExpr; i++){ - Expr *p = sqlite3ExprSkipCollate(pList->a[i].pExpr); - if( p->op==TK_COLUMN - && p->iColumn==pIdx->aiColumn[iCol] - && p->iTable==iBase - ){ - CollSeq *pColl = sqlite3ExprCollSeq(pParse, pList->a[i].pExpr); - if( ALWAYS(pColl) && 0==sqlite3StrICmp(pColl->zName, zColl) ){ - return i; + if( pLevel->iLeftJoin ){ + pLevel->addrFirst = sqlite3VdbeCurrentAddr(v); + sqlite3VdbeAddOp2(v, OP_Integer, 1, pLevel->iLeftJoin); + VdbeComment((v, "record LEFT JOIN hit")); + sqlite3ExprCacheClear(pParse); + for(pTerm=pWC->a, j=0; jnTerm; j++, pTerm++){ + testcase( pTerm->wtFlags & TERM_VIRTUAL ); + testcase( pTerm->wtFlags & TERM_CODED ); + if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; + if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ + assert( pWInfo->untestedTerms ); + continue; } + assert( pTerm->pExpr ); + sqlite3ExprIfFalse(pParse, pTerm->pExpr, addrCont, SQLITE_JUMPIFNULL); + pTerm->wtFlags |= TERM_CODED; } } - return -1; + return pLevel->notReady; } +/************** End of wherecode.c *******************************************/ +/************** Begin file whereexpr.c ***************************************/ /* -** Return true if the DISTINCT expression-list passed as the third argument -** is redundant. +** 2015-06-08 ** -** A DISTINCT list is redundant if the database contains some subset of -** columns that are unique and non-null. +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This module contains C code that generates VDBE code used to process +** the WHERE clause of SQL statements. +** +** This file was originally part of where.c but was split out to improve +** readability and editabiliity. This file contains utility routines for +** analyzing Expr objects in the WHERE clause. */ -static int isDistinctRedundant( - Parse *pParse, /* Parsing context */ - SrcList *pTabList, /* The FROM clause */ - WhereClause *pWC, /* The WHERE clause */ - ExprList *pDistinct /* The result set that needs to be DISTINCT */ -){ - Table *pTab; - Index *pIdx; - int i; - int iBase; +/* #include "sqliteInt.h" */ +/* #include "whereInt.h" */ - /* If there is more than one table or sub-select in the FROM clause of - ** this query, then it will not be possible to show that the DISTINCT - ** clause is redundant. */ - if( pTabList->nSrc!=1 ) return 0; - iBase = pTabList->a[0].iCursor; - pTab = pTabList->a[0].pTab; +/* Forward declarations */ +static void exprAnalyze(SrcList*, WhereClause*, int); - /* If any of the expressions is an IPK column on table iBase, then return - ** true. Note: The (p->iTable==iBase) part of this test may be false if the - ** current SELECT is a correlated sub-query. - */ - for(i=0; inExpr; i++){ - Expr *p = sqlite3ExprSkipCollate(pDistinct->a[i].pExpr); - if( p->op==TK_COLUMN && p->iTable==iBase && p->iColumn<0 ) return 1; - } +/* +** Deallocate all memory associated with a WhereOrInfo object. +*/ +static void whereOrInfoDelete(sqlite3 *db, WhereOrInfo *p){ + sqlite3WhereClauseClear(&p->wc); + sqlite3DbFree(db, p); +} - /* Loop through all indices on the table, checking each to see if it makes - ** the DISTINCT qualifier redundant. It does so if: - ** - ** 1. The index is itself UNIQUE, and - ** - ** 2. All of the columns in the index are either part of the pDistinct - ** list, or else the WHERE clause contains a term of the form "col=X", - ** where X is a constant value. The collation sequences of the - ** comparison and select-list expressions must match those of the index. - ** - ** 3. All of those index columns for which the WHERE clause does not - ** contain a "col=X" term are subject to a NOT NULL constraint. - */ - for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ - if( !IsUniqueIndex(pIdx) ) continue; - for(i=0; inKeyCol; i++){ - i16 iCol = pIdx->aiColumn[i]; - if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){ - int iIdxCol = findIndexCol(pParse, pDistinct, iBase, pIdx, i); - if( iIdxCol<0 || pTab->aCol[iCol].notNull==0 ){ - break; - } +/* +** Deallocate all memory associated with a WhereAndInfo object. +*/ +static void whereAndInfoDelete(sqlite3 *db, WhereAndInfo *p){ + sqlite3WhereClauseClear(&p->wc); + sqlite3DbFree(db, p); +} + +/* +** Add a single new WhereTerm entry to the WhereClause object pWC. +** The new WhereTerm object is constructed from Expr p and with wtFlags. +** The index in pWC->a[] of the new WhereTerm is returned on success. +** 0 is returned if the new WhereTerm could not be added due to a memory +** allocation error. The memory allocation failure will be recorded in +** the db->mallocFailed flag so that higher-level functions can detect it. +** +** This routine will increase the size of the pWC->a[] array as necessary. +** +** If the wtFlags argument includes TERM_DYNAMIC, then responsibility +** for freeing the expression p is assumed by the WhereClause object pWC. +** This is true even if this routine fails to allocate a new WhereTerm. +** +** WARNING: This routine might reallocate the space used to store +** WhereTerms. All pointers to WhereTerms should be invalidated after +** calling this routine. Such pointers may be reinitialized by referencing +** the pWC->a[] array. +*/ +static int whereClauseInsert(WhereClause *pWC, Expr *p, u16 wtFlags){ + WhereTerm *pTerm; + int idx; + testcase( wtFlags & TERM_VIRTUAL ); + if( pWC->nTerm>=pWC->nSlot ){ + WhereTerm *pOld = pWC->a; + sqlite3 *db = pWC->pWInfo->pParse->db; + pWC->a = sqlite3DbMallocRaw(db, sizeof(pWC->a[0])*pWC->nSlot*2 ); + if( pWC->a==0 ){ + if( wtFlags & TERM_DYNAMIC ){ + sqlite3ExprDelete(db, p); } + pWC->a = pOld; + return 0; } - if( i==pIdx->nKeyCol ){ - /* This index implies that the DISTINCT qualifier is redundant. */ - return 1; + memcpy(pWC->a, pOld, sizeof(pWC->a[0])*pWC->nTerm); + if( pOld!=pWC->aStatic ){ + sqlite3DbFree(db, pOld); } + pWC->nSlot = sqlite3DbMallocSize(db, pWC->a)/sizeof(pWC->a[0]); + memset(&pWC->a[pWC->nTerm], 0, sizeof(pWC->a[0])*(pWC->nSlot-pWC->nTerm)); } - - return 0; + pTerm = &pWC->a[idx = pWC->nTerm++]; + if( p && ExprHasProperty(p, EP_Unlikely) ){ + pTerm->truthProb = sqlite3LogEst(p->iTable) - 270; + }else{ + pTerm->truthProb = 1; + } + pTerm->pExpr = sqlite3ExprSkipCollate(p); + pTerm->wtFlags = wtFlags; + pTerm->pWC = pWC; + pTerm->iParent = -1; + return idx; } - /* -** Estimate the logarithm of the input value to base 2. +** Return TRUE if the given operator is one of the operators that is +** allowed for an indexable WHERE clause term. The allowed operators are +** "=", "<", ">", "<=", ">=", "IN", and "IS NULL" */ -static LogEst estLog(LogEst N){ - return N<=10 ? 0 : sqlite3LogEst(N) - 33; +static int allowedOp(int op){ + assert( TK_GT>TK_EQ && TK_GTTK_EQ && TK_LTTK_EQ && TK_LE=TK_EQ && op<=TK_GE) || op==TK_ISNULL || op==TK_IS; } /* -** Two routines for printing the content of an sqlite3_index_info -** structure. Used for testing and debugging only. If neither -** SQLITE_TEST or SQLITE_DEBUG are defined, then these routines -** are no-ops. +** Commute a comparison operator. Expressions of the form "X op Y" +** are converted into "Y op X". +** +** If left/right precedence rules come into play when determining the +** collating sequence, then COLLATE operators are adjusted to ensure +** that the collating sequence does not change. For example: +** "Y collate NOCASE op X" becomes "X op Y" because any collation sequence on +** the left hand side of a comparison overrides any collation sequence +** attached to the right. For the same reason the EP_Collate flag +** is not commuted. */ -#if !defined(SQLITE_OMIT_VIRTUALTABLE) && defined(WHERETRACE_ENABLED) -static void TRACE_IDX_INPUTS(sqlite3_index_info *p){ - int i; - if( !sqlite3WhereTrace ) return; - for(i=0; inConstraint; i++){ - sqlite3DebugPrintf(" constraint[%d]: col=%d termid=%d op=%d usabled=%d\n", - i, - p->aConstraint[i].iColumn, - p->aConstraint[i].iTermOffset, - p->aConstraint[i].op, - p->aConstraint[i].usable); - } - for(i=0; inOrderBy; i++){ - sqlite3DebugPrintf(" orderby[%d]: col=%d desc=%d\n", - i, - p->aOrderBy[i].iColumn, - p->aOrderBy[i].desc); +static void exprCommute(Parse *pParse, Expr *pExpr){ + u16 expRight = (pExpr->pRight->flags & EP_Collate); + u16 expLeft = (pExpr->pLeft->flags & EP_Collate); + assert( allowedOp(pExpr->op) && pExpr->op!=TK_IN ); + if( expRight==expLeft ){ + /* Either X and Y both have COLLATE operator or neither do */ + if( expRight ){ + /* Both X and Y have COLLATE operators. Make sure X is always + ** used by clearing the EP_Collate flag from Y. */ + pExpr->pRight->flags &= ~EP_Collate; + }else if( sqlite3ExprCollSeq(pParse, pExpr->pLeft)!=0 ){ + /* Neither X nor Y have COLLATE operators, but X has a non-default + ** collating sequence. So add the EP_Collate marker on X to cause + ** it to be searched first. */ + pExpr->pLeft->flags |= EP_Collate; + } } -} -static void TRACE_IDX_OUTPUTS(sqlite3_index_info *p){ - int i; - if( !sqlite3WhereTrace ) return; - for(i=0; inConstraint; i++){ - sqlite3DebugPrintf(" usage[%d]: argvIdx=%d omit=%d\n", - i, - p->aConstraintUsage[i].argvIndex, - p->aConstraintUsage[i].omit); + SWAP(Expr*,pExpr->pRight,pExpr->pLeft); + if( pExpr->op>=TK_GT ){ + assert( TK_LT==TK_GT+2 ); + assert( TK_GE==TK_LE+2 ); + assert( TK_GT>TK_EQ ); + assert( TK_GTop>=TK_GT && pExpr->op<=TK_GE ); + pExpr->op = ((pExpr->op-TK_GT)^2)+TK_GT; } - sqlite3DebugPrintf(" idxNum=%d\n", p->idxNum); - sqlite3DebugPrintf(" idxStr=%s\n", p->idxStr); - sqlite3DebugPrintf(" orderByConsumed=%d\n", p->orderByConsumed); - sqlite3DebugPrintf(" estimatedCost=%g\n", p->estimatedCost); - sqlite3DebugPrintf(" estimatedRows=%lld\n", p->estimatedRows); } -#else -#define TRACE_IDX_INPUTS(A) -#define TRACE_IDX_OUTPUTS(A) -#endif -#ifndef SQLITE_OMIT_AUTOMATIC_INDEX /* -** Return TRUE if the WHERE clause term pTerm is of a form where it -** could be used with an index to access pSrc, assuming an appropriate -** index existed. +** Translate from TK_xx operator to WO_xx bitmask. */ -static int termCanDriveIndex( - WhereTerm *pTerm, /* WHERE clause term to check */ - struct SrcList_item *pSrc, /* Table we are trying to access */ - Bitmask notReady /* Tables in outer loops of the join */ -){ - char aff; - if( pTerm->leftCursor!=pSrc->iCursor ) return 0; - if( (pTerm->eOperator & WO_EQ)==0 ) return 0; - if( (pTerm->prereqRight & notReady)!=0 ) return 0; - if( pTerm->u.leftColumn<0 ) return 0; - aff = pSrc->pTab->aCol[pTerm->u.leftColumn].affinity; - if( !sqlite3IndexAffinityOk(pTerm->pExpr, aff) ) return 0; - return 1; +static u16 operatorMask(int op){ + u16 c; + assert( allowedOp(op) ); + if( op==TK_IN ){ + c = WO_IN; + }else if( op==TK_ISNULL ){ + c = WO_ISNULL; + }else if( op==TK_IS ){ + c = WO_IS; + }else{ + assert( (WO_EQ<<(op-TK_EQ)) < 0x7fff ); + c = (u16)(WO_EQ<<(op-TK_EQ)); + } + assert( op!=TK_ISNULL || c==WO_ISNULL ); + assert( op!=TK_IN || c==WO_IN ); + assert( op!=TK_EQ || c==WO_EQ ); + assert( op!=TK_LT || c==WO_LT ); + assert( op!=TK_LE || c==WO_LE ); + assert( op!=TK_GT || c==WO_GT ); + assert( op!=TK_GE || c==WO_GE ); + assert( op!=TK_IS || c==WO_IS ); + return c; } -#endif -#ifndef SQLITE_OMIT_AUTOMATIC_INDEX +#ifndef SQLITE_OMIT_LIKE_OPTIMIZATION /* -** Generate code to construct the Index object for an automatic index -** and to set up the WhereLevel object pLevel so that the code generator -** makes use of the automatic index. +** Check to see if the given expression is a LIKE or GLOB operator that +** can be optimized using inequality constraints. Return TRUE if it is +** so and false if not. +** +** In order for the operator to be optimizible, the RHS must be a string +** literal that does not begin with a wildcard. The LHS must be a column +** that may only be NULL, a string, or a BLOB, never a number. (This means +** that virtual tables cannot participate in the LIKE optimization.) The +** collating sequence for the column on the LHS must be appropriate for +** the operator. */ -static void constructAutomaticIndex( - Parse *pParse, /* The parsing context */ - WhereClause *pWC, /* The WHERE clause */ - struct SrcList_item *pSrc, /* The FROM clause term to get the next index */ - Bitmask notReady, /* Mask of cursors that are not available */ - WhereLevel *pLevel /* Write new index here */ +static int isLikeOrGlob( + Parse *pParse, /* Parsing and code generating context */ + Expr *pExpr, /* Test this expression */ + Expr **ppPrefix, /* Pointer to TK_STRING expression with pattern prefix */ + int *pisComplete, /* True if the only wildcard is % in the last character */ + int *pnoCase /* True if uppercase is equivalent to lowercase */ ){ - int nKeyCol; /* Number of columns in the constructed index */ - WhereTerm *pTerm; /* A single term of the WHERE clause */ - WhereTerm *pWCEnd; /* End of pWC->a[] */ - Index *pIdx; /* Object describing the transient index */ - Vdbe *v; /* Prepared statement under construction */ - int addrInit; /* Address of the initialization bypass jump */ - Table *pTable; /* The table being indexed */ - int addrTop; /* Top of the index fill loop */ - int regRecord; /* Register holding an index record */ - int n; /* Column counter */ - int i; /* Loop counter */ - int mxBitCol; /* Maximum column in pSrc->colUsed */ - CollSeq *pColl; /* Collating sequence to on a column */ - WhereLoop *pLoop; /* The Loop object */ - char *zNotUsed; /* Extra space on the end of pIdx */ - Bitmask idxCols; /* Bitmap of columns used for indexing */ - Bitmask extraCols; /* Bitmap of additional columns */ - u8 sentWarning = 0; /* True if a warnning has been issued */ - Expr *pPartial = 0; /* Partial Index Expression */ - int iContinue = 0; /* Jump here to skip excluded rows */ - - /* Generate code to skip over the creation and initialization of the - ** transient index on 2nd and subsequent iterations of the loop. */ - v = pParse->pVdbe; - assert( v!=0 ); - addrInit = sqlite3CodeOnce(pParse); VdbeCoverage(v); + const char *z = 0; /* String on RHS of LIKE operator */ + Expr *pRight, *pLeft; /* Right and left size of LIKE operator */ + ExprList *pList; /* List of operands to the LIKE operator */ + int c; /* One character in z[] */ + int cnt; /* Number of non-wildcard prefix characters */ + char wc[3]; /* Wildcard characters */ + sqlite3 *db = pParse->db; /* Database connection */ + sqlite3_value *pVal = 0; + int op; /* Opcode of pRight */ - /* Count the number of columns that will be added to the index - ** and used to match WHERE clause constraints */ - nKeyCol = 0; - pTable = pSrc->pTab; - pWCEnd = &pWC->a[pWC->nTerm]; - pLoop = pLevel->pWLoop; - idxCols = 0; - for(pTerm=pWC->a; pTermpExpr; - assert( !ExprHasProperty(pExpr, EP_FromJoin) /* prereq always non-zero */ - || pExpr->iRightJoinTable!=pSrc->iCursor /* for the right-hand */ - || pLoop->prereq!=0 ); /* table of a LEFT JOIN */ - if( pLoop->prereq==0 - && (pTerm->wtFlags & TERM_VIRTUAL)==0 - && !ExprHasProperty(pExpr, EP_FromJoin) - && sqlite3ExprIsTableConstant(pExpr, pSrc->iCursor) ){ - pPartial = sqlite3ExprAnd(pParse->db, pPartial, - sqlite3ExprDup(pParse->db, pExpr, 0)); - } - if( termCanDriveIndex(pTerm, pSrc, notReady) ){ - int iCol = pTerm->u.leftColumn; - Bitmask cMask = iCol>=BMS ? MASKBIT(BMS-1) : MASKBIT(iCol); - testcase( iCol==BMS ); - testcase( iCol==BMS-1 ); - if( !sentWarning ){ - sqlite3_log(SQLITE_WARNING_AUTOINDEX, - "automatic index on %s(%s)", pTable->zName, - pTable->aCol[iCol].zName); - sentWarning = 1; - } - if( (idxCols & cMask)==0 ){ - if( whereLoopResize(pParse->db, pLoop, nKeyCol+1) ){ - goto end_auto_index_create; - } - pLoop->aLTerm[nKeyCol++] = pTerm; - idxCols |= cMask; - } - } - } - assert( nKeyCol>0 ); - pLoop->u.btree.nEq = pLoop->nLTerm = nKeyCol; - pLoop->wsFlags = WHERE_COLUMN_EQ | WHERE_IDX_ONLY | WHERE_INDEXED - | WHERE_AUTO_INDEX; - - /* Count the number of additional columns needed to create a - ** covering index. A "covering index" is an index that contains all - ** columns that are needed by the query. With a covering index, the - ** original table never needs to be accessed. Automatic indices must - ** be a covering index because the index will not be updated if the - ** original table changes and the index and table cannot both be used - ** if they go out of sync. - */ - extraCols = pSrc->colUsed & (~idxCols | MASKBIT(BMS-1)); - mxBitCol = MIN(BMS-1,pTable->nCol); - testcase( pTable->nCol==BMS-1 ); - testcase( pTable->nCol==BMS-2 ); - for(i=0; icolUsed & MASKBIT(BMS-1) ){ - nKeyCol += pTable->nCol - BMS + 1; +#ifdef SQLITE_EBCDIC + if( *pnoCase ) return 0; +#endif + pList = pExpr->x.pList; + pLeft = pList->a[1].pExpr; + if( pLeft->op!=TK_COLUMN + || sqlite3ExprAffinity(pLeft)!=SQLITE_AFF_TEXT + || IsVirtual(pLeft->pTab) /* Value might be numeric */ + ){ + /* IMP: R-02065-49465 The left-hand side of the LIKE or GLOB operator must + ** be the name of an indexed column with TEXT affinity. */ + return 0; } + assert( pLeft->iColumn!=(-1) ); /* Because IPK never has AFF_TEXT */ - /* Construct the Index object to describe this index */ - pIdx = sqlite3AllocateIndexObject(pParse->db, nKeyCol+1, 0, &zNotUsed); - if( pIdx==0 ) goto end_auto_index_create; - pLoop->u.btree.pIndex = pIdx; - pIdx->zName = "auto-index"; - pIdx->pTable = pTable; - n = 0; - idxCols = 0; - for(pTerm=pWC->a; pTermu.leftColumn; - Bitmask cMask = iCol>=BMS ? MASKBIT(BMS-1) : MASKBIT(iCol); - testcase( iCol==BMS-1 ); - testcase( iCol==BMS ); - if( (idxCols & cMask)==0 ){ - Expr *pX = pTerm->pExpr; - idxCols |= cMask; - pIdx->aiColumn[n] = pTerm->u.leftColumn; - pColl = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight); - pIdx->azColl[n] = ALWAYS(pColl) ? pColl->zName : "BINARY"; - n++; - } + pRight = sqlite3ExprSkipCollate(pList->a[0].pExpr); + op = pRight->op; + if( op==TK_VARIABLE ){ + Vdbe *pReprepare = pParse->pReprepare; + int iCol = pRight->iColumn; + pVal = sqlite3VdbeGetBoundValue(pReprepare, iCol, SQLITE_AFF_BLOB); + if( pVal && sqlite3_value_type(pVal)==SQLITE_TEXT ){ + z = (char *)sqlite3_value_text(pVal); } + sqlite3VdbeSetVarmask(pParse->pVdbe, iCol); + assert( pRight->op==TK_VARIABLE || pRight->op==TK_REGISTER ); + }else if( op==TK_STRING ){ + z = pRight->u.zToken; } - assert( (u32)n==pLoop->u.btree.nEq ); - - /* Add additional columns needed to make the automatic index into - ** a covering index */ - for(i=0; iaiColumn[n] = i; - pIdx->azColl[n] = "BINARY"; - n++; + if( z ){ + cnt = 0; + while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ + cnt++; } - } - if( pSrc->colUsed & MASKBIT(BMS-1) ){ - for(i=BMS-1; inCol; i++){ - pIdx->aiColumn[n] = i; - pIdx->azColl[n] = "BINARY"; - n++; + if( cnt!=0 && 255!=(u8)z[cnt-1] ){ + Expr *pPrefix; + *pisComplete = c==wc[0] && z[cnt+1]==0; + pPrefix = sqlite3Expr(db, TK_STRING, z); + if( pPrefix ) pPrefix->u.zToken[cnt] = 0; + *ppPrefix = pPrefix; + if( op==TK_VARIABLE ){ + Vdbe *v = pParse->pVdbe; + sqlite3VdbeSetVarmask(v, pRight->iColumn); + if( *pisComplete && pRight->u.zToken[1] ){ + /* If the rhs of the LIKE expression is a variable, and the current + ** value of the variable means there is no need to invoke the LIKE + ** function, then no OP_Variable will be added to the program. + ** This causes problems for the sqlite3_bind_parameter_name() + ** API. To work around them, add a dummy OP_Variable here. + */ + int r1 = sqlite3GetTempReg(pParse); + sqlite3ExprCodeTarget(pParse, pRight, r1); + sqlite3VdbeChangeP3(v, sqlite3VdbeCurrentAddr(v)-1, 0); + sqlite3ReleaseTempReg(pParse, r1); + } + } + }else{ + z = 0; } } - assert( n==nKeyCol ); - pIdx->aiColumn[n] = -1; - pIdx->azColl[n] = "BINARY"; - - /* Create the automatic index */ - assert( pLevel->iIdxCur>=0 ); - pLevel->iIdxCur = pParse->nTab++; - sqlite3VdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol+1); - sqlite3VdbeSetP4KeyInfo(pParse, pIdx); - VdbeComment((v, "for %s", pTable->zName)); - - /* Fill the automatic index with content */ - sqlite3ExprCachePush(pParse); - addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, pLevel->iTabCur); VdbeCoverage(v); - if( pPartial ){ - iContinue = sqlite3VdbeMakeLabel(v); - sqlite3ExprIfFalse(pParse, pPartial, iContinue, SQLITE_JUMPIFNULL); - pLoop->wsFlags |= WHERE_PARTIALIDX; - } - regRecord = sqlite3GetTempReg(pParse); - sqlite3GenerateIndexKey(pParse, pIdx, pLevel->iTabCur, regRecord, 0, 0, 0, 0); - sqlite3VdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord); - sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); - if( pPartial ) sqlite3VdbeResolveLabel(v, iContinue); - sqlite3VdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop+1); VdbeCoverage(v); - sqlite3VdbeChangeP5(v, SQLITE_STMTSTATUS_AUTOINDEX); - sqlite3VdbeJumpHere(v, addrTop); - sqlite3ReleaseTempReg(pParse, regRecord); - sqlite3ExprCachePop(pParse); - - /* Jump here when skipping the initialization */ - sqlite3VdbeJumpHere(v, addrInit); -end_auto_index_create: - sqlite3ExprDelete(pParse->db, pPartial); + sqlite3ValueFree(pVal); + return (z!=0); } -#endif /* SQLITE_OMIT_AUTOMATIC_INDEX */ +#endif /* SQLITE_OMIT_LIKE_OPTIMIZATION */ + #ifndef SQLITE_OMIT_VIRTUALTABLE /* -** Allocate and populate an sqlite3_index_info structure. It is the -** responsibility of the caller to eventually release the structure -** by passing the pointer returned by this function to sqlite3_free(). +** Check to see if the given expression is of the form +** +** column MATCH expr +** +** If it is then return TRUE. If not, return FALSE. */ -static sqlite3_index_info *allocateIndexInfo( - Parse *pParse, - WhereClause *pWC, - struct SrcList_item *pSrc, - ExprList *pOrderBy +static int isMatchOfColumn( + Expr *pExpr /* Test this expression */ ){ - int i, j; - int nTerm; - struct sqlite3_index_constraint *pIdxCons; - struct sqlite3_index_orderby *pIdxOrderBy; - struct sqlite3_index_constraint_usage *pUsage; - WhereTerm *pTerm; - int nOrderBy; - sqlite3_index_info *pIdxInfo; + ExprList *pList; - /* Count the number of possible WHERE clause constraints referring - ** to this virtual table */ - for(i=nTerm=0, pTerm=pWC->a; inTerm; i++, pTerm++){ - if( pTerm->leftCursor != pSrc->iCursor ) continue; - assert( IsPowerOfTwo(pTerm->eOperator & ~WO_EQUIV) ); - testcase( pTerm->eOperator & WO_IN ); - testcase( pTerm->eOperator & WO_ISNULL ); - testcase( pTerm->eOperator & WO_ALL ); - if( (pTerm->eOperator & ~(WO_ISNULL|WO_EQUIV))==0 ) continue; - if( pTerm->wtFlags & TERM_VNULL ) continue; - nTerm++; + if( pExpr->op!=TK_FUNCTION ){ + return 0; } - - /* If the ORDER BY clause contains only columns in the current - ** virtual table then allocate space for the aOrderBy part of - ** the sqlite3_index_info structure. - */ - nOrderBy = 0; - if( pOrderBy ){ - int n = pOrderBy->nExpr; - for(i=0; ia[i].pExpr; - if( pExpr->op!=TK_COLUMN || pExpr->iTable!=pSrc->iCursor ) break; - } - if( i==n){ - nOrderBy = n; - } + if( sqlite3StrICmp(pExpr->u.zToken,"match")!=0 ){ + return 0; } - - /* Allocate the sqlite3_index_info structure - */ - pIdxInfo = sqlite3DbMallocZero(pParse->db, sizeof(*pIdxInfo) - + (sizeof(*pIdxCons) + sizeof(*pUsage))*nTerm - + sizeof(*pIdxOrderBy)*nOrderBy ); - if( pIdxInfo==0 ){ - sqlite3ErrorMsg(pParse, "out of memory"); + pList = pExpr->x.pList; + if( pList->nExpr!=2 ){ return 0; } + if( pList->a[1].pExpr->op != TK_COLUMN ){ + return 0; + } + return 1; +} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ - /* Initialize the structure. The sqlite3_index_info structure contains - ** many fields that are declared "const" to prevent xBestIndex from - ** changing them. We have to do some funky casting in order to - ** initialize those fields. - */ - pIdxCons = (struct sqlite3_index_constraint*)&pIdxInfo[1]; - pIdxOrderBy = (struct sqlite3_index_orderby*)&pIdxCons[nTerm]; - pUsage = (struct sqlite3_index_constraint_usage*)&pIdxOrderBy[nOrderBy]; - *(int*)&pIdxInfo->nConstraint = nTerm; - *(int*)&pIdxInfo->nOrderBy = nOrderBy; - *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint = pIdxCons; - *(struct sqlite3_index_orderby**)&pIdxInfo->aOrderBy = pIdxOrderBy; - *(struct sqlite3_index_constraint_usage**)&pIdxInfo->aConstraintUsage = - pUsage; +/* +** If the pBase expression originated in the ON or USING clause of +** a join, then transfer the appropriate markings over to derived. +*/ +static void transferJoinMarkings(Expr *pDerived, Expr *pBase){ + if( pDerived ){ + pDerived->flags |= pBase->flags & EP_FromJoin; + pDerived->iRightJoinTable = pBase->iRightJoinTable; + } +} - for(i=j=0, pTerm=pWC->a; inTerm; i++, pTerm++){ - u8 op; - if( pTerm->leftCursor != pSrc->iCursor ) continue; - assert( IsPowerOfTwo(pTerm->eOperator & ~WO_EQUIV) ); - testcase( pTerm->eOperator & WO_IN ); - testcase( pTerm->eOperator & WO_ISNULL ); - testcase( pTerm->eOperator & WO_ALL ); - if( (pTerm->eOperator & ~(WO_ISNULL|WO_EQUIV))==0 ) continue; - if( pTerm->wtFlags & TERM_VNULL ) continue; - pIdxCons[j].iColumn = pTerm->u.leftColumn; - pIdxCons[j].iTermOffset = i; - op = (u8)pTerm->eOperator & WO_ALL; - if( op==WO_IN ) op = WO_EQ; - pIdxCons[j].op = op; - /* The direct assignment in the previous line is possible only because - ** the WO_ and SQLITE_INDEX_CONSTRAINT_ codes are identical. The - ** following asserts verify this fact. */ - assert( WO_EQ==SQLITE_INDEX_CONSTRAINT_EQ ); - assert( WO_LT==SQLITE_INDEX_CONSTRAINT_LT ); - assert( WO_LE==SQLITE_INDEX_CONSTRAINT_LE ); - assert( WO_GT==SQLITE_INDEX_CONSTRAINT_GT ); - assert( WO_GE==SQLITE_INDEX_CONSTRAINT_GE ); - assert( WO_MATCH==SQLITE_INDEX_CONSTRAINT_MATCH ); - assert( pTerm->eOperator & (WO_IN|WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE|WO_MATCH) ); - j++; +/* +** Mark term iChild as being a child of term iParent +*/ +static void markTermAsChild(WhereClause *pWC, int iChild, int iParent){ + pWC->a[iChild].iParent = iParent; + pWC->a[iChild].truthProb = pWC->a[iParent].truthProb; + pWC->a[iParent].nChild++; +} + +/* +** Return the N-th AND-connected subterm of pTerm. Or if pTerm is not +** a conjunction, then return just pTerm when N==0. If N is exceeds +** the number of available subterms, return NULL. +*/ +static WhereTerm *whereNthSubterm(WhereTerm *pTerm, int N){ + if( pTerm->eOperator!=WO_AND ){ + return N==0 ? pTerm : 0; } - for(i=0; ia[i].pExpr; - pIdxOrderBy[i].iColumn = pExpr->iColumn; - pIdxOrderBy[i].desc = pOrderBy->a[i].sortOrder; + if( Nu.pAndInfo->wc.nTerm ){ + return &pTerm->u.pAndInfo->wc.a[N]; } - - return pIdxInfo; + return 0; } /* -** The table object reference passed as the second argument to this function -** must represent a virtual table. This function invokes the xBestIndex() -** method of the virtual table with the sqlite3_index_info object that -** comes in as the 3rd argument to this function. +** Subterms pOne and pTwo are contained within WHERE clause pWC. The +** two subterms are in disjunction - they are OR-ed together. ** -** If an error occurs, pParse is populated with an error message and a -** non-zero value is returned. Otherwise, 0 is returned and the output -** part of the sqlite3_index_info structure is left populated. +** If these two terms are both of the form: "A op B" with the same +** A and B values but different operators and if the operators are +** compatible (if one is = and the other is <, for example) then +** add a new virtual AND term to pWC that is the combination of the +** two. ** -** Whether or not an error is returned, it is the responsibility of the -** caller to eventually free p->idxStr if p->needToFreeIdxStr indicates -** that this is required. +** Some examples: +** +** x x<=y +** x=y OR x=y --> x=y +** x<=y OR x x<=y +** +** The following is NOT generated: +** +** xy --> x!=y */ -static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){ - sqlite3_vtab *pVtab = sqlite3GetVTable(pParse->db, pTab)->pVtab; - int i; - int rc; - - TRACE_IDX_INPUTS(p); - rc = pVtab->pModule->xBestIndex(pVtab, p); - TRACE_IDX_OUTPUTS(p); +static void whereCombineDisjuncts( + SrcList *pSrc, /* the FROM clause */ + WhereClause *pWC, /* The complete WHERE clause */ + WhereTerm *pOne, /* First disjunct */ + WhereTerm *pTwo /* Second disjunct */ +){ + u16 eOp = pOne->eOperator | pTwo->eOperator; + sqlite3 *db; /* Database connection (for malloc) */ + Expr *pNew; /* New virtual expression */ + int op; /* Operator for the combined expression */ + int idxNew; /* Index in pWC of the next virtual term */ - if( rc!=SQLITE_OK ){ - if( rc==SQLITE_NOMEM ){ - pParse->db->mallocFailed = 1; - }else if( !pVtab->zErrMsg ){ - sqlite3ErrorMsg(pParse, "%s", sqlite3ErrStr(rc)); + if( (pOne->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE))==0 ) return; + if( (pTwo->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE))==0 ) return; + if( (eOp & (WO_EQ|WO_LT|WO_LE))!=eOp + && (eOp & (WO_EQ|WO_GT|WO_GE))!=eOp ) return; + assert( pOne->pExpr->pLeft!=0 && pOne->pExpr->pRight!=0 ); + assert( pTwo->pExpr->pLeft!=0 && pTwo->pExpr->pRight!=0 ); + if( sqlite3ExprCompare(pOne->pExpr->pLeft, pTwo->pExpr->pLeft, -1) ) return; + if( sqlite3ExprCompare(pOne->pExpr->pRight, pTwo->pExpr->pRight, -1) )return; + /* If we reach this point, it means the two subterms can be combined */ + if( (eOp & (eOp-1))!=0 ){ + if( eOp & (WO_LT|WO_LE) ){ + eOp = WO_LE; }else{ - sqlite3ErrorMsg(pParse, "%s", pVtab->zErrMsg); - } - } - sqlite3_free(pVtab->zErrMsg); - pVtab->zErrMsg = 0; - - for(i=0; inConstraint; i++){ - if( !p->aConstraint[i].usable && p->aConstraintUsage[i].argvIndex>0 ){ - sqlite3ErrorMsg(pParse, - "table %s: xBestIndex returned an invalid plan", pTab->zName); + assert( eOp & (WO_GT|WO_GE) ); + eOp = WO_GE; } } - - return pParse->nErr; + db = pWC->pWInfo->pParse->db; + pNew = sqlite3ExprDup(db, pOne->pExpr, 0); + if( pNew==0 ) return; + for(op=TK_EQ; eOp!=(WO_EQ<<(op-TK_EQ)); op++){ assert( opop = op; + idxNew = whereClauseInsert(pWC, pNew, TERM_VIRTUAL|TERM_DYNAMIC); + exprAnalyze(pSrc, pWC, idxNew); } -#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */ -#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 +#if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY) /* -** Estimate the location of a particular key among all keys in an -** index. Store the results in aStat as follows: +** Analyze a term that consists of two or more OR-connected +** subterms. So in: ** -** aStat[0] Est. number of rows less than pRec -** aStat[1] Est. number of rows equal to pRec +** ... WHERE (a=5) AND (b=7 OR c=9 OR d=13) AND (d=13) +** ^^^^^^^^^^^^^^^^^^^^ ** -** Return the index of the sample that is the smallest sample that -** is greater than or equal to pRec. Note that this index is not an index -** into the aSample[] array - it is an index into a virtual set of samples -** based on the contents of aSample[] and the number of fields in record -** pRec. +** This routine analyzes terms such as the middle term in the above example. +** A WhereOrTerm object is computed and attached to the term under +** analysis, regardless of the outcome of the analysis. Hence: +** +** WhereTerm.wtFlags |= TERM_ORINFO +** WhereTerm.u.pOrInfo = a dynamically allocated WhereOrTerm object +** +** The term being analyzed must have two or more of OR-connected subterms. +** A single subterm might be a set of AND-connected sub-subterms. +** Examples of terms under analysis: +** +** (A) t1.x=t2.y OR t1.x=t2.z OR t1.y=15 OR t1.z=t3.a+5 +** (B) x=expr1 OR expr2=x OR x=expr3 +** (C) t1.x=t2.y OR (t1.x=t2.z AND t1.y=15) +** (D) x=expr1 OR (y>11 AND y<22 AND z LIKE '*hello*') +** (E) (p.a=1 AND q.b=2 AND r.c=3) OR (p.x=4 AND q.y=5 AND r.z=6) +** (F) x>A OR (x=A AND y>=B) +** +** CASE 1: +** +** If all subterms are of the form T.C=expr for some single column of C and +** a single table T (as shown in example B above) then create a new virtual +** term that is an equivalent IN expression. In other words, if the term +** being analyzed is: +** +** x = expr1 OR expr2 = x OR x = expr3 +** +** then create a new virtual term like this: +** +** x IN (expr1,expr2,expr3) +** +** CASE 2: +** +** If there are exactly two disjuncts and one side has x>A and the other side +** has x=A (for the same x and A) then add a new virtual conjunct term to the +** WHERE clause of the form "x>=A". Example: +** +** x>A OR (x=A AND y>B) adds: x>=A +** +** The added conjunct can sometimes be helpful in query planning. +** +** CASE 3: +** +** If all subterms are indexable by a single table T, then set +** +** WhereTerm.eOperator = WO_OR +** WhereTerm.u.pOrInfo->indexable |= the cursor number for table T +** +** A subterm is "indexable" if it is of the form +** "T.C " where C is any column of table T and +** is one of "=", "<", "<=", ">", ">=", "IS NULL", or "IN". +** A subterm is also indexable if it is an AND of two or more +** subsubterms at least one of which is indexable. Indexable AND +** subterms have their eOperator set to WO_AND and they have +** u.pAndInfo set to a dynamically allocated WhereAndTerm object. +** +** From another point of view, "indexable" means that the subterm could +** potentially be used with an index if an appropriate index exists. +** This analysis does not consider whether or not the index exists; that +** is decided elsewhere. This analysis only looks at whether subterms +** appropriate for indexing exist. +** +** All examples A through E above satisfy case 3. But if a term +** also satisfies case 1 (such as B) we know that the optimizer will +** always prefer case 1, so in that case we pretend that case 3 is not +** satisfied. +** +** It might be the case that multiple tables are indexable. For example, +** (E) above is indexable on tables P, Q, and R. +** +** Terms that satisfy case 3 are candidates for lookup by using +** separate indices to find rowids for each subterm and composing +** the union of all rowids using a RowSet object. This is similar +** to "bitmap indices" in other database engines. +** +** OTHERWISE: +** +** If none of cases 1, 2, or 3 apply, then leave the eOperator set to +** zero. This term is not useful for search. */ -static int whereKeyStats( - Parse *pParse, /* Database connection */ - Index *pIdx, /* Index to consider domain of */ - UnpackedRecord *pRec, /* Vector of values to consider */ - int roundUp, /* Round up if true. Round down if false */ - tRowcnt *aStat /* OUT: stats written here */ +static void exprAnalyzeOrTerm( + SrcList *pSrc, /* the FROM clause */ + WhereClause *pWC, /* the complete WHERE clause */ + int idxTerm /* Index of the OR-term to be analyzed */ ){ - IndexSample *aSample = pIdx->aSample; - int iCol; /* Index of required stats in anEq[] etc. */ - int i; /* Index of first sample >= pRec */ - int iSample; /* Smallest sample larger than or equal to pRec */ - int iMin = 0; /* Smallest sample not yet tested */ - int iTest; /* Next sample to test */ - int res; /* Result of comparison operation */ - int nField; /* Number of fields in pRec */ - tRowcnt iLower = 0; /* anLt[] + anEq[] of largest sample pRec is > */ - -#ifndef SQLITE_DEBUG - UNUSED_PARAMETER( pParse ); -#endif - assert( pRec!=0 ); - assert( pIdx->nSample>0 ); - assert( pRec->nField>0 && pRec->nField<=pIdx->nSampleCol ); + WhereInfo *pWInfo = pWC->pWInfo; /* WHERE clause processing context */ + Parse *pParse = pWInfo->pParse; /* Parser context */ + sqlite3 *db = pParse->db; /* Database connection */ + WhereTerm *pTerm = &pWC->a[idxTerm]; /* The term to be analyzed */ + Expr *pExpr = pTerm->pExpr; /* The expression of the term */ + int i; /* Loop counters */ + WhereClause *pOrWc; /* Breakup of pTerm into subterms */ + WhereTerm *pOrTerm; /* A Sub-term within the pOrWc */ + WhereOrInfo *pOrInfo; /* Additional information associated with pTerm */ + Bitmask chngToIN; /* Tables that might satisfy case 1 */ + Bitmask indexable; /* Tables that are indexable, satisfying case 2 */ - /* Do a binary search to find the first sample greater than or equal - ** to pRec. If pRec contains a single field, the set of samples to search - ** is simply the aSample[] array. If the samples in aSample[] contain more - ** than one fields, all fields following the first are ignored. - ** - ** If pRec contains N fields, where N is more than one, then as well as the - ** samples in aSample[] (truncated to N fields), the search also has to - ** consider prefixes of those samples. For example, if the set of samples - ** in aSample is: - ** - ** aSample[0] = (a, 5) - ** aSample[1] = (a, 10) - ** aSample[2] = (b, 5) - ** aSample[3] = (c, 100) - ** aSample[4] = (c, 105) - ** - ** Then the search space should ideally be the samples above and the - ** unique prefixes [a], [b] and [c]. But since that is hard to organize, - ** the code actually searches this set: - ** - ** 0: (a) - ** 1: (a, 5) - ** 2: (a, 10) - ** 3: (a, 10) - ** 4: (b) - ** 5: (b, 5) - ** 6: (c) - ** 7: (c, 100) - ** 8: (c, 105) - ** 9: (c, 105) - ** - ** For each sample in the aSample[] array, N samples are present in the - ** effective sample array. In the above, samples 0 and 1 are based on - ** sample aSample[0]. Samples 2 and 3 on aSample[1] etc. - ** - ** Often, sample i of each block of N effective samples has (i+1) fields. - ** Except, each sample may be extended to ensure that it is greater than or - ** equal to the previous sample in the array. For example, in the above, - ** sample 2 is the first sample of a block of N samples, so at first it - ** appears that it should be 1 field in size. However, that would make it - ** smaller than sample 1, so the binary search would not work. As a result, - ** it is extended to two fields. The duplicates that this creates do not - ** cause any problems. + /* + ** Break the OR clause into its separate subterms. The subterms are + ** stored in a WhereClause structure containing within the WhereOrInfo + ** object that is attached to the original OR clause term. */ - nField = pRec->nField; - iCol = 0; - iSample = pIdx->nSample * nField; - do{ - int iSamp; /* Index in aSample[] of test sample */ - int n; /* Number of fields in test sample */ + assert( (pTerm->wtFlags & (TERM_DYNAMIC|TERM_ORINFO|TERM_ANDINFO))==0 ); + assert( pExpr->op==TK_OR ); + pTerm->u.pOrInfo = pOrInfo = sqlite3DbMallocZero(db, sizeof(*pOrInfo)); + if( pOrInfo==0 ) return; + pTerm->wtFlags |= TERM_ORINFO; + pOrWc = &pOrInfo->wc; + sqlite3WhereClauseInit(pOrWc, pWInfo); + sqlite3WhereSplit(pOrWc, pExpr, TK_OR); + sqlite3WhereExprAnalyze(pSrc, pOrWc); + if( db->mallocFailed ) return; + assert( pOrWc->nTerm>=2 ); - iTest = (iMin+iSample)/2; - iSamp = iTest / nField; - if( iSamp>0 ){ - /* The proposed effective sample is a prefix of sample aSample[iSamp]. - ** Specifically, the shortest prefix of at least (1 + iTest%nField) - ** fields that is greater than the previous effective sample. */ - for(n=(iTest % nField) + 1; nnTerm-1, pOrTerm=pOrWc->a; i>=0 && indexable; i--, pOrTerm++){ + if( (pOrTerm->eOperator & WO_SINGLE)==0 ){ + WhereAndInfo *pAndInfo; + assert( (pOrTerm->wtFlags & (TERM_ANDINFO|TERM_ORINFO))==0 ); + chngToIN = 0; + pAndInfo = sqlite3DbMallocRaw(db, sizeof(*pAndInfo)); + if( pAndInfo ){ + WhereClause *pAndWC; + WhereTerm *pAndTerm; + int j; + Bitmask b = 0; + pOrTerm->u.pAndInfo = pAndInfo; + pOrTerm->wtFlags |= TERM_ANDINFO; + pOrTerm->eOperator = WO_AND; + pAndWC = &pAndInfo->wc; + sqlite3WhereClauseInit(pAndWC, pWC->pWInfo); + sqlite3WhereSplit(pAndWC, pOrTerm->pExpr, TK_AND); + sqlite3WhereExprAnalyze(pSrc, pAndWC); + pAndWC->pOuter = pWC; + testcase( db->mallocFailed ); + if( !db->mallocFailed ){ + for(j=0, pAndTerm=pAndWC->a; jnTerm; j++, pAndTerm++){ + assert( pAndTerm->pExpr ); + if( allowedOp(pAndTerm->pExpr->op) ){ + b |= sqlite3WhereGetMask(&pWInfo->sMaskSet, pAndTerm->leftCursor); + } + } + } + indexable &= b; } + }else if( pOrTerm->wtFlags & TERM_COPIED ){ + /* Skip this term for now. We revisit it when we process the + ** corresponding TERM_VIRTUAL term */ }else{ - n = iTest + 1; + Bitmask b; + b = sqlite3WhereGetMask(&pWInfo->sMaskSet, pOrTerm->leftCursor); + if( pOrTerm->wtFlags & TERM_VIRTUAL ){ + WhereTerm *pOther = &pOrWc->a[pOrTerm->iParent]; + b |= sqlite3WhereGetMask(&pWInfo->sMaskSet, pOther->leftCursor); + } + indexable &= b; + if( (pOrTerm->eOperator & WO_EQ)==0 ){ + chngToIN = 0; + }else{ + chngToIN &= b; + } } + } - pRec->nField = n; - res = sqlite3VdbeRecordCompare(aSample[iSamp].n, aSample[iSamp].p, pRec); - if( res<0 ){ - iLower = aSample[iSamp].anLt[n-1] + aSample[iSamp].anEq[n-1]; - iMin = iTest+1; - }else if( res==0 && nindexable = indexable; + pTerm->eOperator = indexable==0 ? 0 : WO_OR; + + /* For a two-way OR, attempt to implementation case 2. + */ + if( indexable && pOrWc->nTerm==2 ){ + int iOne = 0; + WhereTerm *pOne; + while( (pOne = whereNthSubterm(&pOrWc->a[0],iOne++))!=0 ){ + int iTwo = 0; + WhereTerm *pTwo; + while( (pTwo = whereNthSubterm(&pOrWc->a[1],iTwo++))!=0 ){ + whereCombineDisjuncts(pSrc, pWC, pOne, pTwo); + } } - }while( res && iMindb->mallocFailed==0 ){ - if( res==0 ){ - /* If (res==0) is true, then pRec must be equal to sample i. */ - assert( inSample ); - assert( iCol==nField-1 ); - pRec->nField = nField; - assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec) - || pParse->db->mallocFailed - ); - }else{ - /* Unless i==pIdx->nSample, indicating that pRec is larger than - ** all samples in the aSample[] array, pRec must be smaller than the - ** (iCol+1) field prefix of sample i. */ - assert( i<=pIdx->nSample && i>=0 ); - pRec->nField = iCol+1; - assert( i==pIdx->nSample - || sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0 - || pParse->db->mallocFailed ); + /* + ** chngToIN holds a set of tables that *might* satisfy case 1. But + ** we have to do some additional checking to see if case 1 really + ** is satisfied. + ** + ** chngToIN will hold either 0, 1, or 2 bits. The 0-bit case means + ** that there is no possibility of transforming the OR clause into an + ** IN operator because one or more terms in the OR clause contain + ** something other than == on a column in the single table. The 1-bit + ** case means that every term of the OR clause is of the form + ** "table.column=expr" for some single table. The one bit that is set + ** will correspond to the common table. We still need to check to make + ** sure the same column is used on all terms. The 2-bit case is when + ** the all terms are of the form "table1.column=table2.column". It + ** might be possible to form an IN operator with either table1.column + ** or table2.column as the LHS if either is common to every term of + ** the OR clause. + ** + ** Note that terms of the form "table.column1=table.column2" (the + ** same table on both sizes of the ==) cannot be optimized. + */ + if( chngToIN ){ + int okToChngToIN = 0; /* True if the conversion to IN is valid */ + int iColumn = -1; /* Column index on lhs of IN operator */ + int iCursor = -1; /* Table cursor common to all terms */ + int j = 0; /* Loop counter */ - /* if i==0 and iCol==0, then record pRec is smaller than all samples - ** in the aSample[] array. Otherwise, if (iCol>0) then pRec must - ** be greater than or equal to the (iCol) field prefix of sample i. - ** If (i>0), then pRec must also be greater than sample (i-1). */ - if( iCol>0 ){ - pRec->nField = iCol; - assert( sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)<=0 - || pParse->db->mallocFailed ); + /* Search for a table and column that appears on one side or the + ** other of the == operator in every subterm. That table and column + ** will be recorded in iCursor and iColumn. There might not be any + ** such table and column. Set okToChngToIN if an appropriate table + ** and column is found but leave okToChngToIN false if not found. + */ + for(j=0; j<2 && !okToChngToIN; j++){ + pOrTerm = pOrWc->a; + for(i=pOrWc->nTerm-1; i>=0; i--, pOrTerm++){ + assert( pOrTerm->eOperator & WO_EQ ); + pOrTerm->wtFlags &= ~TERM_OR_OK; + if( pOrTerm->leftCursor==iCursor ){ + /* This is the 2-bit case and we are on the second iteration and + ** current term is from the first iteration. So skip this term. */ + assert( j==1 ); + continue; + } + if( (chngToIN & sqlite3WhereGetMask(&pWInfo->sMaskSet, + pOrTerm->leftCursor))==0 ){ + /* This term must be of the form t1.a==t2.b where t2 is in the + ** chngToIN set but t1 is not. This term will be either preceded + ** or follwed by an inverted copy (t2.b==t1.a). Skip this term + ** and use its inversion. */ + testcase( pOrTerm->wtFlags & TERM_COPIED ); + testcase( pOrTerm->wtFlags & TERM_VIRTUAL ); + assert( pOrTerm->wtFlags & (TERM_COPIED|TERM_VIRTUAL) ); + continue; + } + iColumn = pOrTerm->u.leftColumn; + iCursor = pOrTerm->leftCursor; + break; } - if( i>0 ){ - pRec->nField = nField; - assert( sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0 - || pParse->db->mallocFailed ); + if( i<0 ){ + /* No candidate table+column was found. This can only occur + ** on the second iteration */ + assert( j==1 ); + assert( IsPowerOfTwo(chngToIN) ); + assert( chngToIN==sqlite3WhereGetMask(&pWInfo->sMaskSet, iCursor) ); + break; } - } - } -#endif /* ifdef SQLITE_DEBUG */ + testcase( j==1 ); - if( res==0 ){ - /* Record pRec is equal to sample i */ - assert( iCol==nField-1 ); - aStat[0] = aSample[i].anLt[iCol]; - aStat[1] = aSample[i].anEq[iCol]; - }else{ - /* At this point, the (iCol+1) field prefix of aSample[i] is the first - ** sample that is greater than pRec. Or, if i==pIdx->nSample then pRec - ** is larger than all samples in the array. */ - tRowcnt iUpper, iGap; - if( i>=pIdx->nSample ){ - iUpper = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]); - }else{ - iUpper = aSample[i].anLt[iCol]; + /* We have found a candidate table and column. Check to see if that + ** table and column is common to every term in the OR clause */ + okToChngToIN = 1; + for(; i>=0 && okToChngToIN; i--, pOrTerm++){ + assert( pOrTerm->eOperator & WO_EQ ); + if( pOrTerm->leftCursor!=iCursor ){ + pOrTerm->wtFlags &= ~TERM_OR_OK; + }else if( pOrTerm->u.leftColumn!=iColumn ){ + okToChngToIN = 0; + }else{ + int affLeft, affRight; + /* If the right-hand side is also a column, then the affinities + ** of both right and left sides must be such that no type + ** conversions are required on the right. (Ticket #2249) + */ + affRight = sqlite3ExprAffinity(pOrTerm->pExpr->pRight); + affLeft = sqlite3ExprAffinity(pOrTerm->pExpr->pLeft); + if( affRight!=0 && affRight!=affLeft ){ + okToChngToIN = 0; + }else{ + pOrTerm->wtFlags |= TERM_OR_OK; + } + } + } } - if( iLower>=iUpper ){ - iGap = 0; - }else{ - iGap = iUpper - iLower; - } - if( roundUp ){ - iGap = (iGap*2)/3; - }else{ - iGap = iGap/3; + /* At this point, okToChngToIN is true if original pTerm satisfies + ** case 1. In that case, construct a new virtual term that is + ** pTerm converted into an IN operator. + */ + if( okToChngToIN ){ + Expr *pDup; /* A transient duplicate expression */ + ExprList *pList = 0; /* The RHS of the IN operator */ + Expr *pLeft = 0; /* The LHS of the IN operator */ + Expr *pNew; /* The complete IN operator */ + + for(i=pOrWc->nTerm-1, pOrTerm=pOrWc->a; i>=0; i--, pOrTerm++){ + if( (pOrTerm->wtFlags & TERM_OR_OK)==0 ) continue; + assert( pOrTerm->eOperator & WO_EQ ); + assert( pOrTerm->leftCursor==iCursor ); + assert( pOrTerm->u.leftColumn==iColumn ); + pDup = sqlite3ExprDup(db, pOrTerm->pExpr->pRight, 0); + pList = sqlite3ExprListAppend(pWInfo->pParse, pList, pDup); + pLeft = pOrTerm->pExpr->pLeft; + } + assert( pLeft!=0 ); + pDup = sqlite3ExprDup(db, pLeft, 0); + pNew = sqlite3PExpr(pParse, TK_IN, pDup, 0, 0); + if( pNew ){ + int idxNew; + transferJoinMarkings(pNew, pExpr); + assert( !ExprHasProperty(pNew, EP_xIsSelect) ); + pNew->x.pList = pList; + idxNew = whereClauseInsert(pWC, pNew, TERM_VIRTUAL|TERM_DYNAMIC); + testcase( idxNew==0 ); + exprAnalyze(pSrc, pWC, idxNew); + pTerm = &pWC->a[idxTerm]; + markTermAsChild(pWC, idxNew, idxTerm); + }else{ + sqlite3ExprListDelete(db, pList); + } + pTerm->eOperator = WO_NOOP; /* case 1 trumps case 3 */ } - aStat[0] = iLower + iGap; - aStat[1] = pIdx->aAvgEq[iCol]; } +} +#endif /* !SQLITE_OMIT_OR_OPTIMIZATION && !SQLITE_OMIT_SUBQUERY */ - /* Restore the pRec->nField value before returning. */ - pRec->nField = nField; - return i; +/* +** We already know that pExpr is a binary operator where both operands are +** column references. This routine checks to see if pExpr is an equivalence +** relation: +** 1. The SQLITE_Transitive optimization must be enabled +** 2. Must be either an == or an IS operator +** 3. Not originating in the ON clause of an OUTER JOIN +** 4. The affinities of A and B must be compatible +** 5a. Both operands use the same collating sequence OR +** 5b. The overall collating sequence is BINARY +** If this routine returns TRUE, that means that the RHS can be substituted +** for the LHS anyplace else in the WHERE clause where the LHS column occurs. +** This is an optimization. No harm comes from returning 0. But if 1 is +** returned when it should not be, then incorrect answers might result. +*/ +static int termIsEquivalence(Parse *pParse, Expr *pExpr){ + char aff1, aff2; + CollSeq *pColl; + const char *zColl1, *zColl2; + if( !OptimizationEnabled(pParse->db, SQLITE_Transitive) ) return 0; + if( pExpr->op!=TK_EQ && pExpr->op!=TK_IS ) return 0; + if( ExprHasProperty(pExpr, EP_FromJoin) ) return 0; + aff1 = sqlite3ExprAffinity(pExpr->pLeft); + aff2 = sqlite3ExprAffinity(pExpr->pRight); + if( aff1!=aff2 + && (!sqlite3IsNumericAffinity(aff1) || !sqlite3IsNumericAffinity(aff2)) + ){ + return 0; + } + pColl = sqlite3BinaryCompareCollSeq(pParse, pExpr->pLeft, pExpr->pRight); + if( pColl==0 || sqlite3StrICmp(pColl->zName, "BINARY")==0 ) return 1; + pColl = sqlite3ExprCollSeq(pParse, pExpr->pLeft); + /* Since pLeft and pRight are both a column references, their collating + ** sequence should always be defined. */ + zColl1 = ALWAYS(pColl) ? pColl->zName : 0; + pColl = sqlite3ExprCollSeq(pParse, pExpr->pRight); + zColl2 = ALWAYS(pColl) ? pColl->zName : 0; + return sqlite3StrICmp(zColl1, zColl2)==0; } -#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ /* -** If it is not NULL, pTerm is a term that provides an upper or lower -** bound on a range scan. Without considering pTerm, it is estimated -** that the scan will visit nNew rows. This function returns the number -** estimated to be visited after taking pTerm into account. -** -** If the user explicitly specified a likelihood() value for this term, -** then the return value is the likelihood multiplied by the number of -** input rows. Otherwise, this function assumes that an "IS NOT NULL" term -** has a likelihood of 0.50, and any other term a likelihood of 0.25. +** Recursively walk the expressions of a SELECT statement and generate +** a bitmask indicating which tables are used in that expression +** tree. */ -static LogEst whereRangeAdjust(WhereTerm *pTerm, LogEst nNew){ - LogEst nRet = nNew; - if( pTerm ){ - if( pTerm->truthProb<=0 ){ - nRet += pTerm->truthProb; - }else if( (pTerm->wtFlags & TERM_VNULL)==0 ){ - nRet -= 20; assert( 20==sqlite3LogEst(4) ); +static Bitmask exprSelectUsage(WhereMaskSet *pMaskSet, Select *pS){ + Bitmask mask = 0; + while( pS ){ + SrcList *pSrc = pS->pSrc; + mask |= sqlite3WhereExprListUsage(pMaskSet, pS->pEList); + mask |= sqlite3WhereExprListUsage(pMaskSet, pS->pGroupBy); + mask |= sqlite3WhereExprListUsage(pMaskSet, pS->pOrderBy); + mask |= sqlite3WhereExprUsage(pMaskSet, pS->pWhere); + mask |= sqlite3WhereExprUsage(pMaskSet, pS->pHaving); + if( ALWAYS(pSrc!=0) ){ + int i; + for(i=0; inSrc; i++){ + mask |= exprSelectUsage(pMaskSet, pSrc->a[i].pSelect); + mask |= sqlite3WhereExprUsage(pMaskSet, pSrc->a[i].pOn); + } } + pS = pS->pPrior; } - return nRet; + return mask; } -#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 -/* -** This function is called to estimate the number of rows visited by a -** range-scan on a skip-scan index. For example: -** -** CREATE INDEX i1 ON t1(a, b, c); -** SELECT * FROM t1 WHERE a=? AND c BETWEEN ? AND ?; -** -** Value pLoop->nOut is currently set to the estimated number of rows -** visited for scanning (a=? AND b=?). This function reduces that estimate -** by some factor to account for the (c BETWEEN ? AND ?) expression based -** on the stat4 data for the index. this scan will be peformed multiple -** times (once for each (a,b) combination that matches a=?) is dealt with -** by the caller. -** -** It does this by scanning through all stat4 samples, comparing values -** extracted from pLower and pUpper with the corresponding column in each -** sample. If L and U are the number of samples found to be less than or -** equal to the values extracted from pLower and pUpper respectively, and -** N is the total number of samples, the pLoop->nOut value is adjusted -** as follows: -** -** nOut = nOut * ( min(U - L, 1) / N ) -** -** If pLower is NULL, or a value cannot be extracted from the term, L is -** set to zero. If pUpper is NULL, or a value cannot be extracted from it, -** U is set to N. +/* +** The input to this routine is an WhereTerm structure with only the +** "pExpr" field filled in. The job of this routine is to analyze the +** subexpression and populate all the other fields of the WhereTerm +** structure. ** -** Normally, this function sets *pbDone to 1 before returning. However, -** if no value can be extracted from either pLower or pUpper (and so the -** estimate of the number of rows delivered remains unchanged), *pbDone -** is left as is. +** If the expression is of the form " X" it gets commuted +** to the standard form of "X ". ** -** If an error occurs, an SQLite error code is returned. Otherwise, -** SQLITE_OK. +** If the expression is of the form "X Y" where both X and Y are +** columns, then the original expression is unchanged and a new virtual +** term of the form "Y X" is added to the WHERE clause and +** analyzed separately. The original term is marked with TERM_COPIED +** and the new term is marked with TERM_DYNAMIC (because it's pExpr +** needs to be freed with the WhereClause) and TERM_VIRTUAL (because it +** is a commuted copy of a prior term.) The original term has nChild=1 +** and the copy has idxParent set to the index of the original term. */ -static int whereRangeSkipScanEst( - Parse *pParse, /* Parsing & code generating context */ - WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */ - WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ - WhereLoop *pLoop, /* Update the .nOut value of this loop */ - int *pbDone /* Set to true if at least one expr. value extracted */ +static void exprAnalyze( + SrcList *pSrc, /* the FROM clause */ + WhereClause *pWC, /* the WHERE clause */ + int idxTerm /* Index of the term to be analyzed */ ){ - Index *p = pLoop->u.btree.pIndex; - int nEq = pLoop->u.btree.nEq; - sqlite3 *db = pParse->db; - int nLower = -1; - int nUpper = p->nSample+1; - int rc = SQLITE_OK; - int iCol = p->aiColumn[nEq]; - u8 aff = iCol>=0 ? p->pTable->aCol[iCol].affinity : SQLITE_AFF_INTEGER; - CollSeq *pColl; - - sqlite3_value *p1 = 0; /* Value extracted from pLower */ - sqlite3_value *p2 = 0; /* Value extracted from pUpper */ - sqlite3_value *pVal = 0; /* Value extracted from record */ + WhereInfo *pWInfo = pWC->pWInfo; /* WHERE clause processing context */ + WhereTerm *pTerm; /* The term to be analyzed */ + WhereMaskSet *pMaskSet; /* Set of table index masks */ + Expr *pExpr; /* The expression to be analyzed */ + Bitmask prereqLeft; /* Prerequesites of the pExpr->pLeft */ + Bitmask prereqAll; /* Prerequesites of pExpr */ + Bitmask extraRight = 0; /* Extra dependencies on LEFT JOIN */ + Expr *pStr1 = 0; /* RHS of LIKE/GLOB operator */ + int isComplete = 0; /* RHS of LIKE/GLOB ends with wildcard */ + int noCase = 0; /* uppercase equivalent to lowercase */ + int op; /* Top-level operator. pExpr->op */ + Parse *pParse = pWInfo->pParse; /* Parsing context */ + sqlite3 *db = pParse->db; /* Database connection */ - pColl = sqlite3LocateCollSeq(pParse, p->azColl[nEq]); - if( pLower ){ - rc = sqlite3Stat4ValueFromExpr(pParse, pLower->pExpr->pRight, aff, &p1); - nLower = 0; - } - if( pUpper && rc==SQLITE_OK ){ - rc = sqlite3Stat4ValueFromExpr(pParse, pUpper->pExpr->pRight, aff, &p2); - nUpper = p2 ? 0 : p->nSample; + if( db->mallocFailed ){ + return; } - - if( p1 || p2 ){ - int i; - int nDiff; - for(i=0; rc==SQLITE_OK && inSample; i++){ - rc = sqlite3Stat4Column(db, p->aSample[i].p, p->aSample[i].n, nEq, &pVal); - if( rc==SQLITE_OK && p1 ){ - int res = sqlite3MemCompare(p1, pVal, pColl); - if( res>=0 ) nLower++; - } - if( rc==SQLITE_OK && p2 ){ - int res = sqlite3MemCompare(p2, pVal, pColl); - if( res>=0 ) nUpper++; - } + pTerm = &pWC->a[idxTerm]; + pMaskSet = &pWInfo->sMaskSet; + pExpr = pTerm->pExpr; + assert( pExpr->op!=TK_AS && pExpr->op!=TK_COLLATE ); + prereqLeft = sqlite3WhereExprUsage(pMaskSet, pExpr->pLeft); + op = pExpr->op; + if( op==TK_IN ){ + assert( pExpr->pRight==0 ); + if( ExprHasProperty(pExpr, EP_xIsSelect) ){ + pTerm->prereqRight = exprSelectUsage(pMaskSet, pExpr->x.pSelect); + }else{ + pTerm->prereqRight = sqlite3WhereExprListUsage(pMaskSet, pExpr->x.pList); } - nDiff = (nUpper - nLower); - if( nDiff<=0 ) nDiff = 1; - - /* If there is both an upper and lower bound specified, and the - ** comparisons indicate that they are close together, use the fallback - ** method (assume that the scan visits 1/64 of the rows) for estimating - ** the number of rows visited. Otherwise, estimate the number of rows - ** using the method described in the header comment for this function. */ - if( nDiff!=1 || pUpper==0 || pLower==0 ){ - int nAdjust = (sqlite3LogEst(p->nSample) - sqlite3LogEst(nDiff)); - pLoop->nOut -= nAdjust; - *pbDone = 1; - WHERETRACE(0x10, ("range skip-scan regions: %u..%u adjust=%d est=%d\n", - nLower, nUpper, nAdjust*-1, pLoop->nOut)); - } - + }else if( op==TK_ISNULL ){ + pTerm->prereqRight = 0; }else{ - assert( *pbDone==0 ); + pTerm->prereqRight = sqlite3WhereExprUsage(pMaskSet, pExpr->pRight); } + prereqAll = sqlite3WhereExprUsage(pMaskSet, pExpr); + if( ExprHasProperty(pExpr, EP_FromJoin) ){ + Bitmask x = sqlite3WhereGetMask(pMaskSet, pExpr->iRightJoinTable); + prereqAll |= x; + extraRight = x-1; /* ON clause terms may not be used with an index + ** on left table of a LEFT JOIN. Ticket #3015 */ + } + pTerm->prereqAll = prereqAll; + pTerm->leftCursor = -1; + pTerm->iParent = -1; + pTerm->eOperator = 0; + if( allowedOp(op) ){ + Expr *pLeft = sqlite3ExprSkipCollate(pExpr->pLeft); + Expr *pRight = sqlite3ExprSkipCollate(pExpr->pRight); + u16 opMask = (pTerm->prereqRight & prereqLeft)==0 ? WO_ALL : WO_EQUIV; + if( pLeft->op==TK_COLUMN ){ + pTerm->leftCursor = pLeft->iTable; + pTerm->u.leftColumn = pLeft->iColumn; + pTerm->eOperator = operatorMask(op) & opMask; + } + if( op==TK_IS ) pTerm->wtFlags |= TERM_IS; + if( pRight && pRight->op==TK_COLUMN ){ + WhereTerm *pNew; + Expr *pDup; + u16 eExtraOp = 0; /* Extra bits for pNew->eOperator */ + if( pTerm->leftCursor>=0 ){ + int idxNew; + pDup = sqlite3ExprDup(db, pExpr, 0); + if( db->mallocFailed ){ + sqlite3ExprDelete(db, pDup); + return; + } + idxNew = whereClauseInsert(pWC, pDup, TERM_VIRTUAL|TERM_DYNAMIC); + if( idxNew==0 ) return; + pNew = &pWC->a[idxNew]; + markTermAsChild(pWC, idxNew, idxTerm); + if( op==TK_IS ) pNew->wtFlags |= TERM_IS; + pTerm = &pWC->a[idxTerm]; + pTerm->wtFlags |= TERM_COPIED; - sqlite3ValueFree(p1); - sqlite3ValueFree(p2); - sqlite3ValueFree(pVal); - - return rc; -} -#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ - -/* -** This function is used to estimate the number of rows that will be visited -** by scanning an index for a range of values. The range may have an upper -** bound, a lower bound, or both. The WHERE clause terms that set the upper -** and lower bounds are represented by pLower and pUpper respectively. For -** example, assuming that index p is on t1(a): -** -** ... FROM t1 WHERE a > ? AND a < ? ... -** |_____| |_____| -** | | -** pLower pUpper -** -** If either of the upper or lower bound is not present, then NULL is passed in -** place of the corresponding WhereTerm. -** -** The value in (pBuilder->pNew->u.btree.nEq) is the number of the index -** column subject to the range constraint. Or, equivalently, the number of -** equality constraints optimized by the proposed index scan. For example, -** assuming index p is on t1(a, b), and the SQL query is: -** -** ... FROM t1 WHERE a = ? AND b > ? AND b < ? ... -** -** then nEq is set to 1 (as the range restricted column, b, is the second -** left-most column of the index). Or, if the query is: -** -** ... FROM t1 WHERE a > ? AND a < ? ... -** -** then nEq is set to 0. -** -** When this function is called, *pnOut is set to the sqlite3LogEst() of the -** number of rows that the index scan is expected to visit without -** considering the range constraints. If nEq is 0, then *pnOut is the number of -** rows in the index. Assuming no error occurs, *pnOut is adjusted (reduced) -** to account for the range constraints pLower and pUpper. -** -** In the absence of sqlite_stat4 ANALYZE data, or if such data cannot be -** used, a single range inequality reduces the search space by a factor of 4. -** and a pair of constraints (x>? AND x123" Might be NULL */ - WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ - WhereLoop *pLoop /* Modify the .nOut and maybe .rRun fields */ -){ - int rc = SQLITE_OK; - int nOut = pLoop->nOut; - LogEst nNew; - -#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 - Index *p = pLoop->u.btree.pIndex; - int nEq = pLoop->u.btree.nEq; - - if( p->nSample>0 && nEqnSampleCol ){ - if( nEq==pBuilder->nRecValid ){ - UnpackedRecord *pRec = pBuilder->pRec; - tRowcnt a[2]; - u8 aff; - - /* Variable iLower will be set to the estimate of the number of rows in - ** the index that are less than the lower bound of the range query. The - ** lower bound being the concatenation of $P and $L, where $P is the - ** key-prefix formed by the nEq values matched against the nEq left-most - ** columns of the index, and $L is the value in pLower. - ** - ** Or, if pLower is NULL or $L cannot be extracted from it (because it - ** is not a simple variable or literal value), the lower bound of the - ** range is $P. Due to a quirk in the way whereKeyStats() works, even - ** if $L is available, whereKeyStats() is called for both ($P) and - ** ($P:$L) and the larger of the two returned values is used. - ** - ** Similarly, iUpper is to be set to the estimate of the number of rows - ** less than the upper bound of the range query. Where the upper bound - ** is either ($P) or ($P:$U). Again, even if $U is available, both values - ** of iUpper are requested of whereKeyStats() and the smaller used. - ** - ** The number of rows between the two bounds is then just iUpper-iLower. - */ - tRowcnt iLower; /* Rows less than the lower bound */ - tRowcnt iUpper; /* Rows less than the upper bound */ - int iLwrIdx = -2; /* aSample[] for the lower bound */ - int iUprIdx = -1; /* aSample[] for the upper bound */ - - if( pRec ){ - testcase( pRec->nField!=pBuilder->nRecValid ); - pRec->nField = pBuilder->nRecValid; - } - if( nEq==p->nKeyCol ){ - aff = SQLITE_AFF_INTEGER; - }else{ - aff = p->pTable->aCol[p->aiColumn[nEq]].affinity; - } - /* Determine iLower and iUpper using ($P) only. */ - if( nEq==0 ){ - iLower = 0; - iUpper = p->nRowEst0; + if( termIsEquivalence(pParse, pDup) ){ + pTerm->eOperator |= WO_EQUIV; + eExtraOp = WO_EQUIV; + } }else{ - /* Note: this call could be optimized away - since the same values must - ** have been requested when testing key $P in whereEqualScanEst(). */ - whereKeyStats(pParse, p, pRec, 0, a); - iLower = a[0]; - iUpper = a[0] + a[1]; + pDup = pExpr; + pNew = pTerm; } + exprCommute(pParse, pDup); + pLeft = sqlite3ExprSkipCollate(pDup->pLeft); + pNew->leftCursor = pLeft->iTable; + pNew->u.leftColumn = pLeft->iColumn; + testcase( (prereqLeft | extraRight) != prereqLeft ); + pNew->prereqRight = prereqLeft | extraRight; + pNew->prereqAll = prereqAll; + pNew->eOperator = (operatorMask(pDup->op) + eExtraOp) & opMask; + } + } - assert( pLower==0 || (pLower->eOperator & (WO_GT|WO_GE))!=0 ); - assert( pUpper==0 || (pUpper->eOperator & (WO_LT|WO_LE))!=0 ); - assert( p->aSortOrder!=0 ); - if( p->aSortOrder[nEq] ){ - /* The roles of pLower and pUpper are swapped for a DESC index */ - SWAP(WhereTerm*, pLower, pUpper); - } +#ifndef SQLITE_OMIT_BETWEEN_OPTIMIZATION + /* If a term is the BETWEEN operator, create two new virtual terms + ** that define the range that the BETWEEN implements. For example: + ** + ** a BETWEEN b AND c + ** + ** is converted into: + ** + ** (a BETWEEN b AND c) AND (a>=b) AND (a<=c) + ** + ** The two new terms are added onto the end of the WhereClause object. + ** The new terms are "dynamic" and are children of the original BETWEEN + ** term. That means that if the BETWEEN term is coded, the children are + ** skipped. Or, if the children are satisfied by an index, the original + ** BETWEEN term is skipped. + */ + else if( pExpr->op==TK_BETWEEN && pWC->op==TK_AND ){ + ExprList *pList = pExpr->x.pList; + int i; + static const u8 ops[] = {TK_GE, TK_LE}; + assert( pList!=0 ); + assert( pList->nExpr==2 ); + for(i=0; i<2; i++){ + Expr *pNewExpr; + int idxNew; + pNewExpr = sqlite3PExpr(pParse, ops[i], + sqlite3ExprDup(db, pExpr->pLeft, 0), + sqlite3ExprDup(db, pList->a[i].pExpr, 0), 0); + transferJoinMarkings(pNewExpr, pExpr); + idxNew = whereClauseInsert(pWC, pNewExpr, TERM_VIRTUAL|TERM_DYNAMIC); + testcase( idxNew==0 ); + exprAnalyze(pSrc, pWC, idxNew); + pTerm = &pWC->a[idxTerm]; + markTermAsChild(pWC, idxNew, idxTerm); + } + } +#endif /* SQLITE_OMIT_BETWEEN_OPTIMIZATION */ - /* If possible, improve on the iLower estimate using ($P:$L). */ - if( pLower ){ - int bOk; /* True if value is extracted from pExpr */ - Expr *pExpr = pLower->pExpr->pRight; - rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); - if( rc==SQLITE_OK && bOk ){ - tRowcnt iNew; - iLwrIdx = whereKeyStats(pParse, p, pRec, 0, a); - iNew = a[0] + ((pLower->eOperator & (WO_GT|WO_LE)) ? a[1] : 0); - if( iNew>iLower ) iLower = iNew; - nOut--; - pLower = 0; - } - } +#if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY) + /* Analyze a term that is composed of two or more subterms connected by + ** an OR operator. + */ + else if( pExpr->op==TK_OR ){ + assert( pWC->op==TK_AND ); + exprAnalyzeOrTerm(pSrc, pWC, idxTerm); + pTerm = &pWC->a[idxTerm]; + } +#endif /* SQLITE_OMIT_OR_OPTIMIZATION */ - /* If possible, improve on the iUpper estimate using ($P:$U). */ - if( pUpper ){ - int bOk; /* True if value is extracted from pExpr */ - Expr *pExpr = pUpper->pExpr->pRight; - rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); - if( rc==SQLITE_OK && bOk ){ - tRowcnt iNew; - iUprIdx = whereKeyStats(pParse, p, pRec, 1, a); - iNew = a[0] + ((pUpper->eOperator & (WO_GT|WO_LE)) ? a[1] : 0); - if( iNew='ABC' AND x<'abd' AND x LIKE 'aBc%' + ** + ** The last character of the prefix "abc" is incremented to form the + ** termination condition "abd". If case is not significant (the default + ** for LIKE) then the lower-bound is made all uppercase and the upper- + ** bound is made all lowercase so that the bounds also work when comparing + ** BLOBs. + */ + if( pWC->op==TK_AND + && isLikeOrGlob(pParse, pExpr, &pStr1, &isComplete, &noCase) + ){ + Expr *pLeft; /* LHS of LIKE/GLOB operator */ + Expr *pStr2; /* Copy of pStr1 - RHS of LIKE/GLOB operator */ + Expr *pNewExpr1; + Expr *pNewExpr2; + int idxNew1; + int idxNew2; + const char *zCollSeqName; /* Name of collating sequence */ + const u16 wtFlags = TERM_LIKEOPT | TERM_VIRTUAL | TERM_DYNAMIC; + + pLeft = pExpr->x.pList->a[1].pExpr; + pStr2 = sqlite3ExprDup(db, pStr1, 0); + + /* Convert the lower bound to upper-case and the upper bound to + ** lower-case (upper-case is less than lower-case in ASCII) so that + ** the range constraints also work for BLOBs + */ + if( noCase && !pParse->db->mallocFailed ){ + int i; + char c; + pTerm->wtFlags |= TERM_LIKE; + for(i=0; (c = pStr1->u.zToken[i])!=0; i++){ + pStr1->u.zToken[i] = sqlite3Toupper(c); + pStr2->u.zToken[i] = sqlite3Tolower(c); } + } - pBuilder->pRec = pRec; - if( rc==SQLITE_OK ){ - if( iUpper>iLower ){ - nNew = sqlite3LogEst(iUpper - iLower); - /* TUNING: If both iUpper and iLower are derived from the same - ** sample, then assume they are 4x more selective. This brings - ** the estimated selectivity more in line with what it would be - ** if estimated without the use of STAT3/4 tables. */ - if( iLwrIdx==iUprIdx ) nNew -= 20; assert( 20==sqlite3LogEst(4) ); - }else{ - nNew = 10; assert( 10==sqlite3LogEst(2) ); - } - if( nNewmallocFailed ){ + u8 c, *pC; /* Last character before the first wildcard */ + pC = (u8*)&pStr2->u.zToken[sqlite3Strlen30(pStr2->u.zToken)-1]; + c = *pC; + if( noCase ){ + /* The point is to increment the last character before the first + ** wildcard. But if we increment '@', that will push it into the + ** alphabetic range where case conversions will mess up the + ** inequality. To avoid this, make sure to also run the full + ** LIKE on all candidate expressions by clearing the isComplete flag + */ + if( c=='A'-1 ) isComplete = 0; + c = sqlite3UpperToLower[c]; } - }else{ - int bDone = 0; - rc = whereRangeSkipScanEst(pParse, pLower, pUpper, pLoop, &bDone); - if( bDone ) return rc; + *pC = c + 1; + } + zCollSeqName = noCase ? "NOCASE" : "BINARY"; + pNewExpr1 = sqlite3ExprDup(db, pLeft, 0); + pNewExpr1 = sqlite3PExpr(pParse, TK_GE, + sqlite3ExprAddCollateString(pParse,pNewExpr1,zCollSeqName), + pStr1, 0); + transferJoinMarkings(pNewExpr1, pExpr); + idxNew1 = whereClauseInsert(pWC, pNewExpr1, wtFlags); + testcase( idxNew1==0 ); + exprAnalyze(pSrc, pWC, idxNew1); + pNewExpr2 = sqlite3ExprDup(db, pLeft, 0); + pNewExpr2 = sqlite3PExpr(pParse, TK_LT, + sqlite3ExprAddCollateString(pParse,pNewExpr2,zCollSeqName), + pStr2, 0); + transferJoinMarkings(pNewExpr2, pExpr); + idxNew2 = whereClauseInsert(pWC, pNewExpr2, wtFlags); + testcase( idxNew2==0 ); + exprAnalyze(pSrc, pWC, idxNew2); + pTerm = &pWC->a[idxTerm]; + if( isComplete ){ + markTermAsChild(pWC, idxNew1, idxTerm); + markTermAsChild(pWC, idxNew2, idxTerm); } } -#else - UNUSED_PARAMETER(pParse); - UNUSED_PARAMETER(pBuilder); - assert( pLower || pUpper ); -#endif - assert( pUpper==0 || (pUpper->wtFlags & TERM_VNULL)==0 ); - nNew = whereRangeAdjust(pLower, nOut); - nNew = whereRangeAdjust(pUpper, nNew); +#endif /* SQLITE_OMIT_LIKE_OPTIMIZATION */ - /* TUNING: If there is both an upper and lower limit and neither limit - ** has an application-defined likelihood(), assume the range is - ** reduced by an additional 75%. This means that, by default, an open-ended - ** range query (e.g. col > ?) is assumed to match 1/4 of the rows in the - ** index. While a closed range (e.g. col BETWEEN ? AND ?) is estimated to - ** match 1/64 of the index. */ - if( pLower && pLower->truthProb>0 && pUpper && pUpper->truthProb>0 ){ - nNew -= 20; - } +#ifndef SQLITE_OMIT_VIRTUALTABLE + /* Add a WO_MATCH auxiliary term to the constraint set if the + ** current expression is of the form: column MATCH expr. + ** This information is used by the xBestIndex methods of + ** virtual tables. The native query optimizer does not attempt + ** to do anything with MATCH functions. + */ + if( isMatchOfColumn(pExpr) ){ + int idxNew; + Expr *pRight, *pLeft; + WhereTerm *pNewTerm; + Bitmask prereqColumn, prereqExpr; - nOut -= (pLower!=0) + (pUpper!=0); - if( nNew<10 ) nNew = 10; - if( nNewnOut>nOut ){ - WHERETRACE(0x10,("Range scan lowers nOut from %d to %d\n", - pLoop->nOut, nOut)); + pRight = pExpr->x.pList->a[0].pExpr; + pLeft = pExpr->x.pList->a[1].pExpr; + prereqExpr = sqlite3WhereExprUsage(pMaskSet, pRight); + prereqColumn = sqlite3WhereExprUsage(pMaskSet, pLeft); + if( (prereqExpr & prereqColumn)==0 ){ + Expr *pNewExpr; + pNewExpr = sqlite3PExpr(pParse, TK_MATCH, + 0, sqlite3ExprDup(db, pRight, 0), 0); + idxNew = whereClauseInsert(pWC, pNewExpr, TERM_VIRTUAL|TERM_DYNAMIC); + testcase( idxNew==0 ); + pNewTerm = &pWC->a[idxNew]; + pNewTerm->prereqRight = prereqExpr; + pNewTerm->leftCursor = pLeft->iTable; + pNewTerm->u.leftColumn = pLeft->iColumn; + pNewTerm->eOperator = WO_MATCH; + markTermAsChild(pWC, idxNew, idxTerm); + pTerm = &pWC->a[idxTerm]; + pTerm->wtFlags |= TERM_COPIED; + pNewTerm->prereqAll = pTerm->prereqAll; + } } -#endif - pLoop->nOut = (LogEst)nOut; - return rc; -} +#endif /* SQLITE_OMIT_VIRTUALTABLE */ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 -/* -** Estimate the number of rows that will be returned based on -** an equality constraint x=VALUE and where that VALUE occurs in -** the histogram data. This only works when x is the left-most -** column of an index and sqlite_stat3 histogram data is available -** for that index. When pExpr==NULL that means the constraint is -** "x IS NULL" instead of "x=VALUE". -** -** Write the estimated row count into *pnRow and return SQLITE_OK. -** If unable to make an estimate, leave *pnRow unchanged and return -** non-zero. -** -** This routine can fail if it is unable to load a collating sequence -** required for string comparison, or if unable to allocate memory -** for a UTF conversion required for comparison. The error is stored -** in the pParse structure. -*/ -static int whereEqualScanEst( - Parse *pParse, /* Parsing & code generating context */ - WhereLoopBuilder *pBuilder, - Expr *pExpr, /* Expression for VALUE in the x=VALUE constraint */ - tRowcnt *pnRow /* Write the revised row estimate here */ -){ - Index *p = pBuilder->pNew->u.btree.pIndex; - int nEq = pBuilder->pNew->u.btree.nEq; - UnpackedRecord *pRec = pBuilder->pRec; - u8 aff; /* Column affinity */ - int rc; /* Subfunction return code */ - tRowcnt a[2]; /* Statistics */ - int bOk; - - assert( nEq>=1 ); - assert( nEq<=p->nColumn ); - assert( p->aSample!=0 ); - assert( p->nSample>0 ); - assert( pBuilder->nRecValidNULL" if x is not an INTEGER PRIMARY KEY. So construct a + ** virtual term of that form. + ** + ** Note that the virtual term must be tagged with TERM_VNULL. + */ + if( pExpr->op==TK_NOTNULL + && pExpr->pLeft->op==TK_COLUMN + && pExpr->pLeft->iColumn>=0 + && OptimizationEnabled(db, SQLITE_Stat34) + ){ + Expr *pNewExpr; + Expr *pLeft = pExpr->pLeft; + int idxNew; + WhereTerm *pNewTerm; - /* If values are not available for all fields of the index to the left - ** of this one, no estimate can be made. Return SQLITE_NOTFOUND. */ - if( pBuilder->nRecValid<(nEq-1) ){ - return SQLITE_NOTFOUND; - } + pNewExpr = sqlite3PExpr(pParse, TK_GT, + sqlite3ExprDup(db, pLeft, 0), + sqlite3PExpr(pParse, TK_NULL, 0, 0, 0), 0); - /* This is an optimization only. The call to sqlite3Stat4ProbeSetValue() - ** below would return the same value. */ - if( nEq>=p->nColumn ){ - *pnRow = 1; - return SQLITE_OK; + idxNew = whereClauseInsert(pWC, pNewExpr, + TERM_VIRTUAL|TERM_DYNAMIC|TERM_VNULL); + if( idxNew ){ + pNewTerm = &pWC->a[idxNew]; + pNewTerm->prereqRight = 0; + pNewTerm->leftCursor = pLeft->iTable; + pNewTerm->u.leftColumn = pLeft->iColumn; + pNewTerm->eOperator = WO_GT; + markTermAsChild(pWC, idxNew, idxTerm); + pTerm = &pWC->a[idxTerm]; + pTerm->wtFlags |= TERM_COPIED; + pNewTerm->prereqAll = pTerm->prereqAll; + } } +#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ - aff = p->pTable->aCol[p->aiColumn[nEq-1]].affinity; - rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq-1, &bOk); - pBuilder->pRec = pRec; - if( rc!=SQLITE_OK ) return rc; - if( bOk==0 ) return SQLITE_NOTFOUND; - pBuilder->nRecValid = nEq; - - whereKeyStats(pParse, p, pRec, 0, a); - WHERETRACE(0x10,("equality scan regions: %d\n", (int)a[1])); - *pnRow = a[1]; - - return rc; + /* Prevent ON clause terms of a LEFT JOIN from being used to drive + ** an index for tables to the left of the join. + */ + pTerm->prereqRight |= extraRight; } -#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ -#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 +/*************************************************************************** +** Routines with file scope above. Interface to the rest of the where.c +** subsystem follows. +***************************************************************************/ + /* -** Estimate the number of rows that will be returned based on -** an IN constraint where the right-hand side of the IN operator -** is a list of values. Example: +** This routine identifies subexpressions in the WHERE clause where +** each subexpression is separated by the AND operator or some other +** operator specified in the op parameter. The WhereClause structure +** is filled with pointers to subexpressions. For example: ** -** WHERE x IN (1,2,3,4) +** WHERE a=='hello' AND coalesce(b,11)<10 AND (c+12!=d OR c==22) +** \________/ \_______________/ \________________/ +** slot[0] slot[1] slot[2] ** -** Write the estimated row count into *pnRow and return SQLITE_OK. -** If unable to make an estimate, leave *pnRow unchanged and return -** non-zero. +** The original WHERE clause in pExpr is unaltered. All this routine +** does is make slot[] entries point to substructure within pExpr. ** -** This routine can fail if it is unable to load a collating sequence -** required for string comparison, or if unable to allocate memory -** for a UTF conversion required for comparison. The error is stored -** in the pParse structure. +** In the previous sentence and in the diagram, "slot[]" refers to +** the WhereClause.a[] array. The slot[] array grows as needed to contain +** all terms of the WHERE clause. */ -static int whereInScanEst( - Parse *pParse, /* Parsing & code generating context */ - WhereLoopBuilder *pBuilder, - ExprList *pList, /* The value list on the RHS of "x IN (v1,v2,v3,...)" */ - tRowcnt *pnRow /* Write the revised row estimate here */ -){ - Index *p = pBuilder->pNew->u.btree.pIndex; - i64 nRow0 = sqlite3LogEstToInt(p->aiRowLogEst[0]); - int nRecValid = pBuilder->nRecValid; - int rc = SQLITE_OK; /* Subfunction return code */ - tRowcnt nEst; /* Number of rows for a single term */ - tRowcnt nRowEst = 0; /* New estimate of the number of rows */ - int i; /* Loop counter */ - - assert( p->aSample!=0 ); - for(i=0; rc==SQLITE_OK && inExpr; i++){ - nEst = nRow0; - rc = whereEqualScanEst(pParse, pBuilder, pList->a[i].pExpr, &nEst); - nRowEst += nEst; - pBuilder->nRecValid = nRecValid; +SQLITE_PRIVATE void sqlite3WhereSplit(WhereClause *pWC, Expr *pExpr, u8 op){ + Expr *pE2 = sqlite3ExprSkipCollate(pExpr); + pWC->op = op; + if( pE2==0 ) return; + if( pE2->op!=op ){ + whereClauseInsert(pWC, pExpr, 0); + }else{ + sqlite3WhereSplit(pWC, pE2->pLeft, op); + sqlite3WhereSplit(pWC, pE2->pRight, op); } +} - if( rc==SQLITE_OK ){ - if( nRowEst > nRow0 ) nRowEst = nRow0; - *pnRow = nRowEst; - WHERETRACE(0x10,("IN row estimate: est=%d\n", nRowEst)); - } - assert( pBuilder->nRecValid==nRecValid ); - return rc; +/* +** Initialize a preallocated WhereClause structure. +*/ +SQLITE_PRIVATE void sqlite3WhereClauseInit( + WhereClause *pWC, /* The WhereClause to be initialized */ + WhereInfo *pWInfo /* The WHERE processing context */ +){ + pWC->pWInfo = pWInfo; + pWC->pOuter = 0; + pWC->nTerm = 0; + pWC->nSlot = ArraySize(pWC->aStatic); + pWC->a = pWC->aStatic; } -#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ /* -** Disable a term in the WHERE clause. Except, do not disable the term -** if it controls a LEFT OUTER JOIN and it did not originate in the ON -** or USING clause of that join. -** -** Consider the term t2.z='ok' in the following queries: -** -** (1) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x WHERE t2.z='ok' -** (2) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x AND t2.z='ok' -** (3) SELECT * FROM t1, t2 WHERE t1.a=t2.x AND t2.z='ok' -** -** The t2.z='ok' is disabled in the in (2) because it originates -** in the ON clause. The term is disabled in (3) because it is not part -** of a LEFT OUTER JOIN. In (1), the term is not disabled. -** -** Disabling a term causes that term to not be tested in the inner loop -** of the join. Disabling is an optimization. When terms are satisfied -** by indices, we disable them to prevent redundant tests in the inner -** loop. We would get the correct results if nothing were ever disabled, -** but joins might run a little slower. The trick is to disable as much -** as we can without disabling too much. If we disabled in (1), we'd get -** the wrong answer. See ticket #813. -** -** If all the children of a term are disabled, then that term is also -** automatically disabled. In this way, terms get disabled if derived -** virtual terms are tested first. For example: -** -** x GLOB 'abc*' AND x>='abc' AND x<'acd' -** \___________/ \______/ \_____/ -** parent child1 child2 -** -** Only the parent term was in the original WHERE clause. The child1 -** and child2 terms were added by the LIKE optimization. If both of -** the virtual child terms are valid, then testing of the parent can be -** skipped. -** -** Usually the parent term is marked as TERM_CODED. But if the parent -** term was originally TERM_LIKE, then the parent gets TERM_LIKECOND instead. -** The TERM_LIKECOND marking indicates that the term should be coded inside -** a conditional such that is only evaluated on the second pass of a -** LIKE-optimization loop, when scanning BLOBs instead of strings. +** Deallocate a WhereClause structure. The WhereClause structure +** itself is not freed. This routine is the inverse of sqlite3WhereClauseInit(). */ -static void disableTerm(WhereLevel *pLevel, WhereTerm *pTerm){ - int nLoop = 0; - while( pTerm - && (pTerm->wtFlags & TERM_CODED)==0 - && (pLevel->iLeftJoin==0 || ExprHasProperty(pTerm->pExpr, EP_FromJoin)) - && (pLevel->notReady & pTerm->prereqAll)==0 - ){ - if( nLoop && (pTerm->wtFlags & TERM_LIKE)!=0 ){ - pTerm->wtFlags |= TERM_LIKECOND; - }else{ - pTerm->wtFlags |= TERM_CODED; +SQLITE_PRIVATE void sqlite3WhereClauseClear(WhereClause *pWC){ + int i; + WhereTerm *a; + sqlite3 *db = pWC->pWInfo->pParse->db; + for(i=pWC->nTerm-1, a=pWC->a; i>=0; i--, a++){ + if( a->wtFlags & TERM_DYNAMIC ){ + sqlite3ExprDelete(db, a->pExpr); } - if( pTerm->iParent<0 ) break; - pTerm = &pTerm->pWC->a[pTerm->iParent]; - pTerm->nChild--; - if( pTerm->nChild!=0 ) break; - nLoop++; + if( a->wtFlags & TERM_ORINFO ){ + whereOrInfoDelete(db, a->u.pOrInfo); + }else if( a->wtFlags & TERM_ANDINFO ){ + whereAndInfoDelete(db, a->u.pAndInfo); + } + } + if( pWC->a!=pWC->aStatic ){ + sqlite3DbFree(db, pWC->a); } } + /* -** Code an OP_Affinity opcode to apply the column affinity string zAff -** to the n registers starting at base. -** -** As an optimization, SQLITE_AFF_NONE entries (which are no-ops) at the -** beginning and end of zAff are ignored. If all entries in zAff are -** SQLITE_AFF_NONE, then no code gets generated. -** -** This routine makes its own copy of zAff so that the caller is free -** to modify zAff after this routine returns. +** These routines walk (recursively) an expression tree and generate +** a bitmask indicating which tables are used in that expression +** tree. */ -static void codeApplyAffinity(Parse *pParse, int base, int n, char *zAff){ - Vdbe *v = pParse->pVdbe; - if( zAff==0 ){ - assert( pParse->db->mallocFailed ); - return; - } - assert( v!=0 ); - - /* Adjust base and n to skip over SQLITE_AFF_NONE entries at the beginning - ** and end of the affinity string. - */ - while( n>0 && zAff[0]==SQLITE_AFF_NONE ){ - n--; - base++; - zAff++; +SQLITE_PRIVATE Bitmask sqlite3WhereExprUsage(WhereMaskSet *pMaskSet, Expr *p){ + Bitmask mask = 0; + if( p==0 ) return 0; + if( p->op==TK_COLUMN ){ + mask = sqlite3WhereGetMask(pMaskSet, p->iTable); + return mask; } - while( n>1 && zAff[n-1]==SQLITE_AFF_NONE ){ - n--; + mask = sqlite3WhereExprUsage(pMaskSet, p->pRight); + mask |= sqlite3WhereExprUsage(pMaskSet, p->pLeft); + if( ExprHasProperty(p, EP_xIsSelect) ){ + mask |= exprSelectUsage(pMaskSet, p->x.pSelect); + }else{ + mask |= sqlite3WhereExprListUsage(pMaskSet, p->x.pList); } - - /* Code the OP_Affinity opcode if there is anything left to do. */ - if( n>0 ){ - sqlite3VdbeAddOp2(v, OP_Affinity, base, n); - sqlite3VdbeChangeP4(v, -1, zAff, n); - sqlite3ExprCacheAffinityChange(pParse, base, n); + return mask; +} +SQLITE_PRIVATE Bitmask sqlite3WhereExprListUsage(WhereMaskSet *pMaskSet, ExprList *pList){ + int i; + Bitmask mask = 0; + if( pList ){ + for(i=0; inExpr; i++){ + mask |= sqlite3WhereExprUsage(pMaskSet, pList->a[i].pExpr); + } } + return mask; } /* -** Generate code for a single equality term of the WHERE clause. An equality -** term can be either X=expr or X IN (...). pTerm is the term to be -** coded. -** -** The current value for the constraint is left in register iReg. +** Call exprAnalyze on all terms in a WHERE clause. ** -** For a constraint of the form X=expr, the expression is evaluated and its -** result is left on the stack. For constraints of the form X IN (...) -** this routine sets up a loop that will iterate over all values of X. +** Note that exprAnalyze() might add new virtual terms onto the +** end of the WHERE clause. We do not want to analyze these new +** virtual terms, so start analyzing at the end and work forward +** so that the added virtual terms are never processed. */ -static int codeEqualityTerm( - Parse *pParse, /* The parsing context */ - WhereTerm *pTerm, /* The term of the WHERE clause to be coded */ - WhereLevel *pLevel, /* The level of the FROM clause we are working on */ - int iEq, /* Index of the equality term within this level */ - int bRev, /* True for reverse-order IN operations */ - int iTarget /* Attempt to leave results in this register */ +SQLITE_PRIVATE void sqlite3WhereExprAnalyze( + SrcList *pTabList, /* the FROM clause */ + WhereClause *pWC /* the WHERE clause to be analyzed */ ){ - Expr *pX = pTerm->pExpr; - Vdbe *v = pParse->pVdbe; - int iReg; /* Register holding results */ - - assert( iTarget>0 ); - if( pX->op==TK_EQ ){ - iReg = sqlite3ExprCodeTarget(pParse, pX->pRight, iTarget); - }else if( pX->op==TK_ISNULL ){ - iReg = iTarget; - sqlite3VdbeAddOp2(v, OP_Null, 0, iReg); -#ifndef SQLITE_OMIT_SUBQUERY - }else{ - int eType; - int iTab; - struct InLoop *pIn; - WhereLoop *pLoop = pLevel->pWLoop; - - if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 - && pLoop->u.btree.pIndex!=0 - && pLoop->u.btree.pIndex->aSortOrder[iEq] - ){ - testcase( iEq==0 ); - testcase( bRev ); - bRev = !bRev; - } - assert( pX->op==TK_IN ); - iReg = iTarget; - eType = sqlite3FindInIndex(pParse, pX, IN_INDEX_LOOP, 0); - if( eType==IN_INDEX_INDEX_DESC ){ - testcase( bRev ); - bRev = !bRev; - } - iTab = pX->iTable; - sqlite3VdbeAddOp2(v, bRev ? OP_Last : OP_Rewind, iTab, 0); - VdbeCoverageIf(v, bRev); - VdbeCoverageIf(v, !bRev); - assert( (pLoop->wsFlags & WHERE_MULTI_OR)==0 ); - pLoop->wsFlags |= WHERE_IN_ABLE; - if( pLevel->u.in.nIn==0 ){ - pLevel->addrNxt = sqlite3VdbeMakeLabel(v); - } - pLevel->u.in.nIn++; - pLevel->u.in.aInLoop = - sqlite3DbReallocOrFree(pParse->db, pLevel->u.in.aInLoop, - sizeof(pLevel->u.in.aInLoop[0])*pLevel->u.in.nIn); - pIn = pLevel->u.in.aInLoop; - if( pIn ){ - pIn += pLevel->u.in.nIn - 1; - pIn->iCur = iTab; - if( eType==IN_INDEX_ROWID ){ - pIn->addrInTop = sqlite3VdbeAddOp2(v, OP_Rowid, iTab, iReg); - }else{ - pIn->addrInTop = sqlite3VdbeAddOp3(v, OP_Column, iTab, 0, iReg); - } - pIn->eEndLoopOp = bRev ? OP_PrevIfOpen : OP_NextIfOpen; - sqlite3VdbeAddOp1(v, OP_IsNull, iReg); VdbeCoverage(v); - }else{ - pLevel->u.in.nIn = 0; - } -#endif + int i; + for(i=pWC->nTerm-1; i>=0; i--){ + exprAnalyze(pTabList, pWC, i); } - disableTerm(pLevel, pTerm); - return iReg; } +/************** End of whereexpr.c *******************************************/ +/************** Begin file where.c *******************************************/ /* -** Generate code that will evaluate all == and IN constraints for an -** index scan. -** -** For example, consider table t1(a,b,c,d,e,f) with index i1(a,b,c). -** Suppose the WHERE clause is this: a==5 AND b IN (1,2,3) AND c>5 AND c<10 -** The index has as many as three equality constraints, but in this -** example, the third "c" value is an inequality. So only two -** constraints are coded. This routine will generate code to evaluate -** a==5 and b IN (1,2,3). The current values for a and b will be stored -** in consecutive registers and the index of the first register is returned. -** -** In the example above nEq==2. But this subroutine works for any value -** of nEq including 0. If nEq==0, this routine is nearly a no-op. -** The only thing it does is allocate the pLevel->iMem memory cell and -** compute the affinity string. -** -** The nExtraReg parameter is 0 or 1. It is 0 if all WHERE clause constraints -** are == or IN and are covered by the nEq. nExtraReg is 1 if there is -** an inequality constraint (such as the "c>=5 AND c<10" in the example) that -** occurs after the nEq quality constraints. -** -** This routine allocates a range of nEq+nExtraReg memory cells and returns -** the index of the first memory cell in that range. The code that -** calls this routine will use that memory range to store keys for -** start and termination conditions of the loop. -** key value of the loop. If one or more IN operators appear, then -** this routine allocates an additional nEq memory cells for internal -** use. +** 2001 September 15 ** -** Before returning, *pzAff is set to point to a buffer containing a -** copy of the column affinity string of the index allocated using -** sqlite3DbMalloc(). Except, entries in the copy of the string associated -** with equality constraints that use NONE affinity are set to -** SQLITE_AFF_NONE. This is to deal with SQL such as the following: +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: ** -** CREATE TABLE t1(a TEXT PRIMARY KEY, b); -** SELECT ... FROM t1 AS t2, t1 WHERE t1.a = t2.b; +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. ** -** In the example above, the index on t1(a) has TEXT affinity. But since -** the right hand side of the equality constraint (t2.b) has NONE affinity, -** no conversion should be attempted before using a t2.b value as part of -** a key to search the index. Hence the first byte in the returned affinity -** string in this example would be set to SQLITE_AFF_NONE. +************************************************************************* +** This module contains C code that generates VDBE code used to process +** the WHERE clause of SQL statements. This module is responsible for +** generating the code that loops through a table looking for applicable +** rows. Indices are selected and used to speed the search when doing +** so is applicable. Because this module is responsible for selecting +** indices, you might also think of this module as the "query optimizer". */ -static int codeAllEqualityTerms( - Parse *pParse, /* Parsing context */ - WhereLevel *pLevel, /* Which nested loop of the FROM we are coding */ - int bRev, /* Reverse the order of IN operators */ - int nExtraReg, /* Number of extra registers to allocate */ - char **pzAff /* OUT: Set to point to affinity string */ -){ - u16 nEq; /* The number of == or IN constraints to code */ - u16 nSkip; /* Number of left-most columns to skip */ - Vdbe *v = pParse->pVdbe; /* The vm under construction */ - Index *pIdx; /* The index being used for this loop */ - WhereTerm *pTerm; /* A single constraint term */ - WhereLoop *pLoop; /* The WhereLoop object */ - int j; /* Loop counter */ - int regBase; /* Base register */ - int nReg; /* Number of registers to allocate */ - char *zAff; /* Affinity string to return */ - - /* This module is only called on query plans that use an index. */ - pLoop = pLevel->pWLoop; - assert( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 ); - nEq = pLoop->u.btree.nEq; - nSkip = pLoop->nSkip; - pIdx = pLoop->u.btree.pIndex; - assert( pIdx!=0 ); +/* #include "sqliteInt.h" */ +/* #include "whereInt.h" */ - /* Figure out how many memory cells we will need then allocate them. - */ - regBase = pParse->nMem + 1; - nReg = pLoop->u.btree.nEq + nExtraReg; - pParse->nMem += nReg; +/* Forward declaration of methods */ +static int whereLoopResize(sqlite3*, WhereLoop*, int); - zAff = sqlite3DbStrDup(pParse->db, sqlite3IndexAffinityStr(v, pIdx)); - if( !zAff ){ - pParse->db->mallocFailed = 1; - } +/* Test variable that can be set to enable WHERE tracing */ +#if defined(SQLITE_TEST) || defined(SQLITE_DEBUG) +/***/ int sqlite3WhereTrace = 0; +#endif - if( nSkip ){ - int iIdxCur = pLevel->iIdxCur; - sqlite3VdbeAddOp1(v, (bRev?OP_Last:OP_Rewind), iIdxCur); - VdbeCoverageIf(v, bRev==0); - VdbeCoverageIf(v, bRev!=0); - VdbeComment((v, "begin skip-scan on %s", pIdx->zName)); - j = sqlite3VdbeAddOp0(v, OP_Goto); - pLevel->addrSkip = sqlite3VdbeAddOp4Int(v, (bRev?OP_SeekLT:OP_SeekGT), - iIdxCur, 0, regBase, nSkip); - VdbeCoverageIf(v, bRev==0); - VdbeCoverageIf(v, bRev!=0); - sqlite3VdbeJumpHere(v, j); - for(j=0; jaiColumn[j]>=0 ); - VdbeComment((v, "%s", pIdx->pTable->aCol[pIdx->aiColumn[j]].zName)); - } - } - /* Evaluate the equality constraints - */ - assert( zAff==0 || (int)strlen(zAff)>=nEq ); - for(j=nSkip; jaLTerm[j]; - assert( pTerm!=0 ); - /* The following testcase is true for indices with redundant columns. - ** Ex: CREATE INDEX i1 ON t1(a,b,a); SELECT * FROM t1 WHERE a=0 AND b=0; */ - testcase( (pTerm->wtFlags & TERM_CODED)!=0 ); - testcase( pTerm->wtFlags & TERM_VIRTUAL ); - r1 = codeEqualityTerm(pParse, pTerm, pLevel, j, bRev, regBase+j); - if( r1!=regBase+j ){ - if( nReg==1 ){ - sqlite3ReleaseTempReg(pParse, regBase); - regBase = r1; - }else{ - sqlite3VdbeAddOp2(v, OP_SCopy, r1, regBase+j); - } - } - testcase( pTerm->eOperator & WO_ISNULL ); - testcase( pTerm->eOperator & WO_IN ); - if( (pTerm->eOperator & (WO_ISNULL|WO_IN))==0 ){ - Expr *pRight = pTerm->pExpr->pRight; - if( sqlite3ExprCanBeNull(pRight) ){ - sqlite3VdbeAddOp2(v, OP_IsNull, regBase+j, pLevel->addrBrk); - VdbeCoverage(v); - } - if( zAff ){ - if( sqlite3CompareAffinity(pRight, zAff[j])==SQLITE_AFF_NONE ){ - zAff[j] = SQLITE_AFF_NONE; - } - if( sqlite3ExprNeedsNoAffinityChange(pRight, zAff[j]) ){ - zAff[j] = SQLITE_AFF_NONE; - } - } - } - } - *pzAff = zAff; - return regBase; +/* +** Return the estimated number of output rows from a WHERE clause +*/ +SQLITE_PRIVATE u64 sqlite3WhereOutputRowCount(WhereInfo *pWInfo){ + return sqlite3LogEstToInt(pWInfo->nRowOut); } -#ifndef SQLITE_OMIT_EXPLAIN /* -** This routine is a helper for explainIndexRange() below -** -** pStr holds the text of an expression that we are building up one term -** at a time. This routine adds a new term to the end of the expression. -** Terms are separated by AND so add the "AND" text for second and subsequent -** terms only. +** Return one of the WHERE_DISTINCT_xxxxx values to indicate how this +** WHERE clause returns outputs for DISTINCT processing. */ -static void explainAppendTerm( - StrAccum *pStr, /* The text expression being built */ - int iTerm, /* Index of this term. First is zero */ - const char *zColumn, /* Name of the column */ - const char *zOp /* Name of the operator */ -){ - if( iTerm ) sqlite3StrAccumAppend(pStr, " AND ", 5); - sqlite3StrAccumAppendAll(pStr, zColumn); - sqlite3StrAccumAppend(pStr, zOp, 1); - sqlite3StrAccumAppend(pStr, "?", 1); +SQLITE_PRIVATE int sqlite3WhereIsDistinct(WhereInfo *pWInfo){ + return pWInfo->eDistinct; } /* -** Argument pLevel describes a strategy for scanning table pTab. This -** function appends text to pStr that describes the subset of table -** rows scanned by the strategy in the form of an SQL expression. -** -** For example, if the query: -** -** SELECT * FROM t1 WHERE a=1 AND b>2; -** -** is run and there is an index on (a, b), then this function returns a -** string similar to: -** -** "a=? AND b>?" +** Return TRUE if the WHERE clause returns rows in ORDER BY order. +** Return FALSE if the output needs to be sorted. */ -static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop, Table *pTab){ - Index *pIndex = pLoop->u.btree.pIndex; - u16 nEq = pLoop->u.btree.nEq; - u16 nSkip = pLoop->nSkip; - int i, j; - Column *aCol = pTab->aCol; - i16 *aiColumn = pIndex->aiColumn; +SQLITE_PRIVATE int sqlite3WhereIsOrdered(WhereInfo *pWInfo){ + return pWInfo->nOBSat; +} - if( nEq==0 && (pLoop->wsFlags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))==0 ) return; - sqlite3StrAccumAppend(pStr, " (", 2); - for(i=0; i=nSkip ){ - explainAppendTerm(pStr, i, z, "="); - }else{ - if( i ) sqlite3StrAccumAppend(pStr, " AND ", 5); - sqlite3XPrintf(pStr, 0, "ANY(%s)", z); - } - } +/* +** Return the VDBE address or label to jump to in order to continue +** immediately with the next row of a WHERE clause. +*/ +SQLITE_PRIVATE int sqlite3WhereContinueLabel(WhereInfo *pWInfo){ + assert( pWInfo->iContinue!=0 ); + return pWInfo->iContinue; +} - j = i; - if( pLoop->wsFlags&WHERE_BTM_LIMIT ){ - char *z = aiColumn[j] < 0 ? "rowid" : aCol[aiColumn[j]].zName; - explainAppendTerm(pStr, i++, z, ">"); - } - if( pLoop->wsFlags&WHERE_TOP_LIMIT ){ - char *z = aiColumn[j] < 0 ? "rowid" : aCol[aiColumn[j]].zName; - explainAppendTerm(pStr, i, z, "<"); - } - sqlite3StrAccumAppend(pStr, ")", 1); +/* +** Return the VDBE address or label to jump to in order to break +** out of a WHERE loop. +*/ +SQLITE_PRIVATE int sqlite3WhereBreakLabel(WhereInfo *pWInfo){ + return pWInfo->iBreak; } /* -** This function is a no-op unless currently processing an EXPLAIN QUERY PLAN -** command, or if either SQLITE_DEBUG or SQLITE_ENABLE_STMT_SCANSTATUS was -** defined at compile-time. If it is not a no-op, a single OP_Explain opcode -** is added to the output to describe the table scan strategy in pLevel. +** Return TRUE if an UPDATE or DELETE statement can operate directly on +** the rowids returned by a WHERE clause. Return FALSE if doing an +** UPDATE or DELETE might change subsequent WHERE clause results. ** -** If an OP_Explain opcode is added to the VM, its address is returned. -** Otherwise, if no OP_Explain is coded, zero is returned. +** If the ONEPASS optimization is used (if this routine returns true) +** then also write the indices of open cursors used by ONEPASS +** into aiCur[0] and aiCur[1]. iaCur[0] gets the cursor of the data +** table and iaCur[1] gets the cursor used by an auxiliary index. +** Either value may be -1, indicating that cursor is not used. +** Any cursors returned will have been opened for writing. +** +** aiCur[0] and aiCur[1] both get -1 if the where-clause logic is +** unable to use the ONEPASS optimization. */ -static int explainOneScan( - Parse *pParse, /* Parse context */ - SrcList *pTabList, /* Table list this loop refers to */ - WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ - int iLevel, /* Value for "level" column of output */ - int iFrom, /* Value for "from" column of output */ - u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ -){ - int ret = 0; -#if !defined(SQLITE_DEBUG) && !defined(SQLITE_ENABLE_STMT_SCANSTATUS) - if( pParse->explain==2 ) -#endif - { - struct SrcList_item *pItem = &pTabList->a[pLevel->iFrom]; - Vdbe *v = pParse->pVdbe; /* VM being constructed */ - sqlite3 *db = pParse->db; /* Database handle */ - int iId = pParse->iSelectId; /* Select id (left-most output column) */ - int isSearch; /* True for a SEARCH. False for SCAN. */ - WhereLoop *pLoop; /* The controlling WhereLoop object */ - u32 flags; /* Flags that describe this loop */ - char *zMsg; /* Text to add to EQP output */ - StrAccum str; /* EQP output string */ - char zBuf[100]; /* Initial space for EQP output string */ - - pLoop = pLevel->pWLoop; - flags = pLoop->wsFlags; - if( (flags&WHERE_MULTI_OR) || (wctrlFlags&WHERE_ONETABLE_ONLY) ) return 0; +SQLITE_PRIVATE int sqlite3WhereOkOnePass(WhereInfo *pWInfo, int *aiCur){ + memcpy(aiCur, pWInfo->aiCurOnePass, sizeof(int)*2); + return pWInfo->okOnePass; +} - isSearch = (flags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))!=0 - || ((flags&WHERE_VIRTUALTABLE)==0 && (pLoop->u.btree.nEq>0)) - || (wctrlFlags&(WHERE_ORDERBY_MIN|WHERE_ORDERBY_MAX)); +/* +** Move the content of pSrc into pDest +*/ +static void whereOrMove(WhereOrSet *pDest, WhereOrSet *pSrc){ + pDest->n = pSrc->n; + memcpy(pDest->a, pSrc->a, pDest->n*sizeof(pDest->a[0])); +} - sqlite3StrAccumInit(&str, zBuf, sizeof(zBuf), SQLITE_MAX_LENGTH); - str.db = db; - sqlite3StrAccumAppendAll(&str, isSearch ? "SEARCH" : "SCAN"); - if( pItem->pSelect ){ - sqlite3XPrintf(&str, 0, " SUBQUERY %d", pItem->iSelectId); - }else{ - sqlite3XPrintf(&str, 0, " TABLE %s", pItem->zName); +/* +** Try to insert a new prerequisite/cost entry into the WhereOrSet pSet. +** +** The new entry might overwrite an existing entry, or it might be +** appended, or it might be discarded. Do whatever is the right thing +** so that pSet keeps the N_OR_COST best entries seen so far. +*/ +static int whereOrInsert( + WhereOrSet *pSet, /* The WhereOrSet to be updated */ + Bitmask prereq, /* Prerequisites of the new entry */ + LogEst rRun, /* Run-cost of the new entry */ + LogEst nOut /* Number of outputs for the new entry */ +){ + u16 i; + WhereOrCost *p; + for(i=pSet->n, p=pSet->a; i>0; i--, p++){ + if( rRun<=p->rRun && (prereq & p->prereq)==prereq ){ + goto whereOrInsert_done; + } + if( p->rRun<=rRun && (p->prereq & prereq)==p->prereq ){ + return 0; + } + } + if( pSet->na[pSet->n++]; + p->nOut = nOut; + }else{ + p = pSet->a; + for(i=1; in; i++){ + if( p->rRun>pSet->a[i].rRun ) p = pSet->a + i; } + if( p->rRun<=rRun ) return 0; + } +whereOrInsert_done: + p->prereq = prereq; + p->rRun = rRun; + if( p->nOut>nOut ) p->nOut = nOut; + return 1; +} - if( pItem->zAlias ){ - sqlite3XPrintf(&str, 0, " AS %s", pItem->zAlias); +/* +** Return the bitmask for the given cursor number. Return 0 if +** iCursor is not in the set. +*/ +SQLITE_PRIVATE Bitmask sqlite3WhereGetMask(WhereMaskSet *pMaskSet, int iCursor){ + int i; + assert( pMaskSet->n<=(int)sizeof(Bitmask)*8 ); + for(i=0; in; i++){ + if( pMaskSet->ix[i]==iCursor ){ + return MASKBIT(i); } - if( (flags & (WHERE_IPK|WHERE_VIRTUALTABLE))==0 ){ - const char *zFmt = 0; - Index *pIdx; + } + return 0; +} - assert( pLoop->u.btree.pIndex!=0 ); - pIdx = pLoop->u.btree.pIndex; - assert( !(flags&WHERE_AUTO_INDEX) || (flags&WHERE_IDX_ONLY) ); - if( !HasRowid(pItem->pTab) && IsPrimaryKeyIndex(pIdx) ){ - if( isSearch ){ - zFmt = "PRIMARY KEY"; +/* +** Create a new mask for cursor iCursor. +** +** There is one cursor per table in the FROM clause. The number of +** tables in the FROM clause is limited by a test early in the +** sqlite3WhereBegin() routine. So we know that the pMaskSet->ix[] +** array will never overflow. +*/ +static void createMask(WhereMaskSet *pMaskSet, int iCursor){ + assert( pMaskSet->n < ArraySize(pMaskSet->ix) ); + pMaskSet->ix[pMaskSet->n++] = iCursor; +} + +/* +** Advance to the next WhereTerm that matches according to the criteria +** established when the pScan object was initialized by whereScanInit(). +** Return NULL if there are no more matching WhereTerms. +*/ +static WhereTerm *whereScanNext(WhereScan *pScan){ + int iCur; /* The cursor on the LHS of the term */ + int iColumn; /* The column on the LHS of the term. -1 for IPK */ + Expr *pX; /* An expression being tested */ + WhereClause *pWC; /* Shorthand for pScan->pWC */ + WhereTerm *pTerm; /* The term being tested */ + int k = pScan->k; /* Where to start scanning */ + + while( pScan->iEquiv<=pScan->nEquiv ){ + iCur = pScan->aEquiv[pScan->iEquiv-2]; + iColumn = pScan->aEquiv[pScan->iEquiv-1]; + while( (pWC = pScan->pWC)!=0 ){ + for(pTerm=pWC->a+k; knTerm; k++, pTerm++){ + if( pTerm->leftCursor==iCur + && pTerm->u.leftColumn==iColumn + && (pScan->iEquiv<=2 || !ExprHasProperty(pTerm->pExpr, EP_FromJoin)) + ){ + if( (pTerm->eOperator & WO_EQUIV)!=0 + && pScan->nEquivaEquiv) + ){ + int j; + pX = sqlite3ExprSkipCollate(pTerm->pExpr->pRight); + assert( pX->op==TK_COLUMN ); + for(j=0; jnEquiv; j+=2){ + if( pScan->aEquiv[j]==pX->iTable + && pScan->aEquiv[j+1]==pX->iColumn ){ + break; + } + } + if( j==pScan->nEquiv ){ + pScan->aEquiv[j] = pX->iTable; + pScan->aEquiv[j+1] = pX->iColumn; + pScan->nEquiv += 2; + } + } + if( (pTerm->eOperator & pScan->opMask)!=0 ){ + /* Verify the affinity and collating sequence match */ + if( pScan->zCollName && (pTerm->eOperator & WO_ISNULL)==0 ){ + CollSeq *pColl; + Parse *pParse = pWC->pWInfo->pParse; + pX = pTerm->pExpr; + if( !sqlite3IndexAffinityOk(pX, pScan->idxaff) ){ + continue; + } + assert(pX->pLeft); + pColl = sqlite3BinaryCompareCollSeq(pParse, + pX->pLeft, pX->pRight); + if( pColl==0 ) pColl = pParse->db->pDfltColl; + if( sqlite3StrICmp(pColl->zName, pScan->zCollName) ){ + continue; + } + } + if( (pTerm->eOperator & (WO_EQ|WO_IS))!=0 + && (pX = pTerm->pExpr->pRight)->op==TK_COLUMN + && pX->iTable==pScan->aEquiv[0] + && pX->iColumn==pScan->aEquiv[1] + ){ + testcase( pTerm->eOperator & WO_IS ); + continue; + } + pScan->k = k+1; + return pTerm; + } } - }else if( flags & WHERE_PARTIALIDX ){ - zFmt = "AUTOMATIC PARTIAL COVERING INDEX"; - }else if( flags & WHERE_AUTO_INDEX ){ - zFmt = "AUTOMATIC COVERING INDEX"; - }else if( flags & WHERE_IDX_ONLY ){ - zFmt = "COVERING INDEX %s"; - }else{ - zFmt = "INDEX %s"; } - if( zFmt ){ - sqlite3StrAccumAppend(&str, " USING ", 7); - sqlite3XPrintf(&str, 0, zFmt, pIdx->zName); - explainIndexRange(&str, pLoop, pItem->pTab); + pScan->pWC = pScan->pWC->pOuter; + k = 0; + } + pScan->pWC = pScan->pOrigWC; + k = 0; + pScan->iEquiv += 2; + } + return 0; +} + +/* +** Initialize a WHERE clause scanner object. Return a pointer to the +** first match. Return NULL if there are no matches. +** +** The scanner will be searching the WHERE clause pWC. It will look +** for terms of the form "X " where X is column iColumn of table +** iCur. The must be one of the operators described by opMask. +** +** If the search is for X and the WHERE clause contains terms of the +** form X=Y then this routine might also return terms of the form +** "Y ". The number of levels of transitivity is limited, +** but is enough to handle most commonly occurring SQL statements. +** +** If X is not the INTEGER PRIMARY KEY then X must be compatible with +** index pIdx. +*/ +static WhereTerm *whereScanInit( + WhereScan *pScan, /* The WhereScan object being initialized */ + WhereClause *pWC, /* The WHERE clause to be scanned */ + int iCur, /* Cursor to scan for */ + int iColumn, /* Column to scan for */ + u32 opMask, /* Operator(s) to scan for */ + Index *pIdx /* Must be compatible with this index */ +){ + int j; + + /* memset(pScan, 0, sizeof(*pScan)); */ + pScan->pOrigWC = pWC; + pScan->pWC = pWC; + if( pIdx && iColumn>=0 ){ + pScan->idxaff = pIdx->pTable->aCol[iColumn].affinity; + for(j=0; pIdx->aiColumn[j]!=iColumn; j++){ + if( NEVER(j>pIdx->nColumn) ) return 0; + } + pScan->zCollName = pIdx->azColl[j]; + }else{ + pScan->idxaff = 0; + pScan->zCollName = 0; + } + pScan->opMask = opMask; + pScan->k = 0; + pScan->aEquiv[0] = iCur; + pScan->aEquiv[1] = iColumn; + pScan->nEquiv = 2; + pScan->iEquiv = 2; + return whereScanNext(pScan); +} + +/* +** Search for a term in the WHERE clause that is of the form "X " +** where X is a reference to the iColumn of table iCur and is one of +** the WO_xx operator codes specified by the op parameter. +** Return a pointer to the term. Return 0 if not found. +** +** The term returned might by Y= if there is another constraint in +** the WHERE clause that specifies that X=Y. Any such constraints will be +** identified by the WO_EQUIV bit in the pTerm->eOperator field. The +** aEquiv[] array holds X and all its equivalents, with each SQL variable +** taking up two slots in aEquiv[]. The first slot is for the cursor number +** and the second is for the column number. There are 22 slots in aEquiv[] +** so that means we can look for X plus up to 10 other equivalent values. +** Hence a search for X will return if X=A1 and A1=A2 and A2=A3 +** and ... and A9=A10 and A10=. +** +** If there are multiple terms in the WHERE clause of the form "X " +** then try for the one with no dependencies on - in other words where +** is a constant expression of some kind. Only return entries of +** the form "X Y" where Y is a column in another table if no terms of +** the form "X " exist. If no terms with a constant RHS +** exist, try to return a term that does not use WO_EQUIV. +*/ +SQLITE_PRIVATE WhereTerm *sqlite3WhereFindTerm( + WhereClause *pWC, /* The WHERE clause to be searched */ + int iCur, /* Cursor number of LHS */ + int iColumn, /* Column number of LHS */ + Bitmask notReady, /* RHS must not overlap with this mask */ + u32 op, /* Mask of WO_xx values describing operator */ + Index *pIdx /* Must be compatible with this index, if not NULL */ +){ + WhereTerm *pResult = 0; + WhereTerm *p; + WhereScan scan; + + p = whereScanInit(&scan, pWC, iCur, iColumn, op, pIdx); + op &= WO_EQ|WO_IS; + while( p ){ + if( (p->prereqRight & notReady)==0 ){ + if( p->prereqRight==0 && (p->eOperator&op)!=0 ){ + testcase( p->eOperator & WO_IS ); + return p; } - }else if( (flags & WHERE_IPK)!=0 && (flags & WHERE_CONSTRAINT)!=0 ){ - const char *zRange; - if( flags&(WHERE_COLUMN_EQ|WHERE_COLUMN_IN) ){ - zRange = "(rowid=?)"; - }else if( (flags&WHERE_BOTH_LIMIT)==WHERE_BOTH_LIMIT ){ - zRange = "(rowid>? AND rowid?)"; - }else{ - assert( flags&WHERE_TOP_LIMIT); - zRange = "(rowida[] is returned. If +** no expression is found, -1 is returned. +*/ +static int findIndexCol( + Parse *pParse, /* Parse context */ + ExprList *pList, /* Expression list to search */ + int iBase, /* Cursor for table associated with pIdx */ + Index *pIdx, /* Index to match column of */ + int iCol /* Column of index to match */ +){ + int i; + const char *zColl = pIdx->azColl[iCol]; + + for(i=0; inExpr; i++){ + Expr *p = sqlite3ExprSkipCollate(pList->a[i].pExpr); + if( p->op==TK_COLUMN + && p->iColumn==pIdx->aiColumn[iCol] + && p->iTable==iBase + ){ + CollSeq *pColl = sqlite3ExprCollSeq(pParse, pList->a[i].pExpr); + if( pColl && 0==sqlite3StrICmp(pColl->zName, zColl) ){ + return i; } - sqlite3StrAccumAppendAll(&str, " USING INTEGER PRIMARY KEY "); - sqlite3StrAccumAppendAll(&str, zRange); } -#ifndef SQLITE_OMIT_VIRTUALTABLE - else if( (flags & WHERE_VIRTUALTABLE)!=0 ){ - sqlite3XPrintf(&str, 0, " VIRTUAL TABLE INDEX %d:%s", - pLoop->u.vtab.idxNum, pLoop->u.vtab.idxStr); + } + + return -1; +} + +/* +** Return true if the DISTINCT expression-list passed as the third argument +** is redundant. +** +** A DISTINCT list is redundant if any subset of the columns in the +** DISTINCT list are collectively unique and individually non-null. +*/ +static int isDistinctRedundant( + Parse *pParse, /* Parsing context */ + SrcList *pTabList, /* The FROM clause */ + WhereClause *pWC, /* The WHERE clause */ + ExprList *pDistinct /* The result set that needs to be DISTINCT */ +){ + Table *pTab; + Index *pIdx; + int i; + int iBase; + + /* If there is more than one table or sub-select in the FROM clause of + ** this query, then it will not be possible to show that the DISTINCT + ** clause is redundant. */ + if( pTabList->nSrc!=1 ) return 0; + iBase = pTabList->a[0].iCursor; + pTab = pTabList->a[0].pTab; + + /* If any of the expressions is an IPK column on table iBase, then return + ** true. Note: The (p->iTable==iBase) part of this test may be false if the + ** current SELECT is a correlated sub-query. + */ + for(i=0; inExpr; i++){ + Expr *p = sqlite3ExprSkipCollate(pDistinct->a[i].pExpr); + if( p->op==TK_COLUMN && p->iTable==iBase && p->iColumn<0 ) return 1; + } + + /* Loop through all indices on the table, checking each to see if it makes + ** the DISTINCT qualifier redundant. It does so if: + ** + ** 1. The index is itself UNIQUE, and + ** + ** 2. All of the columns in the index are either part of the pDistinct + ** list, or else the WHERE clause contains a term of the form "col=X", + ** where X is a constant value. The collation sequences of the + ** comparison and select-list expressions must match those of the index. + ** + ** 3. All of those index columns for which the WHERE clause does not + ** contain a "col=X" term are subject to a NOT NULL constraint. + */ + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( !IsUniqueIndex(pIdx) ) continue; + for(i=0; inKeyCol; i++){ + i16 iCol = pIdx->aiColumn[i]; + if( 0==sqlite3WhereFindTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){ + int iIdxCol = findIndexCol(pParse, pDistinct, iBase, pIdx, i); + if( iIdxCol<0 || pTab->aCol[iCol].notNull==0 ){ + break; + } + } } -#endif -#ifdef SQLITE_EXPLAIN_ESTIMATED_ROWS - if( pLoop->nOut>=10 ){ - sqlite3XPrintf(&str, 0, " (~%llu rows)", sqlite3LogEstToInt(pLoop->nOut)); - }else{ - sqlite3StrAccumAppend(&str, " (~1 row)", 9); + if( i==pIdx->nKeyCol ){ + /* This index implies that the DISTINCT qualifier is redundant. */ + return 1; } -#endif - zMsg = sqlite3StrAccumFinish(&str); - ret = sqlite3VdbeAddOp4(v, OP_Explain, iId, iLevel, iFrom, zMsg,P4_DYNAMIC); } - return ret; + + return 0; } -#else -# define explainOneScan(u,v,w,x,y,z) 0 -#endif /* SQLITE_OMIT_EXPLAIN */ -#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + /* -** Configure the VM passed as the first argument with an -** sqlite3_stmt_scanstatus() entry corresponding to the scan used to -** implement level pLvl. Argument pSrclist is a pointer to the FROM -** clause that the scan reads data from. +** Estimate the logarithm of the input value to base 2. +*/ +static LogEst estLog(LogEst N){ + return N<=10 ? 0 : sqlite3LogEst(N) - 33; +} + +/* +** Convert OP_Column opcodes to OP_Copy in previously generated code. ** -** If argument addrExplain is not 0, it must be the address of an -** OP_Explain instruction that describes the same loop. +** This routine runs over generated VDBE code and translates OP_Column +** opcodes into OP_Copy, and OP_Rowid into OP_Null, when the table is being +** accessed via co-routine instead of via table lookup. */ -static void addScanStatus( - Vdbe *v, /* Vdbe to add scanstatus entry to */ - SrcList *pSrclist, /* FROM clause pLvl reads data from */ - WhereLevel *pLvl, /* Level to add scanstatus() entry for */ - int addrExplain /* Address of OP_Explain (or 0) */ +static void translateColumnToCopy( + Vdbe *v, /* The VDBE containing code to translate */ + int iStart, /* Translate from this opcode to the end */ + int iTabCur, /* OP_Column/OP_Rowid references to this table */ + int iRegister /* The first column is in this register */ ){ - const char *zObj = 0; - WhereLoop *pLoop = pLvl->pWLoop; - if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 && pLoop->u.btree.pIndex!=0 ){ - zObj = pLoop->u.btree.pIndex->zName; - }else{ - zObj = pSrclist->a[pLvl->iFrom].zName; + VdbeOp *pOp = sqlite3VdbeGetOp(v, iStart); + int iEnd = sqlite3VdbeCurrentAddr(v); + for(; iStartp1!=iTabCur ) continue; + if( pOp->opcode==OP_Column ){ + pOp->opcode = OP_Copy; + pOp->p1 = pOp->p2 + iRegister; + pOp->p2 = pOp->p3; + pOp->p3 = 0; + }else if( pOp->opcode==OP_Rowid ){ + pOp->opcode = OP_Null; + pOp->p1 = 0; + pOp->p3 = 0; + } } - sqlite3VdbeScanStatus( - v, addrExplain, pLvl->addrBody, pLvl->addrVisit, pLoop->nOut, zObj - ); +} + +/* +** Two routines for printing the content of an sqlite3_index_info +** structure. Used for testing and debugging only. If neither +** SQLITE_TEST or SQLITE_DEBUG are defined, then these routines +** are no-ops. +*/ +#if !defined(SQLITE_OMIT_VIRTUALTABLE) && defined(WHERETRACE_ENABLED) +static void TRACE_IDX_INPUTS(sqlite3_index_info *p){ + int i; + if( !sqlite3WhereTrace ) return; + for(i=0; inConstraint; i++){ + sqlite3DebugPrintf(" constraint[%d]: col=%d termid=%d op=%d usabled=%d\n", + i, + p->aConstraint[i].iColumn, + p->aConstraint[i].iTermOffset, + p->aConstraint[i].op, + p->aConstraint[i].usable); + } + for(i=0; inOrderBy; i++){ + sqlite3DebugPrintf(" orderby[%d]: col=%d desc=%d\n", + i, + p->aOrderBy[i].iColumn, + p->aOrderBy[i].desc); + } +} +static void TRACE_IDX_OUTPUTS(sqlite3_index_info *p){ + int i; + if( !sqlite3WhereTrace ) return; + for(i=0; inConstraint; i++){ + sqlite3DebugPrintf(" usage[%d]: argvIdx=%d omit=%d\n", + i, + p->aConstraintUsage[i].argvIndex, + p->aConstraintUsage[i].omit); + } + sqlite3DebugPrintf(" idxNum=%d\n", p->idxNum); + sqlite3DebugPrintf(" idxStr=%s\n", p->idxStr); + sqlite3DebugPrintf(" orderByConsumed=%d\n", p->orderByConsumed); + sqlite3DebugPrintf(" estimatedCost=%g\n", p->estimatedCost); + sqlite3DebugPrintf(" estimatedRows=%lld\n", p->estimatedRows); } #else -# define addScanStatus(a, b, c, d) ((void)d) +#define TRACE_IDX_INPUTS(A) +#define TRACE_IDX_OUTPUTS(A) #endif +#ifndef SQLITE_OMIT_AUTOMATIC_INDEX /* -** If the most recently coded instruction is a constant range contraint -** that originated from the LIKE optimization, then change the P3 to be -** pLoop->iLikeRepCntr and set P5. -** -** The LIKE optimization trys to evaluate "x LIKE 'abc%'" as a range -** expression: "x>='ABC' AND x<'abd'". But this requires that the range -** scan loop run twice, once for strings and a second time for BLOBs. -** The OP_String opcodes on the second pass convert the upper and lower -** bound string contants to blobs. This routine makes the necessary changes -** to the OP_String opcodes for that to happen. +** Return TRUE if the WHERE clause term pTerm is of a form where it +** could be used with an index to access pSrc, assuming an appropriate +** index existed. */ -static void whereLikeOptimizationStringFixup( - Vdbe *v, /* prepared statement under construction */ - WhereLevel *pLevel, /* The loop that contains the LIKE operator */ - WhereTerm *pTerm /* The upper or lower bound just coded */ +static int termCanDriveIndex( + WhereTerm *pTerm, /* WHERE clause term to check */ + struct SrcList_item *pSrc, /* Table we are trying to access */ + Bitmask notReady /* Tables in outer loops of the join */ ){ - if( pTerm->wtFlags & TERM_LIKEOPT ){ - VdbeOp *pOp; - assert( pLevel->iLikeRepCntr>0 ); - pOp = sqlite3VdbeGetOp(v, -1); - assert( pOp!=0 ); - assert( pOp->opcode==OP_String8 - || pTerm->pWC->pWInfo->pParse->db->mallocFailed ); - pOp->p3 = pLevel->iLikeRepCntr; - pOp->p5 = 1; - } + char aff; + if( pTerm->leftCursor!=pSrc->iCursor ) return 0; + if( (pTerm->eOperator & (WO_EQ|WO_IS))==0 ) return 0; + if( (pTerm->prereqRight & notReady)!=0 ) return 0; + if( pTerm->u.leftColumn<0 ) return 0; + aff = pSrc->pTab->aCol[pTerm->u.leftColumn].affinity; + if( !sqlite3IndexAffinityOk(pTerm->pExpr, aff) ) return 0; + testcase( pTerm->pExpr->op==TK_IS ); + return 1; } +#endif + +#ifndef SQLITE_OMIT_AUTOMATIC_INDEX /* -** Generate code for the start of the iLevel-th loop in the WHERE clause -** implementation described by pWInfo. +** Generate code to construct the Index object for an automatic index +** and to set up the WhereLevel object pLevel so that the code generator +** makes use of the automatic index. */ -static Bitmask codeOneLoopStart( - WhereInfo *pWInfo, /* Complete information about the WHERE clause */ - int iLevel, /* Which level of pWInfo->a[] should be coded */ - Bitmask notReady /* Which tables are currently available */ +static void constructAutomaticIndex( + Parse *pParse, /* The parsing context */ + WhereClause *pWC, /* The WHERE clause */ + struct SrcList_item *pSrc, /* The FROM clause term to get the next index */ + Bitmask notReady, /* Mask of cursors that are not available */ + WhereLevel *pLevel /* Write new index here */ ){ - int j, k; /* Loop counters */ - int iCur; /* The VDBE cursor for the table */ - int addrNxt; /* Where to jump to continue with the next IN case */ - int omitTable; /* True if we use the index only */ - int bRev; /* True if we need to scan in reverse order */ - WhereLevel *pLevel; /* The where level to be coded */ - WhereLoop *pLoop; /* The WhereLoop object being coded */ - WhereClause *pWC; /* Decomposition of the entire WHERE clause */ - WhereTerm *pTerm; /* A WHERE clause term */ - Parse *pParse; /* Parsing context */ - sqlite3 *db; /* Database connection */ - Vdbe *v; /* The prepared stmt under constructions */ - struct SrcList_item *pTabItem; /* FROM clause term being coded */ - int addrBrk; /* Jump here to break out of the loop */ - int addrCont; /* Jump here to continue with next cycle */ - int iRowidReg = 0; /* Rowid is stored in this register, if not zero */ - int iReleaseReg = 0; /* Temp register to free before returning */ + int nKeyCol; /* Number of columns in the constructed index */ + WhereTerm *pTerm; /* A single term of the WHERE clause */ + WhereTerm *pWCEnd; /* End of pWC->a[] */ + Index *pIdx; /* Object describing the transient index */ + Vdbe *v; /* Prepared statement under construction */ + int addrInit; /* Address of the initialization bypass jump */ + Table *pTable; /* The table being indexed */ + int addrTop; /* Top of the index fill loop */ + int regRecord; /* Register holding an index record */ + int n; /* Column counter */ + int i; /* Loop counter */ + int mxBitCol; /* Maximum column in pSrc->colUsed */ + CollSeq *pColl; /* Collating sequence to on a column */ + WhereLoop *pLoop; /* The Loop object */ + char *zNotUsed; /* Extra space on the end of pIdx */ + Bitmask idxCols; /* Bitmap of columns used for indexing */ + Bitmask extraCols; /* Bitmap of additional columns */ + u8 sentWarning = 0; /* True if a warnning has been issued */ + Expr *pPartial = 0; /* Partial Index Expression */ + int iContinue = 0; /* Jump here to skip excluded rows */ + struct SrcList_item *pTabItem; /* FROM clause term being indexed */ - pParse = pWInfo->pParse; + /* Generate code to skip over the creation and initialization of the + ** transient index on 2nd and subsequent iterations of the loop. */ v = pParse->pVdbe; - pWC = &pWInfo->sWC; - db = pParse->db; - pLevel = &pWInfo->a[iLevel]; + assert( v!=0 ); + addrInit = sqlite3CodeOnce(pParse); VdbeCoverage(v); + + /* Count the number of columns that will be added to the index + ** and used to match WHERE clause constraints */ + nKeyCol = 0; + pTable = pSrc->pTab; + pWCEnd = &pWC->a[pWC->nTerm]; pLoop = pLevel->pWLoop; - pTabItem = &pWInfo->pTabList->a[pLevel->iFrom]; - iCur = pTabItem->iCursor; - pLevel->notReady = notReady & ~getMask(&pWInfo->sMaskSet, iCur); - bRev = (pWInfo->revMask>>iLevel)&1; - omitTable = (pLoop->wsFlags & WHERE_IDX_ONLY)!=0 - && (pWInfo->wctrlFlags & WHERE_FORCE_TABLE)==0; - VdbeModuleComment((v, "Begin WHERE-loop%d: %s",iLevel,pTabItem->pTab->zName)); + idxCols = 0; + for(pTerm=pWC->a; pTermpExpr; + assert( !ExprHasProperty(pExpr, EP_FromJoin) /* prereq always non-zero */ + || pExpr->iRightJoinTable!=pSrc->iCursor /* for the right-hand */ + || pLoop->prereq!=0 ); /* table of a LEFT JOIN */ + if( pLoop->prereq==0 + && (pTerm->wtFlags & TERM_VIRTUAL)==0 + && !ExprHasProperty(pExpr, EP_FromJoin) + && sqlite3ExprIsTableConstant(pExpr, pSrc->iCursor) ){ + pPartial = sqlite3ExprAnd(pParse->db, pPartial, + sqlite3ExprDup(pParse->db, pExpr, 0)); + } + if( termCanDriveIndex(pTerm, pSrc, notReady) ){ + int iCol = pTerm->u.leftColumn; + Bitmask cMask = iCol>=BMS ? MASKBIT(BMS-1) : MASKBIT(iCol); + testcase( iCol==BMS ); + testcase( iCol==BMS-1 ); + if( !sentWarning ){ + sqlite3_log(SQLITE_WARNING_AUTOINDEX, + "automatic index on %s(%s)", pTable->zName, + pTable->aCol[iCol].zName); + sentWarning = 1; + } + if( (idxCols & cMask)==0 ){ + if( whereLoopResize(pParse->db, pLoop, nKeyCol+1) ){ + goto end_auto_index_create; + } + pLoop->aLTerm[nKeyCol++] = pTerm; + idxCols |= cMask; + } + } + } + assert( nKeyCol>0 ); + pLoop->u.btree.nEq = pLoop->nLTerm = nKeyCol; + pLoop->wsFlags = WHERE_COLUMN_EQ | WHERE_IDX_ONLY | WHERE_INDEXED + | WHERE_AUTO_INDEX; - /* Create labels for the "break" and "continue" instructions - ** for the current loop. Jump to addrBrk to break out of a loop. - ** Jump to cont to go immediately to the next iteration of the - ** loop. - ** - ** When there is an IN operator, we also have a "addrNxt" label that - ** means to continue with the next IN value combination. When - ** there are no IN operators in the constraints, the "addrNxt" label - ** is the same as "addrBrk". + /* Count the number of additional columns needed to create a + ** covering index. A "covering index" is an index that contains all + ** columns that are needed by the query. With a covering index, the + ** original table never needs to be accessed. Automatic indices must + ** be a covering index because the index will not be updated if the + ** original table changes and the index and table cannot both be used + ** if they go out of sync. */ - addrBrk = pLevel->addrBrk = pLevel->addrNxt = sqlite3VdbeMakeLabel(v); - addrCont = pLevel->addrCont = sqlite3VdbeMakeLabel(v); + extraCols = pSrc->colUsed & (~idxCols | MASKBIT(BMS-1)); + mxBitCol = MIN(BMS-1,pTable->nCol); + testcase( pTable->nCol==BMS-1 ); + testcase( pTable->nCol==BMS-2 ); + for(i=0; icolUsed & MASKBIT(BMS-1) ){ + nKeyCol += pTable->nCol - BMS + 1; + } - /* If this is the right table of a LEFT OUTER JOIN, allocate and - ** initialize a memory cell that records if this table matches any - ** row of the left table of the join. - */ - if( pLevel->iFrom>0 && (pTabItem[0].jointype & JT_LEFT)!=0 ){ - pLevel->iLeftJoin = ++pParse->nMem; - sqlite3VdbeAddOp2(v, OP_Integer, 0, pLevel->iLeftJoin); - VdbeComment((v, "init LEFT JOIN no-match flag")); + /* Construct the Index object to describe this index */ + pIdx = sqlite3AllocateIndexObject(pParse->db, nKeyCol+1, 0, &zNotUsed); + if( pIdx==0 ) goto end_auto_index_create; + pLoop->u.btree.pIndex = pIdx; + pIdx->zName = "auto-index"; + pIdx->pTable = pTable; + n = 0; + idxCols = 0; + for(pTerm=pWC->a; pTermu.leftColumn; + Bitmask cMask = iCol>=BMS ? MASKBIT(BMS-1) : MASKBIT(iCol); + testcase( iCol==BMS-1 ); + testcase( iCol==BMS ); + if( (idxCols & cMask)==0 ){ + Expr *pX = pTerm->pExpr; + idxCols |= cMask; + pIdx->aiColumn[n] = pTerm->u.leftColumn; + pColl = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight); + pIdx->azColl[n] = pColl ? pColl->zName : "BINARY"; + n++; + } + } } + assert( (u32)n==pLoop->u.btree.nEq ); - /* Special case of a FROM clause subquery implemented as a co-routine */ + /* Add additional columns needed to make the automatic index into + ** a covering index */ + for(i=0; iaiColumn[n] = i; + pIdx->azColl[n] = "BINARY"; + n++; + } + } + if( pSrc->colUsed & MASKBIT(BMS-1) ){ + for(i=BMS-1; inCol; i++){ + pIdx->aiColumn[n] = i; + pIdx->azColl[n] = "BINARY"; + n++; + } + } + assert( n==nKeyCol ); + pIdx->aiColumn[n] = -1; + pIdx->azColl[n] = "BINARY"; + + /* Create the automatic index */ + assert( pLevel->iIdxCur>=0 ); + pLevel->iIdxCur = pParse->nTab++; + sqlite3VdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol+1); + sqlite3VdbeSetP4KeyInfo(pParse, pIdx); + VdbeComment((v, "for %s", pTable->zName)); + + /* Fill the automatic index with content */ + sqlite3ExprCachePush(pParse); + pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom]; if( pTabItem->viaCoroutine ){ int regYield = pTabItem->regReturn; sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, pTabItem->addrFillSub); - pLevel->p2 = sqlite3VdbeAddOp2(v, OP_Yield, regYield, addrBrk); + addrTop = sqlite3VdbeAddOp1(v, OP_Yield, regYield); VdbeCoverage(v); VdbeComment((v, "next row of \"%s\"", pTabItem->pTab->zName)); - pLevel->op = OP_Goto; - }else + }else{ + addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, pLevel->iTabCur); VdbeCoverage(v); + } + if( pPartial ){ + iContinue = sqlite3VdbeMakeLabel(v); + sqlite3ExprIfFalse(pParse, pPartial, iContinue, SQLITE_JUMPIFNULL); + pLoop->wsFlags |= WHERE_PARTIALIDX; + } + regRecord = sqlite3GetTempReg(pParse); + sqlite3GenerateIndexKey(pParse, pIdx, pLevel->iTabCur, regRecord, 0, 0, 0, 0); + sqlite3VdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord); + sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); + if( pPartial ) sqlite3VdbeResolveLabel(v, iContinue); + if( pTabItem->viaCoroutine ){ + translateColumnToCopy(v, addrTop, pLevel->iTabCur, pTabItem->regResult); + sqlite3VdbeAddOp2(v, OP_Goto, 0, addrTop); + pTabItem->viaCoroutine = 0; + }else{ + sqlite3VdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop+1); VdbeCoverage(v); + } + sqlite3VdbeChangeP5(v, SQLITE_STMTSTATUS_AUTOINDEX); + sqlite3VdbeJumpHere(v, addrTop); + sqlite3ReleaseTempReg(pParse, regRecord); + sqlite3ExprCachePop(pParse); + + /* Jump here when skipping the initialization */ + sqlite3VdbeJumpHere(v, addrInit); + +end_auto_index_create: + sqlite3ExprDelete(pParse->db, pPartial); +} +#endif /* SQLITE_OMIT_AUTOMATIC_INDEX */ #ifndef SQLITE_OMIT_VIRTUALTABLE - if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)!=0 ){ - /* Case 1: The table is a virtual-table. Use the VFilter and VNext - ** to access the data. - */ - int iReg; /* P3 Value for OP_VFilter */ - int addrNotFound; - int nConstraint = pLoop->nLTerm; +/* +** Allocate and populate an sqlite3_index_info structure. It is the +** responsibility of the caller to eventually release the structure +** by passing the pointer returned by this function to sqlite3_free(). +*/ +static sqlite3_index_info *allocateIndexInfo( + Parse *pParse, + WhereClause *pWC, + Bitmask mUnusable, /* Ignore terms with these prereqs */ + struct SrcList_item *pSrc, + ExprList *pOrderBy +){ + int i, j; + int nTerm; + struct sqlite3_index_constraint *pIdxCons; + struct sqlite3_index_orderby *pIdxOrderBy; + struct sqlite3_index_constraint_usage *pUsage; + WhereTerm *pTerm; + int nOrderBy; + sqlite3_index_info *pIdxInfo; - sqlite3ExprCachePush(pParse); - iReg = sqlite3GetTempRange(pParse, nConstraint+2); - addrNotFound = pLevel->addrBrk; - for(j=0; jaLTerm[j]; - if( pTerm==0 ) continue; - if( pTerm->eOperator & WO_IN ){ - codeEqualityTerm(pParse, pTerm, pLevel, j, bRev, iTarget); - addrNotFound = pLevel->addrNxt; - }else{ - sqlite3ExprCode(pParse, pTerm->pExpr->pRight, iTarget); - } + /* Count the number of possible WHERE clause constraints referring + ** to this virtual table */ + for(i=nTerm=0, pTerm=pWC->a; inTerm; i++, pTerm++){ + if( pTerm->leftCursor != pSrc->iCursor ) continue; + if( pTerm->prereqRight & mUnusable ) continue; + assert( IsPowerOfTwo(pTerm->eOperator & ~WO_EQUIV) ); + testcase( pTerm->eOperator & WO_IN ); + testcase( pTerm->eOperator & WO_ISNULL ); + testcase( pTerm->eOperator & WO_IS ); + testcase( pTerm->eOperator & WO_ALL ); + if( (pTerm->eOperator & ~(WO_ISNULL|WO_EQUIV|WO_IS))==0 ) continue; + if( pTerm->wtFlags & TERM_VNULL ) continue; + nTerm++; + } + + /* If the ORDER BY clause contains only columns in the current + ** virtual table then allocate space for the aOrderBy part of + ** the sqlite3_index_info structure. + */ + nOrderBy = 0; + if( pOrderBy ){ + int n = pOrderBy->nExpr; + for(i=0; ia[i].pExpr; + if( pExpr->op!=TK_COLUMN || pExpr->iTable!=pSrc->iCursor ) break; } - sqlite3VdbeAddOp2(v, OP_Integer, pLoop->u.vtab.idxNum, iReg); - sqlite3VdbeAddOp2(v, OP_Integer, nConstraint, iReg+1); - sqlite3VdbeAddOp4(v, OP_VFilter, iCur, addrNotFound, iReg, - pLoop->u.vtab.idxStr, - pLoop->u.vtab.needFree ? P4_MPRINTF : P4_STATIC); - VdbeCoverage(v); - pLoop->u.vtab.needFree = 0; - for(j=0; ju.vtab.omitMask>>j)&1 ){ - disableTerm(pLevel, pLoop->aLTerm[j]); - } + if( i==n){ + nOrderBy = n; } - pLevel->op = OP_VNext; - pLevel->p1 = iCur; - pLevel->p2 = sqlite3VdbeCurrentAddr(v); - sqlite3ReleaseTempRange(pParse, iReg, nConstraint+2); - sqlite3ExprCachePop(pParse); - }else -#endif /* SQLITE_OMIT_VIRTUALTABLE */ + } - if( (pLoop->wsFlags & WHERE_IPK)!=0 - && (pLoop->wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_EQ))!=0 - ){ - /* Case 2: We can directly reference a single row using an - ** equality comparison against the ROWID field. Or - ** we reference multiple rows using a "rowid IN (...)" - ** construct. - */ - assert( pLoop->u.btree.nEq==1 ); - pTerm = pLoop->aLTerm[0]; - assert( pTerm!=0 ); - assert( pTerm->pExpr!=0 ); - assert( omitTable==0 ); - testcase( pTerm->wtFlags & TERM_VIRTUAL ); - iReleaseReg = ++pParse->nMem; - iRowidReg = codeEqualityTerm(pParse, pTerm, pLevel, 0, bRev, iReleaseReg); - if( iRowidReg!=iReleaseReg ) sqlite3ReleaseTempReg(pParse, iReleaseReg); - addrNxt = pLevel->addrNxt; - sqlite3VdbeAddOp2(v, OP_MustBeInt, iRowidReg, addrNxt); VdbeCoverage(v); - sqlite3VdbeAddOp3(v, OP_NotExists, iCur, addrNxt, iRowidReg); - VdbeCoverage(v); - sqlite3ExprCacheAffinityChange(pParse, iRowidReg, 1); - sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg); - VdbeComment((v, "pk")); - pLevel->op = OP_Noop; - }else if( (pLoop->wsFlags & WHERE_IPK)!=0 - && (pLoop->wsFlags & WHERE_COLUMN_RANGE)!=0 - ){ - /* Case 3: We have an inequality comparison against the ROWID field. - */ - int testOp = OP_Noop; - int start; - int memEndValue = 0; - WhereTerm *pStart, *pEnd; + /* Allocate the sqlite3_index_info structure + */ + pIdxInfo = sqlite3DbMallocZero(pParse->db, sizeof(*pIdxInfo) + + (sizeof(*pIdxCons) + sizeof(*pUsage))*nTerm + + sizeof(*pIdxOrderBy)*nOrderBy ); + if( pIdxInfo==0 ){ + sqlite3ErrorMsg(pParse, "out of memory"); + return 0; + } - assert( omitTable==0 ); - j = 0; - pStart = pEnd = 0; - if( pLoop->wsFlags & WHERE_BTM_LIMIT ) pStart = pLoop->aLTerm[j++]; - if( pLoop->wsFlags & WHERE_TOP_LIMIT ) pEnd = pLoop->aLTerm[j++]; - assert( pStart!=0 || pEnd!=0 ); - if( bRev ){ - pTerm = pStart; - pStart = pEnd; - pEnd = pTerm; - } - if( pStart ){ - Expr *pX; /* The expression that defines the start bound */ - int r1, rTemp; /* Registers for holding the start boundary */ + /* Initialize the structure. The sqlite3_index_info structure contains + ** many fields that are declared "const" to prevent xBestIndex from + ** changing them. We have to do some funky casting in order to + ** initialize those fields. + */ + pIdxCons = (struct sqlite3_index_constraint*)&pIdxInfo[1]; + pIdxOrderBy = (struct sqlite3_index_orderby*)&pIdxCons[nTerm]; + pUsage = (struct sqlite3_index_constraint_usage*)&pIdxOrderBy[nOrderBy]; + *(int*)&pIdxInfo->nConstraint = nTerm; + *(int*)&pIdxInfo->nOrderBy = nOrderBy; + *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint = pIdxCons; + *(struct sqlite3_index_orderby**)&pIdxInfo->aOrderBy = pIdxOrderBy; + *(struct sqlite3_index_constraint_usage**)&pIdxInfo->aConstraintUsage = + pUsage; - /* The following constant maps TK_xx codes into corresponding - ** seek opcodes. It depends on a particular ordering of TK_xx - */ - const u8 aMoveOp[] = { - /* TK_GT */ OP_SeekGT, - /* TK_LE */ OP_SeekLE, - /* TK_LT */ OP_SeekLT, - /* TK_GE */ OP_SeekGE - }; - assert( TK_LE==TK_GT+1 ); /* Make sure the ordering.. */ - assert( TK_LT==TK_GT+2 ); /* ... of the TK_xx values... */ - assert( TK_GE==TK_GT+3 ); /* ... is correcct. */ + for(i=j=0, pTerm=pWC->a; inTerm; i++, pTerm++){ + u8 op; + if( pTerm->leftCursor != pSrc->iCursor ) continue; + if( pTerm->prereqRight & mUnusable ) continue; + assert( IsPowerOfTwo(pTerm->eOperator & ~WO_EQUIV) ); + testcase( pTerm->eOperator & WO_IN ); + testcase( pTerm->eOperator & WO_IS ); + testcase( pTerm->eOperator & WO_ISNULL ); + testcase( pTerm->eOperator & WO_ALL ); + if( (pTerm->eOperator & ~(WO_ISNULL|WO_EQUIV|WO_IS))==0 ) continue; + if( pTerm->wtFlags & TERM_VNULL ) continue; + pIdxCons[j].iColumn = pTerm->u.leftColumn; + pIdxCons[j].iTermOffset = i; + op = (u8)pTerm->eOperator & WO_ALL; + if( op==WO_IN ) op = WO_EQ; + pIdxCons[j].op = op; + /* The direct assignment in the previous line is possible only because + ** the WO_ and SQLITE_INDEX_CONSTRAINT_ codes are identical. The + ** following asserts verify this fact. */ + assert( WO_EQ==SQLITE_INDEX_CONSTRAINT_EQ ); + assert( WO_LT==SQLITE_INDEX_CONSTRAINT_LT ); + assert( WO_LE==SQLITE_INDEX_CONSTRAINT_LE ); + assert( WO_GT==SQLITE_INDEX_CONSTRAINT_GT ); + assert( WO_GE==SQLITE_INDEX_CONSTRAINT_GE ); + assert( WO_MATCH==SQLITE_INDEX_CONSTRAINT_MATCH ); + assert( pTerm->eOperator & (WO_IN|WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE|WO_MATCH) ); + j++; + } + for(i=0; ia[i].pExpr; + pIdxOrderBy[i].iColumn = pExpr->iColumn; + pIdxOrderBy[i].desc = pOrderBy->a[i].sortOrder; + } - assert( (pStart->wtFlags & TERM_VNULL)==0 ); - testcase( pStart->wtFlags & TERM_VIRTUAL ); - pX = pStart->pExpr; - assert( pX!=0 ); - testcase( pStart->leftCursor!=iCur ); /* transitive constraints */ - r1 = sqlite3ExprCodeTemp(pParse, pX->pRight, &rTemp); - sqlite3VdbeAddOp3(v, aMoveOp[pX->op-TK_GT], iCur, addrBrk, r1); - VdbeComment((v, "pk")); - VdbeCoverageIf(v, pX->op==TK_GT); - VdbeCoverageIf(v, pX->op==TK_LE); - VdbeCoverageIf(v, pX->op==TK_LT); - VdbeCoverageIf(v, pX->op==TK_GE); - sqlite3ExprCacheAffinityChange(pParse, r1, 1); - sqlite3ReleaseTempReg(pParse, rTemp); - disableTerm(pLevel, pStart); + return pIdxInfo; +} + +/* +** The table object reference passed as the second argument to this function +** must represent a virtual table. This function invokes the xBestIndex() +** method of the virtual table with the sqlite3_index_info object that +** comes in as the 3rd argument to this function. +** +** If an error occurs, pParse is populated with an error message and a +** non-zero value is returned. Otherwise, 0 is returned and the output +** part of the sqlite3_index_info structure is left populated. +** +** Whether or not an error is returned, it is the responsibility of the +** caller to eventually free p->idxStr if p->needToFreeIdxStr indicates +** that this is required. +*/ +static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){ + sqlite3_vtab *pVtab = sqlite3GetVTable(pParse->db, pTab)->pVtab; + int i; + int rc; + + TRACE_IDX_INPUTS(p); + rc = pVtab->pModule->xBestIndex(pVtab, p); + TRACE_IDX_OUTPUTS(p); + + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_NOMEM ){ + pParse->db->mallocFailed = 1; + }else if( !pVtab->zErrMsg ){ + sqlite3ErrorMsg(pParse, "%s", sqlite3ErrStr(rc)); }else{ - sqlite3VdbeAddOp2(v, bRev ? OP_Last : OP_Rewind, iCur, addrBrk); - VdbeCoverageIf(v, bRev==0); - VdbeCoverageIf(v, bRev!=0); + sqlite3ErrorMsg(pParse, "%s", pVtab->zErrMsg); } - if( pEnd ){ - Expr *pX; - pX = pEnd->pExpr; - assert( pX!=0 ); - assert( (pEnd->wtFlags & TERM_VNULL)==0 ); - testcase( pEnd->leftCursor!=iCur ); /* Transitive constraints */ - testcase( pEnd->wtFlags & TERM_VIRTUAL ); - memEndValue = ++pParse->nMem; - sqlite3ExprCode(pParse, pX->pRight, memEndValue); - if( pX->op==TK_LT || pX->op==TK_GT ){ - testOp = bRev ? OP_Le : OP_Ge; - }else{ - testOp = bRev ? OP_Lt : OP_Gt; - } - disableTerm(pLevel, pEnd); - } - start = sqlite3VdbeCurrentAddr(v); - pLevel->op = bRev ? OP_Prev : OP_Next; - pLevel->p1 = iCur; - pLevel->p2 = start; - assert( pLevel->p5==0 ); - if( testOp!=OP_Noop ){ - iRowidReg = ++pParse->nMem; - sqlite3VdbeAddOp2(v, OP_Rowid, iCur, iRowidReg); - sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg); - sqlite3VdbeAddOp3(v, testOp, memEndValue, addrBrk, iRowidReg); - VdbeCoverageIf(v, testOp==OP_Le); - VdbeCoverageIf(v, testOp==OP_Lt); - VdbeCoverageIf(v, testOp==OP_Ge); - VdbeCoverageIf(v, testOp==OP_Gt); - sqlite3VdbeChangeP5(v, SQLITE_AFF_NUMERIC | SQLITE_JUMPIFNULL); - } - }else if( pLoop->wsFlags & WHERE_INDEXED ){ - /* Case 4: A scan using an index. - ** - ** The WHERE clause may contain zero or more equality - ** terms ("==" or "IN" operators) that refer to the N - ** left-most columns of the index. It may also contain - ** inequality constraints (>, <, >= or <=) on the indexed - ** column that immediately follows the N equalities. Only - ** the right-most column can be an inequality - the rest must - ** use the "==" and "IN" operators. For example, if the - ** index is on (x,y,z), then the following clauses are all - ** optimized: - ** - ** x=5 - ** x=5 AND y=10 - ** x=5 AND y<10 - ** x=5 AND y>5 AND y<10 - ** x=5 AND y=5 AND z<=10 - ** - ** The z<10 term of the following cannot be used, only - ** the x=5 term: - ** - ** x=5 AND z<10 - ** - ** N may be zero if there are inequality constraints. - ** If there are no inequality constraints, then N is at - ** least one. - ** - ** This case is also used when there are no WHERE clause - ** constraints but an index is selected anyway, in order - ** to force the output order to conform to an ORDER BY. - */ - static const u8 aStartOp[] = { - 0, - 0, - OP_Rewind, /* 2: (!start_constraints && startEq && !bRev) */ - OP_Last, /* 3: (!start_constraints && startEq && bRev) */ - OP_SeekGT, /* 4: (start_constraints && !startEq && !bRev) */ - OP_SeekLT, /* 5: (start_constraints && !startEq && bRev) */ - OP_SeekGE, /* 6: (start_constraints && startEq && !bRev) */ - OP_SeekLE /* 7: (start_constraints && startEq && bRev) */ - }; - static const u8 aEndOp[] = { - OP_IdxGE, /* 0: (end_constraints && !bRev && !endEq) */ - OP_IdxGT, /* 1: (end_constraints && !bRev && endEq) */ - OP_IdxLE, /* 2: (end_constraints && bRev && !endEq) */ - OP_IdxLT, /* 3: (end_constraints && bRev && endEq) */ - }; - u16 nEq = pLoop->u.btree.nEq; /* Number of == or IN terms */ - int regBase; /* Base register holding constraint values */ - WhereTerm *pRangeStart = 0; /* Inequality constraint at range start */ - WhereTerm *pRangeEnd = 0; /* Inequality constraint at range end */ - int startEq; /* True if range start uses ==, >= or <= */ - int endEq; /* True if range end uses ==, >= or <= */ - int start_constraints; /* Start of range is constrained */ - int nConstraint; /* Number of constraint terms */ - Index *pIdx; /* The index we will be using */ - int iIdxCur; /* The VDBE cursor for the index */ - int nExtraReg = 0; /* Number of extra registers needed */ - int op; /* Instruction opcode */ - char *zStartAff; /* Affinity for start of range constraint */ - char cEndAff = 0; /* Affinity for end of range constraint */ - u8 bSeekPastNull = 0; /* True to seek past initial nulls */ - u8 bStopAtNull = 0; /* Add condition to terminate at NULLs */ - - pIdx = pLoop->u.btree.pIndex; - iIdxCur = pLevel->iIdxCur; - assert( nEq>=pLoop->nSkip ); - - /* If this loop satisfies a sort order (pOrderBy) request that - ** was passed to this function to implement a "SELECT min(x) ..." - ** query, then the caller will only allow the loop to run for - ** a single iteration. This means that the first row returned - ** should not have a NULL value stored in 'x'. If column 'x' is - ** the first one after the nEq equality constraints in the index, - ** this requires some special handling. - */ - assert( pWInfo->pOrderBy==0 - || pWInfo->pOrderBy->nExpr==1 - || (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)==0 ); - if( (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)!=0 - && pWInfo->nOBSat>0 - && (pIdx->nKeyCol>nEq) - ){ - assert( pLoop->nSkip==0 ); - bSeekPastNull = 1; - nExtraReg = 1; + } + sqlite3_free(pVtab->zErrMsg); + pVtab->zErrMsg = 0; + + for(i=0; inConstraint; i++){ + if( !p->aConstraint[i].usable && p->aConstraintUsage[i].argvIndex>0 ){ + sqlite3ErrorMsg(pParse, + "table %s: xBestIndex returned an invalid plan", pTab->zName); } + } - /* Find any inequality constraint terms for the start and end - ** of the range. - */ - j = nEq; - if( pLoop->wsFlags & WHERE_BTM_LIMIT ){ - pRangeStart = pLoop->aLTerm[j++]; - nExtraReg = 1; - /* Like optimization range constraints always occur in pairs */ - assert( (pRangeStart->wtFlags & TERM_LIKEOPT)==0 || - (pLoop->wsFlags & WHERE_TOP_LIMIT)!=0 ); - } - if( pLoop->wsFlags & WHERE_TOP_LIMIT ){ - pRangeEnd = pLoop->aLTerm[j++]; - nExtraReg = 1; - if( (pRangeEnd->wtFlags & TERM_LIKEOPT)!=0 ){ - assert( pRangeStart!=0 ); /* LIKE opt constraints */ - assert( pRangeStart->wtFlags & TERM_LIKEOPT ); /* occur in pairs */ - pLevel->iLikeRepCntr = ++pParse->nMem; - testcase( bRev ); - testcase( pIdx->aSortOrder[nEq]==SQLITE_SO_DESC ); - sqlite3VdbeAddOp2(v, OP_Integer, - bRev ^ (pIdx->aSortOrder[nEq]==SQLITE_SO_DESC), - pLevel->iLikeRepCntr); - VdbeComment((v, "LIKE loop counter")); - pLevel->addrLikeRep = sqlite3VdbeCurrentAddr(v); - } - if( pRangeStart==0 - && (j = pIdx->aiColumn[nEq])>=0 - && pIdx->pTable->aCol[j].notNull==0 - ){ - bSeekPastNull = 1; - } - } - assert( pRangeEnd==0 || (pRangeEnd->wtFlags & TERM_VNULL)==0 ); + return pParse->nErr; +} +#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */ - /* Generate code to evaluate all constraint terms using == or IN - ** and store the values of those terms in an array of registers - ** starting at regBase. - */ - regBase = codeAllEqualityTerms(pParse,pLevel,bRev,nExtraReg,&zStartAff); - assert( zStartAff==0 || sqlite3Strlen30(zStartAff)>=nEq ); - if( zStartAff ) cEndAff = zStartAff[nEq]; - addrNxt = pLevel->addrNxt; +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 +/* +** Estimate the location of a particular key among all keys in an +** index. Store the results in aStat as follows: +** +** aStat[0] Est. number of rows less than pRec +** aStat[1] Est. number of rows equal to pRec +** +** Return the index of the sample that is the smallest sample that +** is greater than or equal to pRec. Note that this index is not an index +** into the aSample[] array - it is an index into a virtual set of samples +** based on the contents of aSample[] and the number of fields in record +** pRec. +*/ +static int whereKeyStats( + Parse *pParse, /* Database connection */ + Index *pIdx, /* Index to consider domain of */ + UnpackedRecord *pRec, /* Vector of values to consider */ + int roundUp, /* Round up if true. Round down if false */ + tRowcnt *aStat /* OUT: stats written here */ +){ + IndexSample *aSample = pIdx->aSample; + int iCol; /* Index of required stats in anEq[] etc. */ + int i; /* Index of first sample >= pRec */ + int iSample; /* Smallest sample larger than or equal to pRec */ + int iMin = 0; /* Smallest sample not yet tested */ + int iTest; /* Next sample to test */ + int res; /* Result of comparison operation */ + int nField; /* Number of fields in pRec */ + tRowcnt iLower = 0; /* anLt[] + anEq[] of largest sample pRec is > */ - /* If we are doing a reverse order scan on an ascending index, or - ** a forward order scan on a descending index, interchange the - ** start and end terms (pRangeStart and pRangeEnd). - */ - if( (nEqnKeyCol && bRev==(pIdx->aSortOrder[nEq]==SQLITE_SO_ASC)) - || (bRev && pIdx->nKeyCol==nEq) - ){ - SWAP(WhereTerm *, pRangeEnd, pRangeStart); - SWAP(u8, bSeekPastNull, bStopAtNull); - } +#ifndef SQLITE_DEBUG + UNUSED_PARAMETER( pParse ); +#endif + assert( pRec!=0 ); + assert( pIdx->nSample>0 ); + assert( pRec->nField>0 && pRec->nField<=pIdx->nSampleCol ); - testcase( pRangeStart && (pRangeStart->eOperator & WO_LE)!=0 ); - testcase( pRangeStart && (pRangeStart->eOperator & WO_GE)!=0 ); - testcase( pRangeEnd && (pRangeEnd->eOperator & WO_LE)!=0 ); - testcase( pRangeEnd && (pRangeEnd->eOperator & WO_GE)!=0 ); - startEq = !pRangeStart || pRangeStart->eOperator & (WO_LE|WO_GE); - endEq = !pRangeEnd || pRangeEnd->eOperator & (WO_LE|WO_GE); - start_constraints = pRangeStart || nEq>0; + /* Do a binary search to find the first sample greater than or equal + ** to pRec. If pRec contains a single field, the set of samples to search + ** is simply the aSample[] array. If the samples in aSample[] contain more + ** than one fields, all fields following the first are ignored. + ** + ** If pRec contains N fields, where N is more than one, then as well as the + ** samples in aSample[] (truncated to N fields), the search also has to + ** consider prefixes of those samples. For example, if the set of samples + ** in aSample is: + ** + ** aSample[0] = (a, 5) + ** aSample[1] = (a, 10) + ** aSample[2] = (b, 5) + ** aSample[3] = (c, 100) + ** aSample[4] = (c, 105) + ** + ** Then the search space should ideally be the samples above and the + ** unique prefixes [a], [b] and [c]. But since that is hard to organize, + ** the code actually searches this set: + ** + ** 0: (a) + ** 1: (a, 5) + ** 2: (a, 10) + ** 3: (a, 10) + ** 4: (b) + ** 5: (b, 5) + ** 6: (c) + ** 7: (c, 100) + ** 8: (c, 105) + ** 9: (c, 105) + ** + ** For each sample in the aSample[] array, N samples are present in the + ** effective sample array. In the above, samples 0 and 1 are based on + ** sample aSample[0]. Samples 2 and 3 on aSample[1] etc. + ** + ** Often, sample i of each block of N effective samples has (i+1) fields. + ** Except, each sample may be extended to ensure that it is greater than or + ** equal to the previous sample in the array. For example, in the above, + ** sample 2 is the first sample of a block of N samples, so at first it + ** appears that it should be 1 field in size. However, that would make it + ** smaller than sample 1, so the binary search would not work. As a result, + ** it is extended to two fields. The duplicates that this creates do not + ** cause any problems. + */ + nField = pRec->nField; + iCol = 0; + iSample = pIdx->nSample * nField; + do{ + int iSamp; /* Index in aSample[] of test sample */ + int n; /* Number of fields in test sample */ - /* Seek the index cursor to the start of the range. */ - nConstraint = nEq; - if( pRangeStart ){ - Expr *pRight = pRangeStart->pExpr->pRight; - sqlite3ExprCode(pParse, pRight, regBase+nEq); - whereLikeOptimizationStringFixup(v, pLevel, pRangeStart); - if( (pRangeStart->wtFlags & TERM_VNULL)==0 - && sqlite3ExprCanBeNull(pRight) - ){ - sqlite3VdbeAddOp2(v, OP_IsNull, regBase+nEq, addrNxt); - VdbeCoverage(v); + iTest = (iMin+iSample)/2; + iSamp = iTest / nField; + if( iSamp>0 ){ + /* The proposed effective sample is a prefix of sample aSample[iSamp]. + ** Specifically, the shortest prefix of at least (1 + iTest%nField) + ** fields that is greater than the previous effective sample. */ + for(n=(iTest % nField) + 1; nwtFlags & TERM_VIRTUAL ); - }else if( bSeekPastNull ){ - sqlite3VdbeAddOp2(v, OP_Null, 0, regBase+nEq); - nConstraint++; - startEq = 0; - start_constraints = 1; + }else{ + n = iTest + 1; } - codeApplyAffinity(pParse, regBase, nConstraint - bSeekPastNull, zStartAff); - op = aStartOp[(start_constraints<<2) + (startEq<<1) + bRev]; - assert( op!=0 ); - sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint); - VdbeCoverage(v); - VdbeCoverageIf(v, op==OP_Rewind); testcase( op==OP_Rewind ); - VdbeCoverageIf(v, op==OP_Last); testcase( op==OP_Last ); - VdbeCoverageIf(v, op==OP_SeekGT); testcase( op==OP_SeekGT ); - VdbeCoverageIf(v, op==OP_SeekGE); testcase( op==OP_SeekGE ); - VdbeCoverageIf(v, op==OP_SeekLE); testcase( op==OP_SeekLE ); - VdbeCoverageIf(v, op==OP_SeekLT); testcase( op==OP_SeekLT ); - /* Load the value for the inequality constraint at the end of the - ** range (if any). - */ - nConstraint = nEq; - if( pRangeEnd ){ - Expr *pRight = pRangeEnd->pExpr->pRight; - sqlite3ExprCacheRemove(pParse, regBase+nEq, 1); - sqlite3ExprCode(pParse, pRight, regBase+nEq); - whereLikeOptimizationStringFixup(v, pLevel, pRangeEnd); - if( (pRangeEnd->wtFlags & TERM_VNULL)==0 - && sqlite3ExprCanBeNull(pRight) - ){ - sqlite3VdbeAddOp2(v, OP_IsNull, regBase+nEq, addrNxt); - VdbeCoverage(v); - } - if( sqlite3CompareAffinity(pRight, cEndAff)!=SQLITE_AFF_NONE - && !sqlite3ExprNeedsNoAffinityChange(pRight, cEndAff) - ){ - codeApplyAffinity(pParse, regBase+nEq, 1, &cEndAff); - } - nConstraint++; - testcase( pRangeEnd->wtFlags & TERM_VIRTUAL ); - }else if( bStopAtNull ){ - sqlite3VdbeAddOp2(v, OP_Null, 0, regBase+nEq); - endEq = 0; - nConstraint++; + pRec->nField = n; + res = sqlite3VdbeRecordCompare(aSample[iSamp].n, aSample[iSamp].p, pRec); + if( res<0 ){ + iLower = aSample[iSamp].anLt[n-1] + aSample[iSamp].anEq[n-1]; + iMin = iTest+1; + }else if( res==0 && np2 = sqlite3VdbeCurrentAddr(v); +#ifdef SQLITE_DEBUG + /* The following assert statements check that the binary search code + ** above found the right answer. This block serves no purpose other + ** than to invoke the asserts. */ + if( pParse->db->mallocFailed==0 ){ + if( res==0 ){ + /* If (res==0) is true, then pRec must be equal to sample i. */ + assert( inSample ); + assert( iCol==nField-1 ); + pRec->nField = nField; + assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec) + || pParse->db->mallocFailed + ); + }else{ + /* Unless i==pIdx->nSample, indicating that pRec is larger than + ** all samples in the aSample[] array, pRec must be smaller than the + ** (iCol+1) field prefix of sample i. */ + assert( i<=pIdx->nSample && i>=0 ); + pRec->nField = iCol+1; + assert( i==pIdx->nSample + || sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0 + || pParse->db->mallocFailed ); - /* Check if the index cursor is past the end of the range. */ - if( nConstraint ){ - op = aEndOp[bRev*2 + endEq]; - sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint); - testcase( op==OP_IdxGT ); VdbeCoverageIf(v, op==OP_IdxGT ); - testcase( op==OP_IdxGE ); VdbeCoverageIf(v, op==OP_IdxGE ); - testcase( op==OP_IdxLT ); VdbeCoverageIf(v, op==OP_IdxLT ); - testcase( op==OP_IdxLE ); VdbeCoverageIf(v, op==OP_IdxLE ); + /* if i==0 and iCol==0, then record pRec is smaller than all samples + ** in the aSample[] array. Otherwise, if (iCol>0) then pRec must + ** be greater than or equal to the (iCol) field prefix of sample i. + ** If (i>0), then pRec must also be greater than sample (i-1). */ + if( iCol>0 ){ + pRec->nField = iCol; + assert( sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)<=0 + || pParse->db->mallocFailed ); + } + if( i>0 ){ + pRec->nField = nField; + assert( sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0 + || pParse->db->mallocFailed ); + } } + } +#endif /* ifdef SQLITE_DEBUG */ - /* Seek the table cursor, if required */ - disableTerm(pLevel, pRangeStart); - disableTerm(pLevel, pRangeEnd); - if( omitTable ){ - /* pIdx is a covering index. No need to access the main table. */ - }else if( HasRowid(pIdx->pTable) ){ - iRowidReg = ++pParse->nMem; - sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur, iRowidReg); - sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg); - sqlite3VdbeAddOp2(v, OP_Seek, iCur, iRowidReg); /* Deferred seek */ - }else if( iCur!=iIdxCur ){ - Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable); - iRowidReg = sqlite3GetTempRange(pParse, pPk->nKeyCol); - for(j=0; jnKeyCol; j++){ - k = sqlite3ColumnOfIndex(pIdx, pPk->aiColumn[j]); - sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, k, iRowidReg+j); - } - sqlite3VdbeAddOp4Int(v, OP_NotFound, iCur, addrCont, - iRowidReg, pPk->nKeyCol); VdbeCoverage(v); + if( res==0 ){ + /* Record pRec is equal to sample i */ + assert( iCol==nField-1 ); + aStat[0] = aSample[i].anLt[iCol]; + aStat[1] = aSample[i].anEq[iCol]; + }else{ + /* At this point, the (iCol+1) field prefix of aSample[i] is the first + ** sample that is greater than pRec. Or, if i==pIdx->nSample then pRec + ** is larger than all samples in the array. */ + tRowcnt iUpper, iGap; + if( i>=pIdx->nSample ){ + iUpper = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]); + }else{ + iUpper = aSample[i].anLt[iCol]; } - /* Record the instruction used to terminate the loop. Disable - ** WHERE clause terms made redundant by the index range scan. - */ - if( pLoop->wsFlags & WHERE_ONEROW ){ - pLevel->op = OP_Noop; - }else if( bRev ){ - pLevel->op = OP_Prev; + if( iLower>=iUpper ){ + iGap = 0; }else{ - pLevel->op = OP_Next; + iGap = iUpper - iLower; } - pLevel->p1 = iIdxCur; - pLevel->p3 = (pLoop->wsFlags&WHERE_UNQ_WANTED)!=0 ? 1:0; - if( (pLoop->wsFlags & WHERE_CONSTRAINT)==0 ){ - pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP; + if( roundUp ){ + iGap = (iGap*2)/3; }else{ - assert( pLevel->p5==0 ); + iGap = iGap/3; } - }else - -#ifndef SQLITE_OMIT_OR_OPTIMIZATION - if( pLoop->wsFlags & WHERE_MULTI_OR ){ - /* Case 5: Two or more separately indexed terms connected by OR - ** - ** Example: - ** - ** CREATE TABLE t1(a,b,c,d); - ** CREATE INDEX i1 ON t1(a); - ** CREATE INDEX i2 ON t1(b); - ** CREATE INDEX i3 ON t1(c); - ** - ** SELECT * FROM t1 WHERE a=5 OR b=7 OR (c=11 AND d=13) - ** - ** In the example, there are three indexed terms connected by OR. - ** The top of the loop looks like this: - ** - ** Null 1 # Zero the rowset in reg 1 - ** - ** Then, for each indexed term, the following. The arguments to - ** RowSetTest are such that the rowid of the current row is inserted - ** into the RowSet. If it is already present, control skips the - ** Gosub opcode and jumps straight to the code generated by WhereEnd(). - ** - ** sqlite3WhereBegin() - ** RowSetTest # Insert rowid into rowset - ** Gosub 2 A - ** sqlite3WhereEnd() - ** - ** Following the above, code to terminate the loop. Label A, the target - ** of the Gosub above, jumps to the instruction right after the Goto. - ** - ** Null 1 # Zero the rowset in reg 1 - ** Goto B # The loop is finished. - ** - ** A: # Return data, whatever. - ** - ** Return 2 # Jump back to the Gosub - ** - ** B: - ** - ** Added 2014-05-26: If the table is a WITHOUT ROWID table, then - ** use an ephemeral index instead of a RowSet to record the primary - ** keys of the rows we have already seen. - ** - */ - WhereClause *pOrWc; /* The OR-clause broken out into subterms */ - SrcList *pOrTab; /* Shortened table list or OR-clause generation */ - Index *pCov = 0; /* Potential covering index (or NULL) */ - int iCovCur = pParse->nTab++; /* Cursor used for index scans (if any) */ + aStat[0] = iLower + iGap; + aStat[1] = pIdx->aAvgEq[iCol]; + } - int regReturn = ++pParse->nMem; /* Register used with OP_Gosub */ - int regRowset = 0; /* Register for RowSet object */ - int regRowid = 0; /* Register holding rowid */ - int iLoopBody = sqlite3VdbeMakeLabel(v); /* Start of loop body */ - int iRetInit; /* Address of regReturn init */ - int untestedTerms = 0; /* Some terms not completely tested */ - int ii; /* Loop counter */ - u16 wctrlFlags; /* Flags for sub-WHERE clause */ - Expr *pAndExpr = 0; /* An ".. AND (...)" expression */ - Table *pTab = pTabItem->pTab; - - pTerm = pLoop->aLTerm[0]; - assert( pTerm!=0 ); - assert( pTerm->eOperator & WO_OR ); - assert( (pTerm->wtFlags & TERM_ORINFO)!=0 ); - pOrWc = &pTerm->u.pOrInfo->wc; - pLevel->op = OP_Return; - pLevel->p1 = regReturn; + /* Restore the pRec->nField value before returning. */ + pRec->nField = nField; + return i; +} +#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ - /* Set up a new SrcList in pOrTab containing the table being scanned - ** by this loop in the a[0] slot and all notReady tables in a[1..] slots. - ** This becomes the SrcList in the recursive call to sqlite3WhereBegin(). - */ - if( pWInfo->nLevel>1 ){ - int nNotReady; /* The number of notReady tables */ - struct SrcList_item *origSrc; /* Original list of tables */ - nNotReady = pWInfo->nLevel - iLevel - 1; - pOrTab = sqlite3StackAllocRaw(db, - sizeof(*pOrTab)+ nNotReady*sizeof(pOrTab->a[0])); - if( pOrTab==0 ) return notReady; - pOrTab->nAlloc = (u8)(nNotReady + 1); - pOrTab->nSrc = pOrTab->nAlloc; - memcpy(pOrTab->a, pTabItem, sizeof(*pTabItem)); - origSrc = pWInfo->pTabList->a; - for(k=1; k<=nNotReady; k++){ - memcpy(&pOrTab->a[k], &origSrc[pLevel[k].iFrom], sizeof(pOrTab->a[k])); - } - }else{ - pOrTab = pWInfo->pTabList; +/* +** If it is not NULL, pTerm is a term that provides an upper or lower +** bound on a range scan. Without considering pTerm, it is estimated +** that the scan will visit nNew rows. This function returns the number +** estimated to be visited after taking pTerm into account. +** +** If the user explicitly specified a likelihood() value for this term, +** then the return value is the likelihood multiplied by the number of +** input rows. Otherwise, this function assumes that an "IS NOT NULL" term +** has a likelihood of 0.50, and any other term a likelihood of 0.25. +*/ +static LogEst whereRangeAdjust(WhereTerm *pTerm, LogEst nNew){ + LogEst nRet = nNew; + if( pTerm ){ + if( pTerm->truthProb<=0 ){ + nRet += pTerm->truthProb; + }else if( (pTerm->wtFlags & TERM_VNULL)==0 ){ + nRet -= 20; assert( 20==sqlite3LogEst(4) ); } + } + return nRet; +} - /* Initialize the rowset register to contain NULL. An SQL NULL is - ** equivalent to an empty rowset. Or, create an ephemeral index - ** capable of holding primary keys in the case of a WITHOUT ROWID. - ** - ** Also initialize regReturn to contain the address of the instruction - ** immediately following the OP_Return at the bottom of the loop. This - ** is required in a few obscure LEFT JOIN cases where control jumps - ** over the top of the loop into the body of it. In this case the - ** correct response for the end-of-loop code (the OP_Return) is to - ** fall through to the next instruction, just as an OP_Next does if - ** called on an uninitialized cursor. - */ - if( (pWInfo->wctrlFlags & WHERE_DUPLICATES_OK)==0 ){ - if( HasRowid(pTab) ){ - regRowset = ++pParse->nMem; - sqlite3VdbeAddOp2(v, OP_Null, 0, regRowset); - }else{ - Index *pPk = sqlite3PrimaryKeyIndex(pTab); - regRowset = pParse->nTab++; - sqlite3VdbeAddOp2(v, OP_OpenEphemeral, regRowset, pPk->nKeyCol); - sqlite3VdbeSetP4KeyInfo(pParse, pPk); - } - regRowid = ++pParse->nMem; - } - iRetInit = sqlite3VdbeAddOp2(v, OP_Integer, 0, regReturn); +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 +/* +** This function is called to estimate the number of rows visited by a +** range-scan on a skip-scan index. For example: +** +** CREATE INDEX i1 ON t1(a, b, c); +** SELECT * FROM t1 WHERE a=? AND c BETWEEN ? AND ?; +** +** Value pLoop->nOut is currently set to the estimated number of rows +** visited for scanning (a=? AND b=?). This function reduces that estimate +** by some factor to account for the (c BETWEEN ? AND ?) expression based +** on the stat4 data for the index. this scan will be peformed multiple +** times (once for each (a,b) combination that matches a=?) is dealt with +** by the caller. +** +** It does this by scanning through all stat4 samples, comparing values +** extracted from pLower and pUpper with the corresponding column in each +** sample. If L and U are the number of samples found to be less than or +** equal to the values extracted from pLower and pUpper respectively, and +** N is the total number of samples, the pLoop->nOut value is adjusted +** as follows: +** +** nOut = nOut * ( min(U - L, 1) / N ) +** +** If pLower is NULL, or a value cannot be extracted from the term, L is +** set to zero. If pUpper is NULL, or a value cannot be extracted from it, +** U is set to N. +** +** Normally, this function sets *pbDone to 1 before returning. However, +** if no value can be extracted from either pLower or pUpper (and so the +** estimate of the number of rows delivered remains unchanged), *pbDone +** is left as is. +** +** If an error occurs, an SQLite error code is returned. Otherwise, +** SQLITE_OK. +*/ +static int whereRangeSkipScanEst( + Parse *pParse, /* Parsing & code generating context */ + WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */ + WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ + WhereLoop *pLoop, /* Update the .nOut value of this loop */ + int *pbDone /* Set to true if at least one expr. value extracted */ +){ + Index *p = pLoop->u.btree.pIndex; + int nEq = pLoop->u.btree.nEq; + sqlite3 *db = pParse->db; + int nLower = -1; + int nUpper = p->nSample+1; + int rc = SQLITE_OK; + int iCol = p->aiColumn[nEq]; + u8 aff = iCol>=0 ? p->pTable->aCol[iCol].affinity : SQLITE_AFF_INTEGER; + CollSeq *pColl; + + sqlite3_value *p1 = 0; /* Value extracted from pLower */ + sqlite3_value *p2 = 0; /* Value extracted from pUpper */ + sqlite3_value *pVal = 0; /* Value extracted from record */ - /* If the original WHERE clause is z of the form: (x1 OR x2 OR ...) AND y - ** Then for every term xN, evaluate as the subexpression: xN AND z - ** That way, terms in y that are factored into the disjunction will - ** be picked up by the recursive calls to sqlite3WhereBegin() below. - ** - ** Actually, each subexpression is converted to "xN AND w" where w is - ** the "interesting" terms of z - terms that did not originate in the - ** ON or USING clause of a LEFT JOIN, and terms that are usable as - ** indices. - ** - ** This optimization also only applies if the (x1 OR x2 OR ...) term - ** is not contained in the ON clause of a LEFT JOIN. - ** See ticket http://www.sqlite.org/src/info/f2369304e4 - */ - if( pWC->nTerm>1 ){ - int iTerm; - for(iTerm=0; iTermnTerm; iTerm++){ - Expr *pExpr = pWC->a[iTerm].pExpr; - if( &pWC->a[iTerm] == pTerm ) continue; - if( ExprHasProperty(pExpr, EP_FromJoin) ) continue; - if( (pWC->a[iTerm].wtFlags & TERM_VIRTUAL)!=0 ) continue; - if( (pWC->a[iTerm].eOperator & WO_ALL)==0 ) continue; - testcase( pWC->a[iTerm].wtFlags & TERM_ORINFO ); - pExpr = sqlite3ExprDup(db, pExpr, 0); - pAndExpr = sqlite3ExprAnd(db, pAndExpr, pExpr); + pColl = sqlite3LocateCollSeq(pParse, p->azColl[nEq]); + if( pLower ){ + rc = sqlite3Stat4ValueFromExpr(pParse, pLower->pExpr->pRight, aff, &p1); + nLower = 0; + } + if( pUpper && rc==SQLITE_OK ){ + rc = sqlite3Stat4ValueFromExpr(pParse, pUpper->pExpr->pRight, aff, &p2); + nUpper = p2 ? 0 : p->nSample; + } + + if( p1 || p2 ){ + int i; + int nDiff; + for(i=0; rc==SQLITE_OK && inSample; i++){ + rc = sqlite3Stat4Column(db, p->aSample[i].p, p->aSample[i].n, nEq, &pVal); + if( rc==SQLITE_OK && p1 ){ + int res = sqlite3MemCompare(p1, pVal, pColl); + if( res>=0 ) nLower++; } - if( pAndExpr ){ - pAndExpr = sqlite3PExpr(pParse, TK_AND, 0, pAndExpr, 0); + if( rc==SQLITE_OK && p2 ){ + int res = sqlite3MemCompare(p2, pVal, pColl); + if( res>=0 ) nUpper++; } } + nDiff = (nUpper - nLower); + if( nDiff<=0 ) nDiff = 1; - /* Run a separate WHERE clause for each term of the OR clause. After - ** eliminating duplicates from other WHERE clauses, the action for each - ** sub-WHERE clause is to to invoke the main loop body as a subroutine. - */ - wctrlFlags = WHERE_OMIT_OPEN_CLOSE - | WHERE_FORCE_TABLE - | WHERE_ONETABLE_ONLY - | WHERE_NO_AUTOINDEX; - for(ii=0; iinTerm; ii++){ - WhereTerm *pOrTerm = &pOrWc->a[ii]; - if( pOrTerm->leftCursor==iCur || (pOrTerm->eOperator & WO_AND)!=0 ){ - WhereInfo *pSubWInfo; /* Info for single OR-term scan */ - Expr *pOrExpr = pOrTerm->pExpr; /* Current OR clause term */ - int j1 = 0; /* Address of jump operation */ - if( pAndExpr && !ExprHasProperty(pOrExpr, EP_FromJoin) ){ - pAndExpr->pLeft = pOrExpr; - pOrExpr = pAndExpr; - } - /* Loop through table entries that match term pOrTerm. */ - WHERETRACE(0xffff, ("Subplan for OR-clause:\n")); - pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrExpr, 0, 0, - wctrlFlags, iCovCur); - assert( pSubWInfo || pParse->nErr || db->mallocFailed ); - if( pSubWInfo ){ - WhereLoop *pSubLoop; - int addrExplain = explainOneScan( - pParse, pOrTab, &pSubWInfo->a[0], iLevel, pLevel->iFrom, 0 - ); - addScanStatus(v, pOrTab, &pSubWInfo->a[0], addrExplain); + /* If there is both an upper and lower bound specified, and the + ** comparisons indicate that they are close together, use the fallback + ** method (assume that the scan visits 1/64 of the rows) for estimating + ** the number of rows visited. Otherwise, estimate the number of rows + ** using the method described in the header comment for this function. */ + if( nDiff!=1 || pUpper==0 || pLower==0 ){ + int nAdjust = (sqlite3LogEst(p->nSample) - sqlite3LogEst(nDiff)); + pLoop->nOut -= nAdjust; + *pbDone = 1; + WHERETRACE(0x10, ("range skip-scan regions: %u..%u adjust=%d est=%d\n", + nLower, nUpper, nAdjust*-1, pLoop->nOut)); + } - /* This is the sub-WHERE clause body. First skip over - ** duplicate rows from prior sub-WHERE clauses, and record the - ** rowid (or PRIMARY KEY) for the current row so that the same - ** row will be skipped in subsequent sub-WHERE clauses. - */ - if( (pWInfo->wctrlFlags & WHERE_DUPLICATES_OK)==0 ){ - int r; - int iSet = ((ii==pOrWc->nTerm-1)?-1:ii); - if( HasRowid(pTab) ){ - r = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iCur, regRowid, 0); - j1 = sqlite3VdbeAddOp4Int(v, OP_RowSetTest, regRowset, 0, r,iSet); - VdbeCoverage(v); - }else{ - Index *pPk = sqlite3PrimaryKeyIndex(pTab); - int nPk = pPk->nKeyCol; - int iPk; + }else{ + assert( *pbDone==0 ); + } - /* Read the PK into an array of temp registers. */ - r = sqlite3GetTempRange(pParse, nPk); - for(iPk=0; iPkaiColumn[iPk]; - sqlite3ExprCodeGetColumn(pParse, pTab, iCol, iCur, r+iPk, 0); - } + sqlite3ValueFree(p1); + sqlite3ValueFree(p2); + sqlite3ValueFree(pVal); - /* Check if the temp table already contains this key. If so, - ** the row has already been included in the result set and - ** can be ignored (by jumping past the Gosub below). Otherwise, - ** insert the key into the temp table and proceed with processing - ** the row. - ** - ** Use some of the same optimizations as OP_RowSetTest: If iSet - ** is zero, assume that the key cannot already be present in - ** the temp table. And if iSet is -1, assume that there is no - ** need to insert the key into the temp table, as it will never - ** be tested for. */ - if( iSet ){ - j1 = sqlite3VdbeAddOp4Int(v, OP_Found, regRowset, 0, r, nPk); - VdbeCoverage(v); - } - if( iSet>=0 ){ - sqlite3VdbeAddOp3(v, OP_MakeRecord, r, nPk, regRowid); - sqlite3VdbeAddOp3(v, OP_IdxInsert, regRowset, regRowid, 0); - if( iSet ) sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); - } + return rc; +} +#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ - /* Release the array of temp registers */ - sqlite3ReleaseTempRange(pParse, r, nPk); - } - } +/* +** This function is used to estimate the number of rows that will be visited +** by scanning an index for a range of values. The range may have an upper +** bound, a lower bound, or both. The WHERE clause terms that set the upper +** and lower bounds are represented by pLower and pUpper respectively. For +** example, assuming that index p is on t1(a): +** +** ... FROM t1 WHERE a > ? AND a < ? ... +** |_____| |_____| +** | | +** pLower pUpper +** +** If either of the upper or lower bound is not present, then NULL is passed in +** place of the corresponding WhereTerm. +** +** The value in (pBuilder->pNew->u.btree.nEq) is the number of the index +** column subject to the range constraint. Or, equivalently, the number of +** equality constraints optimized by the proposed index scan. For example, +** assuming index p is on t1(a, b), and the SQL query is: +** +** ... FROM t1 WHERE a = ? AND b > ? AND b < ? ... +** +** then nEq is set to 1 (as the range restricted column, b, is the second +** left-most column of the index). Or, if the query is: +** +** ... FROM t1 WHERE a > ? AND a < ? ... +** +** then nEq is set to 0. +** +** When this function is called, *pnOut is set to the sqlite3LogEst() of the +** number of rows that the index scan is expected to visit without +** considering the range constraints. If nEq is 0, then *pnOut is the number of +** rows in the index. Assuming no error occurs, *pnOut is adjusted (reduced) +** to account for the range constraints pLower and pUpper. +** +** In the absence of sqlite_stat4 ANALYZE data, or if such data cannot be +** used, a single range inequality reduces the search space by a factor of 4. +** and a pair of constraints (x>? AND x123" Might be NULL */ + WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ + WhereLoop *pLoop /* Modify the .nOut and maybe .rRun fields */ +){ + int rc = SQLITE_OK; + int nOut = pLoop->nOut; + LogEst nNew; - /* Invoke the main loop body as a subroutine */ - sqlite3VdbeAddOp2(v, OP_Gosub, regReturn, iLoopBody); +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 + Index *p = pLoop->u.btree.pIndex; + int nEq = pLoop->u.btree.nEq; - /* Jump here (skipping the main loop body subroutine) if the - ** current sub-WHERE row is a duplicate from prior sub-WHEREs. */ - if( j1 ) sqlite3VdbeJumpHere(v, j1); + if( p->nSample>0 && nEqnSampleCol ){ + if( nEq==pBuilder->nRecValid ){ + UnpackedRecord *pRec = pBuilder->pRec; + tRowcnt a[2]; + u8 aff; - /* The pSubWInfo->untestedTerms flag means that this OR term - ** contained one or more AND term from a notReady table. The - ** terms from the notReady table could not be tested and will - ** need to be tested later. - */ - if( pSubWInfo->untestedTerms ) untestedTerms = 1; + /* Variable iLower will be set to the estimate of the number of rows in + ** the index that are less than the lower bound of the range query. The + ** lower bound being the concatenation of $P and $L, where $P is the + ** key-prefix formed by the nEq values matched against the nEq left-most + ** columns of the index, and $L is the value in pLower. + ** + ** Or, if pLower is NULL or $L cannot be extracted from it (because it + ** is not a simple variable or literal value), the lower bound of the + ** range is $P. Due to a quirk in the way whereKeyStats() works, even + ** if $L is available, whereKeyStats() is called for both ($P) and + ** ($P:$L) and the larger of the two returned values is used. + ** + ** Similarly, iUpper is to be set to the estimate of the number of rows + ** less than the upper bound of the range query. Where the upper bound + ** is either ($P) or ($P:$U). Again, even if $U is available, both values + ** of iUpper are requested of whereKeyStats() and the smaller used. + ** + ** The number of rows between the two bounds is then just iUpper-iLower. + */ + tRowcnt iLower; /* Rows less than the lower bound */ + tRowcnt iUpper; /* Rows less than the upper bound */ + int iLwrIdx = -2; /* aSample[] for the lower bound */ + int iUprIdx = -1; /* aSample[] for the upper bound */ - /* If all of the OR-connected terms are optimized using the same - ** index, and the index is opened using the same cursor number - ** by each call to sqlite3WhereBegin() made by this loop, it may - ** be possible to use that index as a covering index. - ** - ** If the call to sqlite3WhereBegin() above resulted in a scan that - ** uses an index, and this is either the first OR-connected term - ** processed or the index is the same as that used by all previous - ** terms, set pCov to the candidate covering index. Otherwise, set - ** pCov to NULL to indicate that no candidate covering index will - ** be available. - */ - pSubLoop = pSubWInfo->a[0].pWLoop; - assert( (pSubLoop->wsFlags & WHERE_AUTO_INDEX)==0 ); - if( (pSubLoop->wsFlags & WHERE_INDEXED)!=0 - && (ii==0 || pSubLoop->u.btree.pIndex==pCov) - && (HasRowid(pTab) || !IsPrimaryKeyIndex(pSubLoop->u.btree.pIndex)) - ){ - assert( pSubWInfo->a[0].iIdxCur==iCovCur ); - pCov = pSubLoop->u.btree.pIndex; - wctrlFlags |= WHERE_REOPEN_IDX; - }else{ - pCov = 0; - } + if( pRec ){ + testcase( pRec->nField!=pBuilder->nRecValid ); + pRec->nField = pBuilder->nRecValid; + } + if( nEq==p->nKeyCol ){ + aff = SQLITE_AFF_INTEGER; + }else{ + aff = p->pTable->aCol[p->aiColumn[nEq]].affinity; + } + /* Determine iLower and iUpper using ($P) only. */ + if( nEq==0 ){ + iLower = 0; + iUpper = p->nRowEst0; + }else{ + /* Note: this call could be optimized away - since the same values must + ** have been requested when testing key $P in whereEqualScanEst(). */ + whereKeyStats(pParse, p, pRec, 0, a); + iLower = a[0]; + iUpper = a[0] + a[1]; + } - /* Finish the loop through table entries that match term pOrTerm. */ - sqlite3WhereEnd(pSubWInfo); + assert( pLower==0 || (pLower->eOperator & (WO_GT|WO_GE))!=0 ); + assert( pUpper==0 || (pUpper->eOperator & (WO_LT|WO_LE))!=0 ); + assert( p->aSortOrder!=0 ); + if( p->aSortOrder[nEq] ){ + /* The roles of pLower and pUpper are swapped for a DESC index */ + SWAP(WhereTerm*, pLower, pUpper); + } + + /* If possible, improve on the iLower estimate using ($P:$L). */ + if( pLower ){ + int bOk; /* True if value is extracted from pExpr */ + Expr *pExpr = pLower->pExpr->pRight; + rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); + if( rc==SQLITE_OK && bOk ){ + tRowcnt iNew; + iLwrIdx = whereKeyStats(pParse, p, pRec, 0, a); + iNew = a[0] + ((pLower->eOperator & (WO_GT|WO_LE)) ? a[1] : 0); + if( iNew>iLower ) iLower = iNew; + nOut--; + pLower = 0; } } - } - pLevel->u.pCovidx = pCov; - if( pCov ) pLevel->iIdxCur = iCovCur; - if( pAndExpr ){ - pAndExpr->pLeft = 0; - sqlite3ExprDelete(db, pAndExpr); - } - sqlite3VdbeChangeP1(v, iRetInit, sqlite3VdbeCurrentAddr(v)); - sqlite3VdbeAddOp2(v, OP_Goto, 0, pLevel->addrBrk); - sqlite3VdbeResolveLabel(v, iLoopBody); - if( pWInfo->nLevel>1 ) sqlite3StackFree(db, pOrTab); - if( !untestedTerms ) disableTerm(pLevel, pTerm); - }else -#endif /* SQLITE_OMIT_OR_OPTIMIZATION */ + /* If possible, improve on the iUpper estimate using ($P:$U). */ + if( pUpper ){ + int bOk; /* True if value is extracted from pExpr */ + Expr *pExpr = pUpper->pExpr->pRight; + rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); + if( rc==SQLITE_OK && bOk ){ + tRowcnt iNew; + iUprIdx = whereKeyStats(pParse, p, pRec, 1, a); + iNew = a[0] + ((pUpper->eOperator & (WO_GT|WO_LE)) ? a[1] : 0); + if( iNewisRecursive ){ - /* Tables marked isRecursive have only a single row that is stored in - ** a pseudo-cursor. No need to Rewind or Next such cursors. */ - pLevel->op = OP_Noop; + pBuilder->pRec = pRec; + if( rc==SQLITE_OK ){ + if( iUpper>iLower ){ + nNew = sqlite3LogEst(iUpper - iLower); + /* TUNING: If both iUpper and iLower are derived from the same + ** sample, then assume they are 4x more selective. This brings + ** the estimated selectivity more in line with what it would be + ** if estimated without the use of STAT3/4 tables. */ + if( iLwrIdx==iUprIdx ) nNew -= 20; assert( 20==sqlite3LogEst(4) ); + }else{ + nNew = 10; assert( 10==sqlite3LogEst(2) ); + } + if( nNewop = aStep[bRev]; - pLevel->p1 = iCur; - pLevel->p2 = 1 + sqlite3VdbeAddOp2(v, aStart[bRev], iCur, addrBrk); - VdbeCoverageIf(v, bRev==0); - VdbeCoverageIf(v, bRev!=0); - pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP; + int bDone = 0; + rc = whereRangeSkipScanEst(pParse, pLower, pUpper, pLoop, &bDone); + if( bDone ) return rc; } } - -#ifdef SQLITE_ENABLE_STMT_SCANSTATUS - pLevel->addrVisit = sqlite3VdbeCurrentAddr(v); +#else + UNUSED_PARAMETER(pParse); + UNUSED_PARAMETER(pBuilder); + assert( pLower || pUpper ); #endif + assert( pUpper==0 || (pUpper->wtFlags & TERM_VNULL)==0 ); + nNew = whereRangeAdjust(pLower, nOut); + nNew = whereRangeAdjust(pUpper, nNew); - /* Insert code to test every subexpression that can be completely - ** computed using the current set of tables. - */ - for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){ - Expr *pE; - int skipLikeAddr = 0; - testcase( pTerm->wtFlags & TERM_VIRTUAL ); - testcase( pTerm->wtFlags & TERM_CODED ); - if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; - if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ - testcase( pWInfo->untestedTerms==0 - && (pWInfo->wctrlFlags & WHERE_ONETABLE_ONLY)!=0 ); - pWInfo->untestedTerms = 1; - continue; - } - pE = pTerm->pExpr; - assert( pE!=0 ); - if( pLevel->iLeftJoin && !ExprHasProperty(pE, EP_FromJoin) ){ - continue; - } - if( pTerm->wtFlags & TERM_LIKECOND ){ - assert( pLevel->iLikeRepCntr>0 ); - skipLikeAddr = sqlite3VdbeAddOp1(v, OP_IfNot, pLevel->iLikeRepCntr); - VdbeCoverage(v); - } - sqlite3ExprIfFalse(pParse, pE, addrCont, SQLITE_JUMPIFNULL); - if( skipLikeAddr ) sqlite3VdbeJumpHere(v, skipLikeAddr); - pTerm->wtFlags |= TERM_CODED; + /* TUNING: If there is both an upper and lower limit and neither limit + ** has an application-defined likelihood(), assume the range is + ** reduced by an additional 75%. This means that, by default, an open-ended + ** range query (e.g. col > ?) is assumed to match 1/4 of the rows in the + ** index. While a closed range (e.g. col BETWEEN ? AND ?) is estimated to + ** match 1/64 of the index. */ + if( pLower && pLower->truthProb>0 && pUpper && pUpper->truthProb>0 ){ + nNew -= 20; } - /* Insert code to test for implied constraints based on transitivity - ** of the "==" operator. - ** - ** Example: If the WHERE clause contains "t1.a=t2.b" and "t2.b=123" - ** and we are coding the t1 loop and the t2 loop has not yet coded, - ** then we cannot use the "t1.a=t2.b" constraint, but we can code - ** the implied "t1.a=123" constraint. - */ - for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){ - Expr *pE, *pEAlt; - WhereTerm *pAlt; - if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; - if( pTerm->eOperator!=(WO_EQUIV|WO_EQ) ) continue; - if( pTerm->leftCursor!=iCur ) continue; - if( pLevel->iLeftJoin ) continue; - pE = pTerm->pExpr; - assert( !ExprHasProperty(pE, EP_FromJoin) ); - assert( (pTerm->prereqRight & pLevel->notReady)!=0 ); - pAlt = findTerm(pWC, iCur, pTerm->u.leftColumn, notReady, WO_EQ|WO_IN, 0); - if( pAlt==0 ) continue; - if( pAlt->wtFlags & (TERM_CODED) ) continue; - testcase( pAlt->eOperator & WO_EQ ); - testcase( pAlt->eOperator & WO_IN ); - VdbeModuleComment((v, "begin transitive constraint")); - pEAlt = sqlite3StackAllocRaw(db, sizeof(*pEAlt)); - if( pEAlt ){ - *pEAlt = *pAlt->pExpr; - pEAlt->pLeft = pE->pLeft; - sqlite3ExprIfFalse(pParse, pEAlt, addrCont, SQLITE_JUMPIFNULL); - sqlite3StackFree(db, pEAlt); - } + nOut -= (pLower!=0) + (pUpper!=0); + if( nNew<10 ) nNew = 10; + if( nNewnOut>nOut ){ + WHERETRACE(0x10,("Range scan lowers nOut from %d to %d\n", + pLoop->nOut, nOut)); } +#endif + pLoop->nOut = (LogEst)nOut; + return rc; +} - /* For a LEFT OUTER JOIN, generate code that will record the fact that - ** at least one row of the right table has matched the left table. - */ - if( pLevel->iLeftJoin ){ - pLevel->addrFirst = sqlite3VdbeCurrentAddr(v); - sqlite3VdbeAddOp2(v, OP_Integer, 1, pLevel->iLeftJoin); - VdbeComment((v, "record LEFT JOIN hit")); - sqlite3ExprCacheClear(pParse); - for(pTerm=pWC->a, j=0; jnTerm; j++, pTerm++){ - testcase( pTerm->wtFlags & TERM_VIRTUAL ); - testcase( pTerm->wtFlags & TERM_CODED ); - if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; - if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ - assert( pWInfo->untestedTerms ); - continue; - } - assert( pTerm->pExpr ); - sqlite3ExprIfFalse(pParse, pTerm->pExpr, addrCont, SQLITE_JUMPIFNULL); - pTerm->wtFlags |= TERM_CODED; - } +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 +/* +** Estimate the number of rows that will be returned based on +** an equality constraint x=VALUE and where that VALUE occurs in +** the histogram data. This only works when x is the left-most +** column of an index and sqlite_stat3 histogram data is available +** for that index. When pExpr==NULL that means the constraint is +** "x IS NULL" instead of "x=VALUE". +** +** Write the estimated row count into *pnRow and return SQLITE_OK. +** If unable to make an estimate, leave *pnRow unchanged and return +** non-zero. +** +** This routine can fail if it is unable to load a collating sequence +** required for string comparison, or if unable to allocate memory +** for a UTF conversion required for comparison. The error is stored +** in the pParse structure. +*/ +static int whereEqualScanEst( + Parse *pParse, /* Parsing & code generating context */ + WhereLoopBuilder *pBuilder, + Expr *pExpr, /* Expression for VALUE in the x=VALUE constraint */ + tRowcnt *pnRow /* Write the revised row estimate here */ +){ + Index *p = pBuilder->pNew->u.btree.pIndex; + int nEq = pBuilder->pNew->u.btree.nEq; + UnpackedRecord *pRec = pBuilder->pRec; + u8 aff; /* Column affinity */ + int rc; /* Subfunction return code */ + tRowcnt a[2]; /* Statistics */ + int bOk; + + assert( nEq>=1 ); + assert( nEq<=p->nColumn ); + assert( p->aSample!=0 ); + assert( p->nSample>0 ); + assert( pBuilder->nRecValidnRecValid<(nEq-1) ){ + return SQLITE_NOTFOUND; } - return pLevel->notReady; + /* This is an optimization only. The call to sqlite3Stat4ProbeSetValue() + ** below would return the same value. */ + if( nEq>=p->nColumn ){ + *pnRow = 1; + return SQLITE_OK; + } + + aff = p->pTable->aCol[p->aiColumn[nEq-1]].affinity; + rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq-1, &bOk); + pBuilder->pRec = pRec; + if( rc!=SQLITE_OK ) return rc; + if( bOk==0 ) return SQLITE_NOTFOUND; + pBuilder->nRecValid = nEq; + + whereKeyStats(pParse, p, pRec, 0, a); + WHERETRACE(0x10,("equality scan regions: %d\n", (int)a[1])); + *pnRow = a[1]; + + return rc; +} +#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ + +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 +/* +** Estimate the number of rows that will be returned based on +** an IN constraint where the right-hand side of the IN operator +** is a list of values. Example: +** +** WHERE x IN (1,2,3,4) +** +** Write the estimated row count into *pnRow and return SQLITE_OK. +** If unable to make an estimate, leave *pnRow unchanged and return +** non-zero. +** +** This routine can fail if it is unable to load a collating sequence +** required for string comparison, or if unable to allocate memory +** for a UTF conversion required for comparison. The error is stored +** in the pParse structure. +*/ +static int whereInScanEst( + Parse *pParse, /* Parsing & code generating context */ + WhereLoopBuilder *pBuilder, + ExprList *pList, /* The value list on the RHS of "x IN (v1,v2,v3,...)" */ + tRowcnt *pnRow /* Write the revised row estimate here */ +){ + Index *p = pBuilder->pNew->u.btree.pIndex; + i64 nRow0 = sqlite3LogEstToInt(p->aiRowLogEst[0]); + int nRecValid = pBuilder->nRecValid; + int rc = SQLITE_OK; /* Subfunction return code */ + tRowcnt nEst; /* Number of rows for a single term */ + tRowcnt nRowEst = 0; /* New estimate of the number of rows */ + int i; /* Loop counter */ + + assert( p->aSample!=0 ); + for(i=0; rc==SQLITE_OK && inExpr; i++){ + nEst = nRow0; + rc = whereEqualScanEst(pParse, pBuilder, pList->a[i].pExpr, &nEst); + nRowEst += nEst; + pBuilder->nRecValid = nRecValid; + } + + if( rc==SQLITE_OK ){ + if( nRowEst > nRow0 ) nRowEst = nRow0; + *pnRow = nRowEst; + WHERETRACE(0x10,("IN row estimate: est=%d\n", nRowEst)); + } + assert( pBuilder->nRecValid==nRecValid ); + return rc; } +#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ + #ifdef WHERETRACE_ENABLED /* @@ -119900,9 +121724,10 @@ static void whereTermPrint(WhereTerm *pTerm, int iTerm){ if( pTerm->wtFlags & TERM_VIRTUAL ) zType[0] = 'V'; if( pTerm->eOperator & WO_EQUIV ) zType[1] = 'E'; if( ExprHasProperty(pTerm->pExpr, EP_FromJoin) ) zType[2] = 'L'; - sqlite3DebugPrintf("TERM-%-3d %p %s cursor=%-3d prob=%-3d op=0x%03x\n", - iTerm, pTerm, zType, pTerm->leftCursor, pTerm->truthProb, - pTerm->eOperator); + sqlite3DebugPrintf( + "TERM-%-3d %p %s cursor=%-3d prob=%-3d op=0x%03x wtFlags=0x%04x\n", + iTerm, pTerm, zType, pTerm->leftCursor, pTerm->truthProb, + pTerm->eOperator, pTerm->wtFlags); sqlite3TreeViewExpr(0, pTerm->pExpr, 0); } } @@ -120044,7 +121869,14 @@ static void whereLoopDelete(sqlite3 *db, WhereLoop *p){ */ static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){ if( ALWAYS(pWInfo) ){ - whereClauseClear(&pWInfo->sWC); + int i; + for(i=0; inLevel; i++){ + WhereLevel *pLevel = &pWInfo->a[i]; + if( pLevel->pWLoop && (pLevel->pWLoop->wsFlags & WHERE_IN_ABLE) ){ + sqlite3DbFree(db, pLevel->u.in.aInLoop); + } + } + sqlite3WhereClauseClear(&pWInfo->sWC); while( pWInfo->pLoops ){ WhereLoop *p = pWInfo->pLoops; pWInfo->pLoops = p->pNextLoop; @@ -120385,8 +122217,9 @@ static void whereLoopOutputAdjust( /* In the absence of explicit truth probabilities, use heuristics to ** guess a reasonable truth probability. */ pLoop->nOut--; - if( pTerm->eOperator&WO_EQ ){ + if( pTerm->eOperator&(WO_EQ|WO_IS) ){ Expr *pRight = pTerm->pExpr->pRight; + testcase( pTerm->pExpr->op==TK_IS ); if( sqlite3ExprIsInteger(pRight, &k) && k>=(-1) && k<=1 ){ k = 10; }else{ @@ -120454,10 +122287,10 @@ static int whereLoopAddBtreeIndex( assert( (pNew->wsFlags & WHERE_TOP_LIMIT)==0 ); if( pNew->wsFlags & WHERE_BTM_LIMIT ){ opMask = WO_LT|WO_LE; - }else if( pProbe->tnum<=0 || (pSrc->jointype & JT_LEFT)!=0 ){ + }else if( /*pProbe->tnum<=0 ||*/ (pSrc->jointype & JT_LEFT)!=0 ){ opMask = WO_EQ|WO_IN|WO_GT|WO_GE|WO_LT|WO_LE; }else{ - opMask = WO_EQ|WO_IN|WO_ISNULL|WO_GT|WO_GE|WO_LT|WO_LE; + opMask = WO_EQ|WO_IN|WO_GT|WO_GE|WO_LT|WO_LE|WO_ISNULL|WO_IS; } if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE); @@ -120520,10 +122353,10 @@ static int whereLoopAddBtreeIndex( assert( nIn>0 ); /* RHS always has 2 or more terms... The parser ** changes "x IN (?)" into "x=?". */ - }else if( eOp & (WO_EQ) ){ + }else if( eOp & (WO_EQ|WO_IS) ){ pNew->wsFlags |= WHERE_COLUMN_EQ; if( iCol<0 || (nInMul==0 && pNew->u.btree.nEq==pProbe->nKeyCol-1) ){ - if( iCol>=0 && !IsUniqueIndex(pProbe) ){ + if( iCol>=0 && pProbe->uniqNotNull==0 ){ pNew->wsFlags |= WHERE_UNQ_WANTED; }else{ pNew->wsFlags |= WHERE_ONEROW; @@ -120570,7 +122403,7 @@ static int whereLoopAddBtreeIndex( whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew); }else{ int nEq = ++pNew->u.btree.nEq; - assert( eOp & (WO_ISNULL|WO_EQ|WO_IN) ); + assert( eOp & (WO_ISNULL|WO_EQ|WO_IN|WO_IS) ); assert( pNew->nOut==saved_nOut ); if( pTerm->truthProb<=0 && iCol>=0 ){ @@ -120587,8 +122420,9 @@ static int whereLoopAddBtreeIndex( && ((eOp & WO_IN)==0 || !ExprHasProperty(pTerm->pExpr, EP_xIsSelect)) ){ Expr *pExpr = pTerm->pExpr; - if( (eOp & (WO_EQ|WO_ISNULL))!=0 ){ + if( (eOp & (WO_EQ|WO_ISNULL|WO_IS))!=0 ){ testcase( eOp & WO_EQ ); + testcase( eOp & WO_IS ); testcase( eOp & WO_ISNULL ); rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut); }else{ @@ -120857,15 +122691,14 @@ static int whereLoopAddBtree( #ifndef SQLITE_OMIT_AUTOMATIC_INDEX /* Automatic indexes */ - if( !pBuilder->pOrSet + if( !pBuilder->pOrSet /* Not part of an OR optimization */ && (pWInfo->wctrlFlags & WHERE_NO_AUTOINDEX)==0 && (pWInfo->pParse->db->flags & SQLITE_AutoIndex)!=0 - && pSrc->pIndex==0 - && !pSrc->viaCoroutine - && !pSrc->notIndexed - && HasRowid(pTab) - && !pSrc->isCorrelated - && !pSrc->isRecursive + && pSrc->pIndex==0 /* Has no INDEXED BY clause */ + && !pSrc->notIndexed /* Has no NOT INDEXED clause */ + && HasRowid(pTab) /* Is not a WITHOUT ROWID table. (FIXME: Why not?) */ + && !pSrc->isCorrelated /* Not a correlated subquery */ + && !pSrc->isRecursive /* Not a recursive common table expression. */ ){ /* Generate auto-index WhereLoops */ WhereTerm *pTerm; @@ -120995,10 +122828,32 @@ static int whereLoopAddBtree( /* ** Add all WhereLoop objects for a table of the join identified by ** pBuilder->pNew->iTab. That table is guaranteed to be a virtual table. +** +** If there are no LEFT or CROSS JOIN joins in the query, both mExtra and +** mUnusable are set to 0. Otherwise, mExtra is a mask of all FROM clause +** entries that occur before the virtual table in the FROM clause and are +** separated from it by at least one LEFT or CROSS JOIN. Similarly, the +** mUnusable mask contains all FROM clause entries that occur after the +** virtual table and are separated from it by at least one LEFT or +** CROSS JOIN. +** +** For example, if the query were: +** +** ... FROM t1, t2 LEFT JOIN t3, t4, vt CROSS JOIN t5, t6; +** +** then mExtra corresponds to (t1, t2) and mUnusable to (t5, t6). +** +** All the tables in mExtra must be scanned before the current virtual +** table. So any terms for which all prerequisites are satisfied by +** mExtra may be specified as "usable" in all calls to xBestIndex. +** Conversely, all tables in mUnusable must be scanned after the current +** virtual table, so any terms for which the prerequisites overlap with +** mUnusable should always be configured as "not-usable" for xBestIndex. */ static int whereLoopAddVirtual( WhereLoopBuilder *pBuilder, /* WHERE clause information */ - Bitmask mExtra + Bitmask mExtra, /* Tables that must be scanned before this one */ + Bitmask mUnusable /* Tables that must be scanned after this one */ ){ WhereInfo *pWInfo; /* WHERE analysis context */ Parse *pParse; /* The parsing context */ @@ -121019,6 +122874,7 @@ static int whereLoopAddVirtual( WhereLoop *pNew; int rc = SQLITE_OK; + assert( (mExtra & mUnusable)==0 ); pWInfo = pBuilder->pWInfo; pParse = pWInfo->pParse; db = pParse->db; @@ -121027,7 +122883,7 @@ static int whereLoopAddVirtual( pSrc = &pWInfo->pTabList->a[pNew->iTab]; pTab = pSrc->pTab; assert( IsVirtual(pTab) ); - pIdxInfo = allocateIndexInfo(pParse, pWC, pSrc, pBuilder->pOrderBy); + pIdxInfo = allocateIndexInfo(pParse, pWC, mUnusable, pSrc,pBuilder->pOrderBy); if( pIdxInfo==0 ) return SQLITE_NOMEM; pNew->prereq = 0; pNew->rSetup = 0; @@ -121057,7 +122913,7 @@ static int whereLoopAddVirtual( if( (pTerm->eOperator & WO_IN)!=0 ){ seenIn = 1; } - if( pTerm->prereqRight!=0 ){ + if( (pTerm->prereqRight & ~mExtra)!=0 ){ seenVar = 1; }else if( (pTerm->eOperator & WO_IN)==0 ){ pIdxCons->usable = 1; @@ -121065,7 +122921,7 @@ static int whereLoopAddVirtual( break; case 1: /* Constants with IN operators */ assert( seenIn ); - pIdxCons->usable = (pTerm->prereqRight==0); + pIdxCons->usable = (pTerm->prereqRight & ~mExtra)==0; break; case 2: /* Variables without IN */ assert( seenVar ); @@ -121164,7 +123020,11 @@ whereLoopAddVtab_exit: ** Add WhereLoop entries to handle OR terms. This works for either ** btrees or virtual tables. */ -static int whereLoopAddOr(WhereLoopBuilder *pBuilder, Bitmask mExtra){ +static int whereLoopAddOr( + WhereLoopBuilder *pBuilder, + Bitmask mExtra, + Bitmask mUnusable +){ WhereInfo *pWInfo = pBuilder->pWInfo; WhereClause *pWC; WhereLoop *pNew; @@ -121223,14 +123083,14 @@ static int whereLoopAddOr(WhereLoopBuilder *pBuilder, Bitmask mExtra){ #endif #ifndef SQLITE_OMIT_VIRTUALTABLE if( IsVirtual(pItem->pTab) ){ - rc = whereLoopAddVirtual(&sSubBuild, mExtra); + rc = whereLoopAddVirtual(&sSubBuild, mExtra, mUnusable); }else #endif { rc = whereLoopAddBtree(&sSubBuild, mExtra); } if( rc==SQLITE_OK ){ - rc = whereLoopAddOr(&sSubBuild, mExtra); + rc = whereLoopAddOr(&sSubBuild, mExtra, mUnusable); } assert( rc==SQLITE_OK || sCur.n==0 ); if( sCur.n==0 ){ @@ -121292,33 +123152,43 @@ static int whereLoopAddAll(WhereLoopBuilder *pBuilder){ int iTab; SrcList *pTabList = pWInfo->pTabList; struct SrcList_item *pItem; + struct SrcList_item *pEnd = &pTabList->a[pWInfo->nLevel]; sqlite3 *db = pWInfo->pParse->db; - int nTabList = pWInfo->nLevel; int rc = SQLITE_OK; - u8 priorJoinType = 0; WhereLoop *pNew; + u8 priorJointype = 0; /* Loop over the tables in the join, from left to right */ pNew = pBuilder->pNew; whereLoopInit(pNew); - for(iTab=0, pItem=pTabList->a; iTaba; pItemiTab = iTab; - pNew->maskSelf = getMask(&pWInfo->sMaskSet, pItem->iCursor); - if( ((pItem->jointype|priorJoinType) & (JT_LEFT|JT_CROSS))!=0 ){ + pNew->maskSelf = sqlite3WhereGetMask(&pWInfo->sMaskSet, pItem->iCursor); + if( ((pItem->jointype|priorJointype) & (JT_LEFT|JT_CROSS))!=0 ){ + /* This condition is true when pItem is the FROM clause term on the + ** right-hand-side of a LEFT or CROSS JOIN. */ mExtra = mPrior; } - priorJoinType = pItem->jointype; + priorJointype = pItem->jointype; if( IsVirtual(pItem->pTab) ){ - rc = whereLoopAddVirtual(pBuilder, mExtra); + struct SrcList_item *p; + for(p=&pItem[1]; pjointype & (JT_LEFT|JT_CROSS)) ){ + mUnusable |= sqlite3WhereGetMask(&pWInfo->sMaskSet, p->iCursor); + } + } + rc = whereLoopAddVirtual(pBuilder, mExtra, mUnusable); }else{ rc = whereLoopAddBtree(pBuilder, mExtra); } if( rc==SQLITE_OK ){ - rc = whereLoopAddOr(pBuilder, mExtra); + rc = whereLoopAddOr(pBuilder, mExtra, mUnusable); } mPrior |= pNew->maskSelf; if( rc || db->mallocFailed ) break; } + whereLoopClear(db, pNew); return rc; } @@ -121424,10 +123294,10 @@ static i8 wherePathSatisfiesOrderBy( pOBExpr = sqlite3ExprSkipCollate(pOrderBy->a[i].pExpr); if( pOBExpr->op!=TK_COLUMN ) continue; if( pOBExpr->iTable!=iCur ) continue; - pTerm = findTerm(&pWInfo->sWC, iCur, pOBExpr->iColumn, - ~ready, WO_EQ|WO_ISNULL, 0); + pTerm = sqlite3WhereFindTerm(&pWInfo->sWC, iCur, pOBExpr->iColumn, + ~ready, WO_EQ|WO_ISNULL|WO_IS, 0); if( pTerm==0 ) continue; - if( (pTerm->eOperator&WO_EQ)!=0 && pOBExpr->iColumn>=0 ){ + if( (pTerm->eOperator&(WO_EQ|WO_IS))!=0 && pOBExpr->iColumn>=0 ){ const char *z1, *z2; pColl = sqlite3ExprCollSeq(pWInfo->pParse, pOrderBy->a[i].pExpr); if( !pColl ) pColl = db->pDfltColl; @@ -121436,6 +123306,7 @@ static i8 wherePathSatisfiesOrderBy( if( !pColl ) pColl = db->pDfltColl; z2 = pColl->zName; if( sqlite3StrICmp(z1, z2)!=0 ) continue; + testcase( pTerm->pExpr->op==TK_IS ); } obSat |= MASKBIT(i); } @@ -121466,7 +123337,7 @@ static i8 wherePathSatisfiesOrderBy( /* Skip over == and IS NULL terms */ if( ju.btree.nEq && pLoop->nSkip==0 - && ((i = pLoop->aLTerm[j]->eOperator) & (WO_EQ|WO_ISNULL))!=0 + && ((i = pLoop->aLTerm[j]->eOperator) & (WO_EQ|WO_ISNULL|WO_IS))!=0 ){ if( i & WO_ISNULL ){ testcase( isOrderDistinct ); @@ -121560,7 +123431,7 @@ static i8 wherePathSatisfiesOrderBy( Bitmask mTerm; if( MASKBIT(i) & obSat ) continue; p = pOrderBy->a[i].pExpr; - mTerm = exprTableUsage(&pWInfo->sMaskSet,p); + mTerm = sqlite3WhereExprUsage(&pWInfo->sMaskSet,p); if( mTerm==0 && !sqlite3ExprIsConstant(p) ) continue; if( (mTerm&~orderDistinctMask)==0 ){ obSat |= MASKBIT(i); @@ -121983,7 +123854,7 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ pWInfo->revMask = pFrom->revLoop; } if( (pWInfo->wctrlFlags & WHERE_SORTBYGROUP) - && pWInfo->nOBSat==pWInfo->pOrderBy->nExpr + && pWInfo->nOBSat==pWInfo->pOrderBy->nExpr && nLoop>0 ){ Bitmask revMask = 0; int nOrder = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pOrderBy, @@ -122033,14 +123904,15 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){ pItem = pWInfo->pTabList->a; pTab = pItem->pTab; if( IsVirtual(pTab) ) return 0; - if( pItem->zIndex ) return 0; + if( pItem->zIndexedBy ) return 0; iCur = pItem->iCursor; pWC = &pWInfo->sWC; pLoop = pBuilder->pNew; pLoop->wsFlags = 0; pLoop->nSkip = 0; - pTerm = findTerm(pWC, iCur, -1, 0, WO_EQ, 0); + pTerm = sqlite3WhereFindTerm(pWC, iCur, -1, 0, WO_EQ|WO_IS, 0); if( pTerm ){ + testcase( pTerm->eOperator & WO_IS ); pLoop->wsFlags = WHERE_COLUMN_EQ|WHERE_IPK|WHERE_ONEROW; pLoop->aLTerm[0] = pTerm; pLoop->nLTerm = 1; @@ -122049,14 +123921,17 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){ pLoop->rRun = 33; /* 33==sqlite3LogEst(10) */ }else{ for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + int opMask; assert( pLoop->aLTermSpace==pLoop->aLTerm ); if( !IsUniqueIndex(pIdx) || pIdx->pPartIdxWhere!=0 || pIdx->nKeyCol>ArraySize(pLoop->aLTermSpace) ) continue; + opMask = pIdx->uniqNotNull ? (WO_EQ|WO_IS) : WO_EQ; for(j=0; jnKeyCol; j++){ - pTerm = findTerm(pWC, iCur, pIdx->aiColumn[j], 0, WO_EQ, pIdx); + pTerm = sqlite3WhereFindTerm(pWC, iCur, pIdx->aiColumn[j], 0, opMask, pIdx); if( pTerm==0 ) break; + testcase( pTerm->eOperator & WO_IS ); pLoop->aLTerm[j] = pTerm; } if( j!=pIdx->nKeyCol ) continue; @@ -122075,7 +123950,7 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){ if( pLoop->wsFlags ){ pLoop->nOut = (LogEst)1; pWInfo->a[0].pWLoop = pLoop; - pLoop->maskSelf = getMask(&pWInfo->sMaskSet, iCur); + pLoop->maskSelf = sqlite3WhereGetMask(&pWInfo->sMaskSet, iCur); pWInfo->a[0].iTabCur = iCur; pWInfo->nRowOut = 1; if( pWInfo->pOrderBy ) pWInfo->nOBSat = pWInfo->pOrderBy->nExpr; @@ -122269,8 +124144,8 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( ** subexpression is separated by an AND operator. */ initMaskSet(pMaskSet); - whereClauseInit(&pWInfo->sWC, pWInfo); - whereSplit(&pWInfo->sWC, pWhere, TK_AND); + sqlite3WhereClauseInit(&pWInfo->sWC, pWInfo); + sqlite3WhereSplit(&pWInfo->sWC, pWhere, TK_AND); /* Special case: a WHERE clause that is constant. Evaluate the ** expression and either jump over all of the code or fall thru. @@ -122315,22 +124190,16 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( { Bitmask toTheLeft = 0; for(ii=0; iinSrc; ii++){ - Bitmask m = getMask(pMaskSet, pTabList->a[ii].iCursor); + Bitmask m = sqlite3WhereGetMask(pMaskSet, pTabList->a[ii].iCursor); assert( (m-1)==toTheLeft ); toTheLeft |= m; } } #endif - /* Analyze all of the subexpressions. Note that exprAnalyze() might - ** add new virtual terms onto the end of the WHERE clause. We do not - ** want to analyze these virtual terms, so start analyzing at the end - ** and work forward so that the added virtual terms are never processed. - */ - exprAnalyzeAll(pTabList, &pWInfo->sWC); - if( db->mallocFailed ){ - goto whereBeginError; - } + /* Analyze all of the subexpressions. */ + sqlite3WhereExprAnalyze(pTabList, &pWInfo->sWC); + if( db->mallocFailed ) goto whereBeginError; if( wctrlFlags & WHERE_WANT_DISTINCT ){ if( isDistinctRedundant(pParse, pTabList, &pWInfo->sWC, pResultSet) ){ @@ -122346,8 +124215,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( /* Construct the WhereLoop objects */ WHERETRACE(0xffff,("*** Optimizer Start ***\n")); #if defined(WHERETRACE_ENABLED) - /* Display all terms of the WHERE clause */ - if( sqlite3WhereTrace & 0x100 ){ + if( sqlite3WhereTrace & 0x100 ){ /* Display all terms of the WHERE clause */ int i; for(i=0; inTerm; i++){ whereTermPrint(&sWLB.pWC->a[i], i); @@ -122359,13 +124227,12 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( rc = whereLoopAddAll(&sWLB); if( rc ) goto whereBeginError; - /* Display all of the WhereLoop objects if wheretrace is enabled */ -#ifdef WHERETRACE_ENABLED /* !=0 */ - if( sqlite3WhereTrace ){ +#ifdef WHERETRACE_ENABLED + if( sqlite3WhereTrace ){ /* Display all of the WhereLoop objects */ WhereLoop *p; int i; - static char zLabel[] = "0123456789abcdefghijklmnopqrstuvwyxz" - "ABCDEFGHIJKLMNOPQRSTUVWYXZ"; + static const char zLabel[] = "0123456789abcdefghijklmnopqrstuvwyxz" + "ABCDEFGHIJKLMNOPQRSTUVWYXZ"; for(p=pWInfo->pLoops, i=0; p; p=p->pNextLoop, i++){ p->cId = zLabel[i%sizeof(zLabel)]; whereLoopPrint(p, sWLB.pWC); @@ -122386,9 +124253,8 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( if( pParse->nErr || NEVER(db->mallocFailed) ){ goto whereBeginError; } -#ifdef WHERETRACE_ENABLED /* !=0 */ +#ifdef WHERETRACE_ENABLED if( sqlite3WhereTrace ){ - int ii; sqlite3DebugPrintf("---- Solution nRow=%d", pWInfo->nRowOut); if( pWInfo->nOBSat>0 ){ sqlite3DebugPrintf(" ORDERBY=%d,0x%llx", pWInfo->nOBSat, pWInfo->revMask); @@ -122418,8 +124284,10 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( && pResultSet!=0 && OptimizationEnabled(db, SQLITE_OmitNoopJoin) ){ - Bitmask tabUsed = exprListTableUsage(pMaskSet, pResultSet); - if( sWLB.pOrderBy ) tabUsed |= exprListTableUsage(pMaskSet, sWLB.pOrderBy); + Bitmask tabUsed = sqlite3WhereExprListUsage(pMaskSet, pResultSet); + if( sWLB.pOrderBy ){ + tabUsed |= sqlite3WhereExprListUsage(pMaskSet, sWLB.pOrderBy); + } while( pWInfo->nLevel>=2 ){ WhereTerm *pTerm, *pEnd; pLoop = pWInfo->a[pWInfo->nLevel-1].pWLoop; @@ -122450,7 +124318,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( /* If the caller is an UPDATE or DELETE statement that is requesting ** to use a one-pass algorithm, determine if this is appropriate. ** The one-pass algorithm only works if the WHERE clause constrains - ** the statement to update a single row. + ** the statement to update or delete a single row. */ assert( (wctrlFlags & WHERE_ONEPASS_DESIRED)==0 || pWInfo->nLevel==1 ); if( (wctrlFlags & WHERE_ONEPASS_DESIRED)!=0 @@ -122464,7 +124332,6 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( /* Open all tables in the pTabList and any indices selected for ** searching those tables. */ - notReady = ~(Bitmask)0; for(ii=0, pLevel=pWInfo->a; iinCol ); } +#ifdef SQLITE_ENABLE_COLUMN_USED_MASK + sqlite3VdbeAddOp4Dup8(v, OP_ColumnsUsed, pTabItem->iCursor, 0, 0, + (const u8*)&pTabItem->colUsed, P4_INT64); +#endif }else{ sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName); } @@ -122550,10 +124421,24 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( sqlite3VdbeChangeP5(v, OPFLAG_SEEKEQ); /* Hint to COMDB2 */ } VdbeComment((v, "%s", pIx->zName)); +#ifdef SQLITE_ENABLE_COLUMN_USED_MASK + { + u64 colUsed = 0; + int ii, jj; + for(ii=0; iinColumn; ii++){ + jj = pIx->aiColumn[ii]; + if( jj<0 ) continue; + if( jj>63 ) jj = 63; + if( (pTabItem->colUsed & MASKBIT(jj))==0 ) continue; + colUsed |= ((u64)1)<<(ii<63 ? ii : 63); + } + sqlite3VdbeAddOp4Dup8(v, OP_ColumnsUsed, iIndexCur, 0, 0, + (u8*)&colUsed, P4_INT64); + } +#endif /* SQLITE_ENABLE_COLUMN_USED_MASK */ } } if( iDb>=0 ) sqlite3CodeVerifySchema(pParse, iDb); - notReady &= ~getMask(&pWInfo->sMaskSet, pTabItem->iCursor); } pWInfo->iTop = sqlite3VdbeCurrentAddr(v); if( db->mallocFailed ) goto whereBeginError; @@ -122575,14 +124460,14 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( if( db->mallocFailed ) goto whereBeginError; } #endif - addrExplain = explainOneScan( + addrExplain = sqlite3WhereExplainOneScan( pParse, pTabList, pLevel, ii, pLevel->iFrom, wctrlFlags ); pLevel->addrBody = sqlite3VdbeCurrentAddr(v); - notReady = codeOneLoopStart(pWInfo, ii, notReady); + notReady = sqlite3WhereCodeOneLoopStart(pWInfo, ii, notReady); pWInfo->iContinue = pLevel->addrCont; if( (wsFlags&WHERE_MULTI_OR)==0 && (wctrlFlags&WHERE_ONETABLE_ONLY)==0 ){ - addScanStatus(v, pTabList, pLevel, addrExplain); + sqlite3WhereAddScanStatus(v, pTabList, pLevel, addrExplain); } } @@ -122641,7 +124526,6 @@ SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){ VdbeCoverageIf(v, pIn->eEndLoopOp==OP_NextIfOpen); sqlite3VdbeJumpHere(v, pIn->addrInTop-1); } - sqlite3DbFree(db, pLevel->u.in.aInLoop); } sqlite3VdbeResolveLabel(v, pLevel->addrBrk); if( pLevel->addrSkip ){ @@ -122697,26 +124581,12 @@ SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){ pLoop = pLevel->pWLoop; /* For a co-routine, change all OP_Column references to the table of - ** the co-routine into OP_SCopy of result contained in a register. + ** the co-routine into OP_Copy of result contained in a register. ** OP_Rowid becomes OP_Null. */ if( pTabItem->viaCoroutine && !db->mallocFailed ){ - last = sqlite3VdbeCurrentAddr(v); - k = pLevel->addrBody; - pOp = sqlite3VdbeGetOp(v, k); - for(; kp1!=pLevel->iTabCur ) continue; - if( pOp->opcode==OP_Column ){ - pOp->opcode = OP_Copy; - pOp->p1 = pOp->p2 + pTabItem->regResult; - pOp->p2 = pOp->p3; - pOp->p3 = 0; - }else if( pOp->opcode==OP_Rowid ){ - pOp->opcode = OP_Null; - pOp->p1 = 0; - pOp->p3 = 0; - } - } + translateColumnToCopy(v, pLevel->addrBody, pLevel->iTabCur, + pTabItem->regResult); continue; } @@ -122806,6 +124676,7 @@ SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){ ** in the input grammar file. */ /* #include */ +/* #include "sqliteInt.h" */ /* ** Disable all error recovery processing in the parser push-down @@ -122853,6 +124724,28 @@ struct TrigEvent { int a; IdList * b; }; struct AttachKey { int type; Token key; }; + /* + ** For a compound SELECT statement, make sure p->pPrior->pNext==p for + ** all elements in the list. And make sure list length does not exceed + ** SQLITE_LIMIT_COMPOUND_SELECT. + */ + static void parserDoubleLinkSelect(Parse *pParse, Select *p){ + if( p->pPrior ){ + Select *pNext = 0, *pLoop; + int mxSelect, cnt = 0; + for(pLoop=p; pLoop; pNext=pLoop, pLoop=pLoop->pPrior, cnt++){ + pLoop->pNext = pNext; + pLoop->selFlags |= SF_Compound; + } + if( (p->selFlags & SF_MultiValue)==0 && + (mxSelect = pParse->db->aLimit[SQLITE_LIMIT_COMPOUND_SELECT])>0 && + cnt>mxSelect + ){ + sqlite3ErrorMsg(pParse, "too many terms in compound SELECT"); + } + } + } + /* This is a utility routine used to set the ExprSpan.zStart and ** ExprSpan.zEnd values of pOut so that the span covers the complete ** range of text beginning with pStart and going to the end of pEnd. @@ -124261,7 +126154,7 @@ static int yy_pop_parser_stack(yyParser *pParser){ /* There is no mechanism by which the parser stack can be popped below ** empty in SQLite. */ - if( NEVER(pParser->yyidx<0) ) return 0; + assert( pParser->yyidx>=0 ); #ifndef NDEBUG if( yyTraceFILE && pParser->yyidx>=0 ){ fprintf(yyTraceFILE,"%sPopping %s\n", @@ -124962,7 +126855,7 @@ static void yy_reduce( case 35: /* table_options ::= WITHOUT nm */ { if( yymsp[0].minor.yy0.n==5 && sqlite3_strnicmp(yymsp[0].minor.yy0.z,"rowid",5)==0 ){ - yygotominor.yy186 = TF_WithoutRowid; + yygotominor.yy186 = TF_WithoutRowid | TF_NoVisibleRowid; }else{ yygotominor.yy186 = 0; sqlite3ErrorMsg(pParse, "unknown table option: %.*s", yymsp[0].minor.yy0.n, yymsp[0].minor.yy0.z); @@ -125169,27 +127062,10 @@ static void yy_reduce( break; case 112: /* select ::= with selectnowith */ { - Select *p = yymsp[0].minor.yy3, *pNext, *pLoop; + Select *p = yymsp[0].minor.yy3; if( p ){ - int cnt = 0, mxSelect; p->pWith = yymsp[-1].minor.yy59; - if( p->pPrior ){ - u16 allValues = SF_Values; - pNext = 0; - for(pLoop=p; pLoop; pNext=pLoop, pLoop=pLoop->pPrior, cnt++){ - pLoop->pNext = pNext; - pLoop->selFlags |= SF_Compound; - allValues &= pLoop->selFlags; - } - if( allValues ){ - p->selFlags |= SF_AllValues; - }else if( - (mxSelect = pParse->db->aLimit[SQLITE_LIMIT_COMPOUND_SELECT])>0 - && cnt>mxSelect - ){ - sqlite3ErrorMsg(pParse, "too many terms in compound SELECT"); - } - } + parserDoubleLinkSelect(pParse, p); }else{ sqlite3WithDelete(pParse->db, yymsp[-1].minor.yy59); } @@ -125203,19 +127079,23 @@ static void yy_reduce( case 114: /* selectnowith ::= selectnowith multiselect_op oneselect */ { Select *pRhs = yymsp[0].minor.yy3; + Select *pLhs = yymsp[-2].minor.yy3; if( pRhs && pRhs->pPrior ){ SrcList *pFrom; Token x; x.n = 0; + parserDoubleLinkSelect(pParse, pRhs); pFrom = sqlite3SrcListAppendFromTerm(pParse,0,0,0,&x,pRhs,0,0); pRhs = sqlite3SelectNew(pParse,0,pFrom,0,0,0,0,0,0,0); } if( pRhs ){ pRhs->op = (u8)yymsp[-1].minor.yy328; - pRhs->pPrior = yymsp[-2].minor.yy3; + pRhs->pPrior = pLhs; + if( ALWAYS(pLhs) ) pLhs->selFlags &= ~SF_MultiValue; + pRhs->selFlags &= ~SF_MultiValue; if( yymsp[-1].minor.yy328!=TK_ALL ) pParse->hasCompound = 1; }else{ - sqlite3SelectDelete(pParse->db, yymsp[-2].minor.yy3); + sqlite3SelectDelete(pParse->db, pLhs); } yygotominor.yy3 = pRhs; } @@ -125259,13 +127139,16 @@ static void yy_reduce( break; case 121: /* values ::= values COMMA LP exprlist RP */ { - Select *pRight = sqlite3SelectNew(pParse,yymsp[-1].minor.yy14,0,0,0,0,0,SF_Values,0,0); + Select *pRight, *pLeft = yymsp[-4].minor.yy3; + pRight = sqlite3SelectNew(pParse,yymsp[-1].minor.yy14,0,0,0,0,0,SF_Values|SF_MultiValue,0,0); + if( ALWAYS(pLeft) ) pLeft->selFlags &= ~SF_MultiValue; if( pRight ){ pRight->op = TK_ALL; - pRight->pPrior = yymsp[-4].minor.yy3; + pLeft = yymsp[-4].minor.yy3; + pRight->pPrior = pLeft; yygotominor.yy3 = pRight; }else{ - yygotominor.yy3 = yymsp[-4].minor.yy3; + yygotominor.yy3 = pLeft; } } break; @@ -125273,7 +127156,9 @@ static void yy_reduce( {yygotominor.yy381 = SF_Distinct;} break; case 123: /* distinct ::= ALL */ - case 124: /* distinct ::= */ yytestcase(yyruleno==124); +{yygotominor.yy381 = SF_All;} + break; + case 124: /* distinct ::= */ {yygotominor.yy381 = 0;} break; case 125: /* sclp ::= selcollist COMMA */ @@ -125568,7 +127453,7 @@ static void yy_reduce( } yygotominor.yy346.pExpr = sqlite3ExprFunction(pParse, yymsp[-1].minor.yy14, &yymsp[-4].minor.yy0); spanSet(&yygotominor.yy346,&yymsp[-4].minor.yy0,&yymsp[0].minor.yy0); - if( yymsp[-2].minor.yy381 && yygotominor.yy346.pExpr ){ + if( yymsp[-2].minor.yy381==SF_Distinct && yygotominor.yy346.pExpr ){ yygotominor.yy346.pExpr->flags |= EP_Distinct; } } @@ -126124,12253 +128009,14594 @@ static void yy_reduce( { yy_shift(yypParser,yyact,yygoto,&yygotominor); } - }else{ - assert( yyact == YYNSTATE + YYNRULE + 1 ); - yy_accept(yypParser); + }else{ + assert( yyact == YYNSTATE + YYNRULE + 1 ); + yy_accept(yypParser); + } +} + +/* +** The following code executes when the parse fails +*/ +#ifndef YYNOERRORRECOVERY +static void yy_parse_failed( + yyParser *yypParser /* The parser */ +){ + sqlite3ParserARG_FETCH; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sFail!\n",yyTracePrompt); + } +#endif + while( yypParser->yyidx>=0 ) yy_pop_parser_stack(yypParser); + /* Here code is inserted which will be executed whenever the + ** parser fails */ + sqlite3ParserARG_STORE; /* Suppress warning about unused %extra_argument variable */ +} +#endif /* YYNOERRORRECOVERY */ + +/* +** The following code executes when a syntax error first occurs. +*/ +static void yy_syntax_error( + yyParser *yypParser, /* The parser */ + int yymajor, /* The major type of the error token */ + YYMINORTYPE yyminor /* The minor type of the error token */ +){ + sqlite3ParserARG_FETCH; +#define TOKEN (yyminor.yy0) + + UNUSED_PARAMETER(yymajor); /* Silence some compiler warnings */ + assert( TOKEN.z[0] ); /* The tokenizer always gives us a token */ + sqlite3ErrorMsg(pParse, "near \"%T\": syntax error", &TOKEN); + sqlite3ParserARG_STORE; /* Suppress warning about unused %extra_argument variable */ +} + +/* +** The following is executed when the parser accepts +*/ +static void yy_accept( + yyParser *yypParser /* The parser */ +){ + sqlite3ParserARG_FETCH; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sAccept!\n",yyTracePrompt); + } +#endif + while( yypParser->yyidx>=0 ) yy_pop_parser_stack(yypParser); + /* Here code is inserted which will be executed whenever the + ** parser accepts */ + sqlite3ParserARG_STORE; /* Suppress warning about unused %extra_argument variable */ +} + +/* The main parser program. +** The first argument is a pointer to a structure obtained from +** "sqlite3ParserAlloc" which describes the current state of the parser. +** The second argument is the major token number. The third is +** the minor token. The fourth optional argument is whatever the +** user wants (and specified in the grammar) and is available for +** use by the action routines. +** +** Inputs: +**
      +**
    • A pointer to the parser (an opaque structure.) +**
    • The major token number. +**
    • The minor token number. +**
    • An option argument of a grammar-specified type. +**
    +** +** Outputs: +** None. +*/ +SQLITE_PRIVATE void sqlite3Parser( + void *yyp, /* The parser */ + int yymajor, /* The major token code number */ + sqlite3ParserTOKENTYPE yyminor /* The value for the token */ + sqlite3ParserARG_PDECL /* Optional %extra_argument parameter */ +){ + YYMINORTYPE yyminorunion; + int yyact; /* The parser action. */ +#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) + int yyendofinput; /* True if we are at the end of input */ +#endif +#ifdef YYERRORSYMBOL + int yyerrorhit = 0; /* True if yymajor has invoked an error */ +#endif + yyParser *yypParser; /* The parser */ + + /* (re)initialize the parser, if necessary */ + yypParser = (yyParser*)yyp; + if( yypParser->yyidx<0 ){ +#if YYSTACKDEPTH<=0 + if( yypParser->yystksz <=0 ){ + /*memset(&yyminorunion, 0, sizeof(yyminorunion));*/ + yyminorunion = yyzerominor; + yyStackOverflow(yypParser, &yyminorunion); + return; + } +#endif + yypParser->yyidx = 0; + yypParser->yyerrcnt = -1; + yypParser->yystack[0].stateno = 0; + yypParser->yystack[0].major = 0; + } + yyminorunion.yy0 = yyminor; +#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) + yyendofinput = (yymajor==0); +#endif + sqlite3ParserARG_STORE; + +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sInput %s\n",yyTracePrompt,yyTokenName[yymajor]); + } +#endif + + do{ + yyact = yy_find_shift_action(yypParser,(YYCODETYPE)yymajor); + if( yyactyyerrcnt--; + yymajor = YYNOCODE; + }else if( yyact < YYNSTATE + YYNRULE ){ + yy_reduce(yypParser,yyact-YYNSTATE); + }else{ + assert( yyact == YY_ERROR_ACTION ); +#ifdef YYERRORSYMBOL + int yymx; +#endif +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sSyntax Error!\n",yyTracePrompt); + } +#endif +#ifdef YYERRORSYMBOL + /* A syntax error has occurred. + ** The response to an error depends upon whether or not the + ** grammar defines an error token "ERROR". + ** + ** This is what we do if the grammar does define ERROR: + ** + ** * Call the %syntax_error function. + ** + ** * Begin popping the stack until we enter a state where + ** it is legal to shift the error symbol, then shift + ** the error symbol. + ** + ** * Set the error count to three. + ** + ** * Begin accepting and shifting new tokens. No new error + ** processing will occur until three tokens have been + ** shifted successfully. + ** + */ + if( yypParser->yyerrcnt<0 ){ + yy_syntax_error(yypParser,yymajor,yyminorunion); + } + yymx = yypParser->yystack[yypParser->yyidx].major; + if( yymx==YYERRORSYMBOL || yyerrorhit ){ +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sDiscard input token %s\n", + yyTracePrompt,yyTokenName[yymajor]); + } +#endif + yy_destructor(yypParser, (YYCODETYPE)yymajor,&yyminorunion); + yymajor = YYNOCODE; + }else{ + while( + yypParser->yyidx >= 0 && + yymx != YYERRORSYMBOL && + (yyact = yy_find_reduce_action( + yypParser->yystack[yypParser->yyidx].stateno, + YYERRORSYMBOL)) >= YYNSTATE + ){ + yy_pop_parser_stack(yypParser); + } + if( yypParser->yyidx < 0 || yymajor==0 ){ + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + yy_parse_failed(yypParser); + yymajor = YYNOCODE; + }else if( yymx!=YYERRORSYMBOL ){ + YYMINORTYPE u2; + u2.YYERRSYMDT = 0; + yy_shift(yypParser,yyact,YYERRORSYMBOL,&u2); + } + } + yypParser->yyerrcnt = 3; + yyerrorhit = 1; +#elif defined(YYNOERRORRECOVERY) + /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to + ** do any kind of error recovery. Instead, simply invoke the syntax + ** error routine and continue going as if nothing had happened. + ** + ** Applications can set this macro (for example inside %include) if + ** they intend to abandon the parse upon the first syntax error seen. + */ + yy_syntax_error(yypParser,yymajor,yyminorunion); + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + yymajor = YYNOCODE; + +#else /* YYERRORSYMBOL is not defined */ + /* This is what we do if the grammar does not define ERROR: + ** + ** * Report an error message, and throw away the input token. + ** + ** * If the input token is $, then fail the parse. + ** + ** As before, subsequent error messages are suppressed until + ** three input tokens have been successfully shifted. + */ + if( yypParser->yyerrcnt<=0 ){ + yy_syntax_error(yypParser,yymajor,yyminorunion); + } + yypParser->yyerrcnt = 3; + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + if( yyendofinput ){ + yy_parse_failed(yypParser); + } + yymajor = YYNOCODE; +#endif + } + }while( yymajor!=YYNOCODE && yypParser->yyidx>=0 ); + return; +} + +/************** End of parse.c ***********************************************/ +/************** Begin file tokenize.c ****************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** An tokenizer for SQL +** +** This file contains C code that splits an SQL input string up into +** individual tokens and sends those tokens one-by-one over to the +** parser for analysis. +*/ +/* #include "sqliteInt.h" */ +/* #include */ + +/* +** The charMap() macro maps alphabetic characters into their +** lower-case ASCII equivalent. On ASCII machines, this is just +** an upper-to-lower case map. On EBCDIC machines we also need +** to adjust the encoding. Only alphabetic characters and underscores +** need to be translated. +*/ +#ifdef SQLITE_ASCII +# define charMap(X) sqlite3UpperToLower[(unsigned char)X] +#endif +#ifdef SQLITE_EBCDIC +# define charMap(X) ebcdicToAscii[(unsigned char)X] +const unsigned char ebcdicToAscii[] = { +/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ + 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ + 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ + 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ + 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ + 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ + 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ +}; +#endif + +/* +** The sqlite3KeywordCode function looks up an identifier to determine if +** it is a keyword. If it is a keyword, the token code of that keyword is +** returned. If the input is not a keyword, TK_ID is returned. +** +** The implementation of this routine was generated by a program, +** mkkeywordhash.h, located in the tool subdirectory of the distribution. +** The output of the mkkeywordhash.c program is written into a file +** named keywordhash.h and then included into this source file by +** the #include below. +*/ +/************** Include keywordhash.h in the middle of tokenize.c ************/ +/************** Begin file keywordhash.h *************************************/ +/***** This file contains automatically generated code ****** +** +** The code in this file has been automatically generated by +** +** sqlite/tool/mkkeywordhash.c +** +** The code in this file implements a function that determines whether +** or not a given identifier is really an SQL keyword. The same thing +** might be implemented more directly using a hand-written hash table. +** But by using this automatically generated code, the size of the code +** is substantially reduced. This is important for embedded applications +** on platforms with limited memory. +*/ +/* Hash score: 182 */ +static int keywordCode(const char *z, int n){ + /* zText[] encodes 834 bytes of keywords in 554 bytes */ + /* REINDEXEDESCAPEACHECKEYBEFOREIGNOREGEXPLAINSTEADDATABASELECT */ + /* ABLEFTHENDEFERRABLELSEXCEPTRANSACTIONATURALTERAISEXCLUSIVE */ + /* XISTSAVEPOINTERSECTRIGGEREFERENCESCONSTRAINTOFFSETEMPORARY */ + /* UNIQUERYWITHOUTERELEASEATTACHAVINGROUPDATEBEGINNERECURSIVE */ + /* BETWEENOTNULLIKECASCADELETECASECOLLATECREATECURRENT_DATEDETACH */ + /* IMMEDIATEJOINSERTMATCHPLANALYZEPRAGMABORTVALUESVIRTUALIMITWHEN */ + /* WHERENAMEAFTEREPLACEANDEFAULTAUTOINCREMENTCASTCOLUMNCOMMIT */ + /* CONFLICTCROSSCURRENT_TIMESTAMPRIMARYDEFERREDISTINCTDROPFAIL */ + /* FROMFULLGLOBYIFISNULLORDERESTRICTRIGHTROLLBACKROWUNIONUSING */ + /* VACUUMVIEWINITIALLY */ + static const char zText[553] = { + 'R','E','I','N','D','E','X','E','D','E','S','C','A','P','E','A','C','H', + 'E','C','K','E','Y','B','E','F','O','R','E','I','G','N','O','R','E','G', + 'E','X','P','L','A','I','N','S','T','E','A','D','D','A','T','A','B','A', + 'S','E','L','E','C','T','A','B','L','E','F','T','H','E','N','D','E','F', + 'E','R','R','A','B','L','E','L','S','E','X','C','E','P','T','R','A','N', + 'S','A','C','T','I','O','N','A','T','U','R','A','L','T','E','R','A','I', + 'S','E','X','C','L','U','S','I','V','E','X','I','S','T','S','A','V','E', + 'P','O','I','N','T','E','R','S','E','C','T','R','I','G','G','E','R','E', + 'F','E','R','E','N','C','E','S','C','O','N','S','T','R','A','I','N','T', + 'O','F','F','S','E','T','E','M','P','O','R','A','R','Y','U','N','I','Q', + 'U','E','R','Y','W','I','T','H','O','U','T','E','R','E','L','E','A','S', + 'E','A','T','T','A','C','H','A','V','I','N','G','R','O','U','P','D','A', + 'T','E','B','E','G','I','N','N','E','R','E','C','U','R','S','I','V','E', + 'B','E','T','W','E','E','N','O','T','N','U','L','L','I','K','E','C','A', + 'S','C','A','D','E','L','E','T','E','C','A','S','E','C','O','L','L','A', + 'T','E','C','R','E','A','T','E','C','U','R','R','E','N','T','_','D','A', + 'T','E','D','E','T','A','C','H','I','M','M','E','D','I','A','T','E','J', + 'O','I','N','S','E','R','T','M','A','T','C','H','P','L','A','N','A','L', + 'Y','Z','E','P','R','A','G','M','A','B','O','R','T','V','A','L','U','E', + 'S','V','I','R','T','U','A','L','I','M','I','T','W','H','E','N','W','H', + 'E','R','E','N','A','M','E','A','F','T','E','R','E','P','L','A','C','E', + 'A','N','D','E','F','A','U','L','T','A','U','T','O','I','N','C','R','E', + 'M','E','N','T','C','A','S','T','C','O','L','U','M','N','C','O','M','M', + 'I','T','C','O','N','F','L','I','C','T','C','R','O','S','S','C','U','R', + 'R','E','N','T','_','T','I','M','E','S','T','A','M','P','R','I','M','A', + 'R','Y','D','E','F','E','R','R','E','D','I','S','T','I','N','C','T','D', + 'R','O','P','F','A','I','L','F','R','O','M','F','U','L','L','G','L','O', + 'B','Y','I','F','I','S','N','U','L','L','O','R','D','E','R','E','S','T', + 'R','I','C','T','R','I','G','H','T','R','O','L','L','B','A','C','K','R', + 'O','W','U','N','I','O','N','U','S','I','N','G','V','A','C','U','U','M', + 'V','I','E','W','I','N','I','T','I','A','L','L','Y', + }; + static const unsigned char aHash[127] = { + 76, 105, 117, 74, 0, 45, 0, 0, 82, 0, 77, 0, 0, + 42, 12, 78, 15, 0, 116, 85, 54, 112, 0, 19, 0, 0, + 121, 0, 119, 115, 0, 22, 93, 0, 9, 0, 0, 70, 71, + 0, 69, 6, 0, 48, 90, 102, 0, 118, 101, 0, 0, 44, + 0, 103, 24, 0, 17, 0, 122, 53, 23, 0, 5, 110, 25, + 96, 0, 0, 124, 106, 60, 123, 57, 28, 55, 0, 91, 0, + 100, 26, 0, 99, 0, 0, 0, 95, 92, 97, 88, 109, 14, + 39, 108, 0, 81, 0, 18, 89, 111, 32, 0, 120, 80, 113, + 62, 46, 84, 0, 0, 94, 40, 59, 114, 0, 36, 0, 0, + 29, 0, 86, 63, 64, 0, 20, 61, 0, 56, + }; + static const unsigned char aNext[124] = { + 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, + 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 33, 0, 21, 0, 0, 0, 0, 0, 50, + 0, 43, 3, 47, 0, 0, 0, 0, 30, 0, 58, 0, 38, + 0, 0, 0, 1, 66, 0, 0, 67, 0, 41, 0, 0, 0, + 0, 0, 0, 49, 65, 0, 0, 0, 0, 31, 52, 16, 34, + 10, 0, 0, 0, 0, 0, 0, 0, 11, 72, 79, 0, 8, + 0, 104, 98, 0, 107, 0, 87, 0, 75, 51, 0, 27, 37, + 73, 83, 0, 35, 68, 0, 0, + }; + static const unsigned char aLen[124] = { + 7, 7, 5, 4, 6, 4, 5, 3, 6, 7, 3, 6, 6, + 7, 7, 3, 8, 2, 6, 5, 4, 4, 3, 10, 4, 6, + 11, 6, 2, 7, 5, 5, 9, 6, 9, 9, 7, 10, 10, + 4, 6, 2, 3, 9, 4, 2, 6, 5, 7, 4, 5, 7, + 6, 6, 5, 6, 5, 5, 9, 7, 7, 3, 2, 4, 4, + 7, 3, 6, 4, 7, 6, 12, 6, 9, 4, 6, 5, 4, + 7, 6, 5, 6, 7, 5, 4, 5, 6, 5, 7, 3, 7, + 13, 2, 2, 4, 6, 6, 8, 5, 17, 12, 7, 8, 8, + 2, 4, 4, 4, 4, 4, 2, 2, 6, 5, 8, 5, 8, + 3, 5, 5, 6, 4, 9, 3, + }; + static const unsigned short int aOffset[124] = { + 0, 2, 2, 8, 9, 14, 16, 20, 23, 25, 25, 29, 33, + 36, 41, 46, 48, 53, 54, 59, 62, 65, 67, 69, 78, 81, + 86, 91, 95, 96, 101, 105, 109, 117, 122, 128, 136, 142, 152, + 159, 162, 162, 165, 167, 167, 171, 176, 179, 184, 184, 188, 192, + 199, 204, 209, 212, 218, 221, 225, 234, 240, 240, 240, 243, 246, + 250, 251, 255, 261, 265, 272, 278, 290, 296, 305, 307, 313, 318, + 320, 327, 332, 337, 343, 349, 354, 358, 361, 367, 371, 378, 380, + 387, 389, 391, 400, 404, 410, 416, 424, 429, 429, 445, 452, 459, + 460, 467, 471, 475, 479, 483, 486, 488, 490, 496, 500, 508, 513, + 521, 524, 529, 534, 540, 544, 549, + }; + static const unsigned char aCode[124] = { + TK_REINDEX, TK_INDEXED, TK_INDEX, TK_DESC, TK_ESCAPE, + TK_EACH, TK_CHECK, TK_KEY, TK_BEFORE, TK_FOREIGN, + TK_FOR, TK_IGNORE, TK_LIKE_KW, TK_EXPLAIN, TK_INSTEAD, + TK_ADD, TK_DATABASE, TK_AS, TK_SELECT, TK_TABLE, + TK_JOIN_KW, TK_THEN, TK_END, TK_DEFERRABLE, TK_ELSE, + TK_EXCEPT, TK_TRANSACTION,TK_ACTION, TK_ON, TK_JOIN_KW, + TK_ALTER, TK_RAISE, TK_EXCLUSIVE, TK_EXISTS, TK_SAVEPOINT, + TK_INTERSECT, TK_TRIGGER, TK_REFERENCES, TK_CONSTRAINT, TK_INTO, + TK_OFFSET, TK_OF, TK_SET, TK_TEMP, TK_TEMP, + TK_OR, TK_UNIQUE, TK_QUERY, TK_WITHOUT, TK_WITH, + TK_JOIN_KW, TK_RELEASE, TK_ATTACH, TK_HAVING, TK_GROUP, + TK_UPDATE, TK_BEGIN, TK_JOIN_KW, TK_RECURSIVE, TK_BETWEEN, + TK_NOTNULL, TK_NOT, TK_NO, TK_NULL, TK_LIKE_KW, + TK_CASCADE, TK_ASC, TK_DELETE, TK_CASE, TK_COLLATE, + TK_CREATE, TK_CTIME_KW, TK_DETACH, TK_IMMEDIATE, TK_JOIN, + TK_INSERT, TK_MATCH, TK_PLAN, TK_ANALYZE, TK_PRAGMA, + TK_ABORT, TK_VALUES, TK_VIRTUAL, TK_LIMIT, TK_WHEN, + TK_WHERE, TK_RENAME, TK_AFTER, TK_REPLACE, TK_AND, + TK_DEFAULT, TK_AUTOINCR, TK_TO, TK_IN, TK_CAST, + TK_COLUMNKW, TK_COMMIT, TK_CONFLICT, TK_JOIN_KW, TK_CTIME_KW, + TK_CTIME_KW, TK_PRIMARY, TK_DEFERRED, TK_DISTINCT, TK_IS, + TK_DROP, TK_FAIL, TK_FROM, TK_JOIN_KW, TK_LIKE_KW, + TK_BY, TK_IF, TK_ISNULL, TK_ORDER, TK_RESTRICT, + TK_JOIN_KW, TK_ROLLBACK, TK_ROW, TK_UNION, TK_USING, + TK_VACUUM, TK_VIEW, TK_INITIALLY, TK_ALL, + }; + int h, i; + if( n<2 ) return TK_ID; + h = ((charMap(z[0])*4) ^ + (charMap(z[n-1])*3) ^ + n) % 127; + for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){ + if( aLen[i]==n && sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){ + testcase( i==0 ); /* REINDEX */ + testcase( i==1 ); /* INDEXED */ + testcase( i==2 ); /* INDEX */ + testcase( i==3 ); /* DESC */ + testcase( i==4 ); /* ESCAPE */ + testcase( i==5 ); /* EACH */ + testcase( i==6 ); /* CHECK */ + testcase( i==7 ); /* KEY */ + testcase( i==8 ); /* BEFORE */ + testcase( i==9 ); /* FOREIGN */ + testcase( i==10 ); /* FOR */ + testcase( i==11 ); /* IGNORE */ + testcase( i==12 ); /* REGEXP */ + testcase( i==13 ); /* EXPLAIN */ + testcase( i==14 ); /* INSTEAD */ + testcase( i==15 ); /* ADD */ + testcase( i==16 ); /* DATABASE */ + testcase( i==17 ); /* AS */ + testcase( i==18 ); /* SELECT */ + testcase( i==19 ); /* TABLE */ + testcase( i==20 ); /* LEFT */ + testcase( i==21 ); /* THEN */ + testcase( i==22 ); /* END */ + testcase( i==23 ); /* DEFERRABLE */ + testcase( i==24 ); /* ELSE */ + testcase( i==25 ); /* EXCEPT */ + testcase( i==26 ); /* TRANSACTION */ + testcase( i==27 ); /* ACTION */ + testcase( i==28 ); /* ON */ + testcase( i==29 ); /* NATURAL */ + testcase( i==30 ); /* ALTER */ + testcase( i==31 ); /* RAISE */ + testcase( i==32 ); /* EXCLUSIVE */ + testcase( i==33 ); /* EXISTS */ + testcase( i==34 ); /* SAVEPOINT */ + testcase( i==35 ); /* INTERSECT */ + testcase( i==36 ); /* TRIGGER */ + testcase( i==37 ); /* REFERENCES */ + testcase( i==38 ); /* CONSTRAINT */ + testcase( i==39 ); /* INTO */ + testcase( i==40 ); /* OFFSET */ + testcase( i==41 ); /* OF */ + testcase( i==42 ); /* SET */ + testcase( i==43 ); /* TEMPORARY */ + testcase( i==44 ); /* TEMP */ + testcase( i==45 ); /* OR */ + testcase( i==46 ); /* UNIQUE */ + testcase( i==47 ); /* QUERY */ + testcase( i==48 ); /* WITHOUT */ + testcase( i==49 ); /* WITH */ + testcase( i==50 ); /* OUTER */ + testcase( i==51 ); /* RELEASE */ + testcase( i==52 ); /* ATTACH */ + testcase( i==53 ); /* HAVING */ + testcase( i==54 ); /* GROUP */ + testcase( i==55 ); /* UPDATE */ + testcase( i==56 ); /* BEGIN */ + testcase( i==57 ); /* INNER */ + testcase( i==58 ); /* RECURSIVE */ + testcase( i==59 ); /* BETWEEN */ + testcase( i==60 ); /* NOTNULL */ + testcase( i==61 ); /* NOT */ + testcase( i==62 ); /* NO */ + testcase( i==63 ); /* NULL */ + testcase( i==64 ); /* LIKE */ + testcase( i==65 ); /* CASCADE */ + testcase( i==66 ); /* ASC */ + testcase( i==67 ); /* DELETE */ + testcase( i==68 ); /* CASE */ + testcase( i==69 ); /* COLLATE */ + testcase( i==70 ); /* CREATE */ + testcase( i==71 ); /* CURRENT_DATE */ + testcase( i==72 ); /* DETACH */ + testcase( i==73 ); /* IMMEDIATE */ + testcase( i==74 ); /* JOIN */ + testcase( i==75 ); /* INSERT */ + testcase( i==76 ); /* MATCH */ + testcase( i==77 ); /* PLAN */ + testcase( i==78 ); /* ANALYZE */ + testcase( i==79 ); /* PRAGMA */ + testcase( i==80 ); /* ABORT */ + testcase( i==81 ); /* VALUES */ + testcase( i==82 ); /* VIRTUAL */ + testcase( i==83 ); /* LIMIT */ + testcase( i==84 ); /* WHEN */ + testcase( i==85 ); /* WHERE */ + testcase( i==86 ); /* RENAME */ + testcase( i==87 ); /* AFTER */ + testcase( i==88 ); /* REPLACE */ + testcase( i==89 ); /* AND */ + testcase( i==90 ); /* DEFAULT */ + testcase( i==91 ); /* AUTOINCREMENT */ + testcase( i==92 ); /* TO */ + testcase( i==93 ); /* IN */ + testcase( i==94 ); /* CAST */ + testcase( i==95 ); /* COLUMN */ + testcase( i==96 ); /* COMMIT */ + testcase( i==97 ); /* CONFLICT */ + testcase( i==98 ); /* CROSS */ + testcase( i==99 ); /* CURRENT_TIMESTAMP */ + testcase( i==100 ); /* CURRENT_TIME */ + testcase( i==101 ); /* PRIMARY */ + testcase( i==102 ); /* DEFERRED */ + testcase( i==103 ); /* DISTINCT */ + testcase( i==104 ); /* IS */ + testcase( i==105 ); /* DROP */ + testcase( i==106 ); /* FAIL */ + testcase( i==107 ); /* FROM */ + testcase( i==108 ); /* FULL */ + testcase( i==109 ); /* GLOB */ + testcase( i==110 ); /* BY */ + testcase( i==111 ); /* IF */ + testcase( i==112 ); /* ISNULL */ + testcase( i==113 ); /* ORDER */ + testcase( i==114 ); /* RESTRICT */ + testcase( i==115 ); /* RIGHT */ + testcase( i==116 ); /* ROLLBACK */ + testcase( i==117 ); /* ROW */ + testcase( i==118 ); /* UNION */ + testcase( i==119 ); /* USING */ + testcase( i==120 ); /* VACUUM */ + testcase( i==121 ); /* VIEW */ + testcase( i==122 ); /* INITIALLY */ + testcase( i==123 ); /* ALL */ + return aCode[i]; + } + } + return TK_ID; +} +SQLITE_PRIVATE int sqlite3KeywordCode(const unsigned char *z, int n){ + return keywordCode((char*)z, n); +} +#define SQLITE_N_KEYWORD 124 + +/************** End of keywordhash.h *****************************************/ +/************** Continuing where we left off in tokenize.c *******************/ + + +/* +** If X is a character that can be used in an identifier then +** IdChar(X) will be true. Otherwise it is false. +** +** For ASCII, any character with the high-order bit set is +** allowed in an identifier. For 7-bit characters, +** sqlite3IsIdChar[X] must be 1. +** +** For EBCDIC, the rules are more complex but have the same +** end result. +** +** Ticket #1066. the SQL standard does not allow '$' in the +** middle of identifiers. But many SQL implementations do. +** SQLite will allow '$' in identifiers for compatibility. +** But the feature is undocumented. +*/ +#ifdef SQLITE_ASCII +#define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0) +#endif +#ifdef SQLITE_EBCDIC +SQLITE_PRIVATE const char sqlite3IsEbcdicIdChar[] = { +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ + 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ +}; +#define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) +#endif + +/* Make the IdChar function accessible from ctime.c */ +#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS +SQLITE_PRIVATE int sqlite3IsIdChar(u8 c){ return IdChar(c); } +#endif + + +/* +** Return the length of the token that begins at z[0]. +** Store the token type in *tokenType before returning. +*/ +SQLITE_PRIVATE int sqlite3GetToken(const unsigned char *z, int *tokenType){ + int i, c; + switch( *z ){ + case ' ': case '\t': case '\n': case '\f': case '\r': { + testcase( z[0]==' ' ); + testcase( z[0]=='\t' ); + testcase( z[0]=='\n' ); + testcase( z[0]=='\f' ); + testcase( z[0]=='\r' ); + for(i=1; sqlite3Isspace(z[i]); i++){} + *tokenType = TK_SPACE; + return i; + } + case '-': { + if( z[1]=='-' ){ + for(i=2; (c=z[i])!=0 && c!='\n'; i++){} + *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ + return i; + } + *tokenType = TK_MINUS; + return 1; + } + case '(': { + *tokenType = TK_LP; + return 1; + } + case ')': { + *tokenType = TK_RP; + return 1; + } + case ';': { + *tokenType = TK_SEMI; + return 1; + } + case '+': { + *tokenType = TK_PLUS; + return 1; + } + case '*': { + *tokenType = TK_STAR; + return 1; + } + case '/': { + if( z[1]!='*' || z[2]==0 ){ + *tokenType = TK_SLASH; + return 1; + } + for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} + if( c ) i++; + *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ + return i; + } + case '%': { + *tokenType = TK_REM; + return 1; + } + case '=': { + *tokenType = TK_EQ; + return 1 + (z[1]=='='); + } + case '<': { + if( (c=z[1])=='=' ){ + *tokenType = TK_LE; + return 2; + }else if( c=='>' ){ + *tokenType = TK_NE; + return 2; + }else if( c=='<' ){ + *tokenType = TK_LSHIFT; + return 2; + }else{ + *tokenType = TK_LT; + return 1; + } + } + case '>': { + if( (c=z[1])=='=' ){ + *tokenType = TK_GE; + return 2; + }else if( c=='>' ){ + *tokenType = TK_RSHIFT; + return 2; + }else{ + *tokenType = TK_GT; + return 1; + } + } + case '!': { + if( z[1]!='=' ){ + *tokenType = TK_ILLEGAL; + return 2; + }else{ + *tokenType = TK_NE; + return 2; + } + } + case '|': { + if( z[1]!='|' ){ + *tokenType = TK_BITOR; + return 1; + }else{ + *tokenType = TK_CONCAT; + return 2; + } + } + case ',': { + *tokenType = TK_COMMA; + return 1; + } + case '&': { + *tokenType = TK_BITAND; + return 1; + } + case '~': { + *tokenType = TK_BITNOT; + return 1; + } + case '`': + case '\'': + case '"': { + int delim = z[0]; + testcase( delim=='`' ); + testcase( delim=='\'' ); + testcase( delim=='"' ); + for(i=1; (c=z[i])!=0; i++){ + if( c==delim ){ + if( z[i+1]==delim ){ + i++; + }else{ + break; + } + } + } + if( c=='\'' ){ + *tokenType = TK_STRING; + return i+1; + }else if( c!=0 ){ + *tokenType = TK_ID; + return i+1; + }else{ + *tokenType = TK_ILLEGAL; + return i; + } + } + case '.': { +#ifndef SQLITE_OMIT_FLOATING_POINT + if( !sqlite3Isdigit(z[1]) ) +#endif + { + *tokenType = TK_DOT; + return 1; + } + /* If the next character is a digit, this is a floating point + ** number that begins with ".". Fall thru into the next case */ + } + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + testcase( z[0]=='0' ); testcase( z[0]=='1' ); testcase( z[0]=='2' ); + testcase( z[0]=='3' ); testcase( z[0]=='4' ); testcase( z[0]=='5' ); + testcase( z[0]=='6' ); testcase( z[0]=='7' ); testcase( z[0]=='8' ); + testcase( z[0]=='9' ); + *tokenType = TK_INTEGER; +#ifndef SQLITE_OMIT_HEX_INTEGER + if( z[0]=='0' && (z[1]=='x' || z[1]=='X') && sqlite3Isxdigit(z[2]) ){ + for(i=3; sqlite3Isxdigit(z[i]); i++){} + return i; + } +#endif + for(i=0; sqlite3Isdigit(z[i]); i++){} +#ifndef SQLITE_OMIT_FLOATING_POINT + if( z[i]=='.' ){ + i++; + while( sqlite3Isdigit(z[i]) ){ i++; } + *tokenType = TK_FLOAT; + } + if( (z[i]=='e' || z[i]=='E') && + ( sqlite3Isdigit(z[i+1]) + || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2])) + ) + ){ + i += 2; + while( sqlite3Isdigit(z[i]) ){ i++; } + *tokenType = TK_FLOAT; + } +#endif + while( IdChar(z[i]) ){ + *tokenType = TK_ILLEGAL; + i++; + } + return i; + } + case '[': { + for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} + *tokenType = c==']' ? TK_ID : TK_ILLEGAL; + return i; + } + case '?': { + *tokenType = TK_VARIABLE; + for(i=1; sqlite3Isdigit(z[i]); i++){} + return i; + } +#ifndef SQLITE_OMIT_TCL_VARIABLE + case '$': +#endif + case '@': /* For compatibility with MS SQL Server */ + case '#': + case ':': { + int n = 0; + testcase( z[0]=='$' ); testcase( z[0]=='@' ); + testcase( z[0]==':' ); testcase( z[0]=='#' ); + *tokenType = TK_VARIABLE; + for(i=1; (c=z[i])!=0; i++){ + if( IdChar(c) ){ + n++; +#ifndef SQLITE_OMIT_TCL_VARIABLE + }else if( c=='(' && n>0 ){ + do{ + i++; + }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' ); + if( c==')' ){ + i++; + }else{ + *tokenType = TK_ILLEGAL; + } + break; + }else if( c==':' && z[i+1]==':' ){ + i++; +#endif + }else{ + break; + } + } + if( n==0 ) *tokenType = TK_ILLEGAL; + return i; + } +#ifndef SQLITE_OMIT_BLOB_LITERAL + case 'x': case 'X': { + testcase( z[0]=='x' ); testcase( z[0]=='X' ); + if( z[1]=='\'' ){ + *tokenType = TK_BLOB; + for(i=2; sqlite3Isxdigit(z[i]); i++){} + if( z[i]!='\'' || i%2 ){ + *tokenType = TK_ILLEGAL; + while( z[i] && z[i]!='\'' ){ i++; } + } + if( z[i] ) i++; + return i; + } + /* Otherwise fall through to the next case */ + } +#endif + default: { + if( !IdChar(*z) ){ + break; + } + for(i=1; IdChar(z[i]); i++){} + *tokenType = keywordCode((char*)z, i); + return i; + } + } + *tokenType = TK_ILLEGAL; + return 1; +} + +/* +** Run the parser on the given SQL string. The parser structure is +** passed in. An SQLITE_ status code is returned. If an error occurs +** then an and attempt is made to write an error message into +** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that +** error message. +*/ +SQLITE_PRIVATE int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ + int nErr = 0; /* Number of errors encountered */ + int i; /* Loop counter */ + void *pEngine; /* The LEMON-generated LALR(1) parser */ + int tokenType; /* type of the next token */ + int lastTokenParsed = -1; /* type of the previous token */ + u8 enableLookaside; /* Saved value of db->lookaside.bEnabled */ + sqlite3 *db = pParse->db; /* The database connection */ + int mxSqlLen; /* Max length of an SQL string */ + + assert( zSql!=0 ); + mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH]; + if( db->nVdbeActive==0 ){ + db->u1.isInterrupted = 0; + } + pParse->rc = SQLITE_OK; + pParse->zTail = zSql; + i = 0; + assert( pzErrMsg!=0 ); + pEngine = sqlite3ParserAlloc(sqlite3Malloc); + if( pEngine==0 ){ + db->mallocFailed = 1; + return SQLITE_NOMEM; + } + assert( pParse->pNewTable==0 ); + assert( pParse->pNewTrigger==0 ); + assert( pParse->nVar==0 ); + assert( pParse->nzVar==0 ); + assert( pParse->azVar==0 ); + enableLookaside = db->lookaside.bEnabled; + if( db->lookaside.pStart ) db->lookaside.bEnabled = 1; + while( !db->mallocFailed && zSql[i]!=0 ){ + assert( i>=0 ); + pParse->sLastToken.z = &zSql[i]; + pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType); + i += pParse->sLastToken.n; + if( i>mxSqlLen ){ + pParse->rc = SQLITE_TOOBIG; + break; + } + switch( tokenType ){ + case TK_SPACE: { + if( db->u1.isInterrupted ){ + sqlite3ErrorMsg(pParse, "interrupt"); + pParse->rc = SQLITE_INTERRUPT; + goto abort_parse; + } + break; + } + case TK_ILLEGAL: { + sqlite3ErrorMsg(pParse, "unrecognized token: \"%T\"", + &pParse->sLastToken); + goto abort_parse; + } + case TK_SEMI: { + pParse->zTail = &zSql[i]; + /* Fall thru into the default case */ + } + default: { + sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); + lastTokenParsed = tokenType; + if( pParse->rc!=SQLITE_OK ){ + goto abort_parse; + } + break; + } + } + } +abort_parse: + assert( nErr==0 ); + if( pParse->rc==SQLITE_OK && db->mallocFailed==0 ){ + assert( zSql[i]==0 ); + if( lastTokenParsed!=TK_SEMI ){ + sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); + pParse->zTail = &zSql[i]; + } + if( pParse->rc==SQLITE_OK && db->mallocFailed==0 ){ + sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); + } + } +#ifdef YYTRACKMAXSTACKDEPTH + sqlite3_mutex_enter(sqlite3MallocMutex()); + sqlite3StatusSet(SQLITE_STATUS_PARSER_STACK, + sqlite3ParserStackPeak(pEngine) + ); + sqlite3_mutex_leave(sqlite3MallocMutex()); +#endif /* YYDEBUG */ + sqlite3ParserFree(pEngine, sqlite3_free); + db->lookaside.bEnabled = enableLookaside; + if( db->mallocFailed ){ + pParse->rc = SQLITE_NOMEM; + } + if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ + pParse->zErrMsg = sqlite3MPrintf(db, "%s", sqlite3ErrStr(pParse->rc)); + } + assert( pzErrMsg!=0 ); + if( pParse->zErrMsg ){ + *pzErrMsg = pParse->zErrMsg; + sqlite3_log(pParse->rc, "%s", *pzErrMsg); + pParse->zErrMsg = 0; + nErr++; + } + if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ + sqlite3VdbeDelete(pParse->pVdbe); + pParse->pVdbe = 0; + } +#ifndef SQLITE_OMIT_SHARED_CACHE + if( pParse->nested==0 ){ + sqlite3DbFree(db, pParse->aTableLock); + pParse->aTableLock = 0; + pParse->nTableLock = 0; + } +#endif +#ifndef SQLITE_OMIT_VIRTUALTABLE + sqlite3_free(pParse->apVtabLock); +#endif + + if( !IN_DECLARE_VTAB ){ + /* If the pParse->declareVtab flag is set, do not delete any table + ** structure built up in pParse->pNewTable. The calling code (see vtab.c) + ** will take responsibility for freeing the Table structure. + */ + sqlite3DeleteTable(db, pParse->pNewTable); + } + + if( pParse->bFreeWith ) sqlite3WithDelete(db, pParse->pWith); + sqlite3DeleteTrigger(db, pParse->pNewTrigger); + for(i=pParse->nzVar-1; i>=0; i--) sqlite3DbFree(db, pParse->azVar[i]); + sqlite3DbFree(db, pParse->azVar); + while( pParse->pAinc ){ + AutoincInfo *p = pParse->pAinc; + pParse->pAinc = p->pNext; + sqlite3DbFree(db, p); + } + while( pParse->pZombieTab ){ + Table *p = pParse->pZombieTab; + pParse->pZombieTab = p->pNextZombie; + sqlite3DeleteTable(db, p); + } + assert( nErr==0 || pParse->rc!=SQLITE_OK ); + return nErr; +} + +/************** End of tokenize.c ********************************************/ +/************** Begin file complete.c ****************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** An tokenizer for SQL +** +** This file contains C code that implements the sqlite3_complete() API. +** This code used to be part of the tokenizer.c source file. But by +** separating it out, the code will be automatically omitted from +** static links that do not use it. +*/ +/* #include "sqliteInt.h" */ +#ifndef SQLITE_OMIT_COMPLETE + +/* +** This is defined in tokenize.c. We just have to import the definition. +*/ +#ifndef SQLITE_AMALGAMATION +#ifdef SQLITE_ASCII +#define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0) +#endif +#ifdef SQLITE_EBCDIC +SQLITE_PRIVATE const char sqlite3IsEbcdicIdChar[]; +#define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) +#endif +#endif /* SQLITE_AMALGAMATION */ + + +/* +** Token types used by the sqlite3_complete() routine. See the header +** comments on that procedure for additional information. +*/ +#define tkSEMI 0 +#define tkWS 1 +#define tkOTHER 2 +#ifndef SQLITE_OMIT_TRIGGER +#define tkEXPLAIN 3 +#define tkCREATE 4 +#define tkTEMP 5 +#define tkTRIGGER 6 +#define tkEND 7 +#endif + +/* +** Return TRUE if the given SQL string ends in a semicolon. +** +** Special handling is require for CREATE TRIGGER statements. +** Whenever the CREATE TRIGGER keywords are seen, the statement +** must end with ";END;". +** +** This implementation uses a state machine with 8 states: +** +** (0) INVALID We have not yet seen a non-whitespace character. +** +** (1) START At the beginning or end of an SQL statement. This routine +** returns 1 if it ends in the START state and 0 if it ends +** in any other state. +** +** (2) NORMAL We are in the middle of statement which ends with a single +** semicolon. +** +** (3) EXPLAIN The keyword EXPLAIN has been seen at the beginning of +** a statement. +** +** (4) CREATE The keyword CREATE has been seen at the beginning of a +** statement, possibly preceded by EXPLAIN and/or followed by +** TEMP or TEMPORARY +** +** (5) TRIGGER We are in the middle of a trigger definition that must be +** ended by a semicolon, the keyword END, and another semicolon. +** +** (6) SEMI We've seen the first semicolon in the ";END;" that occurs at +** the end of a trigger definition. +** +** (7) END We've seen the ";END" of the ";END;" that occurs at the end +** of a trigger definition. +** +** Transitions between states above are determined by tokens extracted +** from the input. The following tokens are significant: +** +** (0) tkSEMI A semicolon. +** (1) tkWS Whitespace. +** (2) tkOTHER Any other SQL token. +** (3) tkEXPLAIN The "explain" keyword. +** (4) tkCREATE The "create" keyword. +** (5) tkTEMP The "temp" or "temporary" keyword. +** (6) tkTRIGGER The "trigger" keyword. +** (7) tkEND The "end" keyword. +** +** Whitespace never causes a state transition and is always ignored. +** This means that a SQL string of all whitespace is invalid. +** +** If we compile with SQLITE_OMIT_TRIGGER, all of the computation needed +** to recognize the end of a trigger can be omitted. All we have to do +** is look for a semicolon that is not part of an string or comment. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_complete(const char *zSql){ + u8 state = 0; /* Current state, using numbers defined in header comment */ + u8 token; /* Value of the next token */ + +#ifndef SQLITE_OMIT_TRIGGER + /* A complex statement machine used to detect the end of a CREATE TRIGGER + ** statement. This is the normal case. + */ + static const u8 trans[8][8] = { + /* Token: */ + /* State: ** SEMI WS OTHER EXPLAIN CREATE TEMP TRIGGER END */ + /* 0 INVALID: */ { 1, 0, 2, 3, 4, 2, 2, 2, }, + /* 1 START: */ { 1, 1, 2, 3, 4, 2, 2, 2, }, + /* 2 NORMAL: */ { 1, 2, 2, 2, 2, 2, 2, 2, }, + /* 3 EXPLAIN: */ { 1, 3, 3, 2, 4, 2, 2, 2, }, + /* 4 CREATE: */ { 1, 4, 2, 2, 2, 4, 5, 2, }, + /* 5 TRIGGER: */ { 6, 5, 5, 5, 5, 5, 5, 5, }, + /* 6 SEMI: */ { 6, 6, 5, 5, 5, 5, 5, 7, }, + /* 7 END: */ { 1, 7, 5, 5, 5, 5, 5, 5, }, + }; +#else + /* If triggers are not supported by this compile then the statement machine + ** used to detect the end of a statement is much simpler + */ + static const u8 trans[3][3] = { + /* Token: */ + /* State: ** SEMI WS OTHER */ + /* 0 INVALID: */ { 1, 0, 2, }, + /* 1 START: */ { 1, 1, 2, }, + /* 2 NORMAL: */ { 1, 2, 2, }, + }; +#endif /* SQLITE_OMIT_TRIGGER */ + +#ifdef SQLITE_ENABLE_API_ARMOR + if( zSql==0 ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + + while( *zSql ){ + switch( *zSql ){ + case ';': { /* A semicolon */ + token = tkSEMI; + break; + } + case ' ': + case '\r': + case '\t': + case '\n': + case '\f': { /* White space is ignored */ + token = tkWS; + break; + } + case '/': { /* C-style comments */ + if( zSql[1]!='*' ){ + token = tkOTHER; + break; + } + zSql += 2; + while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; } + if( zSql[0]==0 ) return 0; + zSql++; + token = tkWS; + break; + } + case '-': { /* SQL-style comments from "--" to end of line */ + if( zSql[1]!='-' ){ + token = tkOTHER; + break; + } + while( *zSql && *zSql!='\n' ){ zSql++; } + if( *zSql==0 ) return state==1; + token = tkWS; + break; + } + case '[': { /* Microsoft-style identifiers in [...] */ + zSql++; + while( *zSql && *zSql!=']' ){ zSql++; } + if( *zSql==0 ) return 0; + token = tkOTHER; + break; + } + case '`': /* Grave-accent quoted symbols used by MySQL */ + case '"': /* single- and double-quoted strings */ + case '\'': { + int c = *zSql; + zSql++; + while( *zSql && *zSql!=c ){ zSql++; } + if( *zSql==0 ) return 0; + token = tkOTHER; + break; + } + default: { +#ifdef SQLITE_EBCDIC + unsigned char c; +#endif + if( IdChar((u8)*zSql) ){ + /* Keywords and unquoted identifiers */ + int nId; + for(nId=1; IdChar(zSql[nId]); nId++){} +#ifdef SQLITE_OMIT_TRIGGER + token = tkOTHER; +#else + switch( *zSql ){ + case 'c': case 'C': { + if( nId==6 && sqlite3StrNICmp(zSql, "create", 6)==0 ){ + token = tkCREATE; + }else{ + token = tkOTHER; + } + break; + } + case 't': case 'T': { + if( nId==7 && sqlite3StrNICmp(zSql, "trigger", 7)==0 ){ + token = tkTRIGGER; + }else if( nId==4 && sqlite3StrNICmp(zSql, "temp", 4)==0 ){ + token = tkTEMP; + }else if( nId==9 && sqlite3StrNICmp(zSql, "temporary", 9)==0 ){ + token = tkTEMP; + }else{ + token = tkOTHER; + } + break; + } + case 'e': case 'E': { + if( nId==3 && sqlite3StrNICmp(zSql, "end", 3)==0 ){ + token = tkEND; + }else +#ifndef SQLITE_OMIT_EXPLAIN + if( nId==7 && sqlite3StrNICmp(zSql, "explain", 7)==0 ){ + token = tkEXPLAIN; + }else +#endif + { + token = tkOTHER; + } + break; + } + default: { + token = tkOTHER; + break; + } + } +#endif /* SQLITE_OMIT_TRIGGER */ + zSql += nId-1; + }else{ + /* Operators and special symbols */ + token = tkOTHER; + } + break; + } + } + state = trans[state][token]; + zSql++; + } + return state==1; +} + +#ifndef SQLITE_OMIT_UTF16 +/* +** This routine is the same as the sqlite3_complete() routine described +** above, except that the parameter is required to be UTF-16 encoded, not +** UTF-8. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_complete16(const void *zSql){ + sqlite3_value *pVal; + char const *zSql8; + int rc; + +#ifndef SQLITE_OMIT_AUTOINIT + rc = sqlite3_initialize(); + if( rc ) return rc; +#endif + pVal = sqlite3ValueNew(0); + sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC); + zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8); + if( zSql8 ){ + rc = sqlite3_complete(zSql8); + }else{ + rc = SQLITE_NOMEM; + } + sqlite3ValueFree(pVal); + return rc & 0xff; +} +#endif /* SQLITE_OMIT_UTF16 */ +#endif /* SQLITE_OMIT_COMPLETE */ + +/************** End of complete.c ********************************************/ +/************** Begin file main.c ********************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Main file for the SQLite library. The routines in this file +** implement the programmer interface to the library. Routines in +** other files are for internal use by SQLite and should not be +** accessed by users of the library. +*/ +/* #include "sqliteInt.h" */ + +#ifdef SQLITE_ENABLE_FTS3 +/************** Include fts3.h in the middle of main.c ***********************/ +/************** Begin file fts3.h ********************************************/ +/* +** 2006 Oct 10 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This header file is used by programs that want to link against the +** FTS3 library. All it does is declare the sqlite3Fts3Init() interface. +*/ +/* #include "sqlite3.h" */ + +#if 0 +extern "C" { +#endif /* __cplusplus */ + +SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db); + +#if 0 +} /* extern "C" */ +#endif /* __cplusplus */ + +/************** End of fts3.h ************************************************/ +/************** Continuing where we left off in main.c ***********************/ +#endif +#ifdef SQLITE_ENABLE_RTREE +/************** Include rtree.h in the middle of main.c **********************/ +/************** Begin file rtree.h *******************************************/ +/* +** 2008 May 26 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This header file is used by programs that want to link against the +** RTREE library. All it does is declare the sqlite3RtreeInit() interface. +*/ +/* #include "sqlite3.h" */ + +#if 0 +extern "C" { +#endif /* __cplusplus */ + +SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db); + +#if 0 +} /* extern "C" */ +#endif /* __cplusplus */ + +/************** End of rtree.h ***********************************************/ +/************** Continuing where we left off in main.c ***********************/ +#endif +#ifdef SQLITE_ENABLE_ICU +/************** Include sqliteicu.h in the middle of main.c ******************/ +/************** Begin file sqliteicu.h ***************************************/ +/* +** 2008 May 26 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This header file is used by programs that want to link against the +** ICU extension. All it does is declare the sqlite3IcuInit() interface. +*/ +/* #include "sqlite3.h" */ + +#if 0 +extern "C" { +#endif /* __cplusplus */ + +SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db); + +#if 0 +} /* extern "C" */ +#endif /* __cplusplus */ + + +/************** End of sqliteicu.h *******************************************/ +/************** Continuing where we left off in main.c ***********************/ +#endif + +#ifndef SQLITE_AMALGAMATION +/* IMPLEMENTATION-OF: R-46656-45156 The sqlite3_version[] string constant +** contains the text of SQLITE_VERSION macro. +*/ +SQLITE_API const char sqlite3_version[] = SQLITE_VERSION; +#endif + +/* IMPLEMENTATION-OF: R-53536-42575 The sqlite3_libversion() function returns +** a pointer to the to the sqlite3_version[] string constant. +*/ +SQLITE_API const char *SQLITE_STDCALL sqlite3_libversion(void){ return sqlite3_version; } + +/* IMPLEMENTATION-OF: R-63124-39300 The sqlite3_sourceid() function returns a +** pointer to a string constant whose value is the same as the +** SQLITE_SOURCE_ID C preprocessor macro. +*/ +SQLITE_API const char *SQLITE_STDCALL sqlite3_sourceid(void){ return SQLITE_SOURCE_ID; } + +/* IMPLEMENTATION-OF: R-35210-63508 The sqlite3_libversion_number() function +** returns an integer equal to SQLITE_VERSION_NUMBER. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_libversion_number(void){ return SQLITE_VERSION_NUMBER; } + +/* IMPLEMENTATION-OF: R-20790-14025 The sqlite3_threadsafe() function returns +** zero if and only if SQLite was compiled with mutexing code omitted due to +** the SQLITE_THREADSAFE compile-time option being set to 0. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_threadsafe(void){ return SQLITE_THREADSAFE; } + +/* +** When compiling the test fixture or with debugging enabled (on Win32), +** this variable being set to non-zero will cause OSTRACE macros to emit +** extra diagnostic information. +*/ +#ifdef SQLITE_HAVE_OS_TRACE +# ifndef SQLITE_DEBUG_OS_TRACE +# define SQLITE_DEBUG_OS_TRACE 0 +# endif + int sqlite3OSTrace = SQLITE_DEBUG_OS_TRACE; +#endif + +#if !defined(SQLITE_OMIT_TRACE) && defined(SQLITE_ENABLE_IOTRACE) +/* +** If the following function pointer is not NULL and if +** SQLITE_ENABLE_IOTRACE is enabled, then messages describing +** I/O active are written using this function. These messages +** are intended for debugging activity only. +*/ +SQLITE_API void (SQLITE_CDECL *sqlite3IoTrace)(const char*, ...) = 0; +#endif + +/* +** If the following global variable points to a string which is the +** name of a directory, then that directory will be used to store +** temporary files. +** +** See also the "PRAGMA temp_store_directory" SQL command. +*/ +SQLITE_API char *sqlite3_temp_directory = 0; + +/* +** If the following global variable points to a string which is the +** name of a directory, then that directory will be used to store +** all database files specified with a relative pathname. +** +** See also the "PRAGMA data_store_directory" SQL command. +*/ +SQLITE_API char *sqlite3_data_directory = 0; + +/* +** Initialize SQLite. +** +** This routine must be called to initialize the memory allocation, +** VFS, and mutex subsystems prior to doing any serious work with +** SQLite. But as long as you do not compile with SQLITE_OMIT_AUTOINIT +** this routine will be called automatically by key routines such as +** sqlite3_open(). +** +** This routine is a no-op except on its very first call for the process, +** or for the first call after a call to sqlite3_shutdown. +** +** The first thread to call this routine runs the initialization to +** completion. If subsequent threads call this routine before the first +** thread has finished the initialization process, then the subsequent +** threads must block until the first thread finishes with the initialization. +** +** The first thread might call this routine recursively. Recursive +** calls to this routine should not block, of course. Otherwise the +** initialization process would never complete. +** +** Let X be the first thread to enter this routine. Let Y be some other +** thread. Then while the initial invocation of this routine by X is +** incomplete, it is required that: +** +** * Calls to this routine from Y must block until the outer-most +** call by X completes. +** +** * Recursive calls to this routine from thread X return immediately +** without blocking. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_initialize(void){ + MUTEX_LOGIC( sqlite3_mutex *pMaster; ) /* The main static mutex */ + int rc; /* Result code */ +#ifdef SQLITE_EXTRA_INIT + int bRunExtraInit = 0; /* Extra initialization needed */ +#endif + +#ifdef SQLITE_OMIT_WSD + rc = sqlite3_wsd_init(4096, 24); + if( rc!=SQLITE_OK ){ + return rc; + } +#endif + + /* If the following assert() fails on some obscure processor/compiler + ** combination, the work-around is to set the correct pointer + ** size at compile-time using -DSQLITE_PTRSIZE=n compile-time option */ + assert( SQLITE_PTRSIZE==sizeof(char*) ); + + /* If SQLite is already completely initialized, then this call + ** to sqlite3_initialize() should be a no-op. But the initialization + ** must be complete. So isInit must not be set until the very end + ** of this routine. + */ + if( sqlite3GlobalConfig.isInit ) return SQLITE_OK; + + /* Make sure the mutex subsystem is initialized. If unable to + ** initialize the mutex subsystem, return early with the error. + ** If the system is so sick that we are unable to allocate a mutex, + ** there is not much SQLite is going to be able to do. + ** + ** The mutex subsystem must take care of serializing its own + ** initialization. + */ + rc = sqlite3MutexInit(); + if( rc ) return rc; + + /* Initialize the malloc() system and the recursive pInitMutex mutex. + ** This operation is protected by the STATIC_MASTER mutex. Note that + ** MutexAlloc() is called for a static mutex prior to initializing the + ** malloc subsystem - this implies that the allocation of a static + ** mutex must not require support from the malloc subsystem. + */ + MUTEX_LOGIC( pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); ) + sqlite3_mutex_enter(pMaster); + sqlite3GlobalConfig.isMutexInit = 1; + if( !sqlite3GlobalConfig.isMallocInit ){ + rc = sqlite3MallocInit(); + } + if( rc==SQLITE_OK ){ + sqlite3GlobalConfig.isMallocInit = 1; + if( !sqlite3GlobalConfig.pInitMutex ){ + sqlite3GlobalConfig.pInitMutex = + sqlite3MutexAlloc(SQLITE_MUTEX_RECURSIVE); + if( sqlite3GlobalConfig.bCoreMutex && !sqlite3GlobalConfig.pInitMutex ){ + rc = SQLITE_NOMEM; + } + } + } + if( rc==SQLITE_OK ){ + sqlite3GlobalConfig.nRefInitMutex++; + } + sqlite3_mutex_leave(pMaster); + + /* If rc is not SQLITE_OK at this point, then either the malloc + ** subsystem could not be initialized or the system failed to allocate + ** the pInitMutex mutex. Return an error in either case. */ + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Do the rest of the initialization under the recursive mutex so + ** that we will be able to handle recursive calls into + ** sqlite3_initialize(). The recursive calls normally come through + ** sqlite3_os_init() when it invokes sqlite3_vfs_register(), but other + ** recursive calls might also be possible. + ** + ** IMPLEMENTATION-OF: R-00140-37445 SQLite automatically serializes calls + ** to the xInit method, so the xInit method need not be threadsafe. + ** + ** The following mutex is what serializes access to the appdef pcache xInit + ** methods. The sqlite3_pcache_methods.xInit() all is embedded in the + ** call to sqlite3PcacheInitialize(). + */ + sqlite3_mutex_enter(sqlite3GlobalConfig.pInitMutex); + if( sqlite3GlobalConfig.isInit==0 && sqlite3GlobalConfig.inProgress==0 ){ + FuncDefHash *pHash = &GLOBAL(FuncDefHash, sqlite3GlobalFunctions); + sqlite3GlobalConfig.inProgress = 1; + memset(pHash, 0, sizeof(sqlite3GlobalFunctions)); + sqlite3RegisterGlobalFunctions(); + if( sqlite3GlobalConfig.isPCacheInit==0 ){ + rc = sqlite3PcacheInitialize(); + } + if( rc==SQLITE_OK ){ + sqlite3GlobalConfig.isPCacheInit = 1; + rc = sqlite3OsInit(); + } + if( rc==SQLITE_OK ){ + sqlite3PCacheBufferSetup( sqlite3GlobalConfig.pPage, + sqlite3GlobalConfig.szPage, sqlite3GlobalConfig.nPage); + sqlite3GlobalConfig.isInit = 1; +#ifdef SQLITE_EXTRA_INIT + bRunExtraInit = 1; +#endif + } + sqlite3GlobalConfig.inProgress = 0; + } + sqlite3_mutex_leave(sqlite3GlobalConfig.pInitMutex); + + /* Go back under the static mutex and clean up the recursive + ** mutex to prevent a resource leak. + */ + sqlite3_mutex_enter(pMaster); + sqlite3GlobalConfig.nRefInitMutex--; + if( sqlite3GlobalConfig.nRefInitMutex<=0 ){ + assert( sqlite3GlobalConfig.nRefInitMutex==0 ); + sqlite3_mutex_free(sqlite3GlobalConfig.pInitMutex); + sqlite3GlobalConfig.pInitMutex = 0; + } + sqlite3_mutex_leave(pMaster); + + /* The following is just a sanity check to make sure SQLite has + ** been compiled correctly. It is important to run this code, but + ** we don't want to run it too often and soak up CPU cycles for no + ** reason. So we run it once during initialization. + */ +#ifndef NDEBUG +#ifndef SQLITE_OMIT_FLOATING_POINT + /* This section of code's only "output" is via assert() statements. */ + if ( rc==SQLITE_OK ){ + u64 x = (((u64)1)<<63)-1; + double y; + assert(sizeof(x)==8); + assert(sizeof(x)==sizeof(y)); + memcpy(&y, &x, 8); + assert( sqlite3IsNaN(y) ); + } +#endif +#endif + + /* Do extra initialization steps requested by the SQLITE_EXTRA_INIT + ** compile-time option. + */ +#ifdef SQLITE_EXTRA_INIT + if( bRunExtraInit ){ + int SQLITE_EXTRA_INIT(const char*); + rc = SQLITE_EXTRA_INIT(0); + } +#endif + + return rc; +} + +/* +** Undo the effects of sqlite3_initialize(). Must not be called while +** there are outstanding database connections or memory allocations or +** while any part of SQLite is otherwise in use in any thread. This +** routine is not threadsafe. But it is safe to invoke this routine +** on when SQLite is already shut down. If SQLite is already shut down +** when this routine is invoked, then this routine is a harmless no-op. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_shutdown(void){ +#ifdef SQLITE_OMIT_WSD + int rc = sqlite3_wsd_init(4096, 24); + if( rc!=SQLITE_OK ){ + return rc; + } +#endif + + if( sqlite3GlobalConfig.isInit ){ +#ifdef SQLITE_EXTRA_SHUTDOWN + void SQLITE_EXTRA_SHUTDOWN(void); + SQLITE_EXTRA_SHUTDOWN(); +#endif + sqlite3_os_end(); + sqlite3_reset_auto_extension(); + sqlite3GlobalConfig.isInit = 0; + } + if( sqlite3GlobalConfig.isPCacheInit ){ + sqlite3PcacheShutdown(); + sqlite3GlobalConfig.isPCacheInit = 0; + } + if( sqlite3GlobalConfig.isMallocInit ){ + sqlite3MallocEnd(); + sqlite3GlobalConfig.isMallocInit = 0; + +#ifndef SQLITE_OMIT_SHUTDOWN_DIRECTORIES + /* The heap subsystem has now been shutdown and these values are supposed + ** to be NULL or point to memory that was obtained from sqlite3_malloc(), + ** which would rely on that heap subsystem; therefore, make sure these + ** values cannot refer to heap memory that was just invalidated when the + ** heap subsystem was shutdown. This is only done if the current call to + ** this function resulted in the heap subsystem actually being shutdown. + */ + sqlite3_data_directory = 0; + sqlite3_temp_directory = 0; +#endif + } + if( sqlite3GlobalConfig.isMutexInit ){ + sqlite3MutexEnd(); + sqlite3GlobalConfig.isMutexInit = 0; + } + + return SQLITE_OK; +} + +/* +** This API allows applications to modify the global configuration of +** the SQLite library at run-time. +** +** This routine should only be called when there are no outstanding +** database connections or memory allocations. This routine is not +** threadsafe. Failure to heed these warnings can lead to unpredictable +** behavior. +*/ +SQLITE_API int SQLITE_CDECL sqlite3_config(int op, ...){ + va_list ap; + int rc = SQLITE_OK; + + /* sqlite3_config() shall return SQLITE_MISUSE if it is invoked while + ** the SQLite library is in use. */ + if( sqlite3GlobalConfig.isInit ) return SQLITE_MISUSE_BKPT; + + va_start(ap, op); + switch( op ){ + + /* Mutex configuration options are only available in a threadsafe + ** compile. + */ +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-54466-46756 */ + case SQLITE_CONFIG_SINGLETHREAD: { + /* EVIDENCE-OF: R-02748-19096 This option sets the threading mode to + ** Single-thread. */ + sqlite3GlobalConfig.bCoreMutex = 0; /* Disable mutex on core */ + sqlite3GlobalConfig.bFullMutex = 0; /* Disable mutex on connections */ + break; + } +#endif +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-20520-54086 */ + case SQLITE_CONFIG_MULTITHREAD: { + /* EVIDENCE-OF: R-14374-42468 This option sets the threading mode to + ** Multi-thread. */ + sqlite3GlobalConfig.bCoreMutex = 1; /* Enable mutex on core */ + sqlite3GlobalConfig.bFullMutex = 0; /* Disable mutex on connections */ + break; + } +#endif +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-59593-21810 */ + case SQLITE_CONFIG_SERIALIZED: { + /* EVIDENCE-OF: R-41220-51800 This option sets the threading mode to + ** Serialized. */ + sqlite3GlobalConfig.bCoreMutex = 1; /* Enable mutex on core */ + sqlite3GlobalConfig.bFullMutex = 1; /* Enable mutex on connections */ + break; + } +#endif +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-63666-48755 */ + case SQLITE_CONFIG_MUTEX: { + /* Specify an alternative mutex implementation */ + sqlite3GlobalConfig.mutex = *va_arg(ap, sqlite3_mutex_methods*); + break; + } +#endif +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-14450-37597 */ + case SQLITE_CONFIG_GETMUTEX: { + /* Retrieve the current mutex implementation */ + *va_arg(ap, sqlite3_mutex_methods*) = sqlite3GlobalConfig.mutex; + break; + } +#endif + + case SQLITE_CONFIG_MALLOC: { + /* EVIDENCE-OF: R-55594-21030 The SQLITE_CONFIG_MALLOC option takes a + ** single argument which is a pointer to an instance of the + ** sqlite3_mem_methods structure. The argument specifies alternative + ** low-level memory allocation routines to be used in place of the memory + ** allocation routines built into SQLite. */ + sqlite3GlobalConfig.m = *va_arg(ap, sqlite3_mem_methods*); + break; + } + case SQLITE_CONFIG_GETMALLOC: { + /* EVIDENCE-OF: R-51213-46414 The SQLITE_CONFIG_GETMALLOC option takes a + ** single argument which is a pointer to an instance of the + ** sqlite3_mem_methods structure. The sqlite3_mem_methods structure is + ** filled with the currently defined memory allocation routines. */ + if( sqlite3GlobalConfig.m.xMalloc==0 ) sqlite3MemSetDefault(); + *va_arg(ap, sqlite3_mem_methods*) = sqlite3GlobalConfig.m; + break; + } + case SQLITE_CONFIG_MEMSTATUS: { + /* EVIDENCE-OF: R-61275-35157 The SQLITE_CONFIG_MEMSTATUS option takes + ** single argument of type int, interpreted as a boolean, which enables + ** or disables the collection of memory allocation statistics. */ + sqlite3GlobalConfig.bMemstat = va_arg(ap, int); + break; + } + case SQLITE_CONFIG_SCRATCH: { + /* EVIDENCE-OF: R-08404-60887 There are three arguments to + ** SQLITE_CONFIG_SCRATCH: A pointer an 8-byte aligned memory buffer from + ** which the scratch allocations will be drawn, the size of each scratch + ** allocation (sz), and the maximum number of scratch allocations (N). */ + sqlite3GlobalConfig.pScratch = va_arg(ap, void*); + sqlite3GlobalConfig.szScratch = va_arg(ap, int); + sqlite3GlobalConfig.nScratch = va_arg(ap, int); + break; + } + case SQLITE_CONFIG_PAGECACHE: { + /* EVIDENCE-OF: R-31408-40510 There are three arguments to + ** SQLITE_CONFIG_PAGECACHE: A pointer to 8-byte aligned memory, the size + ** of each page buffer (sz), and the number of pages (N). */ + sqlite3GlobalConfig.pPage = va_arg(ap, void*); + sqlite3GlobalConfig.szPage = va_arg(ap, int); + sqlite3GlobalConfig.nPage = va_arg(ap, int); + break; + } + case SQLITE_CONFIG_PCACHE_HDRSZ: { + /* EVIDENCE-OF: R-39100-27317 The SQLITE_CONFIG_PCACHE_HDRSZ option takes + ** a single parameter which is a pointer to an integer and writes into + ** that integer the number of extra bytes per page required for each page + ** in SQLITE_CONFIG_PAGECACHE. */ + *va_arg(ap, int*) = + sqlite3HeaderSizeBtree() + + sqlite3HeaderSizePcache() + + sqlite3HeaderSizePcache1(); + break; + } + + case SQLITE_CONFIG_PCACHE: { + /* no-op */ + break; + } + case SQLITE_CONFIG_GETPCACHE: { + /* now an error */ + rc = SQLITE_ERROR; + break; + } + + case SQLITE_CONFIG_PCACHE2: { + /* EVIDENCE-OF: R-63325-48378 The SQLITE_CONFIG_PCACHE2 option takes a + ** single argument which is a pointer to an sqlite3_pcache_methods2 + ** object. This object specifies the interface to a custom page cache + ** implementation. */ + sqlite3GlobalConfig.pcache2 = *va_arg(ap, sqlite3_pcache_methods2*); + break; + } + case SQLITE_CONFIG_GETPCACHE2: { + /* EVIDENCE-OF: R-22035-46182 The SQLITE_CONFIG_GETPCACHE2 option takes a + ** single argument which is a pointer to an sqlite3_pcache_methods2 + ** object. SQLite copies of the current page cache implementation into + ** that object. */ + if( sqlite3GlobalConfig.pcache2.xInit==0 ){ + sqlite3PCacheSetDefault(); + } + *va_arg(ap, sqlite3_pcache_methods2*) = sqlite3GlobalConfig.pcache2; + break; + } + +/* EVIDENCE-OF: R-06626-12911 The SQLITE_CONFIG_HEAP option is only +** available if SQLite is compiled with either SQLITE_ENABLE_MEMSYS3 or +** SQLITE_ENABLE_MEMSYS5 and returns SQLITE_ERROR if invoked otherwise. */ +#if defined(SQLITE_ENABLE_MEMSYS3) || defined(SQLITE_ENABLE_MEMSYS5) + case SQLITE_CONFIG_HEAP: { + /* EVIDENCE-OF: R-19854-42126 There are three arguments to + ** SQLITE_CONFIG_HEAP: An 8-byte aligned pointer to the memory, the + ** number of bytes in the memory buffer, and the minimum allocation size. + */ + sqlite3GlobalConfig.pHeap = va_arg(ap, void*); + sqlite3GlobalConfig.nHeap = va_arg(ap, int); + sqlite3GlobalConfig.mnReq = va_arg(ap, int); + + if( sqlite3GlobalConfig.mnReq<1 ){ + sqlite3GlobalConfig.mnReq = 1; + }else if( sqlite3GlobalConfig.mnReq>(1<<12) ){ + /* cap min request size at 2^12 */ + sqlite3GlobalConfig.mnReq = (1<<12); + } + + if( sqlite3GlobalConfig.pHeap==0 ){ + /* EVIDENCE-OF: R-49920-60189 If the first pointer (the memory pointer) + ** is NULL, then SQLite reverts to using its default memory allocator + ** (the system malloc() implementation), undoing any prior invocation of + ** SQLITE_CONFIG_MALLOC. + ** + ** Setting sqlite3GlobalConfig.m to all zeros will cause malloc to + ** revert to its default implementation when sqlite3_initialize() is run + */ + memset(&sqlite3GlobalConfig.m, 0, sizeof(sqlite3GlobalConfig.m)); + }else{ + /* EVIDENCE-OF: R-61006-08918 If the memory pointer is not NULL then the + ** alternative memory allocator is engaged to handle all of SQLites + ** memory allocation needs. */ +#ifdef SQLITE_ENABLE_MEMSYS3 + sqlite3GlobalConfig.m = *sqlite3MemGetMemsys3(); +#endif +#ifdef SQLITE_ENABLE_MEMSYS5 + sqlite3GlobalConfig.m = *sqlite3MemGetMemsys5(); +#endif + } + break; + } +#endif + + case SQLITE_CONFIG_LOOKASIDE: { + sqlite3GlobalConfig.szLookaside = va_arg(ap, int); + sqlite3GlobalConfig.nLookaside = va_arg(ap, int); + break; + } + + /* Record a pointer to the logger function and its first argument. + ** The default is NULL. Logging is disabled if the function pointer is + ** NULL. + */ + case SQLITE_CONFIG_LOG: { + /* MSVC is picky about pulling func ptrs from va lists. + ** http://support.microsoft.com/kb/47961 + ** sqlite3GlobalConfig.xLog = va_arg(ap, void(*)(void*,int,const char*)); + */ + typedef void(*LOGFUNC_t)(void*,int,const char*); + sqlite3GlobalConfig.xLog = va_arg(ap, LOGFUNC_t); + sqlite3GlobalConfig.pLogArg = va_arg(ap, void*); + break; + } + + /* EVIDENCE-OF: R-55548-33817 The compile-time setting for URI filenames + ** can be changed at start-time using the + ** sqlite3_config(SQLITE_CONFIG_URI,1) or + ** sqlite3_config(SQLITE_CONFIG_URI,0) configuration calls. + */ + case SQLITE_CONFIG_URI: { + /* EVIDENCE-OF: R-25451-61125 The SQLITE_CONFIG_URI option takes a single + ** argument of type int. If non-zero, then URI handling is globally + ** enabled. If the parameter is zero, then URI handling is globally + ** disabled. */ + sqlite3GlobalConfig.bOpenUri = va_arg(ap, int); + break; + } + + case SQLITE_CONFIG_COVERING_INDEX_SCAN: { + /* EVIDENCE-OF: R-36592-02772 The SQLITE_CONFIG_COVERING_INDEX_SCAN + ** option takes a single integer argument which is interpreted as a + ** boolean in order to enable or disable the use of covering indices for + ** full table scans in the query optimizer. */ + sqlite3GlobalConfig.bUseCis = va_arg(ap, int); + break; + } + +#ifdef SQLITE_ENABLE_SQLLOG + case SQLITE_CONFIG_SQLLOG: { + typedef void(*SQLLOGFUNC_t)(void*, sqlite3*, const char*, int); + sqlite3GlobalConfig.xSqllog = va_arg(ap, SQLLOGFUNC_t); + sqlite3GlobalConfig.pSqllogArg = va_arg(ap, void *); + break; + } +#endif + + case SQLITE_CONFIG_MMAP_SIZE: { + /* EVIDENCE-OF: R-58063-38258 SQLITE_CONFIG_MMAP_SIZE takes two 64-bit + ** integer (sqlite3_int64) values that are the default mmap size limit + ** (the default setting for PRAGMA mmap_size) and the maximum allowed + ** mmap size limit. */ + sqlite3_int64 szMmap = va_arg(ap, sqlite3_int64); + sqlite3_int64 mxMmap = va_arg(ap, sqlite3_int64); + /* EVIDENCE-OF: R-53367-43190 If either argument to this option is + ** negative, then that argument is changed to its compile-time default. + ** + ** EVIDENCE-OF: R-34993-45031 The maximum allowed mmap size will be + ** silently truncated if necessary so that it does not exceed the + ** compile-time maximum mmap size set by the SQLITE_MAX_MMAP_SIZE + ** compile-time option. + */ + if( mxMmap<0 || mxMmap>SQLITE_MAX_MMAP_SIZE ){ + mxMmap = SQLITE_MAX_MMAP_SIZE; + } + if( szMmap<0 ) szMmap = SQLITE_DEFAULT_MMAP_SIZE; + if( szMmap>mxMmap) szMmap = mxMmap; + sqlite3GlobalConfig.mxMmap = mxMmap; + sqlite3GlobalConfig.szMmap = szMmap; + break; + } + +#if SQLITE_OS_WIN && defined(SQLITE_WIN32_MALLOC) /* IMP: R-04780-55815 */ + case SQLITE_CONFIG_WIN32_HEAPSIZE: { + /* EVIDENCE-OF: R-34926-03360 SQLITE_CONFIG_WIN32_HEAPSIZE takes a 32-bit + ** unsigned integer value that specifies the maximum size of the created + ** heap. */ + sqlite3GlobalConfig.nHeap = va_arg(ap, int); + break; + } +#endif + + case SQLITE_CONFIG_PMASZ: { + sqlite3GlobalConfig.szPma = va_arg(ap, unsigned int); + break; + } + + default: { + rc = SQLITE_ERROR; + break; + } + } + va_end(ap); + return rc; +} + +/* +** Set up the lookaside buffers for a database connection. +** Return SQLITE_OK on success. +** If lookaside is already active, return SQLITE_BUSY. +** +** The sz parameter is the number of bytes in each lookaside slot. +** The cnt parameter is the number of slots. If pStart is NULL the +** space for the lookaside memory is obtained from sqlite3_malloc(). +** If pStart is not NULL then it is sz*cnt bytes of memory to use for +** the lookaside memory. +*/ +static int setupLookaside(sqlite3 *db, void *pBuf, int sz, int cnt){ +#ifndef SQLITE_OMIT_LOOKASIDE + void *pStart; + if( db->lookaside.nOut ){ + return SQLITE_BUSY; + } + /* Free any existing lookaside buffer for this handle before + ** allocating a new one so we don't have to have space for + ** both at the same time. + */ + if( db->lookaside.bMalloced ){ + sqlite3_free(db->lookaside.pStart); + } + /* The size of a lookaside slot after ROUNDDOWN8 needs to be larger + ** than a pointer to be useful. + */ + sz = ROUNDDOWN8(sz); /* IMP: R-33038-09382 */ + if( sz<=(int)sizeof(LookasideSlot*) ) sz = 0; + if( cnt<0 ) cnt = 0; + if( sz==0 || cnt==0 ){ + sz = 0; + pStart = 0; + }else if( pBuf==0 ){ + sqlite3BeginBenignMalloc(); + pStart = sqlite3Malloc( sz*cnt ); /* IMP: R-61949-35727 */ + sqlite3EndBenignMalloc(); + if( pStart ) cnt = sqlite3MallocSize(pStart)/sz; + }else{ + pStart = pBuf; + } + db->lookaside.pStart = pStart; + db->lookaside.pFree = 0; + db->lookaside.sz = (u16)sz; + if( pStart ){ + int i; + LookasideSlot *p; + assert( sz > (int)sizeof(LookasideSlot*) ); + p = (LookasideSlot*)pStart; + for(i=cnt-1; i>=0; i--){ + p->pNext = db->lookaside.pFree; + db->lookaside.pFree = p; + p = (LookasideSlot*)&((u8*)p)[sz]; + } + db->lookaside.pEnd = p; + db->lookaside.bEnabled = 1; + db->lookaside.bMalloced = pBuf==0 ?1:0; + }else{ + db->lookaside.pStart = db; + db->lookaside.pEnd = db; + db->lookaside.bEnabled = 0; + db->lookaside.bMalloced = 0; + } +#endif /* SQLITE_OMIT_LOOKASIDE */ + return SQLITE_OK; +} + +/* +** Return the mutex associated with a database connection. +*/ +SQLITE_API sqlite3_mutex *SQLITE_STDCALL sqlite3_db_mutex(sqlite3 *db){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + return db->mutex; +} + +/* +** Free up as much memory as we can from the given database +** connection. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_db_release_memory(sqlite3 *db){ + int i; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + sqlite3BtreeEnterAll(db); + for(i=0; inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( pBt ){ + Pager *pPager = sqlite3BtreePager(pBt); + sqlite3PagerShrink(pPager); + } + } + sqlite3BtreeLeaveAll(db); + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; +} + +/* +** Configuration settings for an individual database connection +*/ +SQLITE_API int SQLITE_CDECL sqlite3_db_config(sqlite3 *db, int op, ...){ + va_list ap; + int rc; + va_start(ap, op); + switch( op ){ + case SQLITE_DBCONFIG_LOOKASIDE: { + void *pBuf = va_arg(ap, void*); /* IMP: R-26835-10964 */ + int sz = va_arg(ap, int); /* IMP: R-47871-25994 */ + int cnt = va_arg(ap, int); /* IMP: R-04460-53386 */ + rc = setupLookaside(db, pBuf, sz, cnt); + break; + } + default: { + static const struct { + int op; /* The opcode */ + u32 mask; /* Mask of the bit in sqlite3.flags to set/clear */ + } aFlagOp[] = { + { SQLITE_DBCONFIG_ENABLE_FKEY, SQLITE_ForeignKeys }, + { SQLITE_DBCONFIG_ENABLE_TRIGGER, SQLITE_EnableTrigger }, + }; + unsigned int i; + rc = SQLITE_ERROR; /* IMP: R-42790-23372 */ + for(i=0; iflags; + if( onoff>0 ){ + db->flags |= aFlagOp[i].mask; + }else if( onoff==0 ){ + db->flags &= ~aFlagOp[i].mask; + } + if( oldFlags!=db->flags ){ + sqlite3ExpirePreparedStatements(db); + } + if( pRes ){ + *pRes = (db->flags & aFlagOp[i].mask)!=0; + } + rc = SQLITE_OK; + break; + } + } + break; + } } + va_end(ap); + return rc; } + /* -** The following code executes when the parse fails +** Return true if the buffer z[0..n-1] contains all spaces. */ -#ifndef YYNOERRORRECOVERY -static void yy_parse_failed( - yyParser *yypParser /* The parser */ +static int allSpaces(const char *z, int n){ + while( n>0 && z[n-1]==' ' ){ n--; } + return n==0; +} + +/* +** This is the default collating function named "BINARY" which is always +** available. +** +** If the padFlag argument is not NULL then space padding at the end +** of strings is ignored. This implements the RTRIM collation. +*/ +static int binCollFunc( + void *padFlag, + int nKey1, const void *pKey1, + int nKey2, const void *pKey2 ){ - sqlite3ParserARG_FETCH; -#ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE,"%sFail!\n",yyTracePrompt); + int rc, n; + n = nKey1yyidx>=0 ) yy_pop_parser_stack(yypParser); - /* Here code is inserted which will be executed whenever the - ** parser fails */ - sqlite3ParserARG_STORE; /* Suppress warning about unused %extra_argument variable */ + return rc; } -#endif /* YYNOERRORRECOVERY */ /* -** The following code executes when a syntax error first occurs. +** Another built-in collating sequence: NOCASE. +** +** This collating sequence is intended to be used for "case independent +** comparison". SQLite's knowledge of upper and lower case equivalents +** extends only to the 26 characters used in the English language. +** +** At the moment there is only a UTF-8 implementation. */ -static void yy_syntax_error( - yyParser *yypParser, /* The parser */ - int yymajor, /* The major type of the error token */ - YYMINORTYPE yyminor /* The minor type of the error token */ +static int nocaseCollatingFunc( + void *NotUsed, + int nKey1, const void *pKey1, + int nKey2, const void *pKey2 ){ - sqlite3ParserARG_FETCH; -#define TOKEN (yyminor.yy0) - - UNUSED_PARAMETER(yymajor); /* Silence some compiler warnings */ - assert( TOKEN.z[0] ); /* The tokenizer always gives us a token */ - sqlite3ErrorMsg(pParse, "near \"%T\": syntax error", &TOKEN); - sqlite3ParserARG_STORE; /* Suppress warning about unused %extra_argument variable */ + int r = sqlite3StrNICmp( + (const char *)pKey1, (const char *)pKey2, (nKey1yyidx>=0 ) yy_pop_parser_stack(yypParser); - /* Here code is inserted which will be executed whenever the - ** parser accepts */ - sqlite3ParserARG_STORE; /* Suppress warning about unused %extra_argument variable */ + return db->lastRowid; } -/* The main parser program. -** The first argument is a pointer to a structure obtained from -** "sqlite3ParserAlloc" which describes the current state of the parser. -** The second argument is the major token number. The third is -** the minor token. The fourth optional argument is whatever the -** user wants (and specified in the grammar) and is available for -** use by the action routines. -** -** Inputs: -**
      -**
    • A pointer to the parser (an opaque structure.) -**
    • The major token number. -**
    • The minor token number. -**
    • An option argument of a grammar-specified type. -**
    -** -** Outputs: -** None. +/* +** Return the number of changes in the most recent call to sqlite3_exec(). */ -SQLITE_PRIVATE void sqlite3Parser( - void *yyp, /* The parser */ - int yymajor, /* The major token code number */ - sqlite3ParserTOKENTYPE yyminor /* The value for the token */ - sqlite3ParserARG_PDECL /* Optional %extra_argument parameter */ -){ - YYMINORTYPE yyminorunion; - int yyact; /* The parser action. */ -#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) - int yyendofinput; /* True if we are at the end of input */ -#endif -#ifdef YYERRORSYMBOL - int yyerrorhit = 0; /* True if yymajor has invoked an error */ +SQLITE_API int SQLITE_STDCALL sqlite3_changes(sqlite3 *db){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } #endif - yyParser *yypParser; /* The parser */ + return db->nChange; +} - /* (re)initialize the parser, if necessary */ - yypParser = (yyParser*)yyp; - if( yypParser->yyidx<0 ){ -#if YYSTACKDEPTH<=0 - if( yypParser->yystksz <=0 ){ - /*memset(&yyminorunion, 0, sizeof(yyminorunion));*/ - yyminorunion = yyzerominor; - yyStackOverflow(yypParser, &yyminorunion); - return; - } -#endif - yypParser->yyidx = 0; - yypParser->yyerrcnt = -1; - yypParser->yystack[0].stateno = 0; - yypParser->yystack[0].major = 0; +/* +** Return the number of changes since the database handle was opened. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_total_changes(sqlite3 *db){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; } - yyminorunion.yy0 = yyminor; -#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) - yyendofinput = (yymajor==0); #endif - sqlite3ParserARG_STORE; + return db->nTotalChange; +} -#ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE,"%sInput %s\n",yyTracePrompt,yyTokenName[yymajor]); +/* +** Close all open savepoints. This function only manipulates fields of the +** database handle object, it does not close any savepoints that may be open +** at the b-tree/pager level. +*/ +SQLITE_PRIVATE void sqlite3CloseSavepoints(sqlite3 *db){ + while( db->pSavepoint ){ + Savepoint *pTmp = db->pSavepoint; + db->pSavepoint = pTmp->pNext; + sqlite3DbFree(db, pTmp); } -#endif + db->nSavepoint = 0; + db->nStatement = 0; + db->isTransactionSavepoint = 0; +} - do{ - yyact = yy_find_shift_action(yypParser,(YYCODETYPE)yymajor); - if( yyactyyerrcnt--; - yymajor = YYNOCODE; - }else if( yyact < YYNSTATE + YYNRULE ){ - yy_reduce(yypParser,yyact-YYNSTATE); - }else{ - assert( yyact == YY_ERROR_ACTION ); -#ifdef YYERRORSYMBOL - int yymx; -#endif -#ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE,"%sSyntax Error!\n",yyTracePrompt); +/* +** Invoke the destructor function associated with FuncDef p, if any. Except, +** if this is not the last copy of the function, do not invoke it. Multiple +** copies of a single function are created when create_function() is called +** with SQLITE_ANY as the encoding. +*/ +static void functionDestroy(sqlite3 *db, FuncDef *p){ + FuncDestructor *pDestructor = p->pDestructor; + if( pDestructor ){ + pDestructor->nRef--; + if( pDestructor->nRef==0 ){ + pDestructor->xDestroy(pDestructor->pUserData); + sqlite3DbFree(db, pDestructor); + } + } +} + +/* +** Disconnect all sqlite3_vtab objects that belong to database connection +** db. This is called when db is being closed. +*/ +static void disconnectAllVtab(sqlite3 *db){ +#ifndef SQLITE_OMIT_VIRTUALTABLE + int i; + sqlite3BtreeEnterAll(db); + for(i=0; inDb; i++){ + Schema *pSchema = db->aDb[i].pSchema; + if( db->aDb[i].pSchema ){ + HashElem *p; + for(p=sqliteHashFirst(&pSchema->tblHash); p; p=sqliteHashNext(p)){ + Table *pTab = (Table *)sqliteHashData(p); + if( IsVirtual(pTab) ) sqlite3VtabDisconnect(db, pTab); } + } + } + sqlite3VtabUnlockList(db); + sqlite3BtreeLeaveAll(db); +#else + UNUSED_PARAMETER(db); #endif -#ifdef YYERRORSYMBOL - /* A syntax error has occurred. - ** The response to an error depends upon whether or not the - ** grammar defines an error token "ERROR". - ** - ** This is what we do if the grammar does define ERROR: - ** - ** * Call the %syntax_error function. - ** - ** * Begin popping the stack until we enter a state where - ** it is legal to shift the error symbol, then shift - ** the error symbol. - ** - ** * Set the error count to three. - ** - ** * Begin accepting and shifting new tokens. No new error - ** processing will occur until three tokens have been - ** shifted successfully. - ** - */ - if( yypParser->yyerrcnt<0 ){ - yy_syntax_error(yypParser,yymajor,yyminorunion); - } - yymx = yypParser->yystack[yypParser->yyidx].major; - if( yymx==YYERRORSYMBOL || yyerrorhit ){ -#ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE,"%sDiscard input token %s\n", - yyTracePrompt,yyTokenName[yymajor]); - } +} + +/* +** Return TRUE if database connection db has unfinalized prepared +** statements or unfinished sqlite3_backup objects. +*/ +static int connectionIsBusy(sqlite3 *db){ + int j; + assert( sqlite3_mutex_held(db->mutex) ); + if( db->pVdbe ) return 1; + for(j=0; jnDb; j++){ + Btree *pBt = db->aDb[j].pBt; + if( pBt && sqlite3BtreeIsInBackup(pBt) ) return 1; + } + return 0; +} + +/* +** Close an existing SQLite database +*/ +static int sqlite3Close(sqlite3 *db, int forceZombie){ + if( !db ){ + /* EVIDENCE-OF: R-63257-11740 Calling sqlite3_close() or + ** sqlite3_close_v2() with a NULL pointer argument is a harmless no-op. */ + return SQLITE_OK; + } + if( !sqlite3SafetyCheckSickOrOk(db) ){ + return SQLITE_MISUSE_BKPT; + } + sqlite3_mutex_enter(db->mutex); + + /* Force xDisconnect calls on all virtual tables */ + disconnectAllVtab(db); + + /* If a transaction is open, the disconnectAllVtab() call above + ** will not have called the xDisconnect() method on any virtual + ** tables in the db->aVTrans[] array. The following sqlite3VtabRollback() + ** call will do so. We need to do this before the check for active + ** SQL statements below, as the v-table implementation may be storing + ** some prepared statements internally. + */ + sqlite3VtabRollback(db); + + /* Legacy behavior (sqlite3_close() behavior) is to return + ** SQLITE_BUSY if the connection can not be closed immediately. + */ + if( !forceZombie && connectionIsBusy(db) ){ + sqlite3ErrorWithMsg(db, SQLITE_BUSY, "unable to close due to unfinalized " + "statements or unfinished backups"); + sqlite3_mutex_leave(db->mutex); + return SQLITE_BUSY; + } + +#ifdef SQLITE_ENABLE_SQLLOG + if( sqlite3GlobalConfig.xSqllog ){ + /* Closing the handle. Fourth parameter is passed the value 2. */ + sqlite3GlobalConfig.xSqllog(sqlite3GlobalConfig.pSqllogArg, db, 0, 2); + } #endif - yy_destructor(yypParser, (YYCODETYPE)yymajor,&yyminorunion); - yymajor = YYNOCODE; - }else{ - while( - yypParser->yyidx >= 0 && - yymx != YYERRORSYMBOL && - (yyact = yy_find_reduce_action( - yypParser->yystack[yypParser->yyidx].stateno, - YYERRORSYMBOL)) >= YYNSTATE - ){ - yy_pop_parser_stack(yypParser); - } - if( yypParser->yyidx < 0 || yymajor==0 ){ - yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); - yy_parse_failed(yypParser); - yymajor = YYNOCODE; - }else if( yymx!=YYERRORSYMBOL ){ - YYMINORTYPE u2; - u2.YYERRSYMDT = 0; - yy_shift(yypParser,yyact,YYERRORSYMBOL,&u2); - } + + /* Convert the connection into a zombie and then close it. + */ + db->magic = SQLITE_MAGIC_ZOMBIE; + sqlite3LeaveMutexAndCloseZombie(db); + return SQLITE_OK; +} + +/* +** Two variations on the public interface for closing a database +** connection. The sqlite3_close() version returns SQLITE_BUSY and +** leaves the connection option if there are unfinalized prepared +** statements or unfinished sqlite3_backups. The sqlite3_close_v2() +** version forces the connection to become a zombie if there are +** unclosed resources, and arranges for deallocation when the last +** prepare statement or sqlite3_backup closes. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_close(sqlite3 *db){ return sqlite3Close(db,0); } +SQLITE_API int SQLITE_STDCALL sqlite3_close_v2(sqlite3 *db){ return sqlite3Close(db,1); } + + +/* +** Close the mutex on database connection db. +** +** Furthermore, if database connection db is a zombie (meaning that there +** has been a prior call to sqlite3_close(db) or sqlite3_close_v2(db)) and +** every sqlite3_stmt has now been finalized and every sqlite3_backup has +** finished, then free all resources. +*/ +SQLITE_PRIVATE void sqlite3LeaveMutexAndCloseZombie(sqlite3 *db){ + HashElem *i; /* Hash table iterator */ + int j; + + /* If there are outstanding sqlite3_stmt or sqlite3_backup objects + ** or if the connection has not yet been closed by sqlite3_close_v2(), + ** then just leave the mutex and return. + */ + if( db->magic!=SQLITE_MAGIC_ZOMBIE || connectionIsBusy(db) ){ + sqlite3_mutex_leave(db->mutex); + return; + } + + /* If we reach this point, it means that the database connection has + ** closed all sqlite3_stmt and sqlite3_backup objects and has been + ** passed to sqlite3_close (meaning that it is a zombie). Therefore, + ** go ahead and free all resources. + */ + + /* If a transaction is open, roll it back. This also ensures that if + ** any database schemas have been modified by an uncommitted transaction + ** they are reset. And that the required b-tree mutex is held to make + ** the pager rollback and schema reset an atomic operation. */ + sqlite3RollbackAll(db, SQLITE_OK); + + /* Free any outstanding Savepoint structures. */ + sqlite3CloseSavepoints(db); + + /* Close all database connections */ + for(j=0; jnDb; j++){ + struct Db *pDb = &db->aDb[j]; + if( pDb->pBt ){ + sqlite3BtreeClose(pDb->pBt); + pDb->pBt = 0; + if( j!=1 ){ + pDb->pSchema = 0; } - yypParser->yyerrcnt = 3; - yyerrorhit = 1; -#elif defined(YYNOERRORRECOVERY) - /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to - ** do any kind of error recovery. Instead, simply invoke the syntax - ** error routine and continue going as if nothing had happened. - ** - ** Applications can set this macro (for example inside %include) if - ** they intend to abandon the parse upon the first syntax error seen. - */ - yy_syntax_error(yypParser,yymajor,yyminorunion); - yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); - yymajor = YYNOCODE; - -#else /* YYERRORSYMBOL is not defined */ - /* This is what we do if the grammar does not define ERROR: - ** - ** * Report an error message, and throw away the input token. - ** - ** * If the input token is $, then fail the parse. - ** - ** As before, subsequent error messages are suppressed until - ** three input tokens have been successfully shifted. - */ - if( yypParser->yyerrcnt<=0 ){ - yy_syntax_error(yypParser,yymajor,yyminorunion); + } + } + /* Clear the TEMP schema separately and last */ + if( db->aDb[1].pSchema ){ + sqlite3SchemaClear(db->aDb[1].pSchema); + } + sqlite3VtabUnlockList(db); + + /* Free up the array of auxiliary databases */ + sqlite3CollapseDatabaseArray(db); + assert( db->nDb<=2 ); + assert( db->aDb==db->aDbStatic ); + + /* Tell the code in notify.c that the connection no longer holds any + ** locks and does not require any further unlock-notify callbacks. + */ + sqlite3ConnectionClosed(db); + + for(j=0; jaFunc.a); j++){ + FuncDef *pNext, *pHash, *p; + for(p=db->aFunc.a[j]; p; p=pHash){ + pHash = p->pHash; + while( p ){ + functionDestroy(db, p); + pNext = p->pNext; + sqlite3DbFree(db, p); + p = pNext; } - yypParser->yyerrcnt = 3; - yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); - if( yyendofinput ){ - yy_parse_failed(yypParser); + } + } + for(i=sqliteHashFirst(&db->aCollSeq); i; i=sqliteHashNext(i)){ + CollSeq *pColl = (CollSeq *)sqliteHashData(i); + /* Invoke any destructors registered for collation sequence user data. */ + for(j=0; j<3; j++){ + if( pColl[j].xDel ){ + pColl[j].xDel(pColl[j].pUser); } - yymajor = YYNOCODE; -#endif } - }while( yymajor!=YYNOCODE && yypParser->yyidx>=0 ); - return; + sqlite3DbFree(db, pColl); + } + sqlite3HashClear(&db->aCollSeq); +#ifndef SQLITE_OMIT_VIRTUALTABLE + for(i=sqliteHashFirst(&db->aModule); i; i=sqliteHashNext(i)){ + Module *pMod = (Module *)sqliteHashData(i); + if( pMod->xDestroy ){ + pMod->xDestroy(pMod->pAux); + } + sqlite3DbFree(db, pMod); + } + sqlite3HashClear(&db->aModule); +#endif + + sqlite3Error(db, SQLITE_OK); /* Deallocates any cached error strings. */ + sqlite3ValueFree(db->pErr); + sqlite3CloseExtensions(db); +#if SQLITE_USER_AUTHENTICATION + sqlite3_free(db->auth.zAuthUser); + sqlite3_free(db->auth.zAuthPW); +#endif + + db->magic = SQLITE_MAGIC_ERROR; + + /* The temp-database schema is allocated differently from the other schema + ** objects (using sqliteMalloc() directly, instead of sqlite3BtreeSchema()). + ** So it needs to be freed here. Todo: Why not roll the temp schema into + ** the same sqliteMalloc() as the one that allocates the database + ** structure? + */ + sqlite3DbFree(db, db->aDb[1].pSchema); + sqlite3_mutex_leave(db->mutex); + db->magic = SQLITE_MAGIC_CLOSED; + sqlite3_mutex_free(db->mutex); + assert( db->lookaside.nOut==0 ); /* Fails on a lookaside memory leak */ + if( db->lookaside.bMalloced ){ + sqlite3_free(db->lookaside.pStart); + } + sqlite3_free(db); } -/************** End of parse.c ***********************************************/ -/************** Begin file tokenize.c ****************************************/ /* -** 2001 September 15 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** An tokenizer for SQL -** -** This file contains C code that splits an SQL input string up into -** individual tokens and sends those tokens one-by-one over to the -** parser for analysis. +** Rollback all database files. If tripCode is not SQLITE_OK, then +** any write cursors are invalidated ("tripped" - as in "tripping a circuit +** breaker") and made to return tripCode if there are any further +** attempts to use that cursor. Read cursors remain open and valid +** but are "saved" in case the table pages are moved around. */ -/* #include */ +SQLITE_PRIVATE void sqlite3RollbackAll(sqlite3 *db, int tripCode){ + int i; + int inTrans = 0; + int schemaChange; + assert( sqlite3_mutex_held(db->mutex) ); + sqlite3BeginBenignMalloc(); + + /* Obtain all b-tree mutexes before making any calls to BtreeRollback(). + ** This is important in case the transaction being rolled back has + ** modified the database schema. If the b-tree mutexes are not taken + ** here, then another shared-cache connection might sneak in between + ** the database rollback and schema reset, which can cause false + ** corruption reports in some cases. */ + sqlite3BtreeEnterAll(db); + schemaChange = (db->flags & SQLITE_InternChanges)!=0 && db->init.busy==0; + + for(i=0; inDb; i++){ + Btree *p = db->aDb[i].pBt; + if( p ){ + if( sqlite3BtreeIsInTrans(p) ){ + inTrans = 1; + } + sqlite3BtreeRollback(p, tripCode, !schemaChange); + } + } + sqlite3VtabRollback(db); + sqlite3EndBenignMalloc(); + + if( (db->flags&SQLITE_InternChanges)!=0 && db->init.busy==0 ){ + sqlite3ExpirePreparedStatements(db); + sqlite3ResetAllSchemasOfConnection(db); + } + sqlite3BtreeLeaveAll(db); + + /* Any deferred constraint violations have now been resolved. */ + db->nDeferredCons = 0; + db->nDeferredImmCons = 0; + db->flags &= ~SQLITE_DeferFKs; + + /* If one has been configured, invoke the rollback-hook callback */ + if( db->xRollbackCallback && (inTrans || !db->autoCommit) ){ + db->xRollbackCallback(db->pRollbackArg); + } +} /* -** The charMap() macro maps alphabetic characters into their -** lower-case ASCII equivalent. On ASCII machines, this is just -** an upper-to-lower case map. On EBCDIC machines we also need -** to adjust the encoding. Only alphabetic characters and underscores -** need to be translated. +** Return a static string containing the name corresponding to the error code +** specified in the argument. */ -#ifdef SQLITE_ASCII -# define charMap(X) sqlite3UpperToLower[(unsigned char)X] -#endif -#ifdef SQLITE_EBCDIC -# define charMap(X) ebcdicToAscii[(unsigned char)X] -const unsigned char ebcdicToAscii[] = { -/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ - 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ - 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ - 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ - 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ - 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ - 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ -}; +#if defined(SQLITE_NEED_ERR_NAME) +SQLITE_PRIVATE const char *sqlite3ErrName(int rc){ + const char *zName = 0; + int i, origRc = rc; + for(i=0; i<2 && zName==0; i++, rc &= 0xff){ + switch( rc ){ + case SQLITE_OK: zName = "SQLITE_OK"; break; + case SQLITE_ERROR: zName = "SQLITE_ERROR"; break; + case SQLITE_INTERNAL: zName = "SQLITE_INTERNAL"; break; + case SQLITE_PERM: zName = "SQLITE_PERM"; break; + case SQLITE_ABORT: zName = "SQLITE_ABORT"; break; + case SQLITE_ABORT_ROLLBACK: zName = "SQLITE_ABORT_ROLLBACK"; break; + case SQLITE_BUSY: zName = "SQLITE_BUSY"; break; + case SQLITE_BUSY_RECOVERY: zName = "SQLITE_BUSY_RECOVERY"; break; + case SQLITE_BUSY_SNAPSHOT: zName = "SQLITE_BUSY_SNAPSHOT"; break; + case SQLITE_LOCKED: zName = "SQLITE_LOCKED"; break; + case SQLITE_LOCKED_SHAREDCACHE: zName = "SQLITE_LOCKED_SHAREDCACHE";break; + case SQLITE_NOMEM: zName = "SQLITE_NOMEM"; break; + case SQLITE_READONLY: zName = "SQLITE_READONLY"; break; + case SQLITE_READONLY_RECOVERY: zName = "SQLITE_READONLY_RECOVERY"; break; + case SQLITE_READONLY_CANTLOCK: zName = "SQLITE_READONLY_CANTLOCK"; break; + case SQLITE_READONLY_ROLLBACK: zName = "SQLITE_READONLY_ROLLBACK"; break; + case SQLITE_READONLY_DBMOVED: zName = "SQLITE_READONLY_DBMOVED"; break; + case SQLITE_INTERRUPT: zName = "SQLITE_INTERRUPT"; break; + case SQLITE_IOERR: zName = "SQLITE_IOERR"; break; + case SQLITE_IOERR_READ: zName = "SQLITE_IOERR_READ"; break; + case SQLITE_IOERR_SHORT_READ: zName = "SQLITE_IOERR_SHORT_READ"; break; + case SQLITE_IOERR_WRITE: zName = "SQLITE_IOERR_WRITE"; break; + case SQLITE_IOERR_FSYNC: zName = "SQLITE_IOERR_FSYNC"; break; + case SQLITE_IOERR_DIR_FSYNC: zName = "SQLITE_IOERR_DIR_FSYNC"; break; + case SQLITE_IOERR_TRUNCATE: zName = "SQLITE_IOERR_TRUNCATE"; break; + case SQLITE_IOERR_FSTAT: zName = "SQLITE_IOERR_FSTAT"; break; + case SQLITE_IOERR_UNLOCK: zName = "SQLITE_IOERR_UNLOCK"; break; + case SQLITE_IOERR_RDLOCK: zName = "SQLITE_IOERR_RDLOCK"; break; + case SQLITE_IOERR_DELETE: zName = "SQLITE_IOERR_DELETE"; break; + case SQLITE_IOERR_NOMEM: zName = "SQLITE_IOERR_NOMEM"; break; + case SQLITE_IOERR_ACCESS: zName = "SQLITE_IOERR_ACCESS"; break; + case SQLITE_IOERR_CHECKRESERVEDLOCK: + zName = "SQLITE_IOERR_CHECKRESERVEDLOCK"; break; + case SQLITE_IOERR_LOCK: zName = "SQLITE_IOERR_LOCK"; break; + case SQLITE_IOERR_CLOSE: zName = "SQLITE_IOERR_CLOSE"; break; + case SQLITE_IOERR_DIR_CLOSE: zName = "SQLITE_IOERR_DIR_CLOSE"; break; + case SQLITE_IOERR_SHMOPEN: zName = "SQLITE_IOERR_SHMOPEN"; break; + case SQLITE_IOERR_SHMSIZE: zName = "SQLITE_IOERR_SHMSIZE"; break; + case SQLITE_IOERR_SHMLOCK: zName = "SQLITE_IOERR_SHMLOCK"; break; + case SQLITE_IOERR_SHMMAP: zName = "SQLITE_IOERR_SHMMAP"; break; + case SQLITE_IOERR_SEEK: zName = "SQLITE_IOERR_SEEK"; break; + case SQLITE_IOERR_DELETE_NOENT: zName = "SQLITE_IOERR_DELETE_NOENT";break; + case SQLITE_IOERR_MMAP: zName = "SQLITE_IOERR_MMAP"; break; + case SQLITE_IOERR_GETTEMPPATH: zName = "SQLITE_IOERR_GETTEMPPATH"; break; + case SQLITE_IOERR_CONVPATH: zName = "SQLITE_IOERR_CONVPATH"; break; + case SQLITE_CORRUPT: zName = "SQLITE_CORRUPT"; break; + case SQLITE_CORRUPT_VTAB: zName = "SQLITE_CORRUPT_VTAB"; break; + case SQLITE_NOTFOUND: zName = "SQLITE_NOTFOUND"; break; + case SQLITE_FULL: zName = "SQLITE_FULL"; break; + case SQLITE_CANTOPEN: zName = "SQLITE_CANTOPEN"; break; + case SQLITE_CANTOPEN_NOTEMPDIR: zName = "SQLITE_CANTOPEN_NOTEMPDIR";break; + case SQLITE_CANTOPEN_ISDIR: zName = "SQLITE_CANTOPEN_ISDIR"; break; + case SQLITE_CANTOPEN_FULLPATH: zName = "SQLITE_CANTOPEN_FULLPATH"; break; + case SQLITE_CANTOPEN_CONVPATH: zName = "SQLITE_CANTOPEN_CONVPATH"; break; + case SQLITE_PROTOCOL: zName = "SQLITE_PROTOCOL"; break; + case SQLITE_EMPTY: zName = "SQLITE_EMPTY"; break; + case SQLITE_SCHEMA: zName = "SQLITE_SCHEMA"; break; + case SQLITE_TOOBIG: zName = "SQLITE_TOOBIG"; break; + case SQLITE_CONSTRAINT: zName = "SQLITE_CONSTRAINT"; break; + case SQLITE_CONSTRAINT_UNIQUE: zName = "SQLITE_CONSTRAINT_UNIQUE"; break; + case SQLITE_CONSTRAINT_TRIGGER: zName = "SQLITE_CONSTRAINT_TRIGGER";break; + case SQLITE_CONSTRAINT_FOREIGNKEY: + zName = "SQLITE_CONSTRAINT_FOREIGNKEY"; break; + case SQLITE_CONSTRAINT_CHECK: zName = "SQLITE_CONSTRAINT_CHECK"; break; + case SQLITE_CONSTRAINT_PRIMARYKEY: + zName = "SQLITE_CONSTRAINT_PRIMARYKEY"; break; + case SQLITE_CONSTRAINT_NOTNULL: zName = "SQLITE_CONSTRAINT_NOTNULL";break; + case SQLITE_CONSTRAINT_COMMITHOOK: + zName = "SQLITE_CONSTRAINT_COMMITHOOK"; break; + case SQLITE_CONSTRAINT_VTAB: zName = "SQLITE_CONSTRAINT_VTAB"; break; + case SQLITE_CONSTRAINT_FUNCTION: + zName = "SQLITE_CONSTRAINT_FUNCTION"; break; + case SQLITE_CONSTRAINT_ROWID: zName = "SQLITE_CONSTRAINT_ROWID"; break; + case SQLITE_MISMATCH: zName = "SQLITE_MISMATCH"; break; + case SQLITE_MISUSE: zName = "SQLITE_MISUSE"; break; + case SQLITE_NOLFS: zName = "SQLITE_NOLFS"; break; + case SQLITE_AUTH: zName = "SQLITE_AUTH"; break; + case SQLITE_FORMAT: zName = "SQLITE_FORMAT"; break; + case SQLITE_RANGE: zName = "SQLITE_RANGE"; break; + case SQLITE_NOTADB: zName = "SQLITE_NOTADB"; break; + case SQLITE_ROW: zName = "SQLITE_ROW"; break; + case SQLITE_NOTICE: zName = "SQLITE_NOTICE"; break; + case SQLITE_NOTICE_RECOVER_WAL: zName = "SQLITE_NOTICE_RECOVER_WAL";break; + case SQLITE_NOTICE_RECOVER_ROLLBACK: + zName = "SQLITE_NOTICE_RECOVER_ROLLBACK"; break; + case SQLITE_WARNING: zName = "SQLITE_WARNING"; break; + case SQLITE_WARNING_AUTOINDEX: zName = "SQLITE_WARNING_AUTOINDEX"; break; + case SQLITE_DONE: zName = "SQLITE_DONE"; break; + } + } + if( zName==0 ){ + static char zBuf[50]; + sqlite3_snprintf(sizeof(zBuf), zBuf, "SQLITE_UNKNOWN(%d)", origRc); + zName = zBuf; + } + return zName; +} #endif /* -** The sqlite3KeywordCode function looks up an identifier to determine if -** it is a keyword. If it is a keyword, the token code of that keyword is -** returned. If the input is not a keyword, TK_ID is returned. -** -** The implementation of this routine was generated by a program, -** mkkeywordhash.h, located in the tool subdirectory of the distribution. -** The output of the mkkeywordhash.c program is written into a file -** named keywordhash.h and then included into this source file by -** the #include below. -*/ -/************** Include keywordhash.h in the middle of tokenize.c ************/ -/************** Begin file keywordhash.h *************************************/ -/***** This file contains automatically generated code ****** -** -** The code in this file has been automatically generated by -** -** sqlite/tool/mkkeywordhash.c -** -** The code in this file implements a function that determines whether -** or not a given identifier is really an SQL keyword. The same thing -** might be implemented more directly using a hand-written hash table. -** But by using this automatically generated code, the size of the code -** is substantially reduced. This is important for embedded applications -** on platforms with limited memory. +** Return a static string that describes the kind of error specified in the +** argument. */ -/* Hash score: 182 */ -static int keywordCode(const char *z, int n){ - /* zText[] encodes 834 bytes of keywords in 554 bytes */ - /* REINDEXEDESCAPEACHECKEYBEFOREIGNOREGEXPLAINSTEADDATABASELECT */ - /* ABLEFTHENDEFERRABLELSEXCEPTRANSACTIONATURALTERAISEXCLUSIVE */ - /* XISTSAVEPOINTERSECTRIGGEREFERENCESCONSTRAINTOFFSETEMPORARY */ - /* UNIQUERYWITHOUTERELEASEATTACHAVINGROUPDATEBEGINNERECURSIVE */ - /* BETWEENOTNULLIKECASCADELETECASECOLLATECREATECURRENT_DATEDETACH */ - /* IMMEDIATEJOINSERTMATCHPLANALYZEPRAGMABORTVALUESVIRTUALIMITWHEN */ - /* WHERENAMEAFTEREPLACEANDEFAULTAUTOINCREMENTCASTCOLUMNCOMMIT */ - /* CONFLICTCROSSCURRENT_TIMESTAMPRIMARYDEFERREDISTINCTDROPFAIL */ - /* FROMFULLGLOBYIFISNULLORDERESTRICTRIGHTROLLBACKROWUNIONUSING */ - /* VACUUMVIEWINITIALLY */ - static const char zText[553] = { - 'R','E','I','N','D','E','X','E','D','E','S','C','A','P','E','A','C','H', - 'E','C','K','E','Y','B','E','F','O','R','E','I','G','N','O','R','E','G', - 'E','X','P','L','A','I','N','S','T','E','A','D','D','A','T','A','B','A', - 'S','E','L','E','C','T','A','B','L','E','F','T','H','E','N','D','E','F', - 'E','R','R','A','B','L','E','L','S','E','X','C','E','P','T','R','A','N', - 'S','A','C','T','I','O','N','A','T','U','R','A','L','T','E','R','A','I', - 'S','E','X','C','L','U','S','I','V','E','X','I','S','T','S','A','V','E', - 'P','O','I','N','T','E','R','S','E','C','T','R','I','G','G','E','R','E', - 'F','E','R','E','N','C','E','S','C','O','N','S','T','R','A','I','N','T', - 'O','F','F','S','E','T','E','M','P','O','R','A','R','Y','U','N','I','Q', - 'U','E','R','Y','W','I','T','H','O','U','T','E','R','E','L','E','A','S', - 'E','A','T','T','A','C','H','A','V','I','N','G','R','O','U','P','D','A', - 'T','E','B','E','G','I','N','N','E','R','E','C','U','R','S','I','V','E', - 'B','E','T','W','E','E','N','O','T','N','U','L','L','I','K','E','C','A', - 'S','C','A','D','E','L','E','T','E','C','A','S','E','C','O','L','L','A', - 'T','E','C','R','E','A','T','E','C','U','R','R','E','N','T','_','D','A', - 'T','E','D','E','T','A','C','H','I','M','M','E','D','I','A','T','E','J', - 'O','I','N','S','E','R','T','M','A','T','C','H','P','L','A','N','A','L', - 'Y','Z','E','P','R','A','G','M','A','B','O','R','T','V','A','L','U','E', - 'S','V','I','R','T','U','A','L','I','M','I','T','W','H','E','N','W','H', - 'E','R','E','N','A','M','E','A','F','T','E','R','E','P','L','A','C','E', - 'A','N','D','E','F','A','U','L','T','A','U','T','O','I','N','C','R','E', - 'M','E','N','T','C','A','S','T','C','O','L','U','M','N','C','O','M','M', - 'I','T','C','O','N','F','L','I','C','T','C','R','O','S','S','C','U','R', - 'R','E','N','T','_','T','I','M','E','S','T','A','M','P','R','I','M','A', - 'R','Y','D','E','F','E','R','R','E','D','I','S','T','I','N','C','T','D', - 'R','O','P','F','A','I','L','F','R','O','M','F','U','L','L','G','L','O', - 'B','Y','I','F','I','S','N','U','L','L','O','R','D','E','R','E','S','T', - 'R','I','C','T','R','I','G','H','T','R','O','L','L','B','A','C','K','R', - 'O','W','U','N','I','O','N','U','S','I','N','G','V','A','C','U','U','M', - 'V','I','E','W','I','N','I','T','I','A','L','L','Y', - }; - static const unsigned char aHash[127] = { - 76, 105, 117, 74, 0, 45, 0, 0, 82, 0, 77, 0, 0, - 42, 12, 78, 15, 0, 116, 85, 54, 112, 0, 19, 0, 0, - 121, 0, 119, 115, 0, 22, 93, 0, 9, 0, 0, 70, 71, - 0, 69, 6, 0, 48, 90, 102, 0, 118, 101, 0, 0, 44, - 0, 103, 24, 0, 17, 0, 122, 53, 23, 0, 5, 110, 25, - 96, 0, 0, 124, 106, 60, 123, 57, 28, 55, 0, 91, 0, - 100, 26, 0, 99, 0, 0, 0, 95, 92, 97, 88, 109, 14, - 39, 108, 0, 81, 0, 18, 89, 111, 32, 0, 120, 80, 113, - 62, 46, 84, 0, 0, 94, 40, 59, 114, 0, 36, 0, 0, - 29, 0, 86, 63, 64, 0, 20, 61, 0, 56, - }; - static const unsigned char aNext[124] = { - 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 2, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, - 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 33, 0, 21, 0, 0, 0, 0, 0, 50, - 0, 43, 3, 47, 0, 0, 0, 0, 30, 0, 58, 0, 38, - 0, 0, 0, 1, 66, 0, 0, 67, 0, 41, 0, 0, 0, - 0, 0, 0, 49, 65, 0, 0, 0, 0, 31, 52, 16, 34, - 10, 0, 0, 0, 0, 0, 0, 0, 11, 72, 79, 0, 8, - 0, 104, 98, 0, 107, 0, 87, 0, 75, 51, 0, 27, 37, - 73, 83, 0, 35, 68, 0, 0, - }; - static const unsigned char aLen[124] = { - 7, 7, 5, 4, 6, 4, 5, 3, 6, 7, 3, 6, 6, - 7, 7, 3, 8, 2, 6, 5, 4, 4, 3, 10, 4, 6, - 11, 6, 2, 7, 5, 5, 9, 6, 9, 9, 7, 10, 10, - 4, 6, 2, 3, 9, 4, 2, 6, 5, 7, 4, 5, 7, - 6, 6, 5, 6, 5, 5, 9, 7, 7, 3, 2, 4, 4, - 7, 3, 6, 4, 7, 6, 12, 6, 9, 4, 6, 5, 4, - 7, 6, 5, 6, 7, 5, 4, 5, 6, 5, 7, 3, 7, - 13, 2, 2, 4, 6, 6, 8, 5, 17, 12, 7, 8, 8, - 2, 4, 4, 4, 4, 4, 2, 2, 6, 5, 8, 5, 8, - 3, 5, 5, 6, 4, 9, 3, - }; - static const unsigned short int aOffset[124] = { - 0, 2, 2, 8, 9, 14, 16, 20, 23, 25, 25, 29, 33, - 36, 41, 46, 48, 53, 54, 59, 62, 65, 67, 69, 78, 81, - 86, 91, 95, 96, 101, 105, 109, 117, 122, 128, 136, 142, 152, - 159, 162, 162, 165, 167, 167, 171, 176, 179, 184, 184, 188, 192, - 199, 204, 209, 212, 218, 221, 225, 234, 240, 240, 240, 243, 246, - 250, 251, 255, 261, 265, 272, 278, 290, 296, 305, 307, 313, 318, - 320, 327, 332, 337, 343, 349, 354, 358, 361, 367, 371, 378, 380, - 387, 389, 391, 400, 404, 410, 416, 424, 429, 429, 445, 452, 459, - 460, 467, 471, 475, 479, 483, 486, 488, 490, 496, 500, 508, 513, - 521, 524, 529, 534, 540, 544, 549, - }; - static const unsigned char aCode[124] = { - TK_REINDEX, TK_INDEXED, TK_INDEX, TK_DESC, TK_ESCAPE, - TK_EACH, TK_CHECK, TK_KEY, TK_BEFORE, TK_FOREIGN, - TK_FOR, TK_IGNORE, TK_LIKE_KW, TK_EXPLAIN, TK_INSTEAD, - TK_ADD, TK_DATABASE, TK_AS, TK_SELECT, TK_TABLE, - TK_JOIN_KW, TK_THEN, TK_END, TK_DEFERRABLE, TK_ELSE, - TK_EXCEPT, TK_TRANSACTION,TK_ACTION, TK_ON, TK_JOIN_KW, - TK_ALTER, TK_RAISE, TK_EXCLUSIVE, TK_EXISTS, TK_SAVEPOINT, - TK_INTERSECT, TK_TRIGGER, TK_REFERENCES, TK_CONSTRAINT, TK_INTO, - TK_OFFSET, TK_OF, TK_SET, TK_TEMP, TK_TEMP, - TK_OR, TK_UNIQUE, TK_QUERY, TK_WITHOUT, TK_WITH, - TK_JOIN_KW, TK_RELEASE, TK_ATTACH, TK_HAVING, TK_GROUP, - TK_UPDATE, TK_BEGIN, TK_JOIN_KW, TK_RECURSIVE, TK_BETWEEN, - TK_NOTNULL, TK_NOT, TK_NO, TK_NULL, TK_LIKE_KW, - TK_CASCADE, TK_ASC, TK_DELETE, TK_CASE, TK_COLLATE, - TK_CREATE, TK_CTIME_KW, TK_DETACH, TK_IMMEDIATE, TK_JOIN, - TK_INSERT, TK_MATCH, TK_PLAN, TK_ANALYZE, TK_PRAGMA, - TK_ABORT, TK_VALUES, TK_VIRTUAL, TK_LIMIT, TK_WHEN, - TK_WHERE, TK_RENAME, TK_AFTER, TK_REPLACE, TK_AND, - TK_DEFAULT, TK_AUTOINCR, TK_TO, TK_IN, TK_CAST, - TK_COLUMNKW, TK_COMMIT, TK_CONFLICT, TK_JOIN_KW, TK_CTIME_KW, - TK_CTIME_KW, TK_PRIMARY, TK_DEFERRED, TK_DISTINCT, TK_IS, - TK_DROP, TK_FAIL, TK_FROM, TK_JOIN_KW, TK_LIKE_KW, - TK_BY, TK_IF, TK_ISNULL, TK_ORDER, TK_RESTRICT, - TK_JOIN_KW, TK_ROLLBACK, TK_ROW, TK_UNION, TK_USING, - TK_VACUUM, TK_VIEW, TK_INITIALLY, TK_ALL, - }; - int h, i; - if( n<2 ) return TK_ID; - h = ((charMap(z[0])*4) ^ - (charMap(z[n-1])*3) ^ - n) % 127; - for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){ - if( aLen[i]==n && sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){ - testcase( i==0 ); /* REINDEX */ - testcase( i==1 ); /* INDEXED */ - testcase( i==2 ); /* INDEX */ - testcase( i==3 ); /* DESC */ - testcase( i==4 ); /* ESCAPE */ - testcase( i==5 ); /* EACH */ - testcase( i==6 ); /* CHECK */ - testcase( i==7 ); /* KEY */ - testcase( i==8 ); /* BEFORE */ - testcase( i==9 ); /* FOREIGN */ - testcase( i==10 ); /* FOR */ - testcase( i==11 ); /* IGNORE */ - testcase( i==12 ); /* REGEXP */ - testcase( i==13 ); /* EXPLAIN */ - testcase( i==14 ); /* INSTEAD */ - testcase( i==15 ); /* ADD */ - testcase( i==16 ); /* DATABASE */ - testcase( i==17 ); /* AS */ - testcase( i==18 ); /* SELECT */ - testcase( i==19 ); /* TABLE */ - testcase( i==20 ); /* LEFT */ - testcase( i==21 ); /* THEN */ - testcase( i==22 ); /* END */ - testcase( i==23 ); /* DEFERRABLE */ - testcase( i==24 ); /* ELSE */ - testcase( i==25 ); /* EXCEPT */ - testcase( i==26 ); /* TRANSACTION */ - testcase( i==27 ); /* ACTION */ - testcase( i==28 ); /* ON */ - testcase( i==29 ); /* NATURAL */ - testcase( i==30 ); /* ALTER */ - testcase( i==31 ); /* RAISE */ - testcase( i==32 ); /* EXCLUSIVE */ - testcase( i==33 ); /* EXISTS */ - testcase( i==34 ); /* SAVEPOINT */ - testcase( i==35 ); /* INTERSECT */ - testcase( i==36 ); /* TRIGGER */ - testcase( i==37 ); /* REFERENCES */ - testcase( i==38 ); /* CONSTRAINT */ - testcase( i==39 ); /* INTO */ - testcase( i==40 ); /* OFFSET */ - testcase( i==41 ); /* OF */ - testcase( i==42 ); /* SET */ - testcase( i==43 ); /* TEMPORARY */ - testcase( i==44 ); /* TEMP */ - testcase( i==45 ); /* OR */ - testcase( i==46 ); /* UNIQUE */ - testcase( i==47 ); /* QUERY */ - testcase( i==48 ); /* WITHOUT */ - testcase( i==49 ); /* WITH */ - testcase( i==50 ); /* OUTER */ - testcase( i==51 ); /* RELEASE */ - testcase( i==52 ); /* ATTACH */ - testcase( i==53 ); /* HAVING */ - testcase( i==54 ); /* GROUP */ - testcase( i==55 ); /* UPDATE */ - testcase( i==56 ); /* BEGIN */ - testcase( i==57 ); /* INNER */ - testcase( i==58 ); /* RECURSIVE */ - testcase( i==59 ); /* BETWEEN */ - testcase( i==60 ); /* NOTNULL */ - testcase( i==61 ); /* NOT */ - testcase( i==62 ); /* NO */ - testcase( i==63 ); /* NULL */ - testcase( i==64 ); /* LIKE */ - testcase( i==65 ); /* CASCADE */ - testcase( i==66 ); /* ASC */ - testcase( i==67 ); /* DELETE */ - testcase( i==68 ); /* CASE */ - testcase( i==69 ); /* COLLATE */ - testcase( i==70 ); /* CREATE */ - testcase( i==71 ); /* CURRENT_DATE */ - testcase( i==72 ); /* DETACH */ - testcase( i==73 ); /* IMMEDIATE */ - testcase( i==74 ); /* JOIN */ - testcase( i==75 ); /* INSERT */ - testcase( i==76 ); /* MATCH */ - testcase( i==77 ); /* PLAN */ - testcase( i==78 ); /* ANALYZE */ - testcase( i==79 ); /* PRAGMA */ - testcase( i==80 ); /* ABORT */ - testcase( i==81 ); /* VALUES */ - testcase( i==82 ); /* VIRTUAL */ - testcase( i==83 ); /* LIMIT */ - testcase( i==84 ); /* WHEN */ - testcase( i==85 ); /* WHERE */ - testcase( i==86 ); /* RENAME */ - testcase( i==87 ); /* AFTER */ - testcase( i==88 ); /* REPLACE */ - testcase( i==89 ); /* AND */ - testcase( i==90 ); /* DEFAULT */ - testcase( i==91 ); /* AUTOINCREMENT */ - testcase( i==92 ); /* TO */ - testcase( i==93 ); /* IN */ - testcase( i==94 ); /* CAST */ - testcase( i==95 ); /* COLUMN */ - testcase( i==96 ); /* COMMIT */ - testcase( i==97 ); /* CONFLICT */ - testcase( i==98 ); /* CROSS */ - testcase( i==99 ); /* CURRENT_TIMESTAMP */ - testcase( i==100 ); /* CURRENT_TIME */ - testcase( i==101 ); /* PRIMARY */ - testcase( i==102 ); /* DEFERRED */ - testcase( i==103 ); /* DISTINCT */ - testcase( i==104 ); /* IS */ - testcase( i==105 ); /* DROP */ - testcase( i==106 ); /* FAIL */ - testcase( i==107 ); /* FROM */ - testcase( i==108 ); /* FULL */ - testcase( i==109 ); /* GLOB */ - testcase( i==110 ); /* BY */ - testcase( i==111 ); /* IF */ - testcase( i==112 ); /* ISNULL */ - testcase( i==113 ); /* ORDER */ - testcase( i==114 ); /* RESTRICT */ - testcase( i==115 ); /* RIGHT */ - testcase( i==116 ); /* ROLLBACK */ - testcase( i==117 ); /* ROW */ - testcase( i==118 ); /* UNION */ - testcase( i==119 ); /* USING */ - testcase( i==120 ); /* VACUUM */ - testcase( i==121 ); /* VIEW */ - testcase( i==122 ); /* INITIALLY */ - testcase( i==123 ); /* ALL */ - return aCode[i]; +SQLITE_PRIVATE const char *sqlite3ErrStr(int rc){ + static const char* const aMsg[] = { + /* SQLITE_OK */ "not an error", + /* SQLITE_ERROR */ "SQL logic error or missing database", + /* SQLITE_INTERNAL */ 0, + /* SQLITE_PERM */ "access permission denied", + /* SQLITE_ABORT */ "callback requested query abort", + /* SQLITE_BUSY */ "database is locked", + /* SQLITE_LOCKED */ "database table is locked", + /* SQLITE_NOMEM */ "out of memory", + /* SQLITE_READONLY */ "attempt to write a readonly database", + /* SQLITE_INTERRUPT */ "interrupted", + /* SQLITE_IOERR */ "disk I/O error", + /* SQLITE_CORRUPT */ "database disk image is malformed", + /* SQLITE_NOTFOUND */ "unknown operation", + /* SQLITE_FULL */ "database or disk is full", + /* SQLITE_CANTOPEN */ "unable to open database file", + /* SQLITE_PROTOCOL */ "locking protocol", + /* SQLITE_EMPTY */ "table contains no data", + /* SQLITE_SCHEMA */ "database schema has changed", + /* SQLITE_TOOBIG */ "string or blob too big", + /* SQLITE_CONSTRAINT */ "constraint failed", + /* SQLITE_MISMATCH */ "datatype mismatch", + /* SQLITE_MISUSE */ "library routine called out of sequence", + /* SQLITE_NOLFS */ "large file support is disabled", + /* SQLITE_AUTH */ "authorization denied", + /* SQLITE_FORMAT */ "auxiliary database format error", + /* SQLITE_RANGE */ "bind or column index out of range", + /* SQLITE_NOTADB */ "file is encrypted or is not a database", + }; + const char *zErr = "unknown error"; + switch( rc ){ + case SQLITE_ABORT_ROLLBACK: { + zErr = "abort due to ROLLBACK"; + break; + } + default: { + rc &= 0xff; + if( ALWAYS(rc>=0) && rcbusyTimeout; + int delay, prior; + assert( count>=0 ); + if( count < NDELAY ){ + delay = delays[count]; + prior = totals[count]; + }else{ + delay = delays[NDELAY-1]; + prior = totals[NDELAY-1] + delay*(count-(NDELAY-1)); + } + if( prior + delay > timeout ){ + delay = timeout - prior; + if( delay<=0 ) return 0; + } + sqlite3OsSleep(db->pVfs, delay*1000); + return 1; +#else + sqlite3 *db = (sqlite3 *)ptr; + int timeout = ((sqlite3 *)ptr)->busyTimeout; + if( (count+1)*1000 > timeout ){ + return 0; + } + sqlite3OsSleep(db->pVfs, 1000000); + return 1; +#endif +} /* -** If X is a character that can be used in an identifier then -** IdChar(X) will be true. Otherwise it is false. -** -** For ASCII, any character with the high-order bit set is -** allowed in an identifier. For 7-bit characters, -** sqlite3IsIdChar[X] must be 1. -** -** For EBCDIC, the rules are more complex but have the same -** end result. +** Invoke the given busy handler. ** -** Ticket #1066. the SQL standard does not allow '$' in the -** middle of identifiers. But many SQL implementations do. -** SQLite will allow '$' in identifiers for compatibility. -** But the feature is undocumented. +** This routine is called when an operation failed with a lock. +** If this routine returns non-zero, the lock is retried. If it +** returns 0, the operation aborts with an SQLITE_BUSY error. */ -#ifdef SQLITE_ASCII -#define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0) -#endif -#ifdef SQLITE_EBCDIC -SQLITE_PRIVATE const char sqlite3IsEbcdicIdChar[] = { -/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ - 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ - 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ - 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ - 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ -}; -#define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) -#endif -SQLITE_PRIVATE int sqlite3IsIdChar(u8 c){ return IdChar(c); } - +SQLITE_PRIVATE int sqlite3InvokeBusyHandler(BusyHandler *p){ + int rc; + if( NEVER(p==0) || p->xFunc==0 || p->nBusy<0 ) return 0; + rc = p->xFunc(p->pArg, p->nBusy); + if( rc==0 ){ + p->nBusy = -1; + }else{ + p->nBusy++; + } + return rc; +} /* -** Return the length of the token that begins at z[0]. -** Store the token type in *tokenType before returning. +** This routine sets the busy callback for an Sqlite database to the +** given callback function with the given argument. */ -SQLITE_PRIVATE int sqlite3GetToken(const unsigned char *z, int *tokenType){ - int i, c; - switch( *z ){ - case ' ': case '\t': case '\n': case '\f': case '\r': { - testcase( z[0]==' ' ); - testcase( z[0]=='\t' ); - testcase( z[0]=='\n' ); - testcase( z[0]=='\f' ); - testcase( z[0]=='\r' ); - for(i=1; sqlite3Isspace(z[i]); i++){} - *tokenType = TK_SPACE; - return i; - } - case '-': { - if( z[1]=='-' ){ - for(i=2; (c=z[i])!=0 && c!='\n'; i++){} - *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ - return i; - } - *tokenType = TK_MINUS; - return 1; - } - case '(': { - *tokenType = TK_LP; - return 1; - } - case ')': { - *tokenType = TK_RP; - return 1; - } - case ';': { - *tokenType = TK_SEMI; - return 1; - } - case '+': { - *tokenType = TK_PLUS; - return 1; - } - case '*': { - *tokenType = TK_STAR; - return 1; - } - case '/': { - if( z[1]!='*' || z[2]==0 ){ - *tokenType = TK_SLASH; - return 1; - } - for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} - if( c ) i++; - *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ - return i; - } - case '%': { - *tokenType = TK_REM; - return 1; - } - case '=': { - *tokenType = TK_EQ; - return 1 + (z[1]=='='); - } - case '<': { - if( (c=z[1])=='=' ){ - *tokenType = TK_LE; - return 2; - }else if( c=='>' ){ - *tokenType = TK_NE; - return 2; - }else if( c=='<' ){ - *tokenType = TK_LSHIFT; - return 2; - }else{ - *tokenType = TK_LT; - return 1; - } - } - case '>': { - if( (c=z[1])=='=' ){ - *tokenType = TK_GE; - return 2; - }else if( c=='>' ){ - *tokenType = TK_RSHIFT; - return 2; - }else{ - *tokenType = TK_GT; - return 1; - } - } - case '!': { - if( z[1]!='=' ){ - *tokenType = TK_ILLEGAL; - return 2; - }else{ - *tokenType = TK_NE; - return 2; - } - } - case '|': { - if( z[1]!='|' ){ - *tokenType = TK_BITOR; - return 1; - }else{ - *tokenType = TK_CONCAT; - return 2; - } - } - case ',': { - *tokenType = TK_COMMA; - return 1; - } - case '&': { - *tokenType = TK_BITAND; - return 1; - } - case '~': { - *tokenType = TK_BITNOT; - return 1; - } - case '`': - case '\'': - case '"': { - int delim = z[0]; - testcase( delim=='`' ); - testcase( delim=='\'' ); - testcase( delim=='"' ); - for(i=1; (c=z[i])!=0; i++){ - if( c==delim ){ - if( z[i+1]==delim ){ - i++; - }else{ - break; - } - } - } - if( c=='\'' ){ - *tokenType = TK_STRING; - return i+1; - }else if( c!=0 ){ - *tokenType = TK_ID; - return i+1; - }else{ - *tokenType = TK_ILLEGAL; - return i; - } - } - case '.': { -#ifndef SQLITE_OMIT_FLOATING_POINT - if( !sqlite3Isdigit(z[1]) ) -#endif - { - *tokenType = TK_DOT; - return 1; - } - /* If the next character is a digit, this is a floating point - ** number that begins with ".". Fall thru into the next case */ - } - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': { - testcase( z[0]=='0' ); testcase( z[0]=='1' ); testcase( z[0]=='2' ); - testcase( z[0]=='3' ); testcase( z[0]=='4' ); testcase( z[0]=='5' ); - testcase( z[0]=='6' ); testcase( z[0]=='7' ); testcase( z[0]=='8' ); - testcase( z[0]=='9' ); - *tokenType = TK_INTEGER; -#ifndef SQLITE_OMIT_HEX_INTEGER - if( z[0]=='0' && (z[1]=='x' || z[1]=='X') && sqlite3Isxdigit(z[2]) ){ - for(i=3; sqlite3Isxdigit(z[i]); i++){} - return i; - } -#endif - for(i=0; sqlite3Isdigit(z[i]); i++){} -#ifndef SQLITE_OMIT_FLOATING_POINT - if( z[i]=='.' ){ - i++; - while( sqlite3Isdigit(z[i]) ){ i++; } - *tokenType = TK_FLOAT; - } - if( (z[i]=='e' || z[i]=='E') && - ( sqlite3Isdigit(z[i+1]) - || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2])) - ) - ){ - i += 2; - while( sqlite3Isdigit(z[i]) ){ i++; } - *tokenType = TK_FLOAT; - } +SQLITE_API int SQLITE_STDCALL sqlite3_busy_handler( + sqlite3 *db, + int (*xBusy)(void*,int), + void *pArg +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; #endif - while( IdChar(z[i]) ){ - *tokenType = TK_ILLEGAL; - i++; - } - return i; - } - case '[': { - for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} - *tokenType = c==']' ? TK_ID : TK_ILLEGAL; - return i; - } - case '?': { - *tokenType = TK_VARIABLE; - for(i=1; sqlite3Isdigit(z[i]); i++){} - return i; - } -#ifndef SQLITE_OMIT_TCL_VARIABLE - case '$': + sqlite3_mutex_enter(db->mutex); + db->busyHandler.xFunc = xBusy; + db->busyHandler.pArg = pArg; + db->busyHandler.nBusy = 0; + db->busyTimeout = 0; + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; +} + +#ifndef SQLITE_OMIT_PROGRESS_CALLBACK +/* +** This routine sets the progress callback for an Sqlite database to the +** given callback function with the given argument. The progress callback will +** be invoked every nOps opcodes. +*/ +SQLITE_API void SQLITE_STDCALL sqlite3_progress_handler( + sqlite3 *db, + int nOps, + int (*xProgress)(void*), + void *pArg +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return; + } #endif - case '@': /* For compatibility with MS SQL Server */ - case '#': - case ':': { - int n = 0; - testcase( z[0]=='$' ); testcase( z[0]=='@' ); - testcase( z[0]==':' ); testcase( z[0]=='#' ); - *tokenType = TK_VARIABLE; - for(i=1; (c=z[i])!=0; i++){ - if( IdChar(c) ){ - n++; -#ifndef SQLITE_OMIT_TCL_VARIABLE - }else if( c=='(' && n>0 ){ - do{ - i++; - }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' ); - if( c==')' ){ - i++; - }else{ - *tokenType = TK_ILLEGAL; - } - break; - }else if( c==':' && z[i+1]==':' ){ - i++; + sqlite3_mutex_enter(db->mutex); + if( nOps>0 ){ + db->xProgress = xProgress; + db->nProgressOps = (unsigned)nOps; + db->pProgressArg = pArg; + }else{ + db->xProgress = 0; + db->nProgressOps = 0; + db->pProgressArg = 0; + } + sqlite3_mutex_leave(db->mutex); +} #endif - }else{ - break; - } - } - if( n==0 ) *tokenType = TK_ILLEGAL; - return i; - } -#ifndef SQLITE_OMIT_BLOB_LITERAL - case 'x': case 'X': { - testcase( z[0]=='x' ); testcase( z[0]=='X' ); - if( z[1]=='\'' ){ - *tokenType = TK_BLOB; - for(i=2; sqlite3Isxdigit(z[i]); i++){} - if( z[i]!='\'' || i%2 ){ - *tokenType = TK_ILLEGAL; - while( z[i] && z[i]!='\'' ){ i++; } - } - if( z[i] ) i++; - return i; - } - /* Otherwise fall through to the next case */ - } + + +/* +** This routine installs a default busy handler that waits for the +** specified number of milliseconds before returning 0. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_busy_timeout(sqlite3 *db, int ms){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; #endif - default: { - if( !IdChar(*z) ){ - break; - } - for(i=1; IdChar(z[i]); i++){} - *tokenType = keywordCode((char*)z, i); - return i; - } + if( ms>0 ){ + sqlite3_busy_handler(db, sqliteDefaultBusyCallback, (void*)db); + db->busyTimeout = ms; + }else{ + sqlite3_busy_handler(db, 0, 0); } - *tokenType = TK_ILLEGAL; - return 1; + return SQLITE_OK; } /* -** Run the parser on the given SQL string. The parser structure is -** passed in. An SQLITE_ status code is returned. If an error occurs -** then an and attempt is made to write an error message into -** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that -** error message. +** Cause any pending operation to stop at its earliest opportunity. */ -SQLITE_PRIVATE int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ - int nErr = 0; /* Number of errors encountered */ - int i; /* Loop counter */ - void *pEngine; /* The LEMON-generated LALR(1) parser */ - int tokenType; /* type of the next token */ - int lastTokenParsed = -1; /* type of the previous token */ - u8 enableLookaside; /* Saved value of db->lookaside.bEnabled */ - sqlite3 *db = pParse->db; /* The database connection */ - int mxSqlLen; /* Max length of an SQL string */ - - assert( zSql!=0 ); - mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH]; - if( db->nVdbeActive==0 ){ - db->u1.isInterrupted = 0; +SQLITE_API void SQLITE_STDCALL sqlite3_interrupt(sqlite3 *db){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return; } - pParse->rc = SQLITE_OK; - pParse->zTail = zSql; - i = 0; - assert( pzErrMsg!=0 ); - pEngine = sqlite3ParserAlloc(sqlite3Malloc); - if( pEngine==0 ){ - db->mallocFailed = 1; - return SQLITE_NOMEM; +#endif + db->u1.isInterrupted = 1; +} + + +/* +** This function is exactly the same as sqlite3_create_function(), except +** that it is designed to be called by internal code. The difference is +** that if a malloc() fails in sqlite3_create_function(), an error code +** is returned and the mallocFailed flag cleared. +*/ +SQLITE_PRIVATE int sqlite3CreateFunc( + sqlite3 *db, + const char *zFunctionName, + int nArg, + int enc, + void *pUserData, + void (*xFunc)(sqlite3_context*,int,sqlite3_value **), + void (*xStep)(sqlite3_context*,int,sqlite3_value **), + void (*xFinal)(sqlite3_context*), + FuncDestructor *pDestructor +){ + FuncDef *p; + int nName; + int extraFlags; + + assert( sqlite3_mutex_held(db->mutex) ); + if( zFunctionName==0 || + (xFunc && (xFinal || xStep)) || + (!xFunc && (xFinal && !xStep)) || + (!xFunc && (!xFinal && xStep)) || + (nArg<-1 || nArg>SQLITE_MAX_FUNCTION_ARG) || + (255<(nName = sqlite3Strlen30( zFunctionName))) ){ + return SQLITE_MISUSE_BKPT; } - assert( pParse->pNewTable==0 ); - assert( pParse->pNewTrigger==0 ); - assert( pParse->nVar==0 ); - assert( pParse->nzVar==0 ); - assert( pParse->azVar==0 ); - enableLookaside = db->lookaside.bEnabled; - if( db->lookaside.pStart ) db->lookaside.bEnabled = 1; - while( !db->mallocFailed && zSql[i]!=0 ){ - assert( i>=0 ); - pParse->sLastToken.z = &zSql[i]; - pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType); - i += pParse->sLastToken.n; - if( i>mxSqlLen ){ - pParse->rc = SQLITE_TOOBIG; - break; + + assert( SQLITE_FUNC_CONSTANT==SQLITE_DETERMINISTIC ); + extraFlags = enc & SQLITE_DETERMINISTIC; + enc &= (SQLITE_FUNC_ENCMASK|SQLITE_ANY); + +#ifndef SQLITE_OMIT_UTF16 + /* If SQLITE_UTF16 is specified as the encoding type, transform this + ** to one of SQLITE_UTF16LE or SQLITE_UTF16BE using the + ** SQLITE_UTF16NATIVE macro. SQLITE_UTF16 is not used internally. + ** + ** If SQLITE_ANY is specified, add three versions of the function + ** to the hash table. + */ + if( enc==SQLITE_UTF16 ){ + enc = SQLITE_UTF16NATIVE; + }else if( enc==SQLITE_ANY ){ + int rc; + rc = sqlite3CreateFunc(db, zFunctionName, nArg, SQLITE_UTF8|extraFlags, + pUserData, xFunc, xStep, xFinal, pDestructor); + if( rc==SQLITE_OK ){ + rc = sqlite3CreateFunc(db, zFunctionName, nArg, SQLITE_UTF16LE|extraFlags, + pUserData, xFunc, xStep, xFinal, pDestructor); } - switch( tokenType ){ - case TK_SPACE: { - if( db->u1.isInterrupted ){ - sqlite3ErrorMsg(pParse, "interrupt"); - pParse->rc = SQLITE_INTERRUPT; - goto abort_parse; - } - break; - } - case TK_ILLEGAL: { - sqlite3DbFree(db, *pzErrMsg); - *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"", - &pParse->sLastToken); - nErr++; - goto abort_parse; - } - case TK_SEMI: { - pParse->zTail = &zSql[i]; - /* Fall thru into the default case */ - } - default: { - sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); - lastTokenParsed = tokenType; - if( pParse->rc!=SQLITE_OK ){ - goto abort_parse; - } - break; - } + if( rc!=SQLITE_OK ){ + return rc; } + enc = SQLITE_UTF16BE; } -abort_parse: - if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ - if( lastTokenParsed!=TK_SEMI ){ - sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); - pParse->zTail = &zSql[i]; +#else + enc = SQLITE_UTF8; +#endif + + /* Check if an existing function is being overridden or deleted. If so, + ** and there are active VMs, then return SQLITE_BUSY. If a function + ** is being overridden/deleted but there are no active VMs, allow the + ** operation to continue but invalidate all precompiled statements. + */ + p = sqlite3FindFunction(db, zFunctionName, nName, nArg, (u8)enc, 0); + if( p && (p->funcFlags & SQLITE_FUNC_ENCMASK)==enc && p->nArg==nArg ){ + if( db->nVdbeActive ){ + sqlite3ErrorWithMsg(db, SQLITE_BUSY, + "unable to delete/modify user-function due to active statements"); + assert( !db->mallocFailed ); + return SQLITE_BUSY; + }else{ + sqlite3ExpirePreparedStatements(db); } - sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); } -#ifdef YYTRACKMAXSTACKDEPTH - sqlite3_mutex_enter(sqlite3MallocMutex()); - sqlite3StatusSet(SQLITE_STATUS_PARSER_STACK, - sqlite3ParserStackPeak(pEngine) - ); - sqlite3_mutex_leave(sqlite3MallocMutex()); -#endif /* YYDEBUG */ - sqlite3ParserFree(pEngine, sqlite3_free); - db->lookaside.bEnabled = enableLookaside; - if( db->mallocFailed ){ - pParse->rc = SQLITE_NOMEM; + + p = sqlite3FindFunction(db, zFunctionName, nName, nArg, (u8)enc, 1); + assert(p || db->mallocFailed); + if( !p ){ + return SQLITE_NOMEM; } - if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ - sqlite3SetString(&pParse->zErrMsg, db, "%s", sqlite3ErrStr(pParse->rc)); + + /* If an older version of the function with a configured destructor is + ** being replaced invoke the destructor function here. */ + functionDestroy(db, p); + + if( pDestructor ){ + pDestructor->nRef++; } - assert( pzErrMsg!=0 ); - if( pParse->zErrMsg ){ - *pzErrMsg = pParse->zErrMsg; - sqlite3_log(pParse->rc, "%s", *pzErrMsg); - pParse->zErrMsg = 0; - nErr++; + p->pDestructor = pDestructor; + p->funcFlags = (p->funcFlags & SQLITE_FUNC_ENCMASK) | extraFlags; + testcase( p->funcFlags & SQLITE_DETERMINISTIC ); + p->xFunc = xFunc; + p->xStep = xStep; + p->xFinalize = xFinal; + p->pUserData = pUserData; + p->nArg = (u16)nArg; + return SQLITE_OK; +} + +/* +** Create new user functions. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_create_function( + sqlite3 *db, + const char *zFunc, + int nArg, + int enc, + void *p, + void (*xFunc)(sqlite3_context*,int,sqlite3_value **), + void (*xStep)(sqlite3_context*,int,sqlite3_value **), + void (*xFinal)(sqlite3_context*) +){ + return sqlite3_create_function_v2(db, zFunc, nArg, enc, p, xFunc, xStep, + xFinal, 0); +} + +SQLITE_API int SQLITE_STDCALL sqlite3_create_function_v2( + sqlite3 *db, + const char *zFunc, + int nArg, + int enc, + void *p, + void (*xFunc)(sqlite3_context*,int,sqlite3_value **), + void (*xStep)(sqlite3_context*,int,sqlite3_value **), + void (*xFinal)(sqlite3_context*), + void (*xDestroy)(void *) +){ + int rc = SQLITE_ERROR; + FuncDestructor *pArg = 0; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + return SQLITE_MISUSE_BKPT; } - if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ - sqlite3VdbeDelete(pParse->pVdbe); - pParse->pVdbe = 0; +#endif + sqlite3_mutex_enter(db->mutex); + if( xDestroy ){ + pArg = (FuncDestructor *)sqlite3DbMallocZero(db, sizeof(FuncDestructor)); + if( !pArg ){ + xDestroy(p); + goto out; + } + pArg->xDestroy = xDestroy; + pArg->pUserData = p; } -#ifndef SQLITE_OMIT_SHARED_CACHE - if( pParse->nested==0 ){ - sqlite3DbFree(db, pParse->aTableLock); - pParse->aTableLock = 0; - pParse->nTableLock = 0; + rc = sqlite3CreateFunc(db, zFunc, nArg, enc, p, xFunc, xStep, xFinal, pArg); + if( pArg && pArg->nRef==0 ){ + assert( rc!=SQLITE_OK ); + xDestroy(p); + sqlite3DbFree(db, pArg); } + + out: + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +#ifndef SQLITE_OMIT_UTF16 +SQLITE_API int SQLITE_STDCALL sqlite3_create_function16( + sqlite3 *db, + const void *zFunctionName, + int nArg, + int eTextRep, + void *p, + void (*xFunc)(sqlite3_context*,int,sqlite3_value**), + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*) +){ + int rc; + char *zFunc8; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) || zFunctionName==0 ) return SQLITE_MISUSE_BKPT; #endif -#ifndef SQLITE_OMIT_VIRTUALTABLE - sqlite3_free(pParse->apVtabLock); + sqlite3_mutex_enter(db->mutex); + assert( !db->mallocFailed ); + zFunc8 = sqlite3Utf16to8(db, zFunctionName, -1, SQLITE_UTF16NATIVE); + rc = sqlite3CreateFunc(db, zFunc8, nArg, eTextRep, p, xFunc, xStep, xFinal,0); + sqlite3DbFree(db, zFunc8); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} #endif - if( !IN_DECLARE_VTAB ){ - /* If the pParse->declareVtab flag is set, do not delete any table - ** structure built up in pParse->pNewTable. The calling code (see vtab.c) - ** will take responsibility for freeing the Table structure. - */ - sqlite3DeleteTable(db, pParse->pNewTable); - } - if( pParse->bFreeWith ) sqlite3WithDelete(db, pParse->pWith); - sqlite3DeleteTrigger(db, pParse->pNewTrigger); - for(i=pParse->nzVar-1; i>=0; i--) sqlite3DbFree(db, pParse->azVar[i]); - sqlite3DbFree(db, pParse->azVar); - while( pParse->pAinc ){ - AutoincInfo *p = pParse->pAinc; - pParse->pAinc = p->pNext; - sqlite3DbFree(db, p); - } - while( pParse->pZombieTab ){ - Table *p = pParse->pZombieTab; - pParse->pZombieTab = p->pNextZombie; - sqlite3DeleteTable(db, p); +/* +** Declare that a function has been overloaded by a virtual table. +** +** If the function already exists as a regular global function, then +** this routine is a no-op. If the function does not exist, then create +** a new one that always throws a run-time error. +** +** When virtual tables intend to provide an overloaded function, they +** should call this routine to make sure the global function exists. +** A global function must exist in order for name resolution to work +** properly. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_overload_function( + sqlite3 *db, + const char *zName, + int nArg +){ + int nName = sqlite3Strlen30(zName); + int rc = SQLITE_OK; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) || zName==0 || nArg<-2 ){ + return SQLITE_MISUSE_BKPT; } - if( nErr>0 && pParse->rc==SQLITE_OK ){ - pParse->rc = SQLITE_ERROR; +#endif + sqlite3_mutex_enter(db->mutex); + if( sqlite3FindFunction(db, zName, nName, nArg, SQLITE_UTF8, 0)==0 ){ + rc = sqlite3CreateFunc(db, zName, nArg, SQLITE_UTF8, + 0, sqlite3InvalidFunction, 0, 0, 0); } - return nErr; + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; } -/************** End of tokenize.c ********************************************/ -/************** Begin file complete.c ****************************************/ +#ifndef SQLITE_OMIT_TRACE /* -** 2001 September 15 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** An tokenizer for SQL +** Register a trace function. The pArg from the previously registered trace +** is returned. ** -** This file contains C code that implements the sqlite3_complete() API. -** This code used to be part of the tokenizer.c source file. But by -** separating it out, the code will be automatically omitted from -** static links that do not use it. +** A NULL trace function means that no tracing is executes. A non-NULL +** trace is a pointer to a function that is invoked at the start of each +** SQL statement. */ -#ifndef SQLITE_OMIT_COMPLETE +SQLITE_API void *SQLITE_STDCALL sqlite3_trace(sqlite3 *db, void (*xTrace)(void*,const char*), void *pArg){ + void *pOld; +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + sqlite3_mutex_enter(db->mutex); + pOld = db->pTraceArg; + db->xTrace = xTrace; + db->pTraceArg = pArg; + sqlite3_mutex_leave(db->mutex); + return pOld; +} /* -** This is defined in tokenize.c. We just have to import the definition. +** Register a profile function. The pArg from the previously registered +** profile function is returned. +** +** A NULL profile function means that no profiling is executes. A non-NULL +** profile is a pointer to a function that is invoked at the conclusion of +** each SQL statement that is run. */ -#ifndef SQLITE_AMALGAMATION -#ifdef SQLITE_ASCII -#define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0) +SQLITE_API void *SQLITE_STDCALL sqlite3_profile( + sqlite3 *db, + void (*xProfile)(void*,const char*,sqlite_uint64), + void *pArg +){ + void *pOld; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } #endif -#ifdef SQLITE_EBCDIC -SQLITE_PRIVATE const char sqlite3IsEbcdicIdChar[]; -#define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) + sqlite3_mutex_enter(db->mutex); + pOld = db->pProfileArg; + db->xProfile = xProfile; + db->pProfileArg = pArg; + sqlite3_mutex_leave(db->mutex); + return pOld; +} +#endif /* SQLITE_OMIT_TRACE */ + +/* +** Register a function to be invoked when a transaction commits. +** If the invoked function returns non-zero, then the commit becomes a +** rollback. +*/ +SQLITE_API void *SQLITE_STDCALL sqlite3_commit_hook( + sqlite3 *db, /* Attach the hook to this database */ + int (*xCallback)(void*), /* Function to invoke on each commit */ + void *pArg /* Argument to the function */ +){ + void *pOld; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } #endif -#endif /* SQLITE_AMALGAMATION */ + sqlite3_mutex_enter(db->mutex); + pOld = db->pCommitArg; + db->xCommitCallback = xCallback; + db->pCommitArg = pArg; + sqlite3_mutex_leave(db->mutex); + return pOld; +} + +/* +** Register a callback to be invoked each time a row is updated, +** inserted or deleted using this database connection. +*/ +SQLITE_API void *SQLITE_STDCALL sqlite3_update_hook( + sqlite3 *db, /* Attach the hook to this database */ + void (*xCallback)(void*,int,char const *,char const *,sqlite_int64), + void *pArg /* Argument to the function */ +){ + void *pRet; +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + sqlite3_mutex_enter(db->mutex); + pRet = db->pUpdateArg; + db->xUpdateCallback = xCallback; + db->pUpdateArg = pArg; + sqlite3_mutex_leave(db->mutex); + return pRet; +} /* -** Token types used by the sqlite3_complete() routine. See the header -** comments on that procedure for additional information. +** Register a callback to be invoked each time a transaction is rolled +** back by this database connection. */ -#define tkSEMI 0 -#define tkWS 1 -#define tkOTHER 2 -#ifndef SQLITE_OMIT_TRIGGER -#define tkEXPLAIN 3 -#define tkCREATE 4 -#define tkTEMP 5 -#define tkTRIGGER 6 -#define tkEND 7 +SQLITE_API void *SQLITE_STDCALL sqlite3_rollback_hook( + sqlite3 *db, /* Attach the hook to this database */ + void (*xCallback)(void*), /* Callback function */ + void *pArg /* Argument to the function */ +){ + void *pRet; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } #endif + sqlite3_mutex_enter(db->mutex); + pRet = db->pRollbackArg; + db->xRollbackCallback = xCallback; + db->pRollbackArg = pArg; + sqlite3_mutex_leave(db->mutex); + return pRet; +} +#ifndef SQLITE_OMIT_WAL /* -** Return TRUE if the given SQL string ends in a semicolon. -** -** Special handling is require for CREATE TRIGGER statements. -** Whenever the CREATE TRIGGER keywords are seen, the statement -** must end with ";END;". -** -** This implementation uses a state machine with 8 states: -** -** (0) INVALID We have not yet seen a non-whitespace character. -** -** (1) START At the beginning or end of an SQL statement. This routine -** returns 1 if it ends in the START state and 0 if it ends -** in any other state. -** -** (2) NORMAL We are in the middle of statement which ends with a single -** semicolon. -** -** (3) EXPLAIN The keyword EXPLAIN has been seen at the beginning of -** a statement. -** -** (4) CREATE The keyword CREATE has been seen at the beginning of a -** statement, possibly preceded by EXPLAIN and/or followed by -** TEMP or TEMPORARY -** -** (5) TRIGGER We are in the middle of a trigger definition that must be -** ended by a semicolon, the keyword END, and another semicolon. -** -** (6) SEMI We've seen the first semicolon in the ";END;" that occurs at -** the end of a trigger definition. -** -** (7) END We've seen the ";END" of the ";END;" that occurs at the end -** of a trigger definition. -** -** Transitions between states above are determined by tokens extracted -** from the input. The following tokens are significant: -** -** (0) tkSEMI A semicolon. -** (1) tkWS Whitespace. -** (2) tkOTHER Any other SQL token. -** (3) tkEXPLAIN The "explain" keyword. -** (4) tkCREATE The "create" keyword. -** (5) tkTEMP The "temp" or "temporary" keyword. -** (6) tkTRIGGER The "trigger" keyword. -** (7) tkEND The "end" keyword. -** -** Whitespace never causes a state transition and is always ignored. -** This means that a SQL string of all whitespace is invalid. +** The sqlite3_wal_hook() callback registered by sqlite3_wal_autocheckpoint(). +** Invoke sqlite3_wal_checkpoint if the number of frames in the log file +** is greater than sqlite3.pWalArg cast to an integer (the value configured by +** wal_autocheckpoint()). +*/ +SQLITE_PRIVATE int sqlite3WalDefaultHook( + void *pClientData, /* Argument */ + sqlite3 *db, /* Connection */ + const char *zDb, /* Database */ + int nFrame /* Size of WAL */ +){ + if( nFrame>=SQLITE_PTR_TO_INT(pClientData) ){ + sqlite3BeginBenignMalloc(); + sqlite3_wal_checkpoint(db, zDb); + sqlite3EndBenignMalloc(); + } + return SQLITE_OK; +} +#endif /* SQLITE_OMIT_WAL */ + +/* +** Configure an sqlite3_wal_hook() callback to automatically checkpoint +** a database after committing a transaction if there are nFrame or +** more frames in the log file. Passing zero or a negative value as the +** nFrame parameter disables automatic checkpoints entirely. ** -** If we compile with SQLITE_OMIT_TRIGGER, all of the computation needed -** to recognize the end of a trigger can be omitted. All we have to do -** is look for a semicolon that is not part of an string or comment. +** The callback registered by this function replaces any existing callback +** registered using sqlite3_wal_hook(). Likewise, registering a callback +** using sqlite3_wal_hook() disables the automatic checkpoint mechanism +** configured by this function. */ -SQLITE_API int SQLITE_STDCALL sqlite3_complete(const char *zSql){ - u8 state = 0; /* Current state, using numbers defined in header comment */ - u8 token; /* Value of the next token */ - -#ifndef SQLITE_OMIT_TRIGGER - /* A complex statement machine used to detect the end of a CREATE TRIGGER - ** statement. This is the normal case. - */ - static const u8 trans[8][8] = { - /* Token: */ - /* State: ** SEMI WS OTHER EXPLAIN CREATE TEMP TRIGGER END */ - /* 0 INVALID: */ { 1, 0, 2, 3, 4, 2, 2, 2, }, - /* 1 START: */ { 1, 1, 2, 3, 4, 2, 2, 2, }, - /* 2 NORMAL: */ { 1, 2, 2, 2, 2, 2, 2, 2, }, - /* 3 EXPLAIN: */ { 1, 3, 3, 2, 4, 2, 2, 2, }, - /* 4 CREATE: */ { 1, 4, 2, 2, 2, 4, 5, 2, }, - /* 5 TRIGGER: */ { 6, 5, 5, 5, 5, 5, 5, 5, }, - /* 6 SEMI: */ { 6, 6, 5, 5, 5, 5, 5, 7, }, - /* 7 END: */ { 1, 7, 5, 5, 5, 5, 5, 5, }, - }; +SQLITE_API int SQLITE_STDCALL sqlite3_wal_autocheckpoint(sqlite3 *db, int nFrame){ +#ifdef SQLITE_OMIT_WAL + UNUSED_PARAMETER(db); + UNUSED_PARAMETER(nFrame); #else - /* If triggers are not supported by this compile then the statement machine - ** used to detect the end of a statement is much simpler - */ - static const u8 trans[3][3] = { - /* Token: */ - /* State: ** SEMI WS OTHER */ - /* 0 INVALID: */ { 1, 0, 2, }, - /* 1 START: */ { 1, 1, 2, }, - /* 2 NORMAL: */ { 1, 2, 2, }, - }; -#endif /* SQLITE_OMIT_TRIGGER */ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + if( nFrame>0 ){ + sqlite3_wal_hook(db, sqlite3WalDefaultHook, SQLITE_INT_TO_PTR(nFrame)); + }else{ + sqlite3_wal_hook(db, 0, 0); + } +#endif + return SQLITE_OK; +} +/* +** Register a callback to be invoked each time a transaction is written +** into the write-ahead-log by this database connection. +*/ +SQLITE_API void *SQLITE_STDCALL sqlite3_wal_hook( + sqlite3 *db, /* Attach the hook to this db handle */ + int(*xCallback)(void *, sqlite3*, const char*, int), + void *pArg /* First argument passed to xCallback() */ +){ +#ifndef SQLITE_OMIT_WAL + void *pRet; #ifdef SQLITE_ENABLE_API_ARMOR - if( zSql==0 ){ + if( !sqlite3SafetyCheckOk(db) ){ (void)SQLITE_MISUSE_BKPT; return 0; } #endif - - while( *zSql ){ - switch( *zSql ){ - case ';': { /* A semicolon */ - token = tkSEMI; - break; - } - case ' ': - case '\r': - case '\t': - case '\n': - case '\f': { /* White space is ignored */ - token = tkWS; - break; - } - case '/': { /* C-style comments */ - if( zSql[1]!='*' ){ - token = tkOTHER; - break; - } - zSql += 2; - while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; } - if( zSql[0]==0 ) return 0; - zSql++; - token = tkWS; - break; - } - case '-': { /* SQL-style comments from "--" to end of line */ - if( zSql[1]!='-' ){ - token = tkOTHER; - break; - } - while( *zSql && *zSql!='\n' ){ zSql++; } - if( *zSql==0 ) return state==1; - token = tkWS; - break; - } - case '[': { /* Microsoft-style identifiers in [...] */ - zSql++; - while( *zSql && *zSql!=']' ){ zSql++; } - if( *zSql==0 ) return 0; - token = tkOTHER; - break; - } - case '`': /* Grave-accent quoted symbols used by MySQL */ - case '"': /* single- and double-quoted strings */ - case '\'': { - int c = *zSql; - zSql++; - while( *zSql && *zSql!=c ){ zSql++; } - if( *zSql==0 ) return 0; - token = tkOTHER; - break; - } - default: { -#ifdef SQLITE_EBCDIC - unsigned char c; -#endif - if( IdChar((u8)*zSql) ){ - /* Keywords and unquoted identifiers */ - int nId; - for(nId=1; IdChar(zSql[nId]); nId++){} -#ifdef SQLITE_OMIT_TRIGGER - token = tkOTHER; + sqlite3_mutex_enter(db->mutex); + pRet = db->pWalArg; + db->xWalCallback = xCallback; + db->pWalArg = pArg; + sqlite3_mutex_leave(db->mutex); + return pRet; #else - switch( *zSql ){ - case 'c': case 'C': { - if( nId==6 && sqlite3StrNICmp(zSql, "create", 6)==0 ){ - token = tkCREATE; - }else{ - token = tkOTHER; - } - break; - } - case 't': case 'T': { - if( nId==7 && sqlite3StrNICmp(zSql, "trigger", 7)==0 ){ - token = tkTRIGGER; - }else if( nId==4 && sqlite3StrNICmp(zSql, "temp", 4)==0 ){ - token = tkTEMP; - }else if( nId==9 && sqlite3StrNICmp(zSql, "temporary", 9)==0 ){ - token = tkTEMP; - }else{ - token = tkOTHER; - } - break; - } - case 'e': case 'E': { - if( nId==3 && sqlite3StrNICmp(zSql, "end", 3)==0 ){ - token = tkEND; - }else -#ifndef SQLITE_OMIT_EXPLAIN - if( nId==7 && sqlite3StrNICmp(zSql, "explain", 7)==0 ){ - token = tkEXPLAIN; - }else + return 0; #endif - { - token = tkOTHER; - } - break; - } - default: { - token = tkOTHER; - break; - } - } -#endif /* SQLITE_OMIT_TRIGGER */ - zSql += nId-1; - }else{ - /* Operators and special symbols */ - token = tkOTHER; - } - break; - } - } - state = trans[state][token]; - zSql++; - } - return state==1; } -#ifndef SQLITE_OMIT_UTF16 /* -** This routine is the same as the sqlite3_complete() routine described -** above, except that the parameter is required to be UTF-16 encoded, not -** UTF-8. +** Checkpoint database zDb. */ -SQLITE_API int SQLITE_STDCALL sqlite3_complete16(const void *zSql){ - sqlite3_value *pVal; - char const *zSql8; - int rc = SQLITE_NOMEM; +SQLITE_API int SQLITE_STDCALL sqlite3_wal_checkpoint_v2( + sqlite3 *db, /* Database handle */ + const char *zDb, /* Name of attached database (or NULL) */ + int eMode, /* SQLITE_CHECKPOINT_* value */ + int *pnLog, /* OUT: Size of WAL log in frames */ + int *pnCkpt /* OUT: Total number of frames checkpointed */ +){ +#ifdef SQLITE_OMIT_WAL + return SQLITE_OK; +#else + int rc; /* Return code */ + int iDb = SQLITE_MAX_ATTACHED; /* sqlite3.aDb[] index of db to checkpoint */ -#ifndef SQLITE_OMIT_AUTOINIT - rc = sqlite3_initialize(); - if( rc ) return rc; +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; #endif - pVal = sqlite3ValueNew(0); - sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC); - zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8); - if( zSql8 ){ - rc = sqlite3_complete(zSql8); + + /* Initialize the output variables to -1 in case an error occurs. */ + if( pnLog ) *pnLog = -1; + if( pnCkpt ) *pnCkpt = -1; + + assert( SQLITE_CHECKPOINT_PASSIVE==0 ); + assert( SQLITE_CHECKPOINT_FULL==1 ); + assert( SQLITE_CHECKPOINT_RESTART==2 ); + assert( SQLITE_CHECKPOINT_TRUNCATE==3 ); + if( eModeSQLITE_CHECKPOINT_TRUNCATE ){ + /* EVIDENCE-OF: R-03996-12088 The M parameter must be a valid checkpoint + ** mode: */ + return SQLITE_MISUSE; + } + + sqlite3_mutex_enter(db->mutex); + if( zDb && zDb[0] ){ + iDb = sqlite3FindDbName(db, zDb); + } + if( iDb<0 ){ + rc = SQLITE_ERROR; + sqlite3ErrorWithMsg(db, SQLITE_ERROR, "unknown database: %s", zDb); }else{ - rc = SQLITE_NOMEM; + db->busyHandler.nBusy = 0; + rc = sqlite3Checkpoint(db, iDb, eMode, pnLog, pnCkpt); + sqlite3Error(db, rc); } - sqlite3ValueFree(pVal); - return sqlite3ApiExit(0, rc); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +#endif } -#endif /* SQLITE_OMIT_UTF16 */ -#endif /* SQLITE_OMIT_COMPLETE */ -/************** End of complete.c ********************************************/ -/************** Begin file main.c ********************************************/ + /* -** 2001 September 15 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** Main file for the SQLite library. The routines in this file -** implement the programmer interface to the library. Routines in -** other files are for internal use by SQLite and should not be -** accessed by users of the library. +** Checkpoint database zDb. If zDb is NULL, or if the buffer zDb points +** to contains a zero-length string, all attached databases are +** checkpointed. */ +SQLITE_API int SQLITE_STDCALL sqlite3_wal_checkpoint(sqlite3 *db, const char *zDb){ + /* EVIDENCE-OF: R-41613-20553 The sqlite3_wal_checkpoint(D,X) is equivalent to + ** sqlite3_wal_checkpoint_v2(D,X,SQLITE_CHECKPOINT_PASSIVE,0,0). */ + return sqlite3_wal_checkpoint_v2(db,zDb,SQLITE_CHECKPOINT_PASSIVE,0,0); +} -#ifdef SQLITE_ENABLE_FTS3 -/************** Include fts3.h in the middle of main.c ***********************/ -/************** Begin file fts3.h ********************************************/ +#ifndef SQLITE_OMIT_WAL /* -** 2006 Oct 10 +** Run a checkpoint on database iDb. This is a no-op if database iDb is +** not currently open in WAL mode. ** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: +** If a transaction is open on the database being checkpointed, this +** function returns SQLITE_LOCKED and a checkpoint is not attempted. If +** an error occurs while running the checkpoint, an SQLite error code is +** returned (i.e. SQLITE_IOERR). Otherwise, SQLITE_OK. ** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. +** The mutex on database handle db should be held by the caller. The mutex +** associated with the specific b-tree being checkpointed is taken by +** this function while the checkpoint is running. ** -****************************************************************************** +** If iDb is passed SQLITE_MAX_ATTACHED, then all attached databases are +** checkpointed. If an error is encountered it is returned immediately - +** no attempt is made to checkpoint any remaining databases. ** -** This header file is used by programs that want to link against the -** FTS3 library. All it does is declare the sqlite3Fts3Init() interface. +** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART. */ +SQLITE_PRIVATE int sqlite3Checkpoint(sqlite3 *db, int iDb, int eMode, int *pnLog, int *pnCkpt){ + int rc = SQLITE_OK; /* Return code */ + int i; /* Used to iterate through attached dbs */ + int bBusy = 0; /* True if SQLITE_BUSY has been encountered */ -#if 0 -extern "C" { -#endif /* __cplusplus */ + assert( sqlite3_mutex_held(db->mutex) ); + assert( !pnLog || *pnLog==-1 ); + assert( !pnCkpt || *pnCkpt==-1 ); -SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db); + for(i=0; inDb && rc==SQLITE_OK; i++){ + if( i==iDb || iDb==SQLITE_MAX_ATTACHED ){ + rc = sqlite3BtreeCheckpoint(db->aDb[i].pBt, eMode, pnLog, pnCkpt); + pnLog = 0; + pnCkpt = 0; + if( rc==SQLITE_BUSY ){ + bBusy = 1; + rc = SQLITE_OK; + } + } + } -#if 0 -} /* extern "C" */ -#endif /* __cplusplus */ + return (rc==SQLITE_OK && bBusy) ? SQLITE_BUSY : rc; +} +#endif /* SQLITE_OMIT_WAL */ -/************** End of fts3.h ************************************************/ -/************** Continuing where we left off in main.c ***********************/ -#endif -#ifdef SQLITE_ENABLE_RTREE -/************** Include rtree.h in the middle of main.c **********************/ -/************** Begin file rtree.h *******************************************/ /* -** 2008 May 26 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** +** This function returns true if main-memory should be used instead of +** a temporary file for transient pager files and statement journals. +** The value returned depends on the value of db->temp_store (runtime +** parameter) and the compile time value of SQLITE_TEMP_STORE. The +** following table describes the relationship between these two values +** and this functions return value. ** -** This header file is used by programs that want to link against the -** RTREE library. All it does is declare the sqlite3RtreeInit() interface. +** SQLITE_TEMP_STORE db->temp_store Location of temporary database +** ----------------- -------------- ------------------------------ +** 0 any file (return 0) +** 1 1 file (return 0) +** 1 2 memory (return 1) +** 1 0 file (return 0) +** 2 1 file (return 0) +** 2 2 memory (return 1) +** 2 0 memory (return 1) +** 3 any memory (return 1) */ +SQLITE_PRIVATE int sqlite3TempInMemory(const sqlite3 *db){ +#if SQLITE_TEMP_STORE==1 + return ( db->temp_store==2 ); +#endif +#if SQLITE_TEMP_STORE==2 + return ( db->temp_store!=1 ); +#endif +#if SQLITE_TEMP_STORE==3 + UNUSED_PARAMETER(db); + return 1; +#endif +#if SQLITE_TEMP_STORE<1 || SQLITE_TEMP_STORE>3 + UNUSED_PARAMETER(db); + return 0; +#endif +} -#if 0 -extern "C" { -#endif /* __cplusplus */ - -SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db); - -#if 0 -} /* extern "C" */ -#endif /* __cplusplus */ +/* +** Return UTF-8 encoded English language explanation of the most recent +** error. +*/ +SQLITE_API const char *SQLITE_STDCALL sqlite3_errmsg(sqlite3 *db){ + const char *z; + if( !db ){ + return sqlite3ErrStr(SQLITE_NOMEM); + } + if( !sqlite3SafetyCheckSickOrOk(db) ){ + return sqlite3ErrStr(SQLITE_MISUSE_BKPT); + } + sqlite3_mutex_enter(db->mutex); + if( db->mallocFailed ){ + z = sqlite3ErrStr(SQLITE_NOMEM); + }else{ + testcase( db->pErr==0 ); + z = (char*)sqlite3_value_text(db->pErr); + assert( !db->mallocFailed ); + if( z==0 ){ + z = sqlite3ErrStr(db->errCode); + } + } + sqlite3_mutex_leave(db->mutex); + return z; +} -/************** End of rtree.h ***********************************************/ -/************** Continuing where we left off in main.c ***********************/ -#endif -#ifdef SQLITE_ENABLE_ICU -/************** Include sqliteicu.h in the middle of main.c ******************/ -/************** Begin file sqliteicu.h ***************************************/ +#ifndef SQLITE_OMIT_UTF16 /* -** 2008 May 26 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This header file is used by programs that want to link against the -** ICU extension. All it does is declare the sqlite3IcuInit() interface. +** Return UTF-16 encoded English language explanation of the most recent +** error. */ +SQLITE_API const void *SQLITE_STDCALL sqlite3_errmsg16(sqlite3 *db){ + static const u16 outOfMem[] = { + 'o', 'u', 't', ' ', 'o', 'f', ' ', 'm', 'e', 'm', 'o', 'r', 'y', 0 + }; + static const u16 misuse[] = { + 'l', 'i', 'b', 'r', 'a', 'r', 'y', ' ', + 'r', 'o', 'u', 't', 'i', 'n', 'e', ' ', + 'c', 'a', 'l', 'l', 'e', 'd', ' ', + 'o', 'u', 't', ' ', + 'o', 'f', ' ', + 's', 'e', 'q', 'u', 'e', 'n', 'c', 'e', 0 + }; -#if 0 -extern "C" { -#endif /* __cplusplus */ + const void *z; + if( !db ){ + return (void *)outOfMem; + } + if( !sqlite3SafetyCheckSickOrOk(db) ){ + return (void *)misuse; + } + sqlite3_mutex_enter(db->mutex); + if( db->mallocFailed ){ + z = (void *)outOfMem; + }else{ + z = sqlite3_value_text16(db->pErr); + if( z==0 ){ + sqlite3ErrorWithMsg(db, db->errCode, sqlite3ErrStr(db->errCode)); + z = sqlite3_value_text16(db->pErr); + } + /* A malloc() may have failed within the call to sqlite3_value_text16() + ** above. If this is the case, then the db->mallocFailed flag needs to + ** be cleared before returning. Do this directly, instead of via + ** sqlite3ApiExit(), to avoid setting the database handle error message. + */ + db->mallocFailed = 0; + } + sqlite3_mutex_leave(db->mutex); + return z; +} +#endif /* SQLITE_OMIT_UTF16 */ -SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db); +/* +** Return the most recent error code generated by an SQLite routine. If NULL is +** passed to this function, we assume a malloc() failed during sqlite3_open(). +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_errcode(sqlite3 *db){ + if( db && !sqlite3SafetyCheckSickOrOk(db) ){ + return SQLITE_MISUSE_BKPT; + } + if( !db || db->mallocFailed ){ + return SQLITE_NOMEM; + } + return db->errCode & db->errMask; +} +SQLITE_API int SQLITE_STDCALL sqlite3_extended_errcode(sqlite3 *db){ + if( db && !sqlite3SafetyCheckSickOrOk(db) ){ + return SQLITE_MISUSE_BKPT; + } + if( !db || db->mallocFailed ){ + return SQLITE_NOMEM; + } + return db->errCode; +} -#if 0 -} /* extern "C" */ -#endif /* __cplusplus */ +/* +** Return a string that describes the kind of error specified in the +** argument. For now, this simply calls the internal sqlite3ErrStr() +** function. +*/ +SQLITE_API const char *SQLITE_STDCALL sqlite3_errstr(int rc){ + return sqlite3ErrStr(rc); +} +/* +** Create a new collating function for database "db". The name is zName +** and the encoding is enc. +*/ +static int createCollation( + sqlite3* db, + const char *zName, + u8 enc, + void* pCtx, + int(*xCompare)(void*,int,const void*,int,const void*), + void(*xDel)(void*) +){ + CollSeq *pColl; + int enc2; + + assert( sqlite3_mutex_held(db->mutex) ); -/************** End of sqliteicu.h *******************************************/ -/************** Continuing where we left off in main.c ***********************/ -#endif + /* If SQLITE_UTF16 is specified as the encoding type, transform this + ** to one of SQLITE_UTF16LE or SQLITE_UTF16BE using the + ** SQLITE_UTF16NATIVE macro. SQLITE_UTF16 is not used internally. + */ + enc2 = enc; + testcase( enc2==SQLITE_UTF16 ); + testcase( enc2==SQLITE_UTF16_ALIGNED ); + if( enc2==SQLITE_UTF16 || enc2==SQLITE_UTF16_ALIGNED ){ + enc2 = SQLITE_UTF16NATIVE; + } + if( enc2SQLITE_UTF16BE ){ + return SQLITE_MISUSE_BKPT; + } -#ifndef SQLITE_AMALGAMATION -/* IMPLEMENTATION-OF: R-46656-45156 The sqlite3_version[] string constant -** contains the text of SQLITE_VERSION macro. -*/ -SQLITE_API const char sqlite3_version[] = SQLITE_VERSION; -#endif + /* Check if this call is removing or replacing an existing collation + ** sequence. If so, and there are active VMs, return busy. If there + ** are no active VMs, invalidate any pre-compiled statements. + */ + pColl = sqlite3FindCollSeq(db, (u8)enc2, zName, 0); + if( pColl && pColl->xCmp ){ + if( db->nVdbeActive ){ + sqlite3ErrorWithMsg(db, SQLITE_BUSY, + "unable to delete/modify collation sequence due to active statements"); + return SQLITE_BUSY; + } + sqlite3ExpirePreparedStatements(db); -/* IMPLEMENTATION-OF: R-53536-42575 The sqlite3_libversion() function returns -** a pointer to the to the sqlite3_version[] string constant. -*/ -SQLITE_API const char *SQLITE_STDCALL sqlite3_libversion(void){ return sqlite3_version; } + /* If collation sequence pColl was created directly by a call to + ** sqlite3_create_collation, and not generated by synthCollSeq(), + ** then any copies made by synthCollSeq() need to be invalidated. + ** Also, collation destructor - CollSeq.xDel() - function may need + ** to be called. + */ + if( (pColl->enc & ~SQLITE_UTF16_ALIGNED)==enc2 ){ + CollSeq *aColl = sqlite3HashFind(&db->aCollSeq, zName); + int j; + for(j=0; j<3; j++){ + CollSeq *p = &aColl[j]; + if( p->enc==pColl->enc ){ + if( p->xDel ){ + p->xDel(p->pUser); + } + p->xCmp = 0; + } + } + } + } -/* IMPLEMENTATION-OF: R-63124-39300 The sqlite3_sourceid() function returns a -** pointer to a string constant whose value is the same as the -** SQLITE_SOURCE_ID C preprocessor macro. -*/ -SQLITE_API const char *SQLITE_STDCALL sqlite3_sourceid(void){ return SQLITE_SOURCE_ID; } + pColl = sqlite3FindCollSeq(db, (u8)enc2, zName, 1); + if( pColl==0 ) return SQLITE_NOMEM; + pColl->xCmp = xCompare; + pColl->pUser = pCtx; + pColl->xDel = xDel; + pColl->enc = (u8)(enc2 | (enc & SQLITE_UTF16_ALIGNED)); + sqlite3Error(db, SQLITE_OK); + return SQLITE_OK; +} -/* IMPLEMENTATION-OF: R-35210-63508 The sqlite3_libversion_number() function -** returns an integer equal to SQLITE_VERSION_NUMBER. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_libversion_number(void){ return SQLITE_VERSION_NUMBER; } -/* IMPLEMENTATION-OF: R-20790-14025 The sqlite3_threadsafe() function returns -** zero if and only if SQLite was compiled with mutexing code omitted due to -** the SQLITE_THREADSAFE compile-time option being set to 0. +/* +** This array defines hard upper bounds on limit values. The +** initializer must be kept in sync with the SQLITE_LIMIT_* +** #defines in sqlite3.h. */ -SQLITE_API int SQLITE_STDCALL sqlite3_threadsafe(void){ return SQLITE_THREADSAFE; } +static const int aHardLimit[] = { + SQLITE_MAX_LENGTH, + SQLITE_MAX_SQL_LENGTH, + SQLITE_MAX_COLUMN, + SQLITE_MAX_EXPR_DEPTH, + SQLITE_MAX_COMPOUND_SELECT, + SQLITE_MAX_VDBE_OP, + SQLITE_MAX_FUNCTION_ARG, + SQLITE_MAX_ATTACHED, + SQLITE_MAX_LIKE_PATTERN_LENGTH, + SQLITE_MAX_VARIABLE_NUMBER, /* IMP: R-38091-32352 */ + SQLITE_MAX_TRIGGER_DEPTH, + SQLITE_MAX_WORKER_THREADS, +}; -#if !defined(SQLITE_OMIT_TRACE) && defined(SQLITE_ENABLE_IOTRACE) /* -** If the following function pointer is not NULL and if -** SQLITE_ENABLE_IOTRACE is enabled, then messages describing -** I/O active are written using this function. These messages -** are intended for debugging activity only. +** Make sure the hard limits are set to reasonable values */ -SQLITE_API void (SQLITE_CDECL *sqlite3IoTrace)(const char*, ...) = 0; +#if SQLITE_MAX_LENGTH<100 +# error SQLITE_MAX_LENGTH must be at least 100 +#endif +#if SQLITE_MAX_SQL_LENGTH<100 +# error SQLITE_MAX_SQL_LENGTH must be at least 100 +#endif +#if SQLITE_MAX_SQL_LENGTH>SQLITE_MAX_LENGTH +# error SQLITE_MAX_SQL_LENGTH must not be greater than SQLITE_MAX_LENGTH +#endif +#if SQLITE_MAX_COMPOUND_SELECT<2 +# error SQLITE_MAX_COMPOUND_SELECT must be at least 2 +#endif +#if SQLITE_MAX_VDBE_OP<40 +# error SQLITE_MAX_VDBE_OP must be at least 40 +#endif +#if SQLITE_MAX_FUNCTION_ARG<0 || SQLITE_MAX_FUNCTION_ARG>1000 +# error SQLITE_MAX_FUNCTION_ARG must be between 0 and 1000 +#endif +#if SQLITE_MAX_ATTACHED<0 || SQLITE_MAX_ATTACHED>125 +# error SQLITE_MAX_ATTACHED must be between 0 and 125 +#endif +#if SQLITE_MAX_LIKE_PATTERN_LENGTH<1 +# error SQLITE_MAX_LIKE_PATTERN_LENGTH must be at least 1 +#endif +#if SQLITE_MAX_COLUMN>32767 +# error SQLITE_MAX_COLUMN must not exceed 32767 +#endif +#if SQLITE_MAX_TRIGGER_DEPTH<1 +# error SQLITE_MAX_TRIGGER_DEPTH must be at least 1 +#endif +#if SQLITE_MAX_WORKER_THREADS<0 || SQLITE_MAX_WORKER_THREADS>50 +# error SQLITE_MAX_WORKER_THREADS must be between 0 and 50 #endif -/* -** If the following global variable points to a string which is the -** name of a directory, then that directory will be used to store -** temporary files. -** -** See also the "PRAGMA temp_store_directory" SQL command. -*/ -SQLITE_API char *sqlite3_temp_directory = 0; /* -** If the following global variable points to a string which is the -** name of a directory, then that directory will be used to store -** all database files specified with a relative pathname. +** Change the value of a limit. Report the old value. +** If an invalid limit index is supplied, report -1. +** Make no changes but still report the old value if the +** new limit is negative. ** -** See also the "PRAGMA data_store_directory" SQL command. +** A new lower limit does not shrink existing constructs. +** It merely prevents new constructs that exceed the limit +** from forming. */ -SQLITE_API char *sqlite3_data_directory = 0; +SQLITE_API int SQLITE_STDCALL sqlite3_limit(sqlite3 *db, int limitId, int newLimit){ + int oldLimit; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return -1; + } +#endif + + /* EVIDENCE-OF: R-30189-54097 For each limit category SQLITE_LIMIT_NAME + ** there is a hard upper bound set at compile-time by a C preprocessor + ** macro called SQLITE_MAX_NAME. (The "_LIMIT_" in the name is changed to + ** "_MAX_".) + */ + assert( aHardLimit[SQLITE_LIMIT_LENGTH]==SQLITE_MAX_LENGTH ); + assert( aHardLimit[SQLITE_LIMIT_SQL_LENGTH]==SQLITE_MAX_SQL_LENGTH ); + assert( aHardLimit[SQLITE_LIMIT_COLUMN]==SQLITE_MAX_COLUMN ); + assert( aHardLimit[SQLITE_LIMIT_EXPR_DEPTH]==SQLITE_MAX_EXPR_DEPTH ); + assert( aHardLimit[SQLITE_LIMIT_COMPOUND_SELECT]==SQLITE_MAX_COMPOUND_SELECT); + assert( aHardLimit[SQLITE_LIMIT_VDBE_OP]==SQLITE_MAX_VDBE_OP ); + assert( aHardLimit[SQLITE_LIMIT_FUNCTION_ARG]==SQLITE_MAX_FUNCTION_ARG ); + assert( aHardLimit[SQLITE_LIMIT_ATTACHED]==SQLITE_MAX_ATTACHED ); + assert( aHardLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]== + SQLITE_MAX_LIKE_PATTERN_LENGTH ); + assert( aHardLimit[SQLITE_LIMIT_VARIABLE_NUMBER]==SQLITE_MAX_VARIABLE_NUMBER); + assert( aHardLimit[SQLITE_LIMIT_TRIGGER_DEPTH]==SQLITE_MAX_TRIGGER_DEPTH ); + assert( aHardLimit[SQLITE_LIMIT_WORKER_THREADS]==SQLITE_MAX_WORKER_THREADS ); + assert( SQLITE_LIMIT_WORKER_THREADS==(SQLITE_N_LIMIT-1) ); + + + if( limitId<0 || limitId>=SQLITE_N_LIMIT ){ + return -1; + } + oldLimit = db->aLimit[limitId]; + if( newLimit>=0 ){ /* IMP: R-52476-28732 */ + if( newLimit>aHardLimit[limitId] ){ + newLimit = aHardLimit[limitId]; /* IMP: R-51463-25634 */ + } + db->aLimit[limitId] = newLimit; + } + return oldLimit; /* IMP: R-53341-35419 */ +} /* -** Initialize SQLite. -** -** This routine must be called to initialize the memory allocation, -** VFS, and mutex subsystems prior to doing any serious work with -** SQLite. But as long as you do not compile with SQLITE_OMIT_AUTOINIT -** this routine will be called automatically by key routines such as -** sqlite3_open(). -** -** This routine is a no-op except on its very first call for the process, -** or for the first call after a call to sqlite3_shutdown. -** -** The first thread to call this routine runs the initialization to -** completion. If subsequent threads call this routine before the first -** thread has finished the initialization process, then the subsequent -** threads must block until the first thread finishes with the initialization. -** -** The first thread might call this routine recursively. Recursive -** calls to this routine should not block, of course. Otherwise the -** initialization process would never complete. +** This function is used to parse both URIs and non-URI filenames passed by the +** user to API functions sqlite3_open() or sqlite3_open_v2(), and for database +** URIs specified as part of ATTACH statements. ** -** Let X be the first thread to enter this routine. Let Y be some other -** thread. Then while the initial invocation of this routine by X is -** incomplete, it is required that: +** The first argument to this function is the name of the VFS to use (or +** a NULL to signify the default VFS) if the URI does not contain a "vfs=xxx" +** query parameter. The second argument contains the URI (or non-URI filename) +** itself. When this function is called the *pFlags variable should contain +** the default flags to open the database handle with. The value stored in +** *pFlags may be updated before returning if the URI filename contains +** "cache=xxx" or "mode=xxx" query parameters. ** -** * Calls to this routine from Y must block until the outer-most -** call by X completes. +** If successful, SQLITE_OK is returned. In this case *ppVfs is set to point to +** the VFS that should be used to open the database file. *pzFile is set to +** point to a buffer containing the name of the file to open. It is the +** responsibility of the caller to eventually call sqlite3_free() to release +** this buffer. ** -** * Recursive calls to this routine from thread X return immediately -** without blocking. +** If an error occurs, then an SQLite error code is returned and *pzErrMsg +** may be set to point to a buffer containing an English language error +** message. It is the responsibility of the caller to eventually release +** this buffer by calling sqlite3_free(). */ -SQLITE_API int SQLITE_STDCALL sqlite3_initialize(void){ - MUTEX_LOGIC( sqlite3_mutex *pMaster; ) /* The main static mutex */ - int rc; /* Result code */ -#ifdef SQLITE_EXTRA_INIT - int bRunExtraInit = 0; /* Extra initialization needed */ -#endif +SQLITE_PRIVATE int sqlite3ParseUri( + const char *zDefaultVfs, /* VFS to use if no "vfs=xxx" query option */ + const char *zUri, /* Nul-terminated URI to parse */ + unsigned int *pFlags, /* IN/OUT: SQLITE_OPEN_XXX flags */ + sqlite3_vfs **ppVfs, /* OUT: VFS to use */ + char **pzFile, /* OUT: Filename component of URI */ + char **pzErrMsg /* OUT: Error message (if rc!=SQLITE_OK) */ +){ + int rc = SQLITE_OK; + unsigned int flags = *pFlags; + const char *zVfs = zDefaultVfs; + char *zFile; + char c; + int nUri = sqlite3Strlen30(zUri); -#ifdef SQLITE_OMIT_WSD - rc = sqlite3_wsd_init(4096, 24); - if( rc!=SQLITE_OK ){ - return rc; - } -#endif + assert( *pzErrMsg==0 ); - /* If the following assert() fails on some obscure processor/compiler - ** combination, the work-around is to set the correct pointer - ** size at compile-time using -DSQLITE_PTRSIZE=n compile-time option */ - assert( SQLITE_PTRSIZE==sizeof(char*) ); + if( ((flags & SQLITE_OPEN_URI) /* IMP: R-48725-32206 */ + || sqlite3GlobalConfig.bOpenUri) /* IMP: R-51689-46548 */ + && nUri>=5 && memcmp(zUri, "file:", 5)==0 /* IMP: R-57884-37496 */ + ){ + char *zOpt; + int eState; /* Parser state when parsing URI */ + int iIn; /* Input character index */ + int iOut = 0; /* Output character index */ + u64 nByte = nUri+2; /* Bytes of space to allocate */ - /* If SQLite is already completely initialized, then this call - ** to sqlite3_initialize() should be a no-op. But the initialization - ** must be complete. So isInit must not be set until the very end - ** of this routine. - */ - if( sqlite3GlobalConfig.isInit ) return SQLITE_OK; + /* Make sure the SQLITE_OPEN_URI flag is set to indicate to the VFS xOpen + ** method that there may be extra parameters following the file-name. */ + flags |= SQLITE_OPEN_URI; - /* Make sure the mutex subsystem is initialized. If unable to - ** initialize the mutex subsystem, return early with the error. - ** If the system is so sick that we are unable to allocate a mutex, - ** there is not much SQLite is going to be able to do. - ** - ** The mutex subsystem must take care of serializing its own - ** initialization. - */ - rc = sqlite3MutexInit(); - if( rc ) return rc; + for(iIn=0; iIn=0 && octet<256 ); + if( octet==0 ){ + /* This branch is taken when "%00" appears within the URI. In this + ** case we ignore all text in the remainder of the path, name or + ** value currently being parsed. So ignore the current character + ** and skip to the next "?", "=" or "&", as appropriate. */ + while( (c = zUri[iIn])!=0 && c!='#' + && (eState!=0 || c!='?') + && (eState!=1 || (c!='=' && c!='&')) + && (eState!=2 || c!='&') + ){ + iIn++; + } + continue; + } + c = octet; + }else if( eState==1 && (c=='&' || c=='=') ){ + if( zFile[iOut-1]==0 ){ + /* An empty option name. Ignore this option altogether. */ + while( zUri[iIn] && zUri[iIn]!='#' && zUri[iIn-1]!='&' ) iIn++; + continue; + } + if( c=='&' ){ + zFile[iOut++] = '\0'; + }else{ + eState = 2; + } + c = 0; + }else if( (eState==0 && c=='?') || (eState==2 && c=='&') ){ + c = 0; + eState = 1; + } + zFile[iOut++] = c; + } + if( eState==1 ) zFile[iOut++] = '\0'; + zFile[iOut++] = '\0'; + zFile[iOut++] = '\0'; + + /* Check if there were any options specified that should be interpreted + ** here. Options that are interpreted here include "vfs" and those that + ** correspond to flags that may be passed to the sqlite3_open_v2() + ** method. */ + zOpt = &zFile[sqlite3Strlen30(zFile)+1]; + while( zOpt[0] ){ + int nOpt = sqlite3Strlen30(zOpt); + char *zVal = &zOpt[nOpt+1]; + int nVal = sqlite3Strlen30(zVal); + + if( nOpt==3 && memcmp("vfs", zOpt, 3)==0 ){ + zVfs = zVal; + }else{ + struct OpenMode { + const char *z; + int mode; + } *aMode = 0; + char *zModeType = 0; + int mask = 0; + int limit = 0; + + if( nOpt==5 && memcmp("cache", zOpt, 5)==0 ){ + static struct OpenMode aCacheMode[] = { + { "shared", SQLITE_OPEN_SHAREDCACHE }, + { "private", SQLITE_OPEN_PRIVATECACHE }, + { 0, 0 } + }; + + mask = SQLITE_OPEN_SHAREDCACHE|SQLITE_OPEN_PRIVATECACHE; + aMode = aCacheMode; + limit = mask; + zModeType = "cache"; + } + if( nOpt==4 && memcmp("mode", zOpt, 4)==0 ){ + static struct OpenMode aOpenMode[] = { + { "ro", SQLITE_OPEN_READONLY }, + { "rw", SQLITE_OPEN_READWRITE }, + { "rwc", SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE }, + { "memory", SQLITE_OPEN_MEMORY }, + { 0, 0 } + }; + + mask = SQLITE_OPEN_READONLY | SQLITE_OPEN_READWRITE + | SQLITE_OPEN_CREATE | SQLITE_OPEN_MEMORY; + aMode = aOpenMode; + limit = mask & flags; + zModeType = "access"; + } + + if( aMode ){ + int i; + int mode = 0; + for(i=0; aMode[i].z; i++){ + const char *z = aMode[i].z; + if( nVal==sqlite3Strlen30(z) && 0==memcmp(zVal, z, nVal) ){ + mode = aMode[i].mode; + break; + } + } + if( mode==0 ){ + *pzErrMsg = sqlite3_mprintf("no such %s mode: %s", zModeType, zVal); + rc = SQLITE_ERROR; + goto parse_uri_out; + } + if( (mode & ~SQLITE_OPEN_MEMORY)>limit ){ + *pzErrMsg = sqlite3_mprintf("%s mode not allowed: %s", + zModeType, zVal); + rc = SQLITE_PERM; + goto parse_uri_out; + } + flags = (flags & ~mask) | mode; + } + } - /* Do the rest of the initialization under the recursive mutex so - ** that we will be able to handle recursive calls into - ** sqlite3_initialize(). The recursive calls normally come through - ** sqlite3_os_init() when it invokes sqlite3_vfs_register(), but other - ** recursive calls might also be possible. - ** - ** IMPLEMENTATION-OF: R-00140-37445 SQLite automatically serializes calls - ** to the xInit method, so the xInit method need not be threadsafe. - ** - ** The following mutex is what serializes access to the appdef pcache xInit - ** methods. The sqlite3_pcache_methods.xInit() all is embedded in the - ** call to sqlite3PcacheInitialize(). - */ - sqlite3_mutex_enter(sqlite3GlobalConfig.pInitMutex); - if( sqlite3GlobalConfig.isInit==0 && sqlite3GlobalConfig.inProgress==0 ){ - FuncDefHash *pHash = &GLOBAL(FuncDefHash, sqlite3GlobalFunctions); - sqlite3GlobalConfig.inProgress = 1; - memset(pHash, 0, sizeof(sqlite3GlobalFunctions)); - sqlite3RegisterGlobalFunctions(); - if( sqlite3GlobalConfig.isPCacheInit==0 ){ - rc = sqlite3PcacheInitialize(); - } - if( rc==SQLITE_OK ){ - sqlite3GlobalConfig.isPCacheInit = 1; - rc = sqlite3OsInit(); - } - if( rc==SQLITE_OK ){ - sqlite3PCacheBufferSetup( sqlite3GlobalConfig.pPage, - sqlite3GlobalConfig.szPage, sqlite3GlobalConfig.nPage); - sqlite3GlobalConfig.isInit = 1; -#ifdef SQLITE_EXTRA_INIT - bRunExtraInit = 1; -#endif + zOpt = &zVal[nVal+1]; } - sqlite3GlobalConfig.inProgress = 0; - } - sqlite3_mutex_leave(sqlite3GlobalConfig.pInitMutex); - /* Go back under the static mutex and clean up the recursive - ** mutex to prevent a resource leak. - */ - sqlite3_mutex_enter(pMaster); - sqlite3GlobalConfig.nRefInitMutex--; - if( sqlite3GlobalConfig.nRefInitMutex<=0 ){ - assert( sqlite3GlobalConfig.nRefInitMutex==0 ); - sqlite3_mutex_free(sqlite3GlobalConfig.pInitMutex); - sqlite3GlobalConfig.pInitMutex = 0; + }else{ + zFile = sqlite3_malloc64(nUri+2); + if( !zFile ) return SQLITE_NOMEM; + memcpy(zFile, zUri, nUri); + zFile[nUri] = '\0'; + zFile[nUri+1] = '\0'; + flags &= ~SQLITE_OPEN_URI; } - sqlite3_mutex_leave(pMaster); - /* The following is just a sanity check to make sure SQLite has - ** been compiled correctly. It is important to run this code, but - ** we don't want to run it too often and soak up CPU cycles for no - ** reason. So we run it once during initialization. - */ -#ifndef NDEBUG -#ifndef SQLITE_OMIT_FLOATING_POINT - /* This section of code's only "output" is via assert() statements. */ - if ( rc==SQLITE_OK ){ - u64 x = (((u64)1)<<63)-1; - double y; - assert(sizeof(x)==8); - assert(sizeof(x)==sizeof(y)); - memcpy(&y, &x, 8); - assert( sqlite3IsNaN(y) ); + *ppVfs = sqlite3_vfs_find(zVfs); + if( *ppVfs==0 ){ + *pzErrMsg = sqlite3_mprintf("no such vfs: %s", zVfs); + rc = SQLITE_ERROR; } -#endif -#endif - - /* Do extra initialization steps requested by the SQLITE_EXTRA_INIT - ** compile-time option. - */ -#ifdef SQLITE_EXTRA_INIT - if( bRunExtraInit ){ - int SQLITE_EXTRA_INIT(const char*); - rc = SQLITE_EXTRA_INIT(0); + parse_uri_out: + if( rc!=SQLITE_OK ){ + sqlite3_free(zFile); + zFile = 0; } -#endif - + *pFlags = flags; + *pzFile = zFile; return rc; } + /* -** Undo the effects of sqlite3_initialize(). Must not be called while -** there are outstanding database connections or memory allocations or -** while any part of SQLite is otherwise in use in any thread. This -** routine is not threadsafe. But it is safe to invoke this routine -** on when SQLite is already shut down. If SQLite is already shut down -** when this routine is invoked, then this routine is a harmless no-op. +** This routine does the work of opening a database on behalf of +** sqlite3_open() and sqlite3_open16(). The database filename "zFilename" +** is UTF-8 encoded. */ -SQLITE_API int SQLITE_STDCALL sqlite3_shutdown(void){ -#ifdef SQLITE_OMIT_WSD - int rc = sqlite3_wsd_init(4096, 24); - if( rc!=SQLITE_OK ){ - return rc; - } -#endif +static int openDatabase( + const char *zFilename, /* Database filename UTF-8 encoded */ + sqlite3 **ppDb, /* OUT: Returned database handle */ + unsigned int flags, /* Operational flags */ + const char *zVfs /* Name of the VFS to use */ +){ + sqlite3 *db; /* Store allocated handle here */ + int rc; /* Return code */ + int isThreadsafe; /* True for threadsafe connections */ + char *zOpen = 0; /* Filename argument to pass to BtreeOpen() */ + char *zErrMsg = 0; /* Error message from sqlite3ParseUri() */ - if( sqlite3GlobalConfig.isInit ){ -#ifdef SQLITE_EXTRA_SHUTDOWN - void SQLITE_EXTRA_SHUTDOWN(void); - SQLITE_EXTRA_SHUTDOWN(); +#ifdef SQLITE_ENABLE_API_ARMOR + if( ppDb==0 ) return SQLITE_MISUSE_BKPT; #endif - sqlite3_os_end(); - sqlite3_reset_auto_extension(); - sqlite3GlobalConfig.isInit = 0; - } - if( sqlite3GlobalConfig.isPCacheInit ){ - sqlite3PcacheShutdown(); - sqlite3GlobalConfig.isPCacheInit = 0; + *ppDb = 0; +#ifndef SQLITE_OMIT_AUTOINIT + rc = sqlite3_initialize(); + if( rc ) return rc; +#endif + + /* Only allow sensible combinations of bits in the flags argument. + ** Throw an error if any non-sense combination is used. If we + ** do not block illegal combinations here, it could trigger + ** assert() statements in deeper layers. Sensible combinations + ** are: + ** + ** 1: SQLITE_OPEN_READONLY + ** 2: SQLITE_OPEN_READWRITE + ** 6: SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE + */ + assert( SQLITE_OPEN_READONLY == 0x01 ); + assert( SQLITE_OPEN_READWRITE == 0x02 ); + assert( SQLITE_OPEN_CREATE == 0x04 ); + testcase( (1<<(flags&7))==0x02 ); /* READONLY */ + testcase( (1<<(flags&7))==0x04 ); /* READWRITE */ + testcase( (1<<(flags&7))==0x40 ); /* READWRITE | CREATE */ + if( ((1<<(flags&7)) & 0x46)==0 ){ + return SQLITE_MISUSE_BKPT; /* IMP: R-65497-44594 */ } - if( sqlite3GlobalConfig.isMallocInit ){ - sqlite3MallocEnd(); - sqlite3GlobalConfig.isMallocInit = 0; -#ifndef SQLITE_OMIT_SHUTDOWN_DIRECTORIES - /* The heap subsystem has now been shutdown and these values are supposed - ** to be NULL or point to memory that was obtained from sqlite3_malloc(), - ** which would rely on that heap subsystem; therefore, make sure these - ** values cannot refer to heap memory that was just invalidated when the - ** heap subsystem was shutdown. This is only done if the current call to - ** this function resulted in the heap subsystem actually being shutdown. - */ - sqlite3_data_directory = 0; - sqlite3_temp_directory = 0; -#endif + if( sqlite3GlobalConfig.bCoreMutex==0 ){ + isThreadsafe = 0; + }else if( flags & SQLITE_OPEN_NOMUTEX ){ + isThreadsafe = 0; + }else if( flags & SQLITE_OPEN_FULLMUTEX ){ + isThreadsafe = 1; + }else{ + isThreadsafe = sqlite3GlobalConfig.bFullMutex; } - if( sqlite3GlobalConfig.isMutexInit ){ - sqlite3MutexEnd(); - sqlite3GlobalConfig.isMutexInit = 0; + if( flags & SQLITE_OPEN_PRIVATECACHE ){ + flags &= ~SQLITE_OPEN_SHAREDCACHE; + }else if( sqlite3GlobalConfig.sharedCacheEnabled ){ + flags |= SQLITE_OPEN_SHAREDCACHE; } - return SQLITE_OK; -} - -/* -** This API allows applications to modify the global configuration of -** the SQLite library at run-time. -** -** This routine should only be called when there are no outstanding -** database connections or memory allocations. This routine is not -** threadsafe. Failure to heed these warnings can lead to unpredictable -** behavior. -*/ -SQLITE_API int SQLITE_CDECL sqlite3_config(int op, ...){ - va_list ap; - int rc = SQLITE_OK; - - /* sqlite3_config() shall return SQLITE_MISUSE if it is invoked while - ** the SQLite library is in use. */ - if( sqlite3GlobalConfig.isInit ) return SQLITE_MISUSE_BKPT; - - va_start(ap, op); - switch( op ){ + /* Remove harmful bits from the flags parameter + ** + ** The SQLITE_OPEN_NOMUTEX and SQLITE_OPEN_FULLMUTEX flags were + ** dealt with in the previous code block. Besides these, the only + ** valid input flags for sqlite3_open_v2() are SQLITE_OPEN_READONLY, + ** SQLITE_OPEN_READWRITE, SQLITE_OPEN_CREATE, SQLITE_OPEN_SHAREDCACHE, + ** SQLITE_OPEN_PRIVATECACHE, and some reserved bits. Silently mask + ** off all other flags. + */ + flags &= ~( SQLITE_OPEN_DELETEONCLOSE | + SQLITE_OPEN_EXCLUSIVE | + SQLITE_OPEN_MAIN_DB | + SQLITE_OPEN_TEMP_DB | + SQLITE_OPEN_TRANSIENT_DB | + SQLITE_OPEN_MAIN_JOURNAL | + SQLITE_OPEN_TEMP_JOURNAL | + SQLITE_OPEN_SUBJOURNAL | + SQLITE_OPEN_MASTER_JOURNAL | + SQLITE_OPEN_NOMUTEX | + SQLITE_OPEN_FULLMUTEX | + SQLITE_OPEN_WAL + ); - /* Mutex configuration options are only available in a threadsafe - ** compile. - */ -#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-54466-46756 */ - case SQLITE_CONFIG_SINGLETHREAD: { - /* EVIDENCE-OF: R-02748-19096 This option sets the threading mode to - ** Single-thread. */ - sqlite3GlobalConfig.bCoreMutex = 0; /* Disable mutex on core */ - sqlite3GlobalConfig.bFullMutex = 0; /* Disable mutex on connections */ - break; + /* Allocate the sqlite data structure */ + db = sqlite3MallocZero( sizeof(sqlite3) ); + if( db==0 ) goto opendb_out; + if( isThreadsafe ){ + db->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_RECURSIVE); + if( db->mutex==0 ){ + sqlite3_free(db); + db = 0; + goto opendb_out; } + } + sqlite3_mutex_enter(db->mutex); + db->errMask = 0xff; + db->nDb = 2; + db->magic = SQLITE_MAGIC_BUSY; + db->aDb = db->aDbStatic; + + assert( sizeof(db->aLimit)==sizeof(aHardLimit) ); + memcpy(db->aLimit, aHardLimit, sizeof(db->aLimit)); + db->aLimit[SQLITE_LIMIT_WORKER_THREADS] = SQLITE_DEFAULT_WORKER_THREADS; + db->autoCommit = 1; + db->nextAutovac = -1; + db->szMmap = sqlite3GlobalConfig.szMmap; + db->nextPagesize = 0; + db->nMaxSorterMmap = 0x7FFFFFFF; + db->flags |= SQLITE_ShortColNames | SQLITE_EnableTrigger | SQLITE_CacheSpill +#if !defined(SQLITE_DEFAULT_AUTOMATIC_INDEX) || SQLITE_DEFAULT_AUTOMATIC_INDEX + | SQLITE_AutoIndex #endif -#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-20520-54086 */ - case SQLITE_CONFIG_MULTITHREAD: { - /* EVIDENCE-OF: R-14374-42468 This option sets the threading mode to - ** Multi-thread. */ - sqlite3GlobalConfig.bCoreMutex = 1; /* Enable mutex on core */ - sqlite3GlobalConfig.bFullMutex = 0; /* Disable mutex on connections */ - break; - } +#if SQLITE_DEFAULT_CKPTFULLFSYNC + | SQLITE_CkptFullFSync #endif -#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-59593-21810 */ - case SQLITE_CONFIG_SERIALIZED: { - /* EVIDENCE-OF: R-41220-51800 This option sets the threading mode to - ** Serialized. */ - sqlite3GlobalConfig.bCoreMutex = 1; /* Enable mutex on core */ - sqlite3GlobalConfig.bFullMutex = 1; /* Enable mutex on connections */ - break; - } +#if SQLITE_DEFAULT_FILE_FORMAT<4 + | SQLITE_LegacyFileFmt #endif -#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-63666-48755 */ - case SQLITE_CONFIG_MUTEX: { - /* Specify an alternative mutex implementation */ - sqlite3GlobalConfig.mutex = *va_arg(ap, sqlite3_mutex_methods*); - break; - } +#ifdef SQLITE_ENABLE_LOAD_EXTENSION + | SQLITE_LoadExtension #endif -#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-14450-37597 */ - case SQLITE_CONFIG_GETMUTEX: { - /* Retrieve the current mutex implementation */ - *va_arg(ap, sqlite3_mutex_methods*) = sqlite3GlobalConfig.mutex; - break; - } +#if SQLITE_DEFAULT_RECURSIVE_TRIGGERS + | SQLITE_RecTriggers +#endif +#if defined(SQLITE_DEFAULT_FOREIGN_KEYS) && SQLITE_DEFAULT_FOREIGN_KEYS + | SQLITE_ForeignKeys +#endif +#if defined(SQLITE_REVERSE_UNORDERED_SELECTS) + | SQLITE_ReverseOrder +#endif +#if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK) + | SQLITE_CellSizeCk +#endif + ; + sqlite3HashInit(&db->aCollSeq); +#ifndef SQLITE_OMIT_VIRTUALTABLE + sqlite3HashInit(&db->aModule); #endif - case SQLITE_CONFIG_MALLOC: { - /* EVIDENCE-OF: R-55594-21030 The SQLITE_CONFIG_MALLOC option takes a - ** single argument which is a pointer to an instance of the - ** sqlite3_mem_methods structure. The argument specifies alternative - ** low-level memory allocation routines to be used in place of the memory - ** allocation routines built into SQLite. */ - sqlite3GlobalConfig.m = *va_arg(ap, sqlite3_mem_methods*); - break; - } - case SQLITE_CONFIG_GETMALLOC: { - /* EVIDENCE-OF: R-51213-46414 The SQLITE_CONFIG_GETMALLOC option takes a - ** single argument which is a pointer to an instance of the - ** sqlite3_mem_methods structure. The sqlite3_mem_methods structure is - ** filled with the currently defined memory allocation routines. */ - if( sqlite3GlobalConfig.m.xMalloc==0 ) sqlite3MemSetDefault(); - *va_arg(ap, sqlite3_mem_methods*) = sqlite3GlobalConfig.m; - break; - } - case SQLITE_CONFIG_MEMSTATUS: { - /* EVIDENCE-OF: R-61275-35157 The SQLITE_CONFIG_MEMSTATUS option takes - ** single argument of type int, interpreted as a boolean, which enables - ** or disables the collection of memory allocation statistics. */ - sqlite3GlobalConfig.bMemstat = va_arg(ap, int); - break; - } - case SQLITE_CONFIG_SCRATCH: { - /* EVIDENCE-OF: R-08404-60887 There are three arguments to - ** SQLITE_CONFIG_SCRATCH: A pointer an 8-byte aligned memory buffer from - ** which the scratch allocations will be drawn, the size of each scratch - ** allocation (sz), and the maximum number of scratch allocations (N). */ - sqlite3GlobalConfig.pScratch = va_arg(ap, void*); - sqlite3GlobalConfig.szScratch = va_arg(ap, int); - sqlite3GlobalConfig.nScratch = va_arg(ap, int); - break; - } - case SQLITE_CONFIG_PAGECACHE: { - /* EVIDENCE-OF: R-31408-40510 There are three arguments to - ** SQLITE_CONFIG_PAGECACHE: A pointer to 8-byte aligned memory, the size - ** of each page buffer (sz), and the number of pages (N). */ - sqlite3GlobalConfig.pPage = va_arg(ap, void*); - sqlite3GlobalConfig.szPage = va_arg(ap, int); - sqlite3GlobalConfig.nPage = va_arg(ap, int); - break; - } - case SQLITE_CONFIG_PCACHE_HDRSZ: { - /* EVIDENCE-OF: R-39100-27317 The SQLITE_CONFIG_PCACHE_HDRSZ option takes - ** a single parameter which is a pointer to an integer and writes into - ** that integer the number of extra bytes per page required for each page - ** in SQLITE_CONFIG_PAGECACHE. */ - *va_arg(ap, int*) = - sqlite3HeaderSizeBtree() + - sqlite3HeaderSizePcache() + - sqlite3HeaderSizePcache1(); - break; - } + /* Add the default collation sequence BINARY. BINARY works for both UTF-8 + ** and UTF-16, so add a version for each to avoid any unnecessary + ** conversions. The only error that can occur here is a malloc() failure. + ** + ** EVIDENCE-OF: R-52786-44878 SQLite defines three built-in collating + ** functions: + */ + createCollation(db, "BINARY", SQLITE_UTF8, 0, binCollFunc, 0); + createCollation(db, "BINARY", SQLITE_UTF16BE, 0, binCollFunc, 0); + createCollation(db, "BINARY", SQLITE_UTF16LE, 0, binCollFunc, 0); + createCollation(db, "NOCASE", SQLITE_UTF8, 0, nocaseCollatingFunc, 0); + createCollation(db, "RTRIM", SQLITE_UTF8, (void*)1, binCollFunc, 0); + if( db->mallocFailed ){ + goto opendb_out; + } + /* EVIDENCE-OF: R-08308-17224 The default collating function for all + ** strings is BINARY. + */ + db->pDfltColl = sqlite3FindCollSeq(db, SQLITE_UTF8, "BINARY", 0); + assert( db->pDfltColl!=0 ); - case SQLITE_CONFIG_PCACHE: { - /* no-op */ - break; - } - case SQLITE_CONFIG_GETPCACHE: { - /* now an error */ - rc = SQLITE_ERROR; - break; - } + /* Parse the filename/URI argument. */ + db->openFlags = flags; + rc = sqlite3ParseUri(zVfs, zFilename, &flags, &db->pVfs, &zOpen, &zErrMsg); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_NOMEM ) db->mallocFailed = 1; + sqlite3ErrorWithMsg(db, rc, zErrMsg ? "%s" : 0, zErrMsg); + sqlite3_free(zErrMsg); + goto opendb_out; + } - case SQLITE_CONFIG_PCACHE2: { - /* EVIDENCE-OF: R-63325-48378 The SQLITE_CONFIG_PCACHE2 option takes a - ** single argument which is a pointer to an sqlite3_pcache_methods2 - ** object. This object specifies the interface to a custom page cache - ** implementation. */ - sqlite3GlobalConfig.pcache2 = *va_arg(ap, sqlite3_pcache_methods2*); - break; - } - case SQLITE_CONFIG_GETPCACHE2: { - /* EVIDENCE-OF: R-22035-46182 The SQLITE_CONFIG_GETPCACHE2 option takes a - ** single argument which is a pointer to an sqlite3_pcache_methods2 - ** object. SQLite copies of the current page cache implementation into - ** that object. */ - if( sqlite3GlobalConfig.pcache2.xInit==0 ){ - sqlite3PCacheSetDefault(); - } - *va_arg(ap, sqlite3_pcache_methods2*) = sqlite3GlobalConfig.pcache2; - break; + /* Open the backend database driver */ + rc = sqlite3BtreeOpen(db->pVfs, zOpen, db, &db->aDb[0].pBt, 0, + flags | SQLITE_OPEN_MAIN_DB); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_IOERR_NOMEM ){ + rc = SQLITE_NOMEM; } + sqlite3Error(db, rc); + goto opendb_out; + } + sqlite3BtreeEnter(db->aDb[0].pBt); + db->aDb[0].pSchema = sqlite3SchemaGet(db, db->aDb[0].pBt); + if( !db->mallocFailed ) ENC(db) = SCHEMA_ENC(db); + sqlite3BtreeLeave(db->aDb[0].pBt); + db->aDb[1].pSchema = sqlite3SchemaGet(db, 0); -/* EVIDENCE-OF: R-06626-12911 The SQLITE_CONFIG_HEAP option is only -** available if SQLite is compiled with either SQLITE_ENABLE_MEMSYS3 or -** SQLITE_ENABLE_MEMSYS5 and returns SQLITE_ERROR if invoked otherwise. */ -#if defined(SQLITE_ENABLE_MEMSYS3) || defined(SQLITE_ENABLE_MEMSYS5) - case SQLITE_CONFIG_HEAP: { - /* EVIDENCE-OF: R-19854-42126 There are three arguments to - ** SQLITE_CONFIG_HEAP: An 8-byte aligned pointer to the memory, the - ** number of bytes in the memory buffer, and the minimum allocation size. - */ - sqlite3GlobalConfig.pHeap = va_arg(ap, void*); - sqlite3GlobalConfig.nHeap = va_arg(ap, int); - sqlite3GlobalConfig.mnReq = va_arg(ap, int); + /* The default safety_level for the main database is 'full'; for the temp + ** database it is 'NONE'. This matches the pager layer defaults. + */ + db->aDb[0].zName = "main"; + db->aDb[0].safety_level = 3; + db->aDb[1].zName = "temp"; + db->aDb[1].safety_level = 1; - if( sqlite3GlobalConfig.mnReq<1 ){ - sqlite3GlobalConfig.mnReq = 1; - }else if( sqlite3GlobalConfig.mnReq>(1<<12) ){ - /* cap min request size at 2^12 */ - sqlite3GlobalConfig.mnReq = (1<<12); - } + db->magic = SQLITE_MAGIC_OPEN; + if( db->mallocFailed ){ + goto opendb_out; + } - if( sqlite3GlobalConfig.pHeap==0 ){ - /* EVIDENCE-OF: R-49920-60189 If the first pointer (the memory pointer) - ** is NULL, then SQLite reverts to using its default memory allocator - ** (the system malloc() implementation), undoing any prior invocation of - ** SQLITE_CONFIG_MALLOC. - ** - ** Setting sqlite3GlobalConfig.m to all zeros will cause malloc to - ** revert to its default implementation when sqlite3_initialize() is run - */ - memset(&sqlite3GlobalConfig.m, 0, sizeof(sqlite3GlobalConfig.m)); - }else{ - /* EVIDENCE-OF: R-61006-08918 If the memory pointer is not NULL then the - ** alternative memory allocator is engaged to handle all of SQLites - ** memory allocation needs. */ -#ifdef SQLITE_ENABLE_MEMSYS3 - sqlite3GlobalConfig.m = *sqlite3MemGetMemsys3(); -#endif -#ifdef SQLITE_ENABLE_MEMSYS5 - sqlite3GlobalConfig.m = *sqlite3MemGetMemsys5(); -#endif - } - break; + /* Register all built-in functions, but do not attempt to read the + ** database schema yet. This is delayed until the first time the database + ** is accessed. + */ + sqlite3Error(db, SQLITE_OK); + sqlite3RegisterBuiltinFunctions(db); + + /* Load automatic extensions - extensions that have been registered + ** using the sqlite3_automatic_extension() API. + */ + rc = sqlite3_errcode(db); + if( rc==SQLITE_OK ){ + sqlite3AutoLoadExtensions(db); + rc = sqlite3_errcode(db); + if( rc!=SQLITE_OK ){ + goto opendb_out; } + } + +#ifdef SQLITE_ENABLE_FTS1 + if( !db->mallocFailed ){ + extern int sqlite3Fts1Init(sqlite3*); + rc = sqlite3Fts1Init(db); + } #endif - case SQLITE_CONFIG_LOOKASIDE: { - sqlite3GlobalConfig.szLookaside = va_arg(ap, int); - sqlite3GlobalConfig.nLookaside = va_arg(ap, int); - break; - } - - /* Record a pointer to the logger function and its first argument. - ** The default is NULL. Logging is disabled if the function pointer is - ** NULL. - */ - case SQLITE_CONFIG_LOG: { - /* MSVC is picky about pulling func ptrs from va lists. - ** http://support.microsoft.com/kb/47961 - ** sqlite3GlobalConfig.xLog = va_arg(ap, void(*)(void*,int,const char*)); - */ - typedef void(*LOGFUNC_t)(void*,int,const char*); - sqlite3GlobalConfig.xLog = va_arg(ap, LOGFUNC_t); - sqlite3GlobalConfig.pLogArg = va_arg(ap, void*); - break; - } +#ifdef SQLITE_ENABLE_FTS2 + if( !db->mallocFailed && rc==SQLITE_OK ){ + extern int sqlite3Fts2Init(sqlite3*); + rc = sqlite3Fts2Init(db); + } +#endif - /* EVIDENCE-OF: R-55548-33817 The compile-time setting for URI filenames - ** can be changed at start-time using the - ** sqlite3_config(SQLITE_CONFIG_URI,1) or - ** sqlite3_config(SQLITE_CONFIG_URI,0) configuration calls. - */ - case SQLITE_CONFIG_URI: { - /* EVIDENCE-OF: R-25451-61125 The SQLITE_CONFIG_URI option takes a single - ** argument of type int. If non-zero, then URI handling is globally - ** enabled. If the parameter is zero, then URI handling is globally - ** disabled. */ - sqlite3GlobalConfig.bOpenUri = va_arg(ap, int); - break; - } +#ifdef SQLITE_ENABLE_FTS3 + if( !db->mallocFailed && rc==SQLITE_OK ){ + rc = sqlite3Fts3Init(db); + } +#endif - case SQLITE_CONFIG_COVERING_INDEX_SCAN: { - /* EVIDENCE-OF: R-36592-02772 The SQLITE_CONFIG_COVERING_INDEX_SCAN - ** option takes a single integer argument which is interpreted as a - ** boolean in order to enable or disable the use of covering indices for - ** full table scans in the query optimizer. */ - sqlite3GlobalConfig.bUseCis = va_arg(ap, int); - break; - } +#ifdef SQLITE_ENABLE_ICU + if( !db->mallocFailed && rc==SQLITE_OK ){ + rc = sqlite3IcuInit(db); + } +#endif -#ifdef SQLITE_ENABLE_SQLLOG - case SQLITE_CONFIG_SQLLOG: { - typedef void(*SQLLOGFUNC_t)(void*, sqlite3*, const char*, int); - sqlite3GlobalConfig.xSqllog = va_arg(ap, SQLLOGFUNC_t); - sqlite3GlobalConfig.pSqllogArg = va_arg(ap, void *); - break; - } +#ifdef SQLITE_ENABLE_RTREE + if( !db->mallocFailed && rc==SQLITE_OK){ + rc = sqlite3RtreeInit(db); + } #endif - case SQLITE_CONFIG_MMAP_SIZE: { - /* EVIDENCE-OF: R-58063-38258 SQLITE_CONFIG_MMAP_SIZE takes two 64-bit - ** integer (sqlite3_int64) values that are the default mmap size limit - ** (the default setting for PRAGMA mmap_size) and the maximum allowed - ** mmap size limit. */ - sqlite3_int64 szMmap = va_arg(ap, sqlite3_int64); - sqlite3_int64 mxMmap = va_arg(ap, sqlite3_int64); - /* EVIDENCE-OF: R-53367-43190 If either argument to this option is - ** negative, then that argument is changed to its compile-time default. - ** - ** EVIDENCE-OF: R-34993-45031 The maximum allowed mmap size will be - ** silently truncated if necessary so that it does not exceed the - ** compile-time maximum mmap size set by the SQLITE_MAX_MMAP_SIZE - ** compile-time option. - */ - if( mxMmap<0 || mxMmap>SQLITE_MAX_MMAP_SIZE ){ - mxMmap = SQLITE_MAX_MMAP_SIZE; - } - if( szMmap<0 ) szMmap = SQLITE_DEFAULT_MMAP_SIZE; - if( szMmap>mxMmap) szMmap = mxMmap; - sqlite3GlobalConfig.mxMmap = mxMmap; - sqlite3GlobalConfig.szMmap = szMmap; - break; - } +#ifdef SQLITE_ENABLE_DBSTAT_VTAB + if( !db->mallocFailed && rc==SQLITE_OK){ + rc = sqlite3DbstatRegister(db); + } +#endif -#if SQLITE_OS_WIN && defined(SQLITE_WIN32_MALLOC) /* IMP: R-04780-55815 */ - case SQLITE_CONFIG_WIN32_HEAPSIZE: { - /* EVIDENCE-OF: R-34926-03360 SQLITE_CONFIG_WIN32_HEAPSIZE takes a 32-bit - ** unsigned integer value that specifies the maximum size of the created - ** heap. */ - sqlite3GlobalConfig.nHeap = va_arg(ap, int); - break; - } + /* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking + ** mode. -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking + ** mode. Doing nothing at all also makes NORMAL the default. + */ +#ifdef SQLITE_DEFAULT_LOCKING_MODE + db->dfltLockMode = SQLITE_DEFAULT_LOCKING_MODE; + sqlite3PagerLockingMode(sqlite3BtreePager(db->aDb[0].pBt), + SQLITE_DEFAULT_LOCKING_MODE); #endif - case SQLITE_CONFIG_PMASZ: { - sqlite3GlobalConfig.szPma = va_arg(ap, unsigned int); - break; - } + if( rc ) sqlite3Error(db, rc); - default: { - rc = SQLITE_ERROR; - break; - } + /* Enable the lookaside-malloc subsystem */ + setupLookaside(db, 0, sqlite3GlobalConfig.szLookaside, + sqlite3GlobalConfig.nLookaside); + + sqlite3_wal_autocheckpoint(db, SQLITE_DEFAULT_WAL_AUTOCHECKPOINT); + +opendb_out: + sqlite3_free(zOpen); + if( db ){ + assert( db->mutex!=0 || isThreadsafe==0 + || sqlite3GlobalConfig.bFullMutex==0 ); + sqlite3_mutex_leave(db->mutex); } - va_end(ap); - return rc; + rc = sqlite3_errcode(db); + assert( db!=0 || rc==SQLITE_NOMEM ); + if( rc==SQLITE_NOMEM ){ + sqlite3_close(db); + db = 0; + }else if( rc!=SQLITE_OK ){ + db->magic = SQLITE_MAGIC_SICK; + } + *ppDb = db; +#ifdef SQLITE_ENABLE_SQLLOG + if( sqlite3GlobalConfig.xSqllog ){ + /* Opening a db handle. Fourth parameter is passed 0. */ + void *pArg = sqlite3GlobalConfig.pSqllogArg; + sqlite3GlobalConfig.xSqllog(pArg, db, zFilename, 0); + } +#endif + return rc & 0xff; } /* -** Set up the lookaside buffers for a database connection. -** Return SQLITE_OK on success. -** If lookaside is already active, return SQLITE_BUSY. -** -** The sz parameter is the number of bytes in each lookaside slot. -** The cnt parameter is the number of slots. If pStart is NULL the -** space for the lookaside memory is obtained from sqlite3_malloc(). -** If pStart is not NULL then it is sz*cnt bytes of memory to use for -** the lookaside memory. +** Open a new database handle. */ -static int setupLookaside(sqlite3 *db, void *pBuf, int sz, int cnt){ - void *pStart; - if( db->lookaside.nOut ){ - return SQLITE_BUSY; - } - /* Free any existing lookaside buffer for this handle before - ** allocating a new one so we don't have to have space for - ** both at the same time. - */ - if( db->lookaside.bMalloced ){ - sqlite3_free(db->lookaside.pStart); - } - /* The size of a lookaside slot after ROUNDDOWN8 needs to be larger - ** than a pointer to be useful. - */ - sz = ROUNDDOWN8(sz); /* IMP: R-33038-09382 */ - if( sz<=(int)sizeof(LookasideSlot*) ) sz = 0; - if( cnt<0 ) cnt = 0; - if( sz==0 || cnt==0 ){ - sz = 0; - pStart = 0; - }else if( pBuf==0 ){ - sqlite3BeginBenignMalloc(); - pStart = sqlite3Malloc( sz*cnt ); /* IMP: R-61949-35727 */ - sqlite3EndBenignMalloc(); - if( pStart ) cnt = sqlite3MallocSize(pStart)/sz; - }else{ - pStart = pBuf; - } - db->lookaside.pStart = pStart; - db->lookaside.pFree = 0; - db->lookaside.sz = (u16)sz; - if( pStart ){ - int i; - LookasideSlot *p; - assert( sz > (int)sizeof(LookasideSlot*) ); - p = (LookasideSlot*)pStart; - for(i=cnt-1; i>=0; i--){ - p->pNext = db->lookaside.pFree; - db->lookaside.pFree = p; - p = (LookasideSlot*)&((u8*)p)[sz]; +SQLITE_API int SQLITE_STDCALL sqlite3_open( + const char *zFilename, + sqlite3 **ppDb +){ + return openDatabase(zFilename, ppDb, + SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, 0); +} +SQLITE_API int SQLITE_STDCALL sqlite3_open_v2( + const char *filename, /* Database filename (UTF-8) */ + sqlite3 **ppDb, /* OUT: SQLite db handle */ + int flags, /* Flags */ + const char *zVfs /* Name of VFS module to use */ +){ + return openDatabase(filename, ppDb, (unsigned int)flags, zVfs); +} + +#ifndef SQLITE_OMIT_UTF16 +/* +** Open a new database handle. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_open16( + const void *zFilename, + sqlite3 **ppDb +){ + char const *zFilename8; /* zFilename encoded in UTF-8 instead of UTF-16 */ + sqlite3_value *pVal; + int rc; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( ppDb==0 ) return SQLITE_MISUSE_BKPT; +#endif + *ppDb = 0; +#ifndef SQLITE_OMIT_AUTOINIT + rc = sqlite3_initialize(); + if( rc ) return rc; +#endif + if( zFilename==0 ) zFilename = "\000\000"; + pVal = sqlite3ValueNew(0); + sqlite3ValueSetStr(pVal, -1, zFilename, SQLITE_UTF16NATIVE, SQLITE_STATIC); + zFilename8 = sqlite3ValueText(pVal, SQLITE_UTF8); + if( zFilename8 ){ + rc = openDatabase(zFilename8, ppDb, + SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, 0); + assert( *ppDb || rc==SQLITE_NOMEM ); + if( rc==SQLITE_OK && !DbHasProperty(*ppDb, 0, DB_SchemaLoaded) ){ + SCHEMA_ENC(*ppDb) = ENC(*ppDb) = SQLITE_UTF16NATIVE; } - db->lookaside.pEnd = p; - db->lookaside.bEnabled = 1; - db->lookaside.bMalloced = pBuf==0 ?1:0; }else{ - db->lookaside.pStart = db; - db->lookaside.pEnd = db; - db->lookaside.bEnabled = 0; - db->lookaside.bMalloced = 0; + rc = SQLITE_NOMEM; } - return SQLITE_OK; + sqlite3ValueFree(pVal); + + return rc & 0xff; } +#endif /* SQLITE_OMIT_UTF16 */ /* -** Return the mutex associated with a database connection. +** Register a new collation sequence with the database handle db. */ -SQLITE_API sqlite3_mutex *SQLITE_STDCALL sqlite3_db_mutex(sqlite3 *db){ +SQLITE_API int SQLITE_STDCALL sqlite3_create_collation( + sqlite3* db, + const char *zName, + int enc, + void* pCtx, + int(*xCompare)(void*,int,const void*,int,const void*) +){ + return sqlite3_create_collation_v2(db, zName, enc, pCtx, xCompare, 0); +} + +/* +** Register a new collation sequence with the database handle db. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_create_collation_v2( + sqlite3* db, + const char *zName, + int enc, + void* pCtx, + int(*xCompare)(void*,int,const void*,int,const void*), + void(*xDel)(void*) +){ + int rc; + #ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return 0; - } + if( !sqlite3SafetyCheckOk(db) || zName==0 ) return SQLITE_MISUSE_BKPT; #endif - return db->mutex; + sqlite3_mutex_enter(db->mutex); + assert( !db->mallocFailed ); + rc = createCollation(db, zName, (u8)enc, pCtx, xCompare, xDel); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; } +#ifndef SQLITE_OMIT_UTF16 /* -** Free up as much memory as we can from the given database -** connection. +** Register a new collation sequence with the database handle db. */ -SQLITE_API int SQLITE_STDCALL sqlite3_db_release_memory(sqlite3 *db){ - int i; +SQLITE_API int SQLITE_STDCALL sqlite3_create_collation16( + sqlite3* db, + const void *zName, + int enc, + void* pCtx, + int(*xCompare)(void*,int,const void*,int,const void*) +){ + int rc = SQLITE_OK; + char *zName8; #ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; + if( !sqlite3SafetyCheckOk(db) || zName==0 ) return SQLITE_MISUSE_BKPT; #endif sqlite3_mutex_enter(db->mutex); - sqlite3BtreeEnterAll(db); - for(i=0; inDb; i++){ - Btree *pBt = db->aDb[i].pBt; - if( pBt ){ - Pager *pPager = sqlite3BtreePager(pBt); - sqlite3PagerShrink(pPager); - } + assert( !db->mallocFailed ); + zName8 = sqlite3Utf16to8(db, zName, -1, SQLITE_UTF16NATIVE); + if( zName8 ){ + rc = createCollation(db, zName8, (u8)enc, pCtx, xCompare, 0); + sqlite3DbFree(db, zName8); } - sqlite3BtreeLeaveAll(db); + rc = sqlite3ApiExit(db, rc); sqlite3_mutex_leave(db->mutex); - return SQLITE_OK; + return rc; } +#endif /* SQLITE_OMIT_UTF16 */ /* -** Configuration settings for an individual database connection +** Register a collation sequence factory callback with the database handle +** db. Replace any previously installed collation sequence factory. */ -SQLITE_API int SQLITE_CDECL sqlite3_db_config(sqlite3 *db, int op, ...){ - va_list ap; - int rc; - va_start(ap, op); - switch( op ){ - case SQLITE_DBCONFIG_LOOKASIDE: { - void *pBuf = va_arg(ap, void*); /* IMP: R-26835-10964 */ - int sz = va_arg(ap, int); /* IMP: R-47871-25994 */ - int cnt = va_arg(ap, int); /* IMP: R-04460-53386 */ - rc = setupLookaside(db, pBuf, sz, cnt); - break; - } - default: { - static const struct { - int op; /* The opcode */ - u32 mask; /* Mask of the bit in sqlite3.flags to set/clear */ - } aFlagOp[] = { - { SQLITE_DBCONFIG_ENABLE_FKEY, SQLITE_ForeignKeys }, - { SQLITE_DBCONFIG_ENABLE_TRIGGER, SQLITE_EnableTrigger }, - }; - unsigned int i; - rc = SQLITE_ERROR; /* IMP: R-42790-23372 */ - for(i=0; iflags; - if( onoff>0 ){ - db->flags |= aFlagOp[i].mask; - }else if( onoff==0 ){ - db->flags &= ~aFlagOp[i].mask; - } - if( oldFlags!=db->flags ){ - sqlite3ExpirePreparedStatements(db); - } - if( pRes ){ - *pRes = (db->flags & aFlagOp[i].mask)!=0; - } - rc = SQLITE_OK; - break; - } - } - break; - } - } - va_end(ap); - return rc; +SQLITE_API int SQLITE_STDCALL sqlite3_collation_needed( + sqlite3 *db, + void *pCollNeededArg, + void(*xCollNeeded)(void*,sqlite3*,int eTextRep,const char*) +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + db->xCollNeeded = xCollNeeded; + db->xCollNeeded16 = 0; + db->pCollNeededArg = pCollNeededArg; + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; } +#ifndef SQLITE_OMIT_UTF16 +/* +** Register a collation sequence factory callback with the database handle +** db. Replace any previously installed collation sequence factory. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_collation_needed16( + sqlite3 *db, + void *pCollNeededArg, + void(*xCollNeeded16)(void*,sqlite3*,int eTextRep,const void*) +){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + db->xCollNeeded = 0; + db->xCollNeeded16 = xCollNeeded16; + db->pCollNeededArg = pCollNeededArg; + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; +} +#endif /* SQLITE_OMIT_UTF16 */ +#ifndef SQLITE_OMIT_DEPRECATED /* -** Return true if the buffer z[0..n-1] contains all spaces. +** This function is now an anachronism. It used to be used to recover from a +** malloc() failure, but SQLite now does this automatically. */ -static int allSpaces(const char *z, int n){ - while( n>0 && z[n-1]==' ' ){ n--; } - return n==0; +SQLITE_API int SQLITE_STDCALL sqlite3_global_recover(void){ + return SQLITE_OK; } +#endif /* -** This is the default collating function named "BINARY" which is always -** available. -** -** If the padFlag argument is not NULL then space padding at the end -** of strings is ignored. This implements the RTRIM collation. +** Test to see whether or not the database connection is in autocommit +** mode. Return TRUE if it is and FALSE if not. Autocommit mode is on +** by default. Autocommit is disabled by a BEGIN statement and reenabled +** by the next COMMIT or ROLLBACK. */ -static int binCollFunc( - void *padFlag, - int nKey1, const void *pKey1, - int nKey2, const void *pKey2 -){ - int rc, n; - n = nKey1autoCommit; } /* -** Another built-in collating sequence: NOCASE. +** The following routines are substitutes for constants SQLITE_CORRUPT, +** SQLITE_MISUSE, SQLITE_CANTOPEN, SQLITE_IOERR and possibly other error +** constants. They serve two purposes: ** -** This collating sequence is intended to be used for "case independent -** comparison". SQLite's knowledge of upper and lower case equivalents -** extends only to the 26 characters used in the English language. +** 1. Serve as a convenient place to set a breakpoint in a debugger +** to detect when version error conditions occurs. ** -** At the moment there is only a UTF-8 implementation. +** 2. Invoke sqlite3_log() to provide the source code location where +** a low-level error is first detected. */ -static int nocaseCollatingFunc( - void *NotUsed, - int nKey1, const void *pKey1, - int nKey2, const void *pKey2 -){ - int r = sqlite3StrNICmp( - (const char *)pKey1, (const char *)pKey2, (nKey1lastRowid; -} +#ifndef SQLITE_OMIT_DEPRECATED /* -** Return the number of changes in the most recent call to sqlite3_exec(). +** This is a convenience routine that makes sure that all thread-specific +** data for this thread has been deallocated. +** +** SQLite no longer uses thread-specific data so this routine is now a +** no-op. It is retained for historical compatibility. */ -SQLITE_API int SQLITE_STDCALL sqlite3_changes(sqlite3 *db){ -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return 0; - } -#endif - return db->nChange; +SQLITE_API void SQLITE_STDCALL sqlite3_thread_cleanup(void){ } +#endif /* -** Return the number of changes since the database handle was opened. +** Return meta information about a specific column of a database table. +** See comment in sqlite3.h (sqlite.h.in) for details. */ -SQLITE_API int SQLITE_STDCALL sqlite3_total_changes(sqlite3 *db){ +SQLITE_API int SQLITE_STDCALL sqlite3_table_column_metadata( + sqlite3 *db, /* Connection handle */ + const char *zDbName, /* Database name or NULL */ + const char *zTableName, /* Table name */ + const char *zColumnName, /* Column name */ + char const **pzDataType, /* OUTPUT: Declared data type */ + char const **pzCollSeq, /* OUTPUT: Collation sequence name */ + int *pNotNull, /* OUTPUT: True if NOT NULL constraint exists */ + int *pPrimaryKey, /* OUTPUT: True if column part of PK */ + int *pAutoinc /* OUTPUT: True if column is auto-increment */ +){ + int rc; + char *zErrMsg = 0; + Table *pTab = 0; + Column *pCol = 0; + int iCol = 0; + char const *zDataType = 0; + char const *zCollSeq = 0; + int notnull = 0; + int primarykey = 0; + int autoinc = 0; + + #ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return 0; + if( !sqlite3SafetyCheckOk(db) || zTableName==0 ){ + return SQLITE_MISUSE_BKPT; } #endif - return db->nTotalChange; -} -/* -** Close all open savepoints. This function only manipulates fields of the -** database handle object, it does not close any savepoints that may be open -** at the b-tree/pager level. -*/ -SQLITE_PRIVATE void sqlite3CloseSavepoints(sqlite3 *db){ - while( db->pSavepoint ){ - Savepoint *pTmp = db->pSavepoint; - db->pSavepoint = pTmp->pNext; - sqlite3DbFree(db, pTmp); + /* Ensure the database schema has been loaded */ + sqlite3_mutex_enter(db->mutex); + sqlite3BtreeEnterAll(db); + rc = sqlite3Init(db, &zErrMsg); + if( SQLITE_OK!=rc ){ + goto error_out; } - db->nSavepoint = 0; - db->nStatement = 0; - db->isTransactionSavepoint = 0; -} -/* -** Invoke the destructor function associated with FuncDef p, if any. Except, -** if this is not the last copy of the function, do not invoke it. Multiple -** copies of a single function are created when create_function() is called -** with SQLITE_ANY as the encoding. -*/ -static void functionDestroy(sqlite3 *db, FuncDef *p){ - FuncDestructor *pDestructor = p->pDestructor; - if( pDestructor ){ - pDestructor->nRef--; - if( pDestructor->nRef==0 ){ - pDestructor->xDestroy(pDestructor->pUserData); - sqlite3DbFree(db, pDestructor); - } + /* Locate the table in question */ + pTab = sqlite3FindTable(db, zTableName, zDbName); + if( !pTab || pTab->pSelect ){ + pTab = 0; + goto error_out; } -} -/* -** Disconnect all sqlite3_vtab objects that belong to database connection -** db. This is called when db is being closed. -*/ -static void disconnectAllVtab(sqlite3 *db){ -#ifndef SQLITE_OMIT_VIRTUALTABLE - int i; - sqlite3BtreeEnterAll(db); - for(i=0; inDb; i++){ - Schema *pSchema = db->aDb[i].pSchema; - if( db->aDb[i].pSchema ){ - HashElem *p; - for(p=sqliteHashFirst(&pSchema->tblHash); p; p=sqliteHashNext(p)){ - Table *pTab = (Table *)sqliteHashData(p); - if( IsVirtual(pTab) ) sqlite3VtabDisconnect(db, pTab); + /* Find the column for which info is requested */ + if( zColumnName==0 ){ + /* Query for existance of table only */ + }else{ + for(iCol=0; iColnCol; iCol++){ + pCol = &pTab->aCol[iCol]; + if( 0==sqlite3StrICmp(pCol->zName, zColumnName) ){ + break; + } + } + if( iCol==pTab->nCol ){ + if( HasRowid(pTab) && sqlite3IsRowid(zColumnName) ){ + iCol = pTab->iPKey; + pCol = iCol>=0 ? &pTab->aCol[iCol] : 0; + }else{ + pTab = 0; + goto error_out; } } } - sqlite3VtabUnlockList(db); - sqlite3BtreeLeaveAll(db); -#else - UNUSED_PARAMETER(db); -#endif -} - -/* -** Return TRUE if database connection db has unfinalized prepared -** statements or unfinished sqlite3_backup objects. -*/ -static int connectionIsBusy(sqlite3 *db){ - int j; - assert( sqlite3_mutex_held(db->mutex) ); - if( db->pVdbe ) return 1; - for(j=0; jnDb; j++){ - Btree *pBt = db->aDb[j].pBt; - if( pBt && sqlite3BtreeIsInBackup(pBt) ) return 1; - } - return 0; -} -/* -** Close an existing SQLite database -*/ -static int sqlite3Close(sqlite3 *db, int forceZombie){ - if( !db ){ - /* EVIDENCE-OF: R-63257-11740 Calling sqlite3_close() or - ** sqlite3_close_v2() with a NULL pointer argument is a harmless no-op. */ - return SQLITE_OK; + /* The following block stores the meta information that will be returned + ** to the caller in local variables zDataType, zCollSeq, notnull, primarykey + ** and autoinc. At this point there are two possibilities: + ** + ** 1. The specified column name was rowid", "oid" or "_rowid_" + ** and there is no explicitly declared IPK column. + ** + ** 2. The table is not a view and the column name identified an + ** explicitly declared column. Copy meta information from *pCol. + */ + if( pCol ){ + zDataType = pCol->zType; + zCollSeq = pCol->zColl; + notnull = pCol->notNull!=0; + primarykey = (pCol->colFlags & COLFLAG_PRIMKEY)!=0; + autoinc = pTab->iPKey==iCol && (pTab->tabFlags & TF_Autoincrement)!=0; + }else{ + zDataType = "INTEGER"; + primarykey = 1; } - if( !sqlite3SafetyCheckSickOrOk(db) ){ - return SQLITE_MISUSE_BKPT; + if( !zCollSeq ){ + zCollSeq = "BINARY"; } - sqlite3_mutex_enter(db->mutex); - /* Force xDisconnect calls on all virtual tables */ - disconnectAllVtab(db); +error_out: + sqlite3BtreeLeaveAll(db); - /* If a transaction is open, the disconnectAllVtab() call above - ** will not have called the xDisconnect() method on any virtual - ** tables in the db->aVTrans[] array. The following sqlite3VtabRollback() - ** call will do so. We need to do this before the check for active - ** SQL statements below, as the v-table implementation may be storing - ** some prepared statements internally. + /* Whether the function call succeeded or failed, set the output parameters + ** to whatever their local counterparts contain. If an error did occur, + ** this has the effect of zeroing all output parameters. */ - sqlite3VtabRollback(db); + if( pzDataType ) *pzDataType = zDataType; + if( pzCollSeq ) *pzCollSeq = zCollSeq; + if( pNotNull ) *pNotNull = notnull; + if( pPrimaryKey ) *pPrimaryKey = primarykey; + if( pAutoinc ) *pAutoinc = autoinc; - /* Legacy behavior (sqlite3_close() behavior) is to return - ** SQLITE_BUSY if the connection can not be closed immediately. - */ - if( !forceZombie && connectionIsBusy(db) ){ - sqlite3ErrorWithMsg(db, SQLITE_BUSY, "unable to close due to unfinalized " - "statements or unfinished backups"); - sqlite3_mutex_leave(db->mutex); - return SQLITE_BUSY; + if( SQLITE_OK==rc && !pTab ){ + sqlite3DbFree(db, zErrMsg); + zErrMsg = sqlite3MPrintf(db, "no such table column: %s.%s", zTableName, + zColumnName); + rc = SQLITE_ERROR; } + sqlite3ErrorWithMsg(db, rc, (zErrMsg?"%s":0), zErrMsg); + sqlite3DbFree(db, zErrMsg); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} -#ifdef SQLITE_ENABLE_SQLLOG - if( sqlite3GlobalConfig.xSqllog ){ - /* Closing the handle. Fourth parameter is passed the value 2. */ - sqlite3GlobalConfig.xSqllog(sqlite3GlobalConfig.pSqllogArg, db, 0, 2); - } -#endif +/* +** Sleep for a little while. Return the amount of time slept. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_sleep(int ms){ + sqlite3_vfs *pVfs; + int rc; + pVfs = sqlite3_vfs_find(0); + if( pVfs==0 ) return 0; - /* Convert the connection into a zombie and then close it. + /* This function works in milliseconds, but the underlying OsSleep() + ** API uses microseconds. Hence the 1000's. */ - db->magic = SQLITE_MAGIC_ZOMBIE; - sqlite3LeaveMutexAndCloseZombie(db); - return SQLITE_OK; + rc = (sqlite3OsSleep(pVfs, 1000*ms)/1000); + return rc; } /* -** Two variations on the public interface for closing a database -** connection. The sqlite3_close() version returns SQLITE_BUSY and -** leaves the connection option if there are unfinalized prepared -** statements or unfinished sqlite3_backups. The sqlite3_close_v2() -** version forces the connection to become a zombie if there are -** unclosed resources, and arranges for deallocation when the last -** prepare statement or sqlite3_backup closes. +** Enable or disable the extended result codes. */ -SQLITE_API int SQLITE_STDCALL sqlite3_close(sqlite3 *db){ return sqlite3Close(db,0); } -SQLITE_API int SQLITE_STDCALL sqlite3_close_v2(sqlite3 *db){ return sqlite3Close(db,1); } - +SQLITE_API int SQLITE_STDCALL sqlite3_extended_result_codes(sqlite3 *db, int onoff){ +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + db->errMask = onoff ? 0xffffffff : 0xff; + sqlite3_mutex_leave(db->mutex); + return SQLITE_OK; +} /* -** Close the mutex on database connection db. -** -** Furthermore, if database connection db is a zombie (meaning that there -** has been a prior call to sqlite3_close(db) or sqlite3_close_v2(db)) and -** every sqlite3_stmt has now been finalized and every sqlite3_backup has -** finished, then free all resources. +** Invoke the xFileControl method on a particular database. */ -SQLITE_PRIVATE void sqlite3LeaveMutexAndCloseZombie(sqlite3 *db){ - HashElem *i; /* Hash table iterator */ - int j; +SQLITE_API int SQLITE_STDCALL sqlite3_file_control(sqlite3 *db, const char *zDbName, int op, void *pArg){ + int rc = SQLITE_ERROR; + Btree *pBtree; - /* If there are outstanding sqlite3_stmt or sqlite3_backup objects - ** or if the connection has not yet been closed by sqlite3_close_v2(), - ** then just leave the mutex and return. - */ - if( db->magic!=SQLITE_MAGIC_ZOMBIE || connectionIsBusy(db) ){ - sqlite3_mutex_leave(db->mutex); - return; +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; +#endif + sqlite3_mutex_enter(db->mutex); + pBtree = sqlite3DbNameToBtree(db, zDbName); + if( pBtree ){ + Pager *pPager; + sqlite3_file *fd; + sqlite3BtreeEnter(pBtree); + pPager = sqlite3BtreePager(pBtree); + assert( pPager!=0 ); + fd = sqlite3PagerFile(pPager); + assert( fd!=0 ); + if( op==SQLITE_FCNTL_FILE_POINTER ){ + *(sqlite3_file**)pArg = fd; + rc = SQLITE_OK; + }else if( fd->pMethods ){ + rc = sqlite3OsFileControl(fd, op, pArg); + }else{ + rc = SQLITE_NOTFOUND; + } + sqlite3BtreeLeave(pBtree); } + sqlite3_mutex_leave(db->mutex); + return rc; +} - /* If we reach this point, it means that the database connection has - ** closed all sqlite3_stmt and sqlite3_backup objects and has been - ** passed to sqlite3_close (meaning that it is a zombie). Therefore, - ** go ahead and free all resources. - */ +/* +** Interface to the testing logic. +*/ +SQLITE_API int SQLITE_CDECL sqlite3_test_control(int op, ...){ + int rc = 0; +#ifdef SQLITE_OMIT_BUILTIN_TEST + UNUSED_PARAMETER(op); +#else + va_list ap; + va_start(ap, op); + switch( op ){ - /* If a transaction is open, roll it back. This also ensures that if - ** any database schemas have been modified by an uncommitted transaction - ** they are reset. And that the required b-tree mutex is held to make - ** the pager rollback and schema reset an atomic operation. */ - sqlite3RollbackAll(db, SQLITE_OK); + /* + ** Save the current state of the PRNG. + */ + case SQLITE_TESTCTRL_PRNG_SAVE: { + sqlite3PrngSaveState(); + break; + } - /* Free any outstanding Savepoint structures. */ - sqlite3CloseSavepoints(db); + /* + ** Restore the state of the PRNG to the last state saved using + ** PRNG_SAVE. If PRNG_SAVE has never before been called, then + ** this verb acts like PRNG_RESET. + */ + case SQLITE_TESTCTRL_PRNG_RESTORE: { + sqlite3PrngRestoreState(); + break; + } - /* Close all database connections */ - for(j=0; jnDb; j++){ - struct Db *pDb = &db->aDb[j]; - if( pDb->pBt ){ - sqlite3BtreeClose(pDb->pBt); - pDb->pBt = 0; - if( j!=1 ){ - pDb->pSchema = 0; - } + /* + ** Reset the PRNG back to its uninitialized state. The next call + ** to sqlite3_randomness() will reseed the PRNG using a single call + ** to the xRandomness method of the default VFS. + */ + case SQLITE_TESTCTRL_PRNG_RESET: { + sqlite3_randomness(0,0); + break; } - } - /* Clear the TEMP schema separately and last */ - if( db->aDb[1].pSchema ){ - sqlite3SchemaClear(db->aDb[1].pSchema); - } - sqlite3VtabUnlockList(db); - /* Free up the array of auxiliary databases */ - sqlite3CollapseDatabaseArray(db); - assert( db->nDb<=2 ); - assert( db->aDb==db->aDbStatic ); + /* + ** sqlite3_test_control(BITVEC_TEST, size, program) + ** + ** Run a test against a Bitvec object of size. The program argument + ** is an array of integers that defines the test. Return -1 on a + ** memory allocation error, 0 on success, or non-zero for an error. + ** See the sqlite3BitvecBuiltinTest() for additional information. + */ + case SQLITE_TESTCTRL_BITVEC_TEST: { + int sz = va_arg(ap, int); + int *aProg = va_arg(ap, int*); + rc = sqlite3BitvecBuiltinTest(sz, aProg); + break; + } - /* Tell the code in notify.c that the connection no longer holds any - ** locks and does not require any further unlock-notify callbacks. - */ - sqlite3ConnectionClosed(db); + /* + ** sqlite3_test_control(FAULT_INSTALL, xCallback) + ** + ** Arrange to invoke xCallback() whenever sqlite3FaultSim() is called, + ** if xCallback is not NULL. + ** + ** As a test of the fault simulator mechanism itself, sqlite3FaultSim(0) + ** is called immediately after installing the new callback and the return + ** value from sqlite3FaultSim(0) becomes the return from + ** sqlite3_test_control(). + */ + case SQLITE_TESTCTRL_FAULT_INSTALL: { + /* MSVC is picky about pulling func ptrs from va lists. + ** http://support.microsoft.com/kb/47961 + ** sqlite3GlobalConfig.xTestCallback = va_arg(ap, int(*)(int)); + */ + typedef int(*TESTCALLBACKFUNC_t)(int); + sqlite3GlobalConfig.xTestCallback = va_arg(ap, TESTCALLBACKFUNC_t); + rc = sqlite3FaultSim(0); + break; + } - for(j=0; jaFunc.a); j++){ - FuncDef *pNext, *pHash, *p; - for(p=db->aFunc.a[j]; p; p=pHash){ - pHash = p->pHash; - while( p ){ - functionDestroy(db, p); - pNext = p->pNext; - sqlite3DbFree(db, p); - p = pNext; - } + /* + ** sqlite3_test_control(BENIGN_MALLOC_HOOKS, xBegin, xEnd) + ** + ** Register hooks to call to indicate which malloc() failures + ** are benign. + */ + case SQLITE_TESTCTRL_BENIGN_MALLOC_HOOKS: { + typedef void (*void_function)(void); + void_function xBenignBegin; + void_function xBenignEnd; + xBenignBegin = va_arg(ap, void_function); + xBenignEnd = va_arg(ap, void_function); + sqlite3BenignMallocHooks(xBenignBegin, xBenignEnd); + break; } - } - for(i=sqliteHashFirst(&db->aCollSeq); i; i=sqliteHashNext(i)){ - CollSeq *pColl = (CollSeq *)sqliteHashData(i); - /* Invoke any destructors registered for collation sequence user data. */ - for(j=0; j<3; j++){ - if( pColl[j].xDel ){ - pColl[j].xDel(pColl[j].pUser); + + /* + ** sqlite3_test_control(SQLITE_TESTCTRL_PENDING_BYTE, unsigned int X) + ** + ** Set the PENDING byte to the value in the argument, if X>0. + ** Make no changes if X==0. Return the value of the pending byte + ** as it existing before this routine was called. + ** + ** IMPORTANT: Changing the PENDING byte from 0x40000000 results in + ** an incompatible database file format. Changing the PENDING byte + ** while any database connection is open results in undefined and + ** deleterious behavior. + */ + case SQLITE_TESTCTRL_PENDING_BYTE: { + rc = PENDING_BYTE; +#ifndef SQLITE_OMIT_WSD + { + unsigned int newVal = va_arg(ap, unsigned int); + if( newVal ) sqlite3PendingByte = newVal; } - } - sqlite3DbFree(db, pColl); - } - sqlite3HashClear(&db->aCollSeq); -#ifndef SQLITE_OMIT_VIRTUALTABLE - for(i=sqliteHashFirst(&db->aModule); i; i=sqliteHashNext(i)){ - Module *pMod = (Module *)sqliteHashData(i); - if( pMod->xDestroy ){ - pMod->xDestroy(pMod->pAux); - } - sqlite3DbFree(db, pMod); - } - sqlite3HashClear(&db->aModule); #endif + break; + } - sqlite3Error(db, SQLITE_OK); /* Deallocates any cached error strings. */ - sqlite3ValueFree(db->pErr); - sqlite3CloseExtensions(db); -#if SQLITE_USER_AUTHENTICATION - sqlite3_free(db->auth.zAuthUser); - sqlite3_free(db->auth.zAuthPW); -#endif + /* + ** sqlite3_test_control(SQLITE_TESTCTRL_ASSERT, int X) + ** + ** This action provides a run-time test to see whether or not + ** assert() was enabled at compile-time. If X is true and assert() + ** is enabled, then the return value is true. If X is true and + ** assert() is disabled, then the return value is zero. If X is + ** false and assert() is enabled, then the assertion fires and the + ** process aborts. If X is false and assert() is disabled, then the + ** return value is zero. + */ + case SQLITE_TESTCTRL_ASSERT: { + volatile int x = 0; + assert( (x = va_arg(ap,int))!=0 ); + rc = x; + break; + } - db->magic = SQLITE_MAGIC_ERROR; - /* The temp-database schema is allocated differently from the other schema - ** objects (using sqliteMalloc() directly, instead of sqlite3BtreeSchema()). - ** So it needs to be freed here. Todo: Why not roll the temp schema into - ** the same sqliteMalloc() as the one that allocates the database - ** structure? - */ - sqlite3DbFree(db, db->aDb[1].pSchema); - sqlite3_mutex_leave(db->mutex); - db->magic = SQLITE_MAGIC_CLOSED; - sqlite3_mutex_free(db->mutex); - assert( db->lookaside.nOut==0 ); /* Fails on a lookaside memory leak */ - if( db->lookaside.bMalloced ){ - sqlite3_free(db->lookaside.pStart); - } - sqlite3_free(db); -} + /* + ** sqlite3_test_control(SQLITE_TESTCTRL_ALWAYS, int X) + ** + ** This action provides a run-time test to see how the ALWAYS and + ** NEVER macros were defined at compile-time. + ** + ** The return value is ALWAYS(X). + ** + ** The recommended test is X==2. If the return value is 2, that means + ** ALWAYS() and NEVER() are both no-op pass-through macros, which is the + ** default setting. If the return value is 1, then ALWAYS() is either + ** hard-coded to true or else it asserts if its argument is false. + ** The first behavior (hard-coded to true) is the case if + ** SQLITE_TESTCTRL_ASSERT shows that assert() is disabled and the second + ** behavior (assert if the argument to ALWAYS() is false) is the case if + ** SQLITE_TESTCTRL_ASSERT shows that assert() is enabled. + ** + ** The run-time test procedure might look something like this: + ** + ** if( sqlite3_test_control(SQLITE_TESTCTRL_ALWAYS, 2)==2 ){ + ** // ALWAYS() and NEVER() are no-op pass-through macros + ** }else if( sqlite3_test_control(SQLITE_TESTCTRL_ASSERT, 1) ){ + ** // ALWAYS(x) asserts that x is true. NEVER(x) asserts x is false. + ** }else{ + ** // ALWAYS(x) is a constant 1. NEVER(x) is a constant 0. + ** } + */ + case SQLITE_TESTCTRL_ALWAYS: { + int x = va_arg(ap,int); + rc = ALWAYS(x); + break; + } -/* -** Rollback all database files. If tripCode is not SQLITE_OK, then -** any write cursors are invalidated ("tripped" - as in "tripping a circuit -** breaker") and made to return tripCode if there are any further -** attempts to use that cursor. Read cursors remain open and valid -** but are "saved" in case the table pages are moved around. -*/ -SQLITE_PRIVATE void sqlite3RollbackAll(sqlite3 *db, int tripCode){ - int i; - int inTrans = 0; - int schemaChange; - assert( sqlite3_mutex_held(db->mutex) ); - sqlite3BeginBenignMalloc(); + /* + ** sqlite3_test_control(SQLITE_TESTCTRL_BYTEORDER); + ** + ** The integer returned reveals the byte-order of the computer on which + ** SQLite is running: + ** + ** 1 big-endian, determined at run-time + ** 10 little-endian, determined at run-time + ** 432101 big-endian, determined at compile-time + ** 123410 little-endian, determined at compile-time + */ + case SQLITE_TESTCTRL_BYTEORDER: { + rc = SQLITE_BYTEORDER*100 + SQLITE_LITTLEENDIAN*10 + SQLITE_BIGENDIAN; + break; + } - /* Obtain all b-tree mutexes before making any calls to BtreeRollback(). - ** This is important in case the transaction being rolled back has - ** modified the database schema. If the b-tree mutexes are not taken - ** here, then another shared-cache connection might sneak in between - ** the database rollback and schema reset, which can cause false - ** corruption reports in some cases. */ - sqlite3BtreeEnterAll(db); - schemaChange = (db->flags & SQLITE_InternChanges)!=0 && db->init.busy==0; + /* sqlite3_test_control(SQLITE_TESTCTRL_RESERVE, sqlite3 *db, int N) + ** + ** Set the nReserve size to N for the main database on the database + ** connection db. + */ + case SQLITE_TESTCTRL_RESERVE: { + sqlite3 *db = va_arg(ap, sqlite3*); + int x = va_arg(ap,int); + sqlite3_mutex_enter(db->mutex); + sqlite3BtreeSetPageSize(db->aDb[0].pBt, 0, x, 0); + sqlite3_mutex_leave(db->mutex); + break; + } - for(i=0; inDb; i++){ - Btree *p = db->aDb[i].pBt; - if( p ){ - if( sqlite3BtreeIsInTrans(p) ){ - inTrans = 1; - } - sqlite3BtreeRollback(p, tripCode, !schemaChange); + /* sqlite3_test_control(SQLITE_TESTCTRL_OPTIMIZATIONS, sqlite3 *db, int N) + ** + ** Enable or disable various optimizations for testing purposes. The + ** argument N is a bitmask of optimizations to be disabled. For normal + ** operation N should be 0. The idea is that a test program (like the + ** SQL Logic Test or SLT test module) can run the same SQL multiple times + ** with various optimizations disabled to verify that the same answer + ** is obtained in every case. + */ + case SQLITE_TESTCTRL_OPTIMIZATIONS: { + sqlite3 *db = va_arg(ap, sqlite3*); + db->dbOptFlags = (u16)(va_arg(ap, int) & 0xffff); + break; } - } - sqlite3VtabRollback(db); - sqlite3EndBenignMalloc(); - if( (db->flags&SQLITE_InternChanges)!=0 && db->init.busy==0 ){ - sqlite3ExpirePreparedStatements(db); - sqlite3ResetAllSchemasOfConnection(db); - } - sqlite3BtreeLeaveAll(db); +#ifdef SQLITE_N_KEYWORD + /* sqlite3_test_control(SQLITE_TESTCTRL_ISKEYWORD, const char *zWord) + ** + ** If zWord is a keyword recognized by the parser, then return the + ** number of keywords. Or if zWord is not a keyword, return 0. + ** + ** This test feature is only available in the amalgamation since + ** the SQLITE_N_KEYWORD macro is not defined in this file if SQLite + ** is built using separate source files. + */ + case SQLITE_TESTCTRL_ISKEYWORD: { + const char *zWord = va_arg(ap, const char*); + int n = sqlite3Strlen30(zWord); + rc = (sqlite3KeywordCode((u8*)zWord, n)!=TK_ID) ? SQLITE_N_KEYWORD : 0; + break; + } +#endif - /* Any deferred constraint violations have now been resolved. */ - db->nDeferredCons = 0; - db->nDeferredImmCons = 0; - db->flags &= ~SQLITE_DeferFKs; + /* sqlite3_test_control(SQLITE_TESTCTRL_SCRATCHMALLOC, sz, &pNew, pFree); + ** + ** Pass pFree into sqlite3ScratchFree(). + ** If sz>0 then allocate a scratch buffer into pNew. + */ + case SQLITE_TESTCTRL_SCRATCHMALLOC: { + void *pFree, **ppNew; + int sz; + sz = va_arg(ap, int); + ppNew = va_arg(ap, void**); + pFree = va_arg(ap, void*); + if( sz ) *ppNew = sqlite3ScratchMalloc(sz); + sqlite3ScratchFree(pFree); + break; + } - /* If one has been configured, invoke the rollback-hook callback */ - if( db->xRollbackCallback && (inTrans || !db->autoCommit) ){ - db->xRollbackCallback(db->pRollbackArg); - } -} + /* sqlite3_test_control(SQLITE_TESTCTRL_LOCALTIME_FAULT, int onoff); + ** + ** If parameter onoff is non-zero, configure the wrappers so that all + ** subsequent calls to localtime() and variants fail. If onoff is zero, + ** undo this setting. + */ + case SQLITE_TESTCTRL_LOCALTIME_FAULT: { + sqlite3GlobalConfig.bLocaltimeFault = va_arg(ap, int); + break; + } -/* -** Return a static string containing the name corresponding to the error code -** specified in the argument. -*/ -#if (defined(SQLITE_DEBUG) && SQLITE_OS_WIN) || defined(SQLITE_TEST) -SQLITE_PRIVATE const char *sqlite3ErrName(int rc){ - const char *zName = 0; - int i, origRc = rc; - for(i=0; i<2 && zName==0; i++, rc &= 0xff){ - switch( rc ){ - case SQLITE_OK: zName = "SQLITE_OK"; break; - case SQLITE_ERROR: zName = "SQLITE_ERROR"; break; - case SQLITE_INTERNAL: zName = "SQLITE_INTERNAL"; break; - case SQLITE_PERM: zName = "SQLITE_PERM"; break; - case SQLITE_ABORT: zName = "SQLITE_ABORT"; break; - case SQLITE_ABORT_ROLLBACK: zName = "SQLITE_ABORT_ROLLBACK"; break; - case SQLITE_BUSY: zName = "SQLITE_BUSY"; break; - case SQLITE_BUSY_RECOVERY: zName = "SQLITE_BUSY_RECOVERY"; break; - case SQLITE_BUSY_SNAPSHOT: zName = "SQLITE_BUSY_SNAPSHOT"; break; - case SQLITE_LOCKED: zName = "SQLITE_LOCKED"; break; - case SQLITE_LOCKED_SHAREDCACHE: zName = "SQLITE_LOCKED_SHAREDCACHE";break; - case SQLITE_NOMEM: zName = "SQLITE_NOMEM"; break; - case SQLITE_READONLY: zName = "SQLITE_READONLY"; break; - case SQLITE_READONLY_RECOVERY: zName = "SQLITE_READONLY_RECOVERY"; break; - case SQLITE_READONLY_CANTLOCK: zName = "SQLITE_READONLY_CANTLOCK"; break; - case SQLITE_READONLY_ROLLBACK: zName = "SQLITE_READONLY_ROLLBACK"; break; - case SQLITE_READONLY_DBMOVED: zName = "SQLITE_READONLY_DBMOVED"; break; - case SQLITE_INTERRUPT: zName = "SQLITE_INTERRUPT"; break; - case SQLITE_IOERR: zName = "SQLITE_IOERR"; break; - case SQLITE_IOERR_READ: zName = "SQLITE_IOERR_READ"; break; - case SQLITE_IOERR_SHORT_READ: zName = "SQLITE_IOERR_SHORT_READ"; break; - case SQLITE_IOERR_WRITE: zName = "SQLITE_IOERR_WRITE"; break; - case SQLITE_IOERR_FSYNC: zName = "SQLITE_IOERR_FSYNC"; break; - case SQLITE_IOERR_DIR_FSYNC: zName = "SQLITE_IOERR_DIR_FSYNC"; break; - case SQLITE_IOERR_TRUNCATE: zName = "SQLITE_IOERR_TRUNCATE"; break; - case SQLITE_IOERR_FSTAT: zName = "SQLITE_IOERR_FSTAT"; break; - case SQLITE_IOERR_UNLOCK: zName = "SQLITE_IOERR_UNLOCK"; break; - case SQLITE_IOERR_RDLOCK: zName = "SQLITE_IOERR_RDLOCK"; break; - case SQLITE_IOERR_DELETE: zName = "SQLITE_IOERR_DELETE"; break; - case SQLITE_IOERR_NOMEM: zName = "SQLITE_IOERR_NOMEM"; break; - case SQLITE_IOERR_ACCESS: zName = "SQLITE_IOERR_ACCESS"; break; - case SQLITE_IOERR_CHECKRESERVEDLOCK: - zName = "SQLITE_IOERR_CHECKRESERVEDLOCK"; break; - case SQLITE_IOERR_LOCK: zName = "SQLITE_IOERR_LOCK"; break; - case SQLITE_IOERR_CLOSE: zName = "SQLITE_IOERR_CLOSE"; break; - case SQLITE_IOERR_DIR_CLOSE: zName = "SQLITE_IOERR_DIR_CLOSE"; break; - case SQLITE_IOERR_SHMOPEN: zName = "SQLITE_IOERR_SHMOPEN"; break; - case SQLITE_IOERR_SHMSIZE: zName = "SQLITE_IOERR_SHMSIZE"; break; - case SQLITE_IOERR_SHMLOCK: zName = "SQLITE_IOERR_SHMLOCK"; break; - case SQLITE_IOERR_SHMMAP: zName = "SQLITE_IOERR_SHMMAP"; break; - case SQLITE_IOERR_SEEK: zName = "SQLITE_IOERR_SEEK"; break; - case SQLITE_IOERR_DELETE_NOENT: zName = "SQLITE_IOERR_DELETE_NOENT";break; - case SQLITE_IOERR_MMAP: zName = "SQLITE_IOERR_MMAP"; break; - case SQLITE_IOERR_GETTEMPPATH: zName = "SQLITE_IOERR_GETTEMPPATH"; break; - case SQLITE_IOERR_CONVPATH: zName = "SQLITE_IOERR_CONVPATH"; break; - case SQLITE_CORRUPT: zName = "SQLITE_CORRUPT"; break; - case SQLITE_CORRUPT_VTAB: zName = "SQLITE_CORRUPT_VTAB"; break; - case SQLITE_NOTFOUND: zName = "SQLITE_NOTFOUND"; break; - case SQLITE_FULL: zName = "SQLITE_FULL"; break; - case SQLITE_CANTOPEN: zName = "SQLITE_CANTOPEN"; break; - case SQLITE_CANTOPEN_NOTEMPDIR: zName = "SQLITE_CANTOPEN_NOTEMPDIR";break; - case SQLITE_CANTOPEN_ISDIR: zName = "SQLITE_CANTOPEN_ISDIR"; break; - case SQLITE_CANTOPEN_FULLPATH: zName = "SQLITE_CANTOPEN_FULLPATH"; break; - case SQLITE_CANTOPEN_CONVPATH: zName = "SQLITE_CANTOPEN_CONVPATH"; break; - case SQLITE_PROTOCOL: zName = "SQLITE_PROTOCOL"; break; - case SQLITE_EMPTY: zName = "SQLITE_EMPTY"; break; - case SQLITE_SCHEMA: zName = "SQLITE_SCHEMA"; break; - case SQLITE_TOOBIG: zName = "SQLITE_TOOBIG"; break; - case SQLITE_CONSTRAINT: zName = "SQLITE_CONSTRAINT"; break; - case SQLITE_CONSTRAINT_UNIQUE: zName = "SQLITE_CONSTRAINT_UNIQUE"; break; - case SQLITE_CONSTRAINT_TRIGGER: zName = "SQLITE_CONSTRAINT_TRIGGER";break; - case SQLITE_CONSTRAINT_FOREIGNKEY: - zName = "SQLITE_CONSTRAINT_FOREIGNKEY"; break; - case SQLITE_CONSTRAINT_CHECK: zName = "SQLITE_CONSTRAINT_CHECK"; break; - case SQLITE_CONSTRAINT_PRIMARYKEY: - zName = "SQLITE_CONSTRAINT_PRIMARYKEY"; break; - case SQLITE_CONSTRAINT_NOTNULL: zName = "SQLITE_CONSTRAINT_NOTNULL";break; - case SQLITE_CONSTRAINT_COMMITHOOK: - zName = "SQLITE_CONSTRAINT_COMMITHOOK"; break; - case SQLITE_CONSTRAINT_VTAB: zName = "SQLITE_CONSTRAINT_VTAB"; break; - case SQLITE_CONSTRAINT_FUNCTION: - zName = "SQLITE_CONSTRAINT_FUNCTION"; break; - case SQLITE_CONSTRAINT_ROWID: zName = "SQLITE_CONSTRAINT_ROWID"; break; - case SQLITE_MISMATCH: zName = "SQLITE_MISMATCH"; break; - case SQLITE_MISUSE: zName = "SQLITE_MISUSE"; break; - case SQLITE_NOLFS: zName = "SQLITE_NOLFS"; break; - case SQLITE_AUTH: zName = "SQLITE_AUTH"; break; - case SQLITE_FORMAT: zName = "SQLITE_FORMAT"; break; - case SQLITE_RANGE: zName = "SQLITE_RANGE"; break; - case SQLITE_NOTADB: zName = "SQLITE_NOTADB"; break; - case SQLITE_ROW: zName = "SQLITE_ROW"; break; - case SQLITE_NOTICE: zName = "SQLITE_NOTICE"; break; - case SQLITE_NOTICE_RECOVER_WAL: zName = "SQLITE_NOTICE_RECOVER_WAL";break; - case SQLITE_NOTICE_RECOVER_ROLLBACK: - zName = "SQLITE_NOTICE_RECOVER_ROLLBACK"; break; - case SQLITE_WARNING: zName = "SQLITE_WARNING"; break; - case SQLITE_WARNING_AUTOINDEX: zName = "SQLITE_WARNING_AUTOINDEX"; break; - case SQLITE_DONE: zName = "SQLITE_DONE"; break; + /* sqlite3_test_control(SQLITE_TESTCTRL_NEVER_CORRUPT, int); + ** + ** Set or clear a flag that indicates that the database file is always well- + ** formed and never corrupt. This flag is clear by default, indicating that + ** database files might have arbitrary corruption. Setting the flag during + ** testing causes certain assert() statements in the code to be activated + ** that demonstrat invariants on well-formed database files. + */ + case SQLITE_TESTCTRL_NEVER_CORRUPT: { + sqlite3GlobalConfig.neverCorrupt = va_arg(ap, int); + break; } - } - if( zName==0 ){ - static char zBuf[50]; - sqlite3_snprintf(sizeof(zBuf), zBuf, "SQLITE_UNKNOWN(%d)", origRc); - zName = zBuf; - } - return zName; -} -#endif -/* -** Return a static string that describes the kind of error specified in the -** argument. -*/ -SQLITE_PRIVATE const char *sqlite3ErrStr(int rc){ - static const char* const aMsg[] = { - /* SQLITE_OK */ "not an error", - /* SQLITE_ERROR */ "SQL logic error or missing database", - /* SQLITE_INTERNAL */ 0, - /* SQLITE_PERM */ "access permission denied", - /* SQLITE_ABORT */ "callback requested query abort", - /* SQLITE_BUSY */ "database is locked", - /* SQLITE_LOCKED */ "database table is locked", - /* SQLITE_NOMEM */ "out of memory", - /* SQLITE_READONLY */ "attempt to write a readonly database", - /* SQLITE_INTERRUPT */ "interrupted", - /* SQLITE_IOERR */ "disk I/O error", - /* SQLITE_CORRUPT */ "database disk image is malformed", - /* SQLITE_NOTFOUND */ "unknown operation", - /* SQLITE_FULL */ "database or disk is full", - /* SQLITE_CANTOPEN */ "unable to open database file", - /* SQLITE_PROTOCOL */ "locking protocol", - /* SQLITE_EMPTY */ "table contains no data", - /* SQLITE_SCHEMA */ "database schema has changed", - /* SQLITE_TOOBIG */ "string or blob too big", - /* SQLITE_CONSTRAINT */ "constraint failed", - /* SQLITE_MISMATCH */ "datatype mismatch", - /* SQLITE_MISUSE */ "library routine called out of sequence", - /* SQLITE_NOLFS */ "large file support is disabled", - /* SQLITE_AUTH */ "authorization denied", - /* SQLITE_FORMAT */ "auxiliary database format error", - /* SQLITE_RANGE */ "bind or column index out of range", - /* SQLITE_NOTADB */ "file is encrypted or is not a database", - }; - const char *zErr = "unknown error"; - switch( rc ){ - case SQLITE_ABORT_ROLLBACK: { - zErr = "abort due to ROLLBACK"; + + /* sqlite3_test_control(SQLITE_TESTCTRL_VDBE_COVERAGE, xCallback, ptr); + ** + ** Set the VDBE coverage callback function to xCallback with context + ** pointer ptr. + */ + case SQLITE_TESTCTRL_VDBE_COVERAGE: { +#ifdef SQLITE_VDBE_COVERAGE + typedef void (*branch_callback)(void*,int,u8,u8); + sqlite3GlobalConfig.xVdbeBranch = va_arg(ap,branch_callback); + sqlite3GlobalConfig.pVdbeBranchArg = va_arg(ap,void*); +#endif break; } - default: { - rc &= 0xff; - if( ALWAYS(rc>=0) && rcnMaxSorterMmap = va_arg(ap, int); break; } - } - return zErr; -} -/* -** This routine implements a busy callback that sleeps and tries -** again until a timeout value is reached. The timeout value is -** an integer number of milliseconds passed in as the first -** argument. -*/ -static int sqliteDefaultBusyCallback( - void *ptr, /* Database connection */ - int count /* Number of times table has been busy */ -){ -#if SQLITE_OS_WIN || HAVE_USLEEP - static const u8 delays[] = - { 1, 2, 5, 10, 15, 20, 25, 25, 25, 50, 50, 100 }; - static const u8 totals[] = - { 0, 1, 3, 8, 18, 33, 53, 78, 103, 128, 178, 228 }; -# define NDELAY ArraySize(delays) - sqlite3 *db = (sqlite3 *)ptr; - int timeout = db->busyTimeout; - int delay, prior; + /* sqlite3_test_control(SQLITE_TESTCTRL_ISINIT); + ** + ** Return SQLITE_OK if SQLite has been initialized and SQLITE_ERROR if + ** not. + */ + case SQLITE_TESTCTRL_ISINIT: { + if( sqlite3GlobalConfig.isInit==0 ) rc = SQLITE_ERROR; + break; + } - assert( count>=0 ); - if( count < NDELAY ){ - delay = delays[count]; - prior = totals[count]; - }else{ - delay = delays[NDELAY-1]; - prior = totals[NDELAY-1] + delay*(count-(NDELAY-1)); - } - if( prior + delay > timeout ){ - delay = timeout - prior; - if( delay<=0 ) return 0; - } - sqlite3OsSleep(db->pVfs, delay*1000); - return 1; -#else - sqlite3 *db = (sqlite3 *)ptr; - int timeout = ((sqlite3 *)ptr)->busyTimeout; - if( (count+1)*1000 > timeout ){ - return 0; + /* sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, db, dbName, onOff, tnum); + ** + ** This test control is used to create imposter tables. "db" is a pointer + ** to the database connection. dbName is the database name (ex: "main" or + ** "temp") which will receive the imposter. "onOff" turns imposter mode on + ** or off. "tnum" is the root page of the b-tree to which the imposter + ** table should connect. + ** + ** Enable imposter mode only when the schema has already been parsed. Then + ** run a single CREATE TABLE statement to construct the imposter table in + ** the parsed schema. Then turn imposter mode back off again. + ** + ** If onOff==0 and tnum>0 then reset the schema for all databases, causing + ** the schema to be reparsed the next time it is needed. This has the + ** effect of erasing all imposter tables. + */ + case SQLITE_TESTCTRL_IMPOSTER: { + sqlite3 *db = va_arg(ap, sqlite3*); + sqlite3_mutex_enter(db->mutex); + db->init.iDb = sqlite3FindDbName(db, va_arg(ap,const char*)); + db->init.busy = db->init.imposterTable = va_arg(ap,int); + db->init.newTnum = va_arg(ap,int); + if( db->init.busy==0 && db->init.newTnum>0 ){ + sqlite3ResetAllSchemasOfConnection(db); + } + sqlite3_mutex_leave(db->mutex); + break; + } } - sqlite3OsSleep(db->pVfs, 1000000); - return 1; -#endif + va_end(ap); +#endif /* SQLITE_OMIT_BUILTIN_TEST */ + return rc; } /* -** Invoke the given busy handler. +** This is a utility routine, useful to VFS implementations, that checks +** to see if a database file was a URI that contained a specific query +** parameter, and if so obtains the value of the query parameter. ** -** This routine is called when an operation failed with a lock. -** If this routine returns non-zero, the lock is retried. If it -** returns 0, the operation aborts with an SQLITE_BUSY error. -*/ -SQLITE_PRIVATE int sqlite3InvokeBusyHandler(BusyHandler *p){ - int rc; - if( NEVER(p==0) || p->xFunc==0 || p->nBusy<0 ) return 0; - rc = p->xFunc(p->pArg, p->nBusy); - if( rc==0 ){ - p->nBusy = -1; - }else{ - p->nBusy++; - } - return rc; -} - -/* -** This routine sets the busy callback for an Sqlite database to the -** given callback function with the given argument. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_busy_handler( - sqlite3 *db, - int (*xBusy)(void*,int), - void *pArg -){ -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; -#endif - sqlite3_mutex_enter(db->mutex); - db->busyHandler.xFunc = xBusy; - db->busyHandler.pArg = pArg; - db->busyHandler.nBusy = 0; - db->busyTimeout = 0; - sqlite3_mutex_leave(db->mutex); - return SQLITE_OK; -} - -#ifndef SQLITE_OMIT_PROGRESS_CALLBACK -/* -** This routine sets the progress callback for an Sqlite database to the -** given callback function with the given argument. The progress callback will -** be invoked every nOps opcodes. +** The zFilename argument is the filename pointer passed into the xOpen() +** method of a VFS implementation. The zParam argument is the name of the +** query parameter we seek. This routine returns the value of the zParam +** parameter if it exists. If the parameter does not exist, this routine +** returns a NULL pointer. */ -SQLITE_API void SQLITE_STDCALL sqlite3_progress_handler( - sqlite3 *db, - int nOps, - int (*xProgress)(void*), - void *pArg -){ -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return; - } -#endif - sqlite3_mutex_enter(db->mutex); - if( nOps>0 ){ - db->xProgress = xProgress; - db->nProgressOps = (unsigned)nOps; - db->pProgressArg = pArg; - }else{ - db->xProgress = 0; - db->nProgressOps = 0; - db->pProgressArg = 0; +SQLITE_API const char *SQLITE_STDCALL sqlite3_uri_parameter(const char *zFilename, const char *zParam){ + if( zFilename==0 || zParam==0 ) return 0; + zFilename += sqlite3Strlen30(zFilename) + 1; + while( zFilename[0] ){ + int x = strcmp(zFilename, zParam); + zFilename += sqlite3Strlen30(zFilename) + 1; + if( x==0 ) return zFilename; + zFilename += sqlite3Strlen30(zFilename) + 1; } - sqlite3_mutex_leave(db->mutex); + return 0; } -#endif - /* -** This routine installs a default busy handler that waits for the -** specified number of milliseconds before returning 0. +** Return a boolean value for a query parameter. */ -SQLITE_API int SQLITE_STDCALL sqlite3_busy_timeout(sqlite3 *db, int ms){ -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; -#endif - if( ms>0 ){ - sqlite3_busy_handler(db, sqliteDefaultBusyCallback, (void*)db); - db->busyTimeout = ms; - }else{ - sqlite3_busy_handler(db, 0, 0); - } - return SQLITE_OK; +SQLITE_API int SQLITE_STDCALL sqlite3_uri_boolean(const char *zFilename, const char *zParam, int bDflt){ + const char *z = sqlite3_uri_parameter(zFilename, zParam); + bDflt = bDflt!=0; + return z ? sqlite3GetBoolean(z, bDflt) : bDflt; } /* -** Cause any pending operation to stop at its earliest opportunity. +** Return a 64-bit integer value for a query parameter. */ -SQLITE_API void SQLITE_STDCALL sqlite3_interrupt(sqlite3 *db){ -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return; +SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_uri_int64( + const char *zFilename, /* Filename as passed to xOpen */ + const char *zParam, /* URI parameter sought */ + sqlite3_int64 bDflt /* return if parameter is missing */ +){ + const char *z = sqlite3_uri_parameter(zFilename, zParam); + sqlite3_int64 v; + if( z && sqlite3DecOrHexToI64(z, &v)==SQLITE_OK ){ + bDflt = v; } -#endif - db->u1.isInterrupted = 1; + return bDflt; } - /* -** This function is exactly the same as sqlite3_create_function(), except -** that it is designed to be called by internal code. The difference is -** that if a malloc() fails in sqlite3_create_function(), an error code -** is returned and the mallocFailed flag cleared. +** Return the Btree pointer identified by zDbName. Return NULL if not found. */ -SQLITE_PRIVATE int sqlite3CreateFunc( - sqlite3 *db, - const char *zFunctionName, - int nArg, - int enc, - void *pUserData, - void (*xFunc)(sqlite3_context*,int,sqlite3_value **), - void (*xStep)(sqlite3_context*,int,sqlite3_value **), - void (*xFinal)(sqlite3_context*), - FuncDestructor *pDestructor -){ - FuncDef *p; - int nName; - int extraFlags; - - assert( sqlite3_mutex_held(db->mutex) ); - if( zFunctionName==0 || - (xFunc && (xFinal || xStep)) || - (!xFunc && (xFinal && !xStep)) || - (!xFunc && (!xFinal && xStep)) || - (nArg<-1 || nArg>SQLITE_MAX_FUNCTION_ARG) || - (255<(nName = sqlite3Strlen30( zFunctionName))) ){ - return SQLITE_MISUSE_BKPT; - } - - assert( SQLITE_FUNC_CONSTANT==SQLITE_DETERMINISTIC ); - extraFlags = enc & SQLITE_DETERMINISTIC; - enc &= (SQLITE_FUNC_ENCMASK|SQLITE_ANY); - -#ifndef SQLITE_OMIT_UTF16 - /* If SQLITE_UTF16 is specified as the encoding type, transform this - ** to one of SQLITE_UTF16LE or SQLITE_UTF16BE using the - ** SQLITE_UTF16NATIVE macro. SQLITE_UTF16 is not used internally. - ** - ** If SQLITE_ANY is specified, add three versions of the function - ** to the hash table. - */ - if( enc==SQLITE_UTF16 ){ - enc = SQLITE_UTF16NATIVE; - }else if( enc==SQLITE_ANY ){ - int rc; - rc = sqlite3CreateFunc(db, zFunctionName, nArg, SQLITE_UTF8|extraFlags, - pUserData, xFunc, xStep, xFinal, pDestructor); - if( rc==SQLITE_OK ){ - rc = sqlite3CreateFunc(db, zFunctionName, nArg, SQLITE_UTF16LE|extraFlags, - pUserData, xFunc, xStep, xFinal, pDestructor); - } - if( rc!=SQLITE_OK ){ - return rc; - } - enc = SQLITE_UTF16BE; - } -#else - enc = SQLITE_UTF8; -#endif - - /* Check if an existing function is being overridden or deleted. If so, - ** and there are active VMs, then return SQLITE_BUSY. If a function - ** is being overridden/deleted but there are no active VMs, allow the - ** operation to continue but invalidate all precompiled statements. - */ - p = sqlite3FindFunction(db, zFunctionName, nName, nArg, (u8)enc, 0); - if( p && (p->funcFlags & SQLITE_FUNC_ENCMASK)==enc && p->nArg==nArg ){ - if( db->nVdbeActive ){ - sqlite3ErrorWithMsg(db, SQLITE_BUSY, - "unable to delete/modify user-function due to active statements"); - assert( !db->mallocFailed ); - return SQLITE_BUSY; - }else{ - sqlite3ExpirePreparedStatements(db); +SQLITE_PRIVATE Btree *sqlite3DbNameToBtree(sqlite3 *db, const char *zDbName){ + int i; + for(i=0; inDb; i++){ + if( db->aDb[i].pBt + && (zDbName==0 || sqlite3StrICmp(zDbName, db->aDb[i].zName)==0) + ){ + return db->aDb[i].pBt; } } - - p = sqlite3FindFunction(db, zFunctionName, nName, nArg, (u8)enc, 1); - assert(p || db->mallocFailed); - if( !p ){ - return SQLITE_NOMEM; - } - - /* If an older version of the function with a configured destructor is - ** being replaced invoke the destructor function here. */ - functionDestroy(db, p); - - if( pDestructor ){ - pDestructor->nRef++; - } - p->pDestructor = pDestructor; - p->funcFlags = (p->funcFlags & SQLITE_FUNC_ENCMASK) | extraFlags; - testcase( p->funcFlags & SQLITE_DETERMINISTIC ); - p->xFunc = xFunc; - p->xStep = xStep; - p->xFinalize = xFinal; - p->pUserData = pUserData; - p->nArg = (u16)nArg; - return SQLITE_OK; + return 0; } /* -** Create new user functions. +** Return the filename of the database associated with a database +** connection. */ -SQLITE_API int SQLITE_STDCALL sqlite3_create_function( - sqlite3 *db, - const char *zFunc, - int nArg, - int enc, - void *p, - void (*xFunc)(sqlite3_context*,int,sqlite3_value **), - void (*xStep)(sqlite3_context*,int,sqlite3_value **), - void (*xFinal)(sqlite3_context*) -){ - return sqlite3_create_function_v2(db, zFunc, nArg, enc, p, xFunc, xStep, - xFinal, 0); -} - -SQLITE_API int SQLITE_STDCALL sqlite3_create_function_v2( - sqlite3 *db, - const char *zFunc, - int nArg, - int enc, - void *p, - void (*xFunc)(sqlite3_context*,int,sqlite3_value **), - void (*xStep)(sqlite3_context*,int,sqlite3_value **), - void (*xFinal)(sqlite3_context*), - void (*xDestroy)(void *) -){ - int rc = SQLITE_ERROR; - FuncDestructor *pArg = 0; - +SQLITE_API const char *SQLITE_STDCALL sqlite3_db_filename(sqlite3 *db, const char *zDbName){ + Btree *pBt; #ifdef SQLITE_ENABLE_API_ARMOR if( !sqlite3SafetyCheckOk(db) ){ - return SQLITE_MISUSE_BKPT; + (void)SQLITE_MISUSE_BKPT; + return 0; } #endif - sqlite3_mutex_enter(db->mutex); - if( xDestroy ){ - pArg = (FuncDestructor *)sqlite3DbMallocZero(db, sizeof(FuncDestructor)); - if( !pArg ){ - xDestroy(p); - goto out; - } - pArg->xDestroy = xDestroy; - pArg->pUserData = p; - } - rc = sqlite3CreateFunc(db, zFunc, nArg, enc, p, xFunc, xStep, xFinal, pArg); - if( pArg && pArg->nRef==0 ){ - assert( rc!=SQLITE_OK ); - xDestroy(p); - sqlite3DbFree(db, pArg); - } - - out: - rc = sqlite3ApiExit(db, rc); - sqlite3_mutex_leave(db->mutex); - return rc; + pBt = sqlite3DbNameToBtree(db, zDbName); + return pBt ? sqlite3BtreeGetFilename(pBt) : 0; } -#ifndef SQLITE_OMIT_UTF16 -SQLITE_API int SQLITE_STDCALL sqlite3_create_function16( - sqlite3 *db, - const void *zFunctionName, - int nArg, - int eTextRep, - void *p, - void (*xFunc)(sqlite3_context*,int,sqlite3_value**), - void (*xStep)(sqlite3_context*,int,sqlite3_value**), - void (*xFinal)(sqlite3_context*) -){ - int rc; - char *zFunc8; - +/* +** Return 1 if database is read-only or 0 if read/write. Return -1 if +** no such database exists. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3_db_readonly(sqlite3 *db, const char *zDbName){ + Btree *pBt; #ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) || zFunctionName==0 ) return SQLITE_MISUSE_BKPT; + if( !sqlite3SafetyCheckOk(db) ){ + (void)SQLITE_MISUSE_BKPT; + return -1; + } #endif - sqlite3_mutex_enter(db->mutex); - assert( !db->mallocFailed ); - zFunc8 = sqlite3Utf16to8(db, zFunctionName, -1, SQLITE_UTF16NATIVE); - rc = sqlite3CreateFunc(db, zFunc8, nArg, eTextRep, p, xFunc, xStep, xFinal,0); - sqlite3DbFree(db, zFunc8); - rc = sqlite3ApiExit(db, rc); - sqlite3_mutex_leave(db->mutex); - return rc; + pBt = sqlite3DbNameToBtree(db, zDbName); + return pBt ? sqlite3BtreeIsReadonly(pBt) : -1; } -#endif - +/************** End of main.c ************************************************/ +/************** Begin file notify.c ******************************************/ /* -** Declare that a function has been overloaded by a virtual table. +** 2009 March 3 ** -** If the function already exists as a regular global function, then -** this routine is a no-op. If the function does not exist, then create -** a new one that always throws a run-time error. +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: ** -** When virtual tables intend to provide an overloaded function, they -** should call this routine to make sure the global function exists. -** A global function must exist in order for name resolution to work -** properly. +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains the implementation of the sqlite3_unlock_notify() +** API method and its associated functionality. */ -SQLITE_API int SQLITE_STDCALL sqlite3_overload_function( - sqlite3 *db, - const char *zName, - int nArg -){ - int nName = sqlite3Strlen30(zName); - int rc = SQLITE_OK; +/* #include "sqliteInt.h" */ +/* #include "btreeInt.h" */ -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) || zName==0 || nArg<-2 ){ - return SQLITE_MISUSE_BKPT; - } -#endif - sqlite3_mutex_enter(db->mutex); - if( sqlite3FindFunction(db, zName, nName, nArg, SQLITE_UTF8, 0)==0 ){ - rc = sqlite3CreateFunc(db, zName, nArg, SQLITE_UTF8, - 0, sqlite3InvalidFunction, 0, 0, 0); - } - rc = sqlite3ApiExit(db, rc); - sqlite3_mutex_leave(db->mutex); - return rc; -} +/* Omit this entire file if SQLITE_ENABLE_UNLOCK_NOTIFY is not defined. */ +#ifdef SQLITE_ENABLE_UNLOCK_NOTIFY -#ifndef SQLITE_OMIT_TRACE /* -** Register a trace function. The pArg from the previously registered trace -** is returned. +** Public interfaces: ** -** A NULL trace function means that no tracing is executes. A non-NULL -** trace is a pointer to a function that is invoked at the start of each -** SQL statement. +** sqlite3ConnectionBlocked() +** sqlite3ConnectionUnlocked() +** sqlite3ConnectionClosed() +** sqlite3_unlock_notify() */ -SQLITE_API void *SQLITE_STDCALL sqlite3_trace(sqlite3 *db, void (*xTrace)(void*,const char*), void *pArg){ - void *pOld; -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return 0; - } -#endif - sqlite3_mutex_enter(db->mutex); - pOld = db->pTraceArg; - db->xTrace = xTrace; - db->pTraceArg = pArg; - sqlite3_mutex_leave(db->mutex); - return pOld; -} +#define assertMutexHeld() \ + assert( sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)) ) + /* -** Register a profile function. The pArg from the previously registered -** profile function is returned. +** Head of a linked list of all sqlite3 objects created by this process +** for which either sqlite3.pBlockingConnection or sqlite3.pUnlockConnection +** is not NULL. This variable may only accessed while the STATIC_MASTER +** mutex is held. +*/ +static sqlite3 *SQLITE_WSD sqlite3BlockedList = 0; + +#ifndef NDEBUG +/* +** This function is a complex assert() that verifies the following +** properties of the blocked connections list: ** -** A NULL profile function means that no profiling is executes. A non-NULL -** profile is a pointer to a function that is invoked at the conclusion of -** each SQL statement that is run. +** 1) Each entry in the list has a non-NULL value for either +** pUnlockConnection or pBlockingConnection, or both. +** +** 2) All entries in the list that share a common value for +** xUnlockNotify are grouped together. +** +** 3) If the argument db is not NULL, then none of the entries in the +** blocked connections list have pUnlockConnection or pBlockingConnection +** set to db. This is used when closing connection db. */ -SQLITE_API void *SQLITE_STDCALL sqlite3_profile( - sqlite3 *db, - void (*xProfile)(void*,const char*,sqlite_uint64), - void *pArg -){ - void *pOld; +static void checkListProperties(sqlite3 *db){ + sqlite3 *p; + for(p=sqlite3BlockedList; p; p=p->pNextBlocked){ + int seen = 0; + sqlite3 *p2; -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return 0; + /* Verify property (1) */ + assert( p->pUnlockConnection || p->pBlockingConnection ); + + /* Verify property (2) */ + for(p2=sqlite3BlockedList; p2!=p; p2=p2->pNextBlocked){ + if( p2->xUnlockNotify==p->xUnlockNotify ) seen = 1; + assert( p2->xUnlockNotify==p->xUnlockNotify || !seen ); + assert( db==0 || p->pUnlockConnection!=db ); + assert( db==0 || p->pBlockingConnection!=db ); + } } -#endif - sqlite3_mutex_enter(db->mutex); - pOld = db->pProfileArg; - db->xProfile = xProfile; - db->pProfileArg = pArg; - sqlite3_mutex_leave(db->mutex); - return pOld; } -#endif /* SQLITE_OMIT_TRACE */ +#else +# define checkListProperties(x) +#endif /* -** Register a function to be invoked when a transaction commits. -** If the invoked function returns non-zero, then the commit becomes a -** rollback. +** Remove connection db from the blocked connections list. If connection +** db is not currently a part of the list, this function is a no-op. */ -SQLITE_API void *SQLITE_STDCALL sqlite3_commit_hook( - sqlite3 *db, /* Attach the hook to this database */ - int (*xCallback)(void*), /* Function to invoke on each commit */ - void *pArg /* Argument to the function */ -){ - void *pOld; - -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return 0; +static void removeFromBlockedList(sqlite3 *db){ + sqlite3 **pp; + assertMutexHeld(); + for(pp=&sqlite3BlockedList; *pp; pp = &(*pp)->pNextBlocked){ + if( *pp==db ){ + *pp = (*pp)->pNextBlocked; + break; + } } -#endif - sqlite3_mutex_enter(db->mutex); - pOld = db->pCommitArg; - db->xCommitCallback = xCallback; - db->pCommitArg = pArg; - sqlite3_mutex_leave(db->mutex); - return pOld; } /* -** Register a callback to be invoked each time a row is updated, -** inserted or deleted using this database connection. +** Add connection db to the blocked connections list. It is assumed +** that it is not already a part of the list. */ -SQLITE_API void *SQLITE_STDCALL sqlite3_update_hook( - sqlite3 *db, /* Attach the hook to this database */ - void (*xCallback)(void*,int,char const *,char const *,sqlite_int64), - void *pArg /* Argument to the function */ -){ - void *pRet; - -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return 0; - } -#endif - sqlite3_mutex_enter(db->mutex); - pRet = db->pUpdateArg; - db->xUpdateCallback = xCallback; - db->pUpdateArg = pArg; - sqlite3_mutex_leave(db->mutex); - return pRet; +static void addToBlockedList(sqlite3 *db){ + sqlite3 **pp; + assertMutexHeld(); + for( + pp=&sqlite3BlockedList; + *pp && (*pp)->xUnlockNotify!=db->xUnlockNotify; + pp=&(*pp)->pNextBlocked + ); + db->pNextBlocked = *pp; + *pp = db; } /* -** Register a callback to be invoked each time a transaction is rolled -** back by this database connection. +** Obtain the STATIC_MASTER mutex. */ -SQLITE_API void *SQLITE_STDCALL sqlite3_rollback_hook( - sqlite3 *db, /* Attach the hook to this database */ - void (*xCallback)(void*), /* Callback function */ - void *pArg /* Argument to the function */ -){ - void *pRet; - -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return 0; - } -#endif - sqlite3_mutex_enter(db->mutex); - pRet = db->pRollbackArg; - db->xRollbackCallback = xCallback; - db->pRollbackArg = pArg; - sqlite3_mutex_leave(db->mutex); - return pRet; +static void enterMutex(void){ + sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); + checkListProperties(0); } -#ifndef SQLITE_OMIT_WAL /* -** The sqlite3_wal_hook() callback registered by sqlite3_wal_autocheckpoint(). -** Invoke sqlite3_wal_checkpoint if the number of frames in the log file -** is greater than sqlite3.pWalArg cast to an integer (the value configured by -** wal_autocheckpoint()). -*/ -SQLITE_PRIVATE int sqlite3WalDefaultHook( - void *pClientData, /* Argument */ - sqlite3 *db, /* Connection */ - const char *zDb, /* Database */ - int nFrame /* Size of WAL */ -){ - if( nFrame>=SQLITE_PTR_TO_INT(pClientData) ){ - sqlite3BeginBenignMalloc(); - sqlite3_wal_checkpoint(db, zDb); - sqlite3EndBenignMalloc(); - } - return SQLITE_OK; +** Release the STATIC_MASTER mutex. +*/ +static void leaveMutex(void){ + assertMutexHeld(); + checkListProperties(0); + sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); } -#endif /* SQLITE_OMIT_WAL */ /* -** Configure an sqlite3_wal_hook() callback to automatically checkpoint -** a database after committing a transaction if there are nFrame or -** more frames in the log file. Passing zero or a negative value as the -** nFrame parameter disables automatic checkpoints entirely. +** Register an unlock-notify callback. ** -** The callback registered by this function replaces any existing callback -** registered using sqlite3_wal_hook(). Likewise, registering a callback -** using sqlite3_wal_hook() disables the automatic checkpoint mechanism -** configured by this function. +** This is called after connection "db" has attempted some operation +** but has received an SQLITE_LOCKED error because another connection +** (call it pOther) in the same process was busy using the same shared +** cache. pOther is found by looking at db->pBlockingConnection. +** +** If there is no blocking connection, the callback is invoked immediately, +** before this routine returns. +** +** If pOther is already blocked on db, then report SQLITE_LOCKED, to indicate +** a deadlock. +** +** Otherwise, make arrangements to invoke xNotify when pOther drops +** its locks. +** +** Each call to this routine overrides any prior callbacks registered +** on the same "db". If xNotify==0 then any prior callbacks are immediately +** cancelled. */ -SQLITE_API int SQLITE_STDCALL sqlite3_wal_autocheckpoint(sqlite3 *db, int nFrame){ -#ifdef SQLITE_OMIT_WAL - UNUSED_PARAMETER(db); - UNUSED_PARAMETER(nFrame); -#else -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; -#endif - if( nFrame>0 ){ - sqlite3_wal_hook(db, sqlite3WalDefaultHook, SQLITE_INT_TO_PTR(nFrame)); +SQLITE_API int SQLITE_STDCALL sqlite3_unlock_notify( + sqlite3 *db, + void (*xNotify)(void **, int), + void *pArg +){ + int rc = SQLITE_OK; + + sqlite3_mutex_enter(db->mutex); + enterMutex(); + + if( xNotify==0 ){ + removeFromBlockedList(db); + db->pBlockingConnection = 0; + db->pUnlockConnection = 0; + db->xUnlockNotify = 0; + db->pUnlockArg = 0; + }else if( 0==db->pBlockingConnection ){ + /* The blocking transaction has been concluded. Or there never was a + ** blocking transaction. In either case, invoke the notify callback + ** immediately. + */ + xNotify(&pArg, 1); }else{ - sqlite3_wal_hook(db, 0, 0); + sqlite3 *p; + + for(p=db->pBlockingConnection; p && p!=db; p=p->pUnlockConnection){} + if( p ){ + rc = SQLITE_LOCKED; /* Deadlock detected. */ + }else{ + db->pUnlockConnection = db->pBlockingConnection; + db->xUnlockNotify = xNotify; + db->pUnlockArg = pArg; + removeFromBlockedList(db); + addToBlockedList(db); + } } -#endif - return SQLITE_OK; + + leaveMutex(); + assert( !db->mallocFailed ); + sqlite3ErrorWithMsg(db, rc, (rc?"database is deadlocked":0)); + sqlite3_mutex_leave(db->mutex); + return rc; } /* -** Register a callback to be invoked each time a transaction is written -** into the write-ahead-log by this database connection. +** This function is called while stepping or preparing a statement +** associated with connection db. The operation will return SQLITE_LOCKED +** to the user because it requires a lock that will not be available +** until connection pBlocker concludes its current transaction. */ -SQLITE_API void *SQLITE_STDCALL sqlite3_wal_hook( - sqlite3 *db, /* Attach the hook to this db handle */ - int(*xCallback)(void *, sqlite3*, const char*, int), - void *pArg /* First argument passed to xCallback() */ -){ -#ifndef SQLITE_OMIT_WAL - void *pRet; -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return 0; +SQLITE_PRIVATE void sqlite3ConnectionBlocked(sqlite3 *db, sqlite3 *pBlocker){ + enterMutex(); + if( db->pBlockingConnection==0 && db->pUnlockConnection==0 ){ + addToBlockedList(db); } -#endif - sqlite3_mutex_enter(db->mutex); - pRet = db->pWalArg; - db->xWalCallback = xCallback; - db->pWalArg = pArg; - sqlite3_mutex_leave(db->mutex); - return pRet; -#else - return 0; -#endif + db->pBlockingConnection = pBlocker; + leaveMutex(); } /* -** Checkpoint database zDb. +** This function is called when +** the transaction opened by database db has just finished. Locks held +** by database connection db have been released. +** +** This function loops through each entry in the blocked connections +** list and does the following: +** +** 1) If the sqlite3.pBlockingConnection member of a list entry is +** set to db, then set pBlockingConnection=0. +** +** 2) If the sqlite3.pUnlockConnection member of a list entry is +** set to db, then invoke the configured unlock-notify callback and +** set pUnlockConnection=0. +** +** 3) If the two steps above mean that pBlockingConnection==0 and +** pUnlockConnection==0, remove the entry from the blocked connections +** list. */ -SQLITE_API int SQLITE_STDCALL sqlite3_wal_checkpoint_v2( - sqlite3 *db, /* Database handle */ - const char *zDb, /* Name of attached database (or NULL) */ - int eMode, /* SQLITE_CHECKPOINT_* value */ - int *pnLog, /* OUT: Size of WAL log in frames */ - int *pnCkpt /* OUT: Total number of frames checkpointed */ -){ -#ifdef SQLITE_OMIT_WAL - return SQLITE_OK; -#else - int rc; /* Return code */ - int iDb = SQLITE_MAX_ATTACHED; /* sqlite3.aDb[] index of db to checkpoint */ +SQLITE_PRIVATE void sqlite3ConnectionUnlocked(sqlite3 *db){ + void (*xUnlockNotify)(void **, int) = 0; /* Unlock-notify cb to invoke */ + int nArg = 0; /* Number of entries in aArg[] */ + sqlite3 **pp; /* Iterator variable */ + void **aArg; /* Arguments to the unlock callback */ + void **aDyn = 0; /* Dynamically allocated space for aArg[] */ + void *aStatic[16]; /* Starter space for aArg[]. No malloc required */ -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; -#endif + aArg = aStatic; + enterMutex(); /* Enter STATIC_MASTER mutex */ - /* Initialize the output variables to -1 in case an error occurs. */ - if( pnLog ) *pnLog = -1; - if( pnCkpt ) *pnCkpt = -1; + /* This loop runs once for each entry in the blocked-connections list. */ + for(pp=&sqlite3BlockedList; *pp; /* no-op */ ){ + sqlite3 *p = *pp; - assert( SQLITE_CHECKPOINT_PASSIVE==0 ); - assert( SQLITE_CHECKPOINT_FULL==1 ); - assert( SQLITE_CHECKPOINT_RESTART==2 ); - assert( SQLITE_CHECKPOINT_TRUNCATE==3 ); - if( eModeSQLITE_CHECKPOINT_TRUNCATE ){ - /* EVIDENCE-OF: R-03996-12088 The M parameter must be a valid checkpoint - ** mode: */ - return SQLITE_MISUSE; - } + /* Step 1. */ + if( p->pBlockingConnection==db ){ + p->pBlockingConnection = 0; + } - sqlite3_mutex_enter(db->mutex); - if( zDb && zDb[0] ){ - iDb = sqlite3FindDbName(db, zDb); + /* Step 2. */ + if( p->pUnlockConnection==db ){ + assert( p->xUnlockNotify ); + if( p->xUnlockNotify!=xUnlockNotify && nArg!=0 ){ + xUnlockNotify(aArg, nArg); + nArg = 0; + } + + sqlite3BeginBenignMalloc(); + assert( aArg==aDyn || (aDyn==0 && aArg==aStatic) ); + assert( nArg<=(int)ArraySize(aStatic) || aArg==aDyn ); + if( (!aDyn && nArg==(int)ArraySize(aStatic)) + || (aDyn && nArg==(int)(sqlite3MallocSize(aDyn)/sizeof(void*))) + ){ + /* The aArg[] array needs to grow. */ + void **pNew = (void **)sqlite3Malloc(nArg*sizeof(void *)*2); + if( pNew ){ + memcpy(pNew, aArg, nArg*sizeof(void *)); + sqlite3_free(aDyn); + aDyn = aArg = pNew; + }else{ + /* This occurs when the array of context pointers that need to + ** be passed to the unlock-notify callback is larger than the + ** aStatic[] array allocated on the stack and the attempt to + ** allocate a larger array from the heap has failed. + ** + ** This is a difficult situation to handle. Returning an error + ** code to the caller is insufficient, as even if an error code + ** is returned the transaction on connection db will still be + ** closed and the unlock-notify callbacks on blocked connections + ** will go unissued. This might cause the application to wait + ** indefinitely for an unlock-notify callback that will never + ** arrive. + ** + ** Instead, invoke the unlock-notify callback with the context + ** array already accumulated. We can then clear the array and + ** begin accumulating any further context pointers without + ** requiring any dynamic allocation. This is sub-optimal because + ** it means that instead of one callback with a large array of + ** context pointers the application will receive two or more + ** callbacks with smaller arrays of context pointers, which will + ** reduce the applications ability to prioritize multiple + ** connections. But it is the best that can be done under the + ** circumstances. + */ + xUnlockNotify(aArg, nArg); + nArg = 0; + } + } + sqlite3EndBenignMalloc(); + + aArg[nArg++] = p->pUnlockArg; + xUnlockNotify = p->xUnlockNotify; + p->pUnlockConnection = 0; + p->xUnlockNotify = 0; + p->pUnlockArg = 0; + } + + /* Step 3. */ + if( p->pBlockingConnection==0 && p->pUnlockConnection==0 ){ + /* Remove connection p from the blocked connections list. */ + *pp = p->pNextBlocked; + p->pNextBlocked = 0; + }else{ + pp = &p->pNextBlocked; + } } - if( iDb<0 ){ - rc = SQLITE_ERROR; - sqlite3ErrorWithMsg(db, SQLITE_ERROR, "unknown database: %s", zDb); - }else{ - db->busyHandler.nBusy = 0; - rc = sqlite3Checkpoint(db, iDb, eMode, pnLog, pnCkpt); - sqlite3Error(db, rc); + + if( nArg!=0 ){ + xUnlockNotify(aArg, nArg); } - rc = sqlite3ApiExit(db, rc); - sqlite3_mutex_leave(db->mutex); - return rc; -#endif + sqlite3_free(aDyn); + leaveMutex(); /* Leave STATIC_MASTER mutex */ } - /* -** Checkpoint database zDb. If zDb is NULL, or if the buffer zDb points -** to contains a zero-length string, all attached databases are -** checkpointed. +** This is called when the database connection passed as an argument is +** being closed. The connection is removed from the blocked list. */ -SQLITE_API int SQLITE_STDCALL sqlite3_wal_checkpoint(sqlite3 *db, const char *zDb){ - /* EVIDENCE-OF: R-41613-20553 The sqlite3_wal_checkpoint(D,X) is equivalent to - ** sqlite3_wal_checkpoint_v2(D,X,SQLITE_CHECKPOINT_PASSIVE,0,0). */ - return sqlite3_wal_checkpoint_v2(db,zDb,SQLITE_CHECKPOINT_PASSIVE,0,0); +SQLITE_PRIVATE void sqlite3ConnectionClosed(sqlite3 *db){ + sqlite3ConnectionUnlocked(db); + enterMutex(); + removeFromBlockedList(db); + checkListProperties(db); + leaveMutex(); } +#endif -#ifndef SQLITE_OMIT_WAL +/************** End of notify.c **********************************************/ +/************** Begin file fts3.c ********************************************/ /* -** Run a checkpoint on database iDb. This is a no-op if database iDb is -** not currently open in WAL mode. +** 2006 Oct 10 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This is an SQLite module implementing full-text search. +*/ + +/* +** The code in this file is only compiled if: +** +** * The FTS3 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS3 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +*/ + +/* The full-text index is stored in a series of b+tree (-like) +** structures called segments which map terms to doclists. The +** structures are like b+trees in layout, but are constructed from the +** bottom up in optimal fashion and are not updatable. Since trees +** are built from the bottom up, things will be described from the +** bottom up. +** +** +**** Varints **** +** The basic unit of encoding is a variable-length integer called a +** varint. We encode variable-length integers in little-endian order +** using seven bits * per byte as follows: +** +** KEY: +** A = 0xxxxxxx 7 bits of data and one flag bit +** B = 1xxxxxxx 7 bits of data and one flag bit +** +** 7 bits - A +** 14 bits - BA +** 21 bits - BBA +** and so on. +** +** This is similar in concept to how sqlite encodes "varints" but +** the encoding is not the same. SQLite varints are big-endian +** are are limited to 9 bytes in length whereas FTS3 varints are +** little-endian and can be up to 10 bytes in length (in theory). +** +** Example encodings: +** +** 1: 0x01 +** 127: 0x7f +** 128: 0x81 0x00 +** +** +**** Document lists **** +** A doclist (document list) holds a docid-sorted list of hits for a +** given term. Doclists hold docids and associated token positions. +** A docid is the unique integer identifier for a single document. +** A position is the index of a word within the document. The first +** word of the document has a position of 0. +** +** FTS3 used to optionally store character offsets using a compile-time +** option. But that functionality is no longer supported. +** +** A doclist is stored like this: +** +** array { +** varint docid; (delta from previous doclist) +** array { (position list for column 0) +** varint position; (2 more than the delta from previous position) +** } +** array { +** varint POS_COLUMN; (marks start of position list for new column) +** varint column; (index of new column) +** array { +** varint position; (2 more than the delta from previous position) +** } +** } +** varint POS_END; (marks end of positions for this document. +** } +** +** Here, array { X } means zero or more occurrences of X, adjacent in +** memory. A "position" is an index of a token in the token stream +** generated by the tokenizer. Note that POS_END and POS_COLUMN occur +** in the same logical place as the position element, and act as sentinals +** ending a position list array. POS_END is 0. POS_COLUMN is 1. +** The positions numbers are not stored literally but rather as two more +** than the difference from the prior position, or the just the position plus +** 2 for the first position. Example: +** +** label: A B C D E F G H I J K +** value: 123 5 9 1 1 14 35 0 234 72 0 +** +** The 123 value is the first docid. For column zero in this document +** there are two matches at positions 3 and 10 (5-2 and 9-2+3). The 1 +** at D signals the start of a new column; the 1 at E indicates that the +** new column is column number 1. There are two positions at 12 and 45 +** (14-2 and 35-2+12). The 0 at H indicate the end-of-document. The +** 234 at I is the delta to next docid (357). It has one position 70 +** (72-2) and then terminates with the 0 at K. +** +** A "position-list" is the list of positions for multiple columns for +** a single docid. A "column-list" is the set of positions for a single +** column. Hence, a position-list consists of one or more column-lists, +** a document record consists of a docid followed by a position-list and +** a doclist consists of one or more document records. +** +** A bare doclist omits the position information, becoming an +** array of varint-encoded docids. +** +**** Segment leaf nodes **** +** Segment leaf nodes store terms and doclists, ordered by term. Leaf +** nodes are written using LeafWriter, and read using LeafReader (to +** iterate through a single leaf node's data) and LeavesReader (to +** iterate through a segment's entire leaf layer). Leaf nodes have +** the format: +** +** varint iHeight; (height from leaf level, always 0) +** varint nTerm; (length of first term) +** char pTerm[nTerm]; (content of first term) +** varint nDoclist; (length of term's associated doclist) +** char pDoclist[nDoclist]; (content of doclist) +** array { +** (further terms are delta-encoded) +** varint nPrefix; (length of prefix shared with previous term) +** varint nSuffix; (length of unshared suffix) +** char pTermSuffix[nSuffix];(unshared suffix of next term) +** varint nDoclist; (length of term's associated doclist) +** char pDoclist[nDoclist]; (content of doclist) +** } +** +** Here, array { X } means zero or more occurrences of X, adjacent in +** memory. +** +** Leaf nodes are broken into blocks which are stored contiguously in +** the %_segments table in sorted order. This means that when the end +** of a node is reached, the next term is in the node with the next +** greater node id. +** +** New data is spilled to a new leaf node when the current node +** exceeds LEAF_MAX bytes (default 2048). New data which itself is +** larger than STANDALONE_MIN (default 1024) is placed in a standalone +** node (a leaf node with a single term and doclist). The goal of +** these settings is to pack together groups of small doclists while +** making it efficient to directly access large doclists. The +** assumption is that large doclists represent terms which are more +** likely to be query targets. +** +** TODO(shess) It may be useful for blocking decisions to be more +** dynamic. For instance, it may make more sense to have a 2.5k leaf +** node rather than splitting into 2k and .5k nodes. My intuition is +** that this might extend through 2x or 4x the pagesize. +** +** +**** Segment interior nodes **** +** Segment interior nodes store blockids for subtree nodes and terms +** to describe what data is stored by the each subtree. Interior +** nodes are written using InteriorWriter, and read using +** InteriorReader. InteriorWriters are created as needed when +** SegmentWriter creates new leaf nodes, or when an interior node +** itself grows too big and must be split. The format of interior +** nodes: +** +** varint iHeight; (height from leaf level, always >0) +** varint iBlockid; (block id of node's leftmost subtree) +** optional { +** varint nTerm; (length of first term) +** char pTerm[nTerm]; (content of first term) +** array { +** (further terms are delta-encoded) +** varint nPrefix; (length of shared prefix with previous term) +** varint nSuffix; (length of unshared suffix) +** char pTermSuffix[nSuffix]; (unshared suffix of next term) +** } +** } +** +** Here, optional { X } means an optional element, while array { X } +** means zero or more occurrences of X, adjacent in memory. +** +** An interior node encodes n terms separating n+1 subtrees. The +** subtree blocks are contiguous, so only the first subtree's blockid +** is encoded. The subtree at iBlockid will contain all terms less +** than the first term encoded (or all terms if no term is encoded). +** Otherwise, for terms greater than or equal to pTerm[i] but less +** than pTerm[i+1], the subtree for that term will be rooted at +** iBlockid+i. Interior nodes only store enough term data to +** distinguish adjacent children (if the rightmost term of the left +** child is "something", and the leftmost term of the right child is +** "wicked", only "w" is stored). +** +** New data is spilled to a new interior node at the same height when +** the current node exceeds INTERIOR_MAX bytes (default 2048). +** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing +** interior nodes and making the tree too skinny. The interior nodes +** at a given height are naturally tracked by interior nodes at +** height+1, and so on. +** +** +**** Segment directory **** +** The segment directory in table %_segdir stores meta-information for +** merging and deleting segments, and also the root node of the +** segment's tree. +** +** The root node is the top node of the segment's tree after encoding +** the entire segment, restricted to ROOT_MAX bytes (default 1024). +** This could be either a leaf node or an interior node. If the top +** node requires more than ROOT_MAX bytes, it is flushed to %_segments +** and a new root interior node is generated (which should always fit +** within ROOT_MAX because it only needs space for 2 varints, the +** height and the blockid of the previous root). +** +** The meta-information in the segment directory is: +** level - segment level (see below) +** idx - index within level +** - (level,idx uniquely identify a segment) +** start_block - first leaf node +** leaves_end_block - last leaf node +** end_block - last block (including interior nodes) +** root - contents of root node +** +** If the root node is a leaf node, then start_block, +** leaves_end_block, and end_block are all 0. +** +** +**** Segment merging **** +** To amortize update costs, segments are grouped into levels and +** merged in batches. Each increase in level represents exponentially +** more documents. +** +** New documents (actually, document updates) are tokenized and +** written individually (using LeafWriter) to a level 0 segment, with +** incrementing idx. When idx reaches MERGE_COUNT (default 16), all +** level 0 segments are merged into a single level 1 segment. Level 1 +** is populated like level 0, and eventually MERGE_COUNT level 1 +** segments are merged to a single level 2 segment (representing +** MERGE_COUNT^2 updates), and so on. +** +** A segment merge traverses all segments at a given level in +** parallel, performing a straightforward sorted merge. Since segment +** leaf nodes are written in to the %_segments table in order, this +** merge traverses the underlying sqlite disk structures efficiently. +** After the merge, all segment blocks from the merged level are +** deleted. +** +** MERGE_COUNT controls how often we merge segments. 16 seems to be +** somewhat of a sweet spot for insertion performance. 32 and 64 show +** very similar performance numbers to 16 on insertion, though they're +** a tiny bit slower (perhaps due to more overhead in merge-time +** sorting). 8 is about 20% slower than 16, 4 about 50% slower than +** 16, 2 about 66% slower than 16. ** -** If a transaction is open on the database being checkpointed, this -** function returns SQLITE_LOCKED and a checkpoint is not attempted. If -** an error occurs while running the checkpoint, an SQLite error code is -** returned (i.e. SQLITE_IOERR). Otherwise, SQLITE_OK. +** At query time, high MERGE_COUNT increases the number of segments +** which need to be scanned and merged. For instance, with 100k docs +** inserted: ** -** The mutex on database handle db should be held by the caller. The mutex -** associated with the specific b-tree being checkpointed is taken by -** this function while the checkpoint is running. +** MERGE_COUNT segments +** 16 25 +** 8 12 +** 4 10 +** 2 6 ** -** If iDb is passed SQLITE_MAX_ATTACHED, then all attached databases are -** checkpointed. If an error is encountered it is returned immediately - -** no attempt is made to checkpoint any remaining databases. +** This appears to have only a moderate impact on queries for very +** frequent terms (which are somewhat dominated by segment merge +** costs), and infrequent and non-existent terms still seem to be fast +** even with many segments. ** -** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART. +** TODO(shess) That said, it would be nice to have a better query-side +** argument for MERGE_COUNT of 16. Also, it is possible/likely that +** optimizations to things like doclist merging will swing the sweet +** spot around. +** +** +** +**** Handling of deletions and updates **** +** Since we're using a segmented structure, with no docid-oriented +** index into the term index, we clearly cannot simply update the term +** index when a document is deleted or updated. For deletions, we +** write an empty doclist (varint(docid) varint(POS_END)), for updates +** we simply write the new doclist. Segment merges overwrite older +** data for a particular docid with newer data, so deletes or updates +** will eventually overtake the earlier data and knock it out. The +** query logic likewise merges doclists so that newer data knocks out +** older data. */ -SQLITE_PRIVATE int sqlite3Checkpoint(sqlite3 *db, int iDb, int eMode, int *pnLog, int *pnCkpt){ - int rc = SQLITE_OK; /* Return code */ - int i; /* Used to iterate through attached dbs */ - int bBusy = 0; /* True if SQLITE_BUSY has been encountered */ - - assert( sqlite3_mutex_held(db->mutex) ); - assert( !pnLog || *pnLog==-1 ); - assert( !pnCkpt || *pnCkpt==-1 ); - - for(i=0; inDb && rc==SQLITE_OK; i++){ - if( i==iDb || iDb==SQLITE_MAX_ATTACHED ){ - rc = sqlite3BtreeCheckpoint(db->aDb[i].pBt, eMode, pnLog, pnCkpt); - pnLog = 0; - pnCkpt = 0; - if( rc==SQLITE_BUSY ){ - bBusy = 1; - rc = SQLITE_OK; - } - } - } - - return (rc==SQLITE_OK && bBusy) ? SQLITE_BUSY : rc; -} -#endif /* SQLITE_OMIT_WAL */ +/************** Include fts3Int.h in the middle of fts3.c ********************/ +/************** Begin file fts3Int.h *****************************************/ /* -** This function returns true if main-memory should be used instead of -** a temporary file for transient pager files and statement journals. -** The value returned depends on the value of db->temp_store (runtime -** parameter) and the compile time value of SQLITE_TEMP_STORE. The -** following table describes the relationship between these two values -** and this functions return value. +** 2009 Nov 12 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** ** -** SQLITE_TEMP_STORE db->temp_store Location of temporary database -** ----------------- -------------- ------------------------------ -** 0 any file (return 0) -** 1 1 file (return 0) -** 1 2 memory (return 1) -** 1 0 file (return 0) -** 2 1 file (return 0) -** 2 2 memory (return 1) -** 2 0 memory (return 1) -** 3 any memory (return 1) */ -SQLITE_PRIVATE int sqlite3TempInMemory(const sqlite3 *db){ -#if SQLITE_TEMP_STORE==1 - return ( db->temp_store==2 ); -#endif -#if SQLITE_TEMP_STORE==2 - return ( db->temp_store!=1 ); -#endif -#if SQLITE_TEMP_STORE==3 - return 1; -#endif -#if SQLITE_TEMP_STORE<1 || SQLITE_TEMP_STORE>3 - return 0; +#ifndef _FTSINT_H +#define _FTSINT_H + +#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) +# define NDEBUG 1 #endif -} /* -** Return UTF-8 encoded English language explanation of the most recent -** error. +** FTS4 is really an extension for FTS3. It is enabled using the +** SQLITE_ENABLE_FTS3 macro. But to avoid confusion we also all +** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3. */ -SQLITE_API const char *SQLITE_STDCALL sqlite3_errmsg(sqlite3 *db){ - const char *z; - if( !db ){ - return sqlite3ErrStr(SQLITE_NOMEM); - } - if( !sqlite3SafetyCheckSickOrOk(db) ){ - return sqlite3ErrStr(SQLITE_MISUSE_BKPT); - } - sqlite3_mutex_enter(db->mutex); - if( db->mallocFailed ){ - z = sqlite3ErrStr(SQLITE_NOMEM); - }else{ - testcase( db->pErr==0 ); - z = (char*)sqlite3_value_text(db->pErr); - assert( !db->mallocFailed ); - if( z==0 ){ - z = sqlite3ErrStr(db->errCode); - } - } - sqlite3_mutex_leave(db->mutex); - return z; -} +#if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3) +# define SQLITE_ENABLE_FTS3 +#endif -#ifndef SQLITE_OMIT_UTF16 -/* -** Return UTF-16 encoded English language explanation of the most recent -** error. -*/ -SQLITE_API const void *SQLITE_STDCALL sqlite3_errmsg16(sqlite3 *db){ - static const u16 outOfMem[] = { - 'o', 'u', 't', ' ', 'o', 'f', ' ', 'm', 'e', 'm', 'o', 'r', 'y', 0 - }; - static const u16 misuse[] = { - 'l', 'i', 'b', 'r', 'a', 'r', 'y', ' ', - 'r', 'o', 'u', 't', 'i', 'n', 'e', ' ', - 'c', 'a', 'l', 'l', 'e', 'd', ' ', - 'o', 'u', 't', ' ', - 'o', 'f', ' ', - 's', 'e', 'q', 'u', 'e', 'n', 'c', 'e', 0 - }; +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) - const void *z; - if( !db ){ - return (void *)outOfMem; - } - if( !sqlite3SafetyCheckSickOrOk(db) ){ - return (void *)misuse; - } - sqlite3_mutex_enter(db->mutex); - if( db->mallocFailed ){ - z = (void *)outOfMem; - }else{ - z = sqlite3_value_text16(db->pErr); - if( z==0 ){ - sqlite3ErrorWithMsg(db, db->errCode, sqlite3ErrStr(db->errCode)); - z = sqlite3_value_text16(db->pErr); - } - /* A malloc() may have failed within the call to sqlite3_value_text16() - ** above. If this is the case, then the db->mallocFailed flag needs to - ** be cleared before returning. Do this directly, instead of via - ** sqlite3ApiExit(), to avoid setting the database handle error message. - */ - db->mallocFailed = 0; - } - sqlite3_mutex_leave(db->mutex); - return z; -} -#endif /* SQLITE_OMIT_UTF16 */ +/* If not building as part of the core, include sqlite3ext.h. */ +#ifndef SQLITE_CORE +/* # include "sqlite3ext.h" */ +SQLITE_EXTENSION_INIT3 +#endif +/* #include "sqlite3.h" */ +/************** Include fts3_tokenizer.h in the middle of fts3Int.h **********/ +/************** Begin file fts3_tokenizer.h **********************************/ /* -** Return the most recent error code generated by an SQLite routine. If NULL is -** passed to this function, we assume a malloc() failed during sqlite3_open(). +** 2006 July 10 +** +** The author disclaims copyright to this source code. +** +************************************************************************* +** Defines the interface to tokenizers used by fulltext-search. There +** are three basic components: +** +** sqlite3_tokenizer_module is a singleton defining the tokenizer +** interface functions. This is essentially the class structure for +** tokenizers. +** +** sqlite3_tokenizer is used to define a particular tokenizer, perhaps +** including customization information defined at creation time. +** +** sqlite3_tokenizer_cursor is generated by a tokenizer to generate +** tokens from a particular input. */ -SQLITE_API int SQLITE_STDCALL sqlite3_errcode(sqlite3 *db){ - if( db && !sqlite3SafetyCheckSickOrOk(db) ){ - return SQLITE_MISUSE_BKPT; - } - if( !db || db->mallocFailed ){ - return SQLITE_NOMEM; - } - return db->errCode & db->errMask; -} -SQLITE_API int SQLITE_STDCALL sqlite3_extended_errcode(sqlite3 *db){ - if( db && !sqlite3SafetyCheckSickOrOk(db) ){ - return SQLITE_MISUSE_BKPT; - } - if( !db || db->mallocFailed ){ - return SQLITE_NOMEM; - } - return db->errCode; -} +#ifndef _FTS3_TOKENIZER_H_ +#define _FTS3_TOKENIZER_H_ -/* -** Return a string that describes the kind of error specified in the -** argument. For now, this simply calls the internal sqlite3ErrStr() -** function. +/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. +** If tokenizers are to be allowed to call sqlite3_*() functions, then +** we will need a way to register the API consistently. */ -SQLITE_API const char *SQLITE_STDCALL sqlite3_errstr(int rc){ - return sqlite3ErrStr(rc); -} +/* #include "sqlite3.h" */ /* -** Create a new collating function for database "db". The name is zName -** and the encoding is enc. +** Structures used by the tokenizer interface. When a new tokenizer +** implementation is registered, the caller provides a pointer to +** an sqlite3_tokenizer_module containing pointers to the callback +** functions that make up an implementation. +** +** When an fts3 table is created, it passes any arguments passed to +** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the +** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer +** implementation. The xCreate() function in turn returns an +** sqlite3_tokenizer structure representing the specific tokenizer to +** be used for the fts3 table (customized by the tokenizer clause arguments). +** +** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen() +** method is called. It returns an sqlite3_tokenizer_cursor object +** that may be used to tokenize a specific input buffer based on +** the tokenization rules supplied by a specific sqlite3_tokenizer +** object. */ -static int createCollation( - sqlite3* db, - const char *zName, - u8 enc, - void* pCtx, - int(*xCompare)(void*,int,const void*,int,const void*), - void(*xDel)(void*) -){ - CollSeq *pColl; - int enc2; - - assert( sqlite3_mutex_held(db->mutex) ); +typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; +typedef struct sqlite3_tokenizer sqlite3_tokenizer; +typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; - /* If SQLITE_UTF16 is specified as the encoding type, transform this - ** to one of SQLITE_UTF16LE or SQLITE_UTF16BE using the - ** SQLITE_UTF16NATIVE macro. SQLITE_UTF16 is not used internally. +struct sqlite3_tokenizer_module { + + /* + ** Structure version. Should always be set to 0 or 1. */ - enc2 = enc; - testcase( enc2==SQLITE_UTF16 ); - testcase( enc2==SQLITE_UTF16_ALIGNED ); - if( enc2==SQLITE_UTF16 || enc2==SQLITE_UTF16_ALIGNED ){ - enc2 = SQLITE_UTF16NATIVE; - } - if( enc2SQLITE_UTF16BE ){ - return SQLITE_MISUSE_BKPT; - } + int iVersion; - /* Check if this call is removing or replacing an existing collation - ** sequence. If so, and there are active VMs, return busy. If there - ** are no active VMs, invalidate any pre-compiled statements. + /* + ** Create a new tokenizer. The values in the argv[] array are the + ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL + ** TABLE statement that created the fts3 table. For example, if + ** the following SQL is executed: + ** + ** CREATE .. USING fts3( ... , tokenizer arg1 arg2) + ** + ** then argc is set to 2, and the argv[] array contains pointers + ** to the strings "arg1" and "arg2". + ** + ** This method should return either SQLITE_OK (0), or an SQLite error + ** code. If SQLITE_OK is returned, then *ppTokenizer should be set + ** to point at the newly created tokenizer structure. The generic + ** sqlite3_tokenizer.pModule variable should not be initialized by + ** this callback. The caller will do so. */ - pColl = sqlite3FindCollSeq(db, (u8)enc2, zName, 0); - if( pColl && pColl->xCmp ){ - if( db->nVdbeActive ){ - sqlite3ErrorWithMsg(db, SQLITE_BUSY, - "unable to delete/modify collation sequence due to active statements"); - return SQLITE_BUSY; - } - sqlite3ExpirePreparedStatements(db); + int (*xCreate)( + int argc, /* Size of argv array */ + const char *const*argv, /* Tokenizer argument strings */ + sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ + ); - /* If collation sequence pColl was created directly by a call to - ** sqlite3_create_collation, and not generated by synthCollSeq(), - ** then any copies made by synthCollSeq() need to be invalidated. - ** Also, collation destructor - CollSeq.xDel() - function may need - ** to be called. - */ - if( (pColl->enc & ~SQLITE_UTF16_ALIGNED)==enc2 ){ - CollSeq *aColl = sqlite3HashFind(&db->aCollSeq, zName); - int j; - for(j=0; j<3; j++){ - CollSeq *p = &aColl[j]; - if( p->enc==pColl->enc ){ - if( p->xDel ){ - p->xDel(p->pUser); - } - p->xCmp = 0; - } - } - } - } + /* + ** Destroy an existing tokenizer. The fts3 module calls this method + ** exactly once for each successful call to xCreate(). + */ + int (*xDestroy)(sqlite3_tokenizer *pTokenizer); - pColl = sqlite3FindCollSeq(db, (u8)enc2, zName, 1); - if( pColl==0 ) return SQLITE_NOMEM; - pColl->xCmp = xCompare; - pColl->pUser = pCtx; - pColl->xDel = xDel; - pColl->enc = (u8)(enc2 | (enc & SQLITE_UTF16_ALIGNED)); - sqlite3Error(db, SQLITE_OK); - return SQLITE_OK; -} + /* + ** Create a tokenizer cursor to tokenize an input buffer. The caller + ** is responsible for ensuring that the input buffer remains valid + ** until the cursor is closed (using the xClose() method). + */ + int (*xOpen)( + sqlite3_tokenizer *pTokenizer, /* Tokenizer object */ + const char *pInput, int nBytes, /* Input buffer */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */ + ); + /* + ** Destroy an existing tokenizer cursor. The fts3 module calls this + ** method exactly once for each successful call to xOpen(). + */ + int (*xClose)(sqlite3_tokenizer_cursor *pCursor); -/* -** This array defines hard upper bounds on limit values. The -** initializer must be kept in sync with the SQLITE_LIMIT_* -** #defines in sqlite3.h. -*/ -static const int aHardLimit[] = { - SQLITE_MAX_LENGTH, - SQLITE_MAX_SQL_LENGTH, - SQLITE_MAX_COLUMN, - SQLITE_MAX_EXPR_DEPTH, - SQLITE_MAX_COMPOUND_SELECT, - SQLITE_MAX_VDBE_OP, - SQLITE_MAX_FUNCTION_ARG, - SQLITE_MAX_ATTACHED, - SQLITE_MAX_LIKE_PATTERN_LENGTH, - SQLITE_MAX_VARIABLE_NUMBER, /* IMP: R-38091-32352 */ - SQLITE_MAX_TRIGGER_DEPTH, - SQLITE_MAX_WORKER_THREADS, -}; + /* + ** Retrieve the next token from the tokenizer cursor pCursor. This + ** method should either return SQLITE_OK and set the values of the + ** "OUT" variables identified below, or SQLITE_DONE to indicate that + ** the end of the buffer has been reached, or an SQLite error code. + ** + ** *ppToken should be set to point at a buffer containing the + ** normalized version of the token (i.e. after any case-folding and/or + ** stemming has been performed). *pnBytes should be set to the length + ** of this buffer in bytes. The input text that generated the token is + ** identified by the byte offsets returned in *piStartOffset and + ** *piEndOffset. *piStartOffset should be set to the index of the first + ** byte of the token in the input buffer. *piEndOffset should be set + ** to the index of the first byte just past the end of the token in + ** the input buffer. + ** + ** The buffer *ppToken is set to point at is managed by the tokenizer + ** implementation. It is only required to be valid until the next call + ** to xNext() or xClose(). + */ + /* TODO(shess) current implementation requires pInput to be + ** nul-terminated. This should either be fixed, or pInput/nBytes + ** should be converted to zInput. + */ + int (*xNext)( + sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */ + const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */ + int *piStartOffset, /* OUT: Byte offset of token in input buffer */ + int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */ + int *piPosition /* OUT: Number of tokens returned before this one */ + ); -/* -** Make sure the hard limits are set to reasonable values -*/ -#if SQLITE_MAX_LENGTH<100 -# error SQLITE_MAX_LENGTH must be at least 100 -#endif -#if SQLITE_MAX_SQL_LENGTH<100 -# error SQLITE_MAX_SQL_LENGTH must be at least 100 -#endif -#if SQLITE_MAX_SQL_LENGTH>SQLITE_MAX_LENGTH -# error SQLITE_MAX_SQL_LENGTH must not be greater than SQLITE_MAX_LENGTH -#endif -#if SQLITE_MAX_COMPOUND_SELECT<2 -# error SQLITE_MAX_COMPOUND_SELECT must be at least 2 -#endif -#if SQLITE_MAX_VDBE_OP<40 -# error SQLITE_MAX_VDBE_OP must be at least 40 -#endif -#if SQLITE_MAX_FUNCTION_ARG<0 || SQLITE_MAX_FUNCTION_ARG>1000 -# error SQLITE_MAX_FUNCTION_ARG must be between 0 and 1000 -#endif -#if SQLITE_MAX_ATTACHED<0 || SQLITE_MAX_ATTACHED>125 -# error SQLITE_MAX_ATTACHED must be between 0 and 125 -#endif -#if SQLITE_MAX_LIKE_PATTERN_LENGTH<1 -# error SQLITE_MAX_LIKE_PATTERN_LENGTH must be at least 1 -#endif -#if SQLITE_MAX_COLUMN>32767 -# error SQLITE_MAX_COLUMN must not exceed 32767 -#endif -#if SQLITE_MAX_TRIGGER_DEPTH<1 -# error SQLITE_MAX_TRIGGER_DEPTH must be at least 1 -#endif -#if SQLITE_MAX_WORKER_THREADS<0 || SQLITE_MAX_WORKER_THREADS>50 -# error SQLITE_MAX_WORKER_THREADS must be between 0 and 50 -#endif + /*********************************************************************** + ** Methods below this point are only available if iVersion>=1. + */ + /* + ** Configure the language id of a tokenizer cursor. + */ + int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid); +}; -/* -** Change the value of a limit. Report the old value. -** If an invalid limit index is supplied, report -1. -** Make no changes but still report the old value if the -** new limit is negative. -** -** A new lower limit does not shrink existing constructs. -** It merely prevents new constructs that exceed the limit -** from forming. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_limit(sqlite3 *db, int limitId, int newLimit){ - int oldLimit; +struct sqlite3_tokenizer { + const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */ + /* Tokenizer implementations will typically add additional fields */ +}; -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return -1; - } -#endif +struct sqlite3_tokenizer_cursor { + sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */ + /* Tokenizer implementations will typically add additional fields */ +}; - /* EVIDENCE-OF: R-30189-54097 For each limit category SQLITE_LIMIT_NAME - ** there is a hard upper bound set at compile-time by a C preprocessor - ** macro called SQLITE_MAX_NAME. (The "_LIMIT_" in the name is changed to - ** "_MAX_".) - */ - assert( aHardLimit[SQLITE_LIMIT_LENGTH]==SQLITE_MAX_LENGTH ); - assert( aHardLimit[SQLITE_LIMIT_SQL_LENGTH]==SQLITE_MAX_SQL_LENGTH ); - assert( aHardLimit[SQLITE_LIMIT_COLUMN]==SQLITE_MAX_COLUMN ); - assert( aHardLimit[SQLITE_LIMIT_EXPR_DEPTH]==SQLITE_MAX_EXPR_DEPTH ); - assert( aHardLimit[SQLITE_LIMIT_COMPOUND_SELECT]==SQLITE_MAX_COMPOUND_SELECT); - assert( aHardLimit[SQLITE_LIMIT_VDBE_OP]==SQLITE_MAX_VDBE_OP ); - assert( aHardLimit[SQLITE_LIMIT_FUNCTION_ARG]==SQLITE_MAX_FUNCTION_ARG ); - assert( aHardLimit[SQLITE_LIMIT_ATTACHED]==SQLITE_MAX_ATTACHED ); - assert( aHardLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]== - SQLITE_MAX_LIKE_PATTERN_LENGTH ); - assert( aHardLimit[SQLITE_LIMIT_VARIABLE_NUMBER]==SQLITE_MAX_VARIABLE_NUMBER); - assert( aHardLimit[SQLITE_LIMIT_TRIGGER_DEPTH]==SQLITE_MAX_TRIGGER_DEPTH ); - assert( aHardLimit[SQLITE_LIMIT_WORKER_THREADS]==SQLITE_MAX_WORKER_THREADS ); - assert( SQLITE_LIMIT_WORKER_THREADS==(SQLITE_N_LIMIT-1) ); +int fts3_global_term_cnt(int iTerm, int iCol); +int fts3_term_cnt(int iTerm, int iCol); - if( limitId<0 || limitId>=SQLITE_N_LIMIT ){ - return -1; - } - oldLimit = db->aLimit[limitId]; - if( newLimit>=0 ){ /* IMP: R-52476-28732 */ - if( newLimit>aHardLimit[limitId] ){ - newLimit = aHardLimit[limitId]; /* IMP: R-51463-25634 */ - } - db->aLimit[limitId] = newLimit; - } - return oldLimit; /* IMP: R-53341-35419 */ -} +#endif /* _FTS3_TOKENIZER_H_ */ +/************** End of fts3_tokenizer.h **************************************/ +/************** Continuing where we left off in fts3Int.h ********************/ +/************** Include fts3_hash.h in the middle of fts3Int.h ***************/ +/************** Begin file fts3_hash.h ***************************************/ /* -** This function is used to parse both URIs and non-URI filenames passed by the -** user to API functions sqlite3_open() or sqlite3_open_v2(), and for database -** URIs specified as part of ATTACH statements. +** 2001 September 22 ** -** The first argument to this function is the name of the VFS to use (or -** a NULL to signify the default VFS) if the URI does not contain a "vfs=xxx" -** query parameter. The second argument contains the URI (or non-URI filename) -** itself. When this function is called the *pFlags variable should contain -** the default flags to open the database handle with. The value stored in -** *pFlags may be updated before returning if the URI filename contains -** "cache=xxx" or "mode=xxx" query parameters. +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: ** -** If successful, SQLITE_OK is returned. In this case *ppVfs is set to point to -** the VFS that should be used to open the database file. *pzFile is set to -** point to a buffer containing the name of the file to open. It is the -** responsibility of the caller to eventually call sqlite3_free() to release -** this buffer. +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This is the header file for the generic hash-table implementation +** used in SQLite. We've modified it slightly to serve as a standalone +** hash table implementation for the full-text indexing module. ** -** If an error occurs, then an SQLite error code is returned and *pzErrMsg -** may be set to point to a buffer containing an English language error -** message. It is the responsibility of the caller to eventually release -** this buffer by calling sqlite3_free(). */ -SQLITE_PRIVATE int sqlite3ParseUri( - const char *zDefaultVfs, /* VFS to use if no "vfs=xxx" query option */ - const char *zUri, /* Nul-terminated URI to parse */ - unsigned int *pFlags, /* IN/OUT: SQLITE_OPEN_XXX flags */ - sqlite3_vfs **ppVfs, /* OUT: VFS to use */ - char **pzFile, /* OUT: Filename component of URI */ - char **pzErrMsg /* OUT: Error message (if rc!=SQLITE_OK) */ -){ - int rc = SQLITE_OK; - unsigned int flags = *pFlags; - const char *zVfs = zDefaultVfs; - char *zFile; - char c; - int nUri = sqlite3Strlen30(zUri); - - assert( *pzErrMsg==0 ); - - if( ((flags & SQLITE_OPEN_URI) /* IMP: R-48725-32206 */ - || sqlite3GlobalConfig.bOpenUri) /* IMP: R-51689-46548 */ - && nUri>=5 && memcmp(zUri, "file:", 5)==0 /* IMP: R-57884-37496 */ - ){ - char *zOpt; - int eState; /* Parser state when parsing URI */ - int iIn; /* Input character index */ - int iOut = 0; /* Output character index */ - int nByte = nUri+2; /* Bytes of space to allocate */ - - /* Make sure the SQLITE_OPEN_URI flag is set to indicate to the VFS xOpen - ** method that there may be extra parameters following the file-name. */ - flags |= SQLITE_OPEN_URI; - - for(iIn=0; iIn=0 && octet<256 ); - if( octet==0 ){ - /* This branch is taken when "%00" appears within the URI. In this - ** case we ignore all text in the remainder of the path, name or - ** value currently being parsed. So ignore the current character - ** and skip to the next "?", "=" or "&", as appropriate. */ - while( (c = zUri[iIn])!=0 && c!='#' - && (eState!=0 || c!='?') - && (eState!=1 || (c!='=' && c!='&')) - && (eState!=2 || c!='&') - ){ - iIn++; - } - continue; - } - c = octet; - }else if( eState==1 && (c=='&' || c=='=') ){ - if( zFile[iOut-1]==0 ){ - /* An empty option name. Ignore this option altogether. */ - while( zUri[iIn] && zUri[iIn]!='#' && zUri[iIn-1]!='&' ) iIn++; - continue; - } - if( c=='&' ){ - zFile[iOut++] = '\0'; - }else{ - eState = 2; - } - c = 0; - }else if( (eState==0 && c=='?') || (eState==2 && c=='&') ){ - c = 0; - eState = 1; - } - zFile[iOut++] = c; - } - if( eState==1 ) zFile[iOut++] = '\0'; - zFile[iOut++] = '\0'; - zFile[iOut++] = '\0'; - - /* Check if there were any options specified that should be interpreted - ** here. Options that are interpreted here include "vfs" and those that - ** correspond to flags that may be passed to the sqlite3_open_v2() - ** method. */ - zOpt = &zFile[sqlite3Strlen30(zFile)+1]; - while( zOpt[0] ){ - int nOpt = sqlite3Strlen30(zOpt); - char *zVal = &zOpt[nOpt+1]; - int nVal = sqlite3Strlen30(zVal); - - if( nOpt==3 && memcmp("vfs", zOpt, 3)==0 ){ - zVfs = zVal; - }else{ - struct OpenMode { - const char *z; - int mode; - } *aMode = 0; - char *zModeType = 0; - int mask = 0; - int limit = 0; - - if( nOpt==5 && memcmp("cache", zOpt, 5)==0 ){ - static struct OpenMode aCacheMode[] = { - { "shared", SQLITE_OPEN_SHAREDCACHE }, - { "private", SQLITE_OPEN_PRIVATECACHE }, - { 0, 0 } - }; - - mask = SQLITE_OPEN_SHAREDCACHE|SQLITE_OPEN_PRIVATECACHE; - aMode = aCacheMode; - limit = mask; - zModeType = "cache"; - } - if( nOpt==4 && memcmp("mode", zOpt, 4)==0 ){ - static struct OpenMode aOpenMode[] = { - { "ro", SQLITE_OPEN_READONLY }, - { "rw", SQLITE_OPEN_READWRITE }, - { "rwc", SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE }, - { "memory", SQLITE_OPEN_MEMORY }, - { 0, 0 } - }; - - mask = SQLITE_OPEN_READONLY | SQLITE_OPEN_READWRITE - | SQLITE_OPEN_CREATE | SQLITE_OPEN_MEMORY; - aMode = aOpenMode; - limit = mask & flags; - zModeType = "access"; - } - - if( aMode ){ - int i; - int mode = 0; - for(i=0; aMode[i].z; i++){ - const char *z = aMode[i].z; - if( nVal==sqlite3Strlen30(z) && 0==memcmp(zVal, z, nVal) ){ - mode = aMode[i].mode; - break; - } - } - if( mode==0 ){ - *pzErrMsg = sqlite3_mprintf("no such %s mode: %s", zModeType, zVal); - rc = SQLITE_ERROR; - goto parse_uri_out; - } - if( (mode & ~SQLITE_OPEN_MEMORY)>limit ){ - *pzErrMsg = sqlite3_mprintf("%s mode not allowed: %s", - zModeType, zVal); - rc = SQLITE_PERM; - goto parse_uri_out; - } - flags = (flags & ~mask) | mode; - } - } - - zOpt = &zVal[nVal+1]; - } +#ifndef _FTS3_HASH_H_ +#define _FTS3_HASH_H_ - }else{ - zFile = sqlite3_malloc(nUri+2); - if( !zFile ) return SQLITE_NOMEM; - memcpy(zFile, zUri, nUri); - zFile[nUri] = '\0'; - zFile[nUri+1] = '\0'; - flags &= ~SQLITE_OPEN_URI; - } +/* Forward declarations of structures. */ +typedef struct Fts3Hash Fts3Hash; +typedef struct Fts3HashElem Fts3HashElem; - *ppVfs = sqlite3_vfs_find(zVfs); - if( *ppVfs==0 ){ - *pzErrMsg = sqlite3_mprintf("no such vfs: %s", zVfs); - rc = SQLITE_ERROR; - } - parse_uri_out: - if( rc!=SQLITE_OK ){ - sqlite3_free(zFile); - zFile = 0; - } - *pFlags = flags; - *pzFile = zFile; - return rc; -} +/* A complete hash table is an instance of the following structure. +** The internals of this structure are intended to be opaque -- client +** code should not attempt to access or modify the fields of this structure +** directly. Change this structure only by using the routines below. +** However, many of the "procedures" and "functions" for modifying and +** accessing this structure are really macros, so we can't really make +** this structure opaque. +*/ +struct Fts3Hash { + char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */ + char copyKey; /* True if copy of key made on insert */ + int count; /* Number of entries in this table */ + Fts3HashElem *first; /* The first element of the array */ + int htsize; /* Number of buckets in the hash table */ + struct _fts3ht { /* the hash table */ + int count; /* Number of entries with this hash */ + Fts3HashElem *chain; /* Pointer to first entry with this hash */ + } *ht; +}; +/* Each element in the hash table is an instance of the following +** structure. All elements are stored on a single doubly-linked list. +** +** Again, this structure is intended to be opaque, but it can't really +** be opaque because it is used by macros. +*/ +struct Fts3HashElem { + Fts3HashElem *next, *prev; /* Next and previous elements in the table */ + void *data; /* Data associated with this element */ + void *pKey; int nKey; /* Key associated with this element */ +}; /* -** This routine does the work of opening a database on behalf of -** sqlite3_open() and sqlite3_open16(). The database filename "zFilename" -** is UTF-8 encoded. +** There are 2 different modes of operation for a hash table: +** +** FTS3_HASH_STRING pKey points to a string that is nKey bytes long +** (including the null-terminator, if any). Case +** is respected in comparisons. +** +** FTS3_HASH_BINARY pKey points to binary data nKey bytes long. +** memcmp() is used to compare keys. +** +** A copy of the key is made if the copyKey parameter to fts3HashInit is 1. */ -static int openDatabase( - const char *zFilename, /* Database filename UTF-8 encoded */ - sqlite3 **ppDb, /* OUT: Returned database handle */ - unsigned int flags, /* Operational flags */ - const char *zVfs /* Name of the VFS to use */ -){ - sqlite3 *db; /* Store allocated handle here */ - int rc; /* Return code */ - int isThreadsafe; /* True for threadsafe connections */ - char *zOpen = 0; /* Filename argument to pass to BtreeOpen() */ - char *zErrMsg = 0; /* Error message from sqlite3ParseUri() */ - -#ifdef SQLITE_ENABLE_API_ARMOR - if( ppDb==0 ) return SQLITE_MISUSE_BKPT; -#endif - *ppDb = 0; -#ifndef SQLITE_OMIT_AUTOINIT - rc = sqlite3_initialize(); - if( rc ) return rc; -#endif - - /* Only allow sensible combinations of bits in the flags argument. - ** Throw an error if any non-sense combination is used. If we - ** do not block illegal combinations here, it could trigger - ** assert() statements in deeper layers. Sensible combinations - ** are: - ** - ** 1: SQLITE_OPEN_READONLY - ** 2: SQLITE_OPEN_READWRITE - ** 6: SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE - */ - assert( SQLITE_OPEN_READONLY == 0x01 ); - assert( SQLITE_OPEN_READWRITE == 0x02 ); - assert( SQLITE_OPEN_CREATE == 0x04 ); - testcase( (1<<(flags&7))==0x02 ); /* READONLY */ - testcase( (1<<(flags&7))==0x04 ); /* READWRITE */ - testcase( (1<<(flags&7))==0x40 ); /* READWRITE | CREATE */ - if( ((1<<(flags&7)) & 0x46)==0 ){ - return SQLITE_MISUSE_BKPT; /* IMP: R-65497-44594 */ - } - - if( sqlite3GlobalConfig.bCoreMutex==0 ){ - isThreadsafe = 0; - }else if( flags & SQLITE_OPEN_NOMUTEX ){ - isThreadsafe = 0; - }else if( flags & SQLITE_OPEN_FULLMUTEX ){ - isThreadsafe = 1; - }else{ - isThreadsafe = sqlite3GlobalConfig.bFullMutex; - } - if( flags & SQLITE_OPEN_PRIVATECACHE ){ - flags &= ~SQLITE_OPEN_SHAREDCACHE; - }else if( sqlite3GlobalConfig.sharedCacheEnabled ){ - flags |= SQLITE_OPEN_SHAREDCACHE; - } - - /* Remove harmful bits from the flags parameter - ** - ** The SQLITE_OPEN_NOMUTEX and SQLITE_OPEN_FULLMUTEX flags were - ** dealt with in the previous code block. Besides these, the only - ** valid input flags for sqlite3_open_v2() are SQLITE_OPEN_READONLY, - ** SQLITE_OPEN_READWRITE, SQLITE_OPEN_CREATE, SQLITE_OPEN_SHAREDCACHE, - ** SQLITE_OPEN_PRIVATECACHE, and some reserved bits. Silently mask - ** off all other flags. - */ - flags &= ~( SQLITE_OPEN_DELETEONCLOSE | - SQLITE_OPEN_EXCLUSIVE | - SQLITE_OPEN_MAIN_DB | - SQLITE_OPEN_TEMP_DB | - SQLITE_OPEN_TRANSIENT_DB | - SQLITE_OPEN_MAIN_JOURNAL | - SQLITE_OPEN_TEMP_JOURNAL | - SQLITE_OPEN_SUBJOURNAL | - SQLITE_OPEN_MASTER_JOURNAL | - SQLITE_OPEN_NOMUTEX | - SQLITE_OPEN_FULLMUTEX | - SQLITE_OPEN_WAL - ); - - /* Allocate the sqlite data structure */ - db = sqlite3MallocZero( sizeof(sqlite3) ); - if( db==0 ) goto opendb_out; - if( isThreadsafe ){ - db->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_RECURSIVE); - if( db->mutex==0 ){ - sqlite3_free(db); - db = 0; - goto opendb_out; - } - } - sqlite3_mutex_enter(db->mutex); - db->errMask = 0xff; - db->nDb = 2; - db->magic = SQLITE_MAGIC_BUSY; - db->aDb = db->aDbStatic; - - assert( sizeof(db->aLimit)==sizeof(aHardLimit) ); - memcpy(db->aLimit, aHardLimit, sizeof(db->aLimit)); - db->aLimit[SQLITE_LIMIT_WORKER_THREADS] = SQLITE_DEFAULT_WORKER_THREADS; - db->autoCommit = 1; - db->nextAutovac = -1; - db->szMmap = sqlite3GlobalConfig.szMmap; - db->nextPagesize = 0; - db->nMaxSorterMmap = 0x7FFFFFFF; - db->flags |= SQLITE_ShortColNames | SQLITE_EnableTrigger | SQLITE_CacheSpill -#if !defined(SQLITE_DEFAULT_AUTOMATIC_INDEX) || SQLITE_DEFAULT_AUTOMATIC_INDEX - | SQLITE_AutoIndex -#endif -#if SQLITE_DEFAULT_CKPTFULLFSYNC - | SQLITE_CkptFullFSync -#endif -#if SQLITE_DEFAULT_FILE_FORMAT<4 - | SQLITE_LegacyFileFmt -#endif -#ifdef SQLITE_ENABLE_LOAD_EXTENSION - | SQLITE_LoadExtension -#endif -#if SQLITE_DEFAULT_RECURSIVE_TRIGGERS - | SQLITE_RecTriggers -#endif -#if defined(SQLITE_DEFAULT_FOREIGN_KEYS) && SQLITE_DEFAULT_FOREIGN_KEYS - | SQLITE_ForeignKeys -#endif -#if defined(SQLITE_REVERSE_UNORDERED_SELECTS) - | SQLITE_ReverseOrder -#endif - ; - sqlite3HashInit(&db->aCollSeq); -#ifndef SQLITE_OMIT_VIRTUALTABLE - sqlite3HashInit(&db->aModule); -#endif - - /* Add the default collation sequence BINARY. BINARY works for both UTF-8 - ** and UTF-16, so add a version for each to avoid any unnecessary - ** conversions. The only error that can occur here is a malloc() failure. - ** - ** EVIDENCE-OF: R-52786-44878 SQLite defines three built-in collating - ** functions: - */ - createCollation(db, "BINARY", SQLITE_UTF8, 0, binCollFunc, 0); - createCollation(db, "BINARY", SQLITE_UTF16BE, 0, binCollFunc, 0); - createCollation(db, "BINARY", SQLITE_UTF16LE, 0, binCollFunc, 0); - createCollation(db, "NOCASE", SQLITE_UTF8, 0, nocaseCollatingFunc, 0); - createCollation(db, "RTRIM", SQLITE_UTF8, (void*)1, binCollFunc, 0); - if( db->mallocFailed ){ - goto opendb_out; - } - /* EVIDENCE-OF: R-08308-17224 The default collating function for all - ** strings is BINARY. - */ - db->pDfltColl = sqlite3FindCollSeq(db, SQLITE_UTF8, "BINARY", 0); - assert( db->pDfltColl!=0 ); - - /* Parse the filename/URI argument. */ - db->openFlags = flags; - rc = sqlite3ParseUri(zVfs, zFilename, &flags, &db->pVfs, &zOpen, &zErrMsg); - if( rc!=SQLITE_OK ){ - if( rc==SQLITE_NOMEM ) db->mallocFailed = 1; - sqlite3ErrorWithMsg(db, rc, zErrMsg ? "%s" : 0, zErrMsg); - sqlite3_free(zErrMsg); - goto opendb_out; - } - - /* Open the backend database driver */ - rc = sqlite3BtreeOpen(db->pVfs, zOpen, db, &db->aDb[0].pBt, 0, - flags | SQLITE_OPEN_MAIN_DB); - if( rc!=SQLITE_OK ){ - if( rc==SQLITE_IOERR_NOMEM ){ - rc = SQLITE_NOMEM; - } - sqlite3Error(db, rc); - goto opendb_out; - } - sqlite3BtreeEnter(db->aDb[0].pBt); - db->aDb[0].pSchema = sqlite3SchemaGet(db, db->aDb[0].pBt); - if( !db->mallocFailed ) ENC(db) = SCHEMA_ENC(db); - sqlite3BtreeLeave(db->aDb[0].pBt); - db->aDb[1].pSchema = sqlite3SchemaGet(db, 0); - - /* The default safety_level for the main database is 'full'; for the temp - ** database it is 'NONE'. This matches the pager layer defaults. - */ - db->aDb[0].zName = "main"; - db->aDb[0].safety_level = 3; - db->aDb[1].zName = "temp"; - db->aDb[1].safety_level = 1; - - db->magic = SQLITE_MAGIC_OPEN; - if( db->mallocFailed ){ - goto opendb_out; - } - - /* Register all built-in functions, but do not attempt to read the - ** database schema yet. This is delayed until the first time the database - ** is accessed. - */ - sqlite3Error(db, SQLITE_OK); - sqlite3RegisterBuiltinFunctions(db); +#define FTS3_HASH_STRING 1 +#define FTS3_HASH_BINARY 2 - /* Load automatic extensions - extensions that have been registered - ** using the sqlite3_automatic_extension() API. - */ - rc = sqlite3_errcode(db); - if( rc==SQLITE_OK ){ - sqlite3AutoLoadExtensions(db); - rc = sqlite3_errcode(db); - if( rc!=SQLITE_OK ){ - goto opendb_out; - } - } +/* +** Access routines. To delete, insert a NULL pointer. +*/ +SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey); +SQLITE_PRIVATE void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey, void *pData); +SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int nKey); +SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash*); +SQLITE_PRIVATE Fts3HashElem *sqlite3Fts3HashFindElem(const Fts3Hash *, const void *, int); -#ifdef SQLITE_ENABLE_FTS1 - if( !db->mallocFailed ){ - extern int sqlite3Fts1Init(sqlite3*); - rc = sqlite3Fts1Init(db); - } -#endif +/* +** Shorthand for the functions above +*/ +#define fts3HashInit sqlite3Fts3HashInit +#define fts3HashInsert sqlite3Fts3HashInsert +#define fts3HashFind sqlite3Fts3HashFind +#define fts3HashClear sqlite3Fts3HashClear +#define fts3HashFindElem sqlite3Fts3HashFindElem -#ifdef SQLITE_ENABLE_FTS2 - if( !db->mallocFailed && rc==SQLITE_OK ){ - extern int sqlite3Fts2Init(sqlite3*); - rc = sqlite3Fts2Init(db); - } -#endif +/* +** Macros for looping over all elements of a hash table. The idiom is +** like this: +** +** Fts3Hash h; +** Fts3HashElem *p; +** ... +** for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){ +** SomeStructure *pData = fts3HashData(p); +** // do something with pData +** } +*/ +#define fts3HashFirst(H) ((H)->first) +#define fts3HashNext(E) ((E)->next) +#define fts3HashData(E) ((E)->data) +#define fts3HashKey(E) ((E)->pKey) +#define fts3HashKeysize(E) ((E)->nKey) -#ifdef SQLITE_ENABLE_FTS3 - if( !db->mallocFailed && rc==SQLITE_OK ){ - rc = sqlite3Fts3Init(db); - } -#endif +/* +** Number of entries in a hash table +*/ +#define fts3HashCount(H) ((H)->count) -#ifdef SQLITE_ENABLE_ICU - if( !db->mallocFailed && rc==SQLITE_OK ){ - rc = sqlite3IcuInit(db); - } -#endif +#endif /* _FTS3_HASH_H_ */ -#ifdef SQLITE_ENABLE_RTREE - if( !db->mallocFailed && rc==SQLITE_OK){ - rc = sqlite3RtreeInit(db); - } -#endif +/************** End of fts3_hash.h *******************************************/ +/************** Continuing where we left off in fts3Int.h ********************/ - /* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking - ** mode. -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking - ** mode. Doing nothing at all also makes NORMAL the default. - */ -#ifdef SQLITE_DEFAULT_LOCKING_MODE - db->dfltLockMode = SQLITE_DEFAULT_LOCKING_MODE; - sqlite3PagerLockingMode(sqlite3BtreePager(db->aDb[0].pBt), - SQLITE_DEFAULT_LOCKING_MODE); +/* +** This constant determines the maximum depth of an FTS expression tree +** that the library will create and use. FTS uses recursion to perform +** various operations on the query tree, so the disadvantage of a large +** limit is that it may allow very large queries to use large amounts +** of stack space (perhaps causing a stack overflow). +*/ +#ifndef SQLITE_FTS3_MAX_EXPR_DEPTH +# define SQLITE_FTS3_MAX_EXPR_DEPTH 12 #endif - if( rc ) sqlite3Error(db, rc); - - /* Enable the lookaside-malloc subsystem */ - setupLookaside(db, 0, sqlite3GlobalConfig.szLookaside, - sqlite3GlobalConfig.nLookaside); - - sqlite3_wal_autocheckpoint(db, SQLITE_DEFAULT_WAL_AUTOCHECKPOINT); -opendb_out: - sqlite3_free(zOpen); - if( db ){ - assert( db->mutex!=0 || isThreadsafe==0 - || sqlite3GlobalConfig.bFullMutex==0 ); - sqlite3_mutex_leave(db->mutex); - } - rc = sqlite3_errcode(db); - assert( db!=0 || rc==SQLITE_NOMEM ); - if( rc==SQLITE_NOMEM ){ - sqlite3_close(db); - db = 0; - }else if( rc!=SQLITE_OK ){ - db->magic = SQLITE_MAGIC_SICK; - } - *ppDb = db; -#ifdef SQLITE_ENABLE_SQLLOG - if( sqlite3GlobalConfig.xSqllog ){ - /* Opening a db handle. Fourth parameter is passed 0. */ - void *pArg = sqlite3GlobalConfig.pSqllogArg; - sqlite3GlobalConfig.xSqllog(pArg, db, zFilename, 0); - } -#endif - return sqlite3ApiExit(0, rc); -} +/* +** This constant controls how often segments are merged. Once there are +** FTS3_MERGE_COUNT segments of level N, they are merged into a single +** segment of level N+1. +*/ +#define FTS3_MERGE_COUNT 16 /* -** Open a new database handle. +** This is the maximum amount of data (in bytes) to store in the +** Fts3Table.pendingTerms hash table. Normally, the hash table is +** populated as documents are inserted/updated/deleted in a transaction +** and used to create a new segment when the transaction is committed. +** However if this limit is reached midway through a transaction, a new +** segment is created and the hash table cleared immediately. */ -SQLITE_API int SQLITE_STDCALL sqlite3_open( - const char *zFilename, - sqlite3 **ppDb -){ - return openDatabase(zFilename, ppDb, - SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, 0); -} -SQLITE_API int SQLITE_STDCALL sqlite3_open_v2( - const char *filename, /* Database filename (UTF-8) */ - sqlite3 **ppDb, /* OUT: SQLite db handle */ - int flags, /* Flags */ - const char *zVfs /* Name of VFS module to use */ -){ - return openDatabase(filename, ppDb, (unsigned int)flags, zVfs); -} +#define FTS3_MAX_PENDING_DATA (1*1024*1024) -#ifndef SQLITE_OMIT_UTF16 /* -** Open a new database handle. +** Macro to return the number of elements in an array. SQLite has a +** similar macro called ArraySize(). Use a different name to avoid +** a collision when building an amalgamation with built-in FTS3. */ -SQLITE_API int SQLITE_STDCALL sqlite3_open16( - const void *zFilename, - sqlite3 **ppDb -){ - char const *zFilename8; /* zFilename encoded in UTF-8 instead of UTF-16 */ - sqlite3_value *pVal; - int rc; +#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0]))) -#ifdef SQLITE_ENABLE_API_ARMOR - if( ppDb==0 ) return SQLITE_MISUSE_BKPT; + +#ifndef MIN +# define MIN(x,y) ((x)<(y)?(x):(y)) #endif - *ppDb = 0; -#ifndef SQLITE_OMIT_AUTOINIT - rc = sqlite3_initialize(); - if( rc ) return rc; +#ifndef MAX +# define MAX(x,y) ((x)>(y)?(x):(y)) #endif - if( zFilename==0 ) zFilename = "\000\000"; - pVal = sqlite3ValueNew(0); - sqlite3ValueSetStr(pVal, -1, zFilename, SQLITE_UTF16NATIVE, SQLITE_STATIC); - zFilename8 = sqlite3ValueText(pVal, SQLITE_UTF8); - if( zFilename8 ){ - rc = openDatabase(zFilename8, ppDb, - SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, 0); - assert( *ppDb || rc==SQLITE_NOMEM ); - if( rc==SQLITE_OK && !DbHasProperty(*ppDb, 0, DB_SchemaLoaded) ){ - SCHEMA_ENC(*ppDb) = ENC(*ppDb) = SQLITE_UTF16NATIVE; - } - }else{ - rc = SQLITE_NOMEM; - } - sqlite3ValueFree(pVal); - - return sqlite3ApiExit(0, rc); -} -#endif /* SQLITE_OMIT_UTF16 */ /* -** Register a new collation sequence with the database handle db. +** Maximum length of a varint encoded integer. The varint format is different +** from that used by SQLite, so the maximum length is 10, not 9. */ -SQLITE_API int SQLITE_STDCALL sqlite3_create_collation( - sqlite3* db, - const char *zName, - int enc, - void* pCtx, - int(*xCompare)(void*,int,const void*,int,const void*) -){ - return sqlite3_create_collation_v2(db, zName, enc, pCtx, xCompare, 0); -} +#define FTS3_VARINT_MAX 10 /* -** Register a new collation sequence with the database handle db. +** FTS4 virtual tables may maintain multiple indexes - one index of all terms +** in the document set and zero or more prefix indexes. All indexes are stored +** as one or more b+-trees in the %_segments and %_segdir tables. +** +** It is possible to determine which index a b+-tree belongs to based on the +** value stored in the "%_segdir.level" column. Given this value L, the index +** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with +** level values between 0 and 1023 (inclusive) belong to index 0, all levels +** between 1024 and 2047 to index 1, and so on. +** +** It is considered impossible for an index to use more than 1024 levels. In +** theory though this may happen, but only after at least +** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables. */ -SQLITE_API int SQLITE_STDCALL sqlite3_create_collation_v2( - sqlite3* db, - const char *zName, - int enc, - void* pCtx, - int(*xCompare)(void*,int,const void*,int,const void*), - void(*xDel)(void*) -){ - int rc; - -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) || zName==0 ) return SQLITE_MISUSE_BKPT; -#endif - sqlite3_mutex_enter(db->mutex); - assert( !db->mallocFailed ); - rc = createCollation(db, zName, (u8)enc, pCtx, xCompare, xDel); - rc = sqlite3ApiExit(db, rc); - sqlite3_mutex_leave(db->mutex); - return rc; -} +#define FTS3_SEGDIR_MAXLEVEL 1024 +#define FTS3_SEGDIR_MAXLEVEL_STR "1024" -#ifndef SQLITE_OMIT_UTF16 /* -** Register a new collation sequence with the database handle db. +** The testcase() macro is only used by the amalgamation. If undefined, +** make it a no-op. */ -SQLITE_API int SQLITE_STDCALL sqlite3_create_collation16( - sqlite3* db, - const void *zName, - int enc, - void* pCtx, - int(*xCompare)(void*,int,const void*,int,const void*) -){ - int rc = SQLITE_OK; - char *zName8; - -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) || zName==0 ) return SQLITE_MISUSE_BKPT; +#ifndef testcase +# define testcase(X) #endif - sqlite3_mutex_enter(db->mutex); - assert( !db->mallocFailed ); - zName8 = sqlite3Utf16to8(db, zName, -1, SQLITE_UTF16NATIVE); - if( zName8 ){ - rc = createCollation(db, zName8, (u8)enc, pCtx, xCompare, 0); - sqlite3DbFree(db, zName8); - } - rc = sqlite3ApiExit(db, rc); - sqlite3_mutex_leave(db->mutex); - return rc; -} -#endif /* SQLITE_OMIT_UTF16 */ /* -** Register a collation sequence factory callback with the database handle -** db. Replace any previously installed collation sequence factory. +** Terminator values for position-lists and column-lists. */ -SQLITE_API int SQLITE_STDCALL sqlite3_collation_needed( - sqlite3 *db, - void *pCollNeededArg, - void(*xCollNeeded)(void*,sqlite3*,int eTextRep,const char*) -){ -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; -#endif - sqlite3_mutex_enter(db->mutex); - db->xCollNeeded = xCollNeeded; - db->xCollNeeded16 = 0; - db->pCollNeededArg = pCollNeededArg; - sqlite3_mutex_leave(db->mutex); - return SQLITE_OK; -} +#define POS_COLUMN (1) /* Column-list terminator */ +#define POS_END (0) /* Position-list terminator */ -#ifndef SQLITE_OMIT_UTF16 /* -** Register a collation sequence factory callback with the database handle -** db. Replace any previously installed collation sequence factory. +** This section provides definitions to allow the +** FTS3 extension to be compiled outside of the +** amalgamation. */ -SQLITE_API int SQLITE_STDCALL sqlite3_collation_needed16( - sqlite3 *db, - void *pCollNeededArg, - void(*xCollNeeded16)(void*,sqlite3*,int eTextRep,const void*) -){ -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; -#endif - sqlite3_mutex_enter(db->mutex); - db->xCollNeeded = 0; - db->xCollNeeded16 = xCollNeeded16; - db->pCollNeededArg = pCollNeededArg; - sqlite3_mutex_leave(db->mutex); - return SQLITE_OK; -} -#endif /* SQLITE_OMIT_UTF16 */ - -#ifndef SQLITE_OMIT_DEPRECATED +#ifndef SQLITE_AMALGAMATION /* -** This function is now an anachronism. It used to be used to recover from a -** malloc() failure, but SQLite now does this automatically. +** Macros indicating that conditional expressions are always true or +** false. */ -SQLITE_API int SQLITE_STDCALL sqlite3_global_recover(void){ - return SQLITE_OK; -} +#ifdef SQLITE_COVERAGE_TEST +# define ALWAYS(x) (1) +# define NEVER(X) (0) +#elif defined(SQLITE_DEBUG) +# define ALWAYS(x) sqlite3Fts3Always((x)!=0) +# define NEVER(x) sqlite3Fts3Never((x)!=0) +SQLITE_PRIVATE int sqlite3Fts3Always(int b); +SQLITE_PRIVATE int sqlite3Fts3Never(int b); +#else +# define ALWAYS(x) (x) +# define NEVER(x) (x) #endif /* -** Test to see whether or not the database connection is in autocommit -** mode. Return TRUE if it is and FALSE if not. Autocommit mode is on -** by default. Autocommit is disabled by a BEGIN statement and reenabled -** by the next COMMIT or ROLLBACK. +** Internal types used by SQLite. */ -SQLITE_API int SQLITE_STDCALL sqlite3_get_autocommit(sqlite3 *db){ -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return 0; - } -#endif - return db->autoCommit; -} +typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */ +typedef short int i16; /* 2-byte (or larger) signed integer */ +typedef unsigned int u32; /* 4-byte unsigned integer */ +typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */ +typedef sqlite3_int64 i64; /* 8-byte signed integer */ /* -** The following routines are substitutes for constants SQLITE_CORRUPT, -** SQLITE_MISUSE, SQLITE_CANTOPEN, SQLITE_IOERR and possibly other error -** constants. They serve two purposes: -** -** 1. Serve as a convenient place to set a breakpoint in a debugger -** to detect when version error conditions occurs. -** -** 2. Invoke sqlite3_log() to provide the source code location where -** a low-level error is first detected. +** Macro used to suppress compiler warnings for unused parameters. */ -SQLITE_PRIVATE int sqlite3CorruptError(int lineno){ - testcase( sqlite3GlobalConfig.xLog!=0 ); - sqlite3_log(SQLITE_CORRUPT, - "database corruption at line %d of [%.10s]", - lineno, 20+sqlite3_sourceid()); - return SQLITE_CORRUPT; -} -SQLITE_PRIVATE int sqlite3MisuseError(int lineno){ - testcase( sqlite3GlobalConfig.xLog!=0 ); - sqlite3_log(SQLITE_MISUSE, - "misuse at line %d of [%.10s]", - lineno, 20+sqlite3_sourceid()); - return SQLITE_MISUSE; -} -SQLITE_PRIVATE int sqlite3CantopenError(int lineno){ - testcase( sqlite3GlobalConfig.xLog!=0 ); - sqlite3_log(SQLITE_CANTOPEN, - "cannot open file at line %d of [%.10s]", - lineno, 20+sqlite3_sourceid()); - return SQLITE_CANTOPEN; -} - +#define UNUSED_PARAMETER(x) (void)(x) -#ifndef SQLITE_OMIT_DEPRECATED /* -** This is a convenience routine that makes sure that all thread-specific -** data for this thread has been deallocated. -** -** SQLite no longer uses thread-specific data so this routine is now a -** no-op. It is retained for historical compatibility. +** Activate assert() only if SQLITE_TEST is enabled. */ -SQLITE_API void SQLITE_STDCALL sqlite3_thread_cleanup(void){ -} +#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) +# define NDEBUG 1 #endif /* -** Return meta information about a specific column of a database table. -** See comment in sqlite3.h (sqlite.h.in) for details. +** The TESTONLY macro is used to enclose variable declarations or +** other bits of code that are needed to support the arguments +** within testcase() and assert() macros. */ -SQLITE_API int SQLITE_STDCALL sqlite3_table_column_metadata( - sqlite3 *db, /* Connection handle */ - const char *zDbName, /* Database name or NULL */ - const char *zTableName, /* Table name */ - const char *zColumnName, /* Column name */ - char const **pzDataType, /* OUTPUT: Declared data type */ - char const **pzCollSeq, /* OUTPUT: Collation sequence name */ - int *pNotNull, /* OUTPUT: True if NOT NULL constraint exists */ - int *pPrimaryKey, /* OUTPUT: True if column part of PK */ - int *pAutoinc /* OUTPUT: True if column is auto-increment */ -){ - int rc; - char *zErrMsg = 0; - Table *pTab = 0; - Column *pCol = 0; - int iCol = 0; - char const *zDataType = 0; - char const *zCollSeq = 0; - int notnull = 0; - int primarykey = 0; - int autoinc = 0; +#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) +# define TESTONLY(X) X +#else +# define TESTONLY(X) +#endif +#endif /* SQLITE_AMALGAMATION */ -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) || zTableName==0 ){ - return SQLITE_MISUSE_BKPT; - } +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3Fts3Corrupt(void); +# define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt() +#else +# define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB #endif - /* Ensure the database schema has been loaded */ - sqlite3_mutex_enter(db->mutex); - sqlite3BtreeEnterAll(db); - rc = sqlite3Init(db, &zErrMsg); - if( SQLITE_OK!=rc ){ - goto error_out; - } +typedef struct Fts3Table Fts3Table; +typedef struct Fts3Cursor Fts3Cursor; +typedef struct Fts3Expr Fts3Expr; +typedef struct Fts3Phrase Fts3Phrase; +typedef struct Fts3PhraseToken Fts3PhraseToken; - /* Locate the table in question */ - pTab = sqlite3FindTable(db, zTableName, zDbName); - if( !pTab || pTab->pSelect ){ - pTab = 0; - goto error_out; - } +typedef struct Fts3Doclist Fts3Doclist; +typedef struct Fts3SegFilter Fts3SegFilter; +typedef struct Fts3DeferredToken Fts3DeferredToken; +typedef struct Fts3SegReader Fts3SegReader; +typedef struct Fts3MultiSegReader Fts3MultiSegReader; - /* Find the column for which info is requested */ - if( zColumnName==0 ){ - /* Query for existance of table only */ - }else{ - for(iCol=0; iColnCol; iCol++){ - pCol = &pTab->aCol[iCol]; - if( 0==sqlite3StrICmp(pCol->zName, zColumnName) ){ - break; - } - } - if( iCol==pTab->nCol ){ - if( HasRowid(pTab) && sqlite3IsRowid(zColumnName) ){ - iCol = pTab->iPKey; - pCol = iCol>=0 ? &pTab->aCol[iCol] : 0; - }else{ - pTab = 0; - goto error_out; - } - } - } +typedef struct MatchinfoBuffer MatchinfoBuffer; - /* The following block stores the meta information that will be returned - ** to the caller in local variables zDataType, zCollSeq, notnull, primarykey - ** and autoinc. At this point there are two possibilities: - ** - ** 1. The specified column name was rowid", "oid" or "_rowid_" - ** and there is no explicitly declared IPK column. - ** - ** 2. The table is not a view and the column name identified an - ** explicitly declared column. Copy meta information from *pCol. - */ - if( pCol ){ - zDataType = pCol->zType; - zCollSeq = pCol->zColl; - notnull = pCol->notNull!=0; - primarykey = (pCol->colFlags & COLFLAG_PRIMKEY)!=0; - autoinc = pTab->iPKey==iCol && (pTab->tabFlags & TF_Autoincrement)!=0; - }else{ - zDataType = "INTEGER"; - primarykey = 1; - } - if( !zCollSeq ){ - zCollSeq = "BINARY"; - } +/* +** A connection to a fulltext index is an instance of the following +** structure. The xCreate and xConnect methods create an instance +** of this structure and xDestroy and xDisconnect free that instance. +** All other methods receive a pointer to the structure as one of their +** arguments. +*/ +struct Fts3Table { + sqlite3_vtab base; /* Base class used by SQLite core */ + sqlite3 *db; /* The database connection */ + const char *zDb; /* logical database name */ + const char *zName; /* virtual table name */ + int nColumn; /* number of named columns in virtual table */ + char **azColumn; /* column names. malloced */ + u8 *abNotindexed; /* True for 'notindexed' columns */ + sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ + char *zContentTbl; /* content=xxx option, or NULL */ + char *zLanguageid; /* languageid=xxx option, or NULL */ + int nAutoincrmerge; /* Value configured by 'automerge' */ + u32 nLeafAdd; /* Number of leaf blocks added this trans */ -error_out: - sqlite3BtreeLeaveAll(db); + /* Precompiled statements used by the implementation. Each of these + ** statements is run and reset within a single virtual table API call. + */ + sqlite3_stmt *aStmt[40]; - /* Whether the function call succeeded or failed, set the output parameters - ** to whatever their local counterparts contain. If an error did occur, - ** this has the effect of zeroing all output parameters. + char *zReadExprlist; + char *zWriteExprlist; + + int nNodeSize; /* Soft limit for node size */ + u8 bFts4; /* True for FTS4, false for FTS3 */ + u8 bHasStat; /* True if %_stat table exists (2==unknown) */ + u8 bHasDocsize; /* True if %_docsize table exists */ + u8 bDescIdx; /* True if doclists are in reverse order */ + u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */ + int nPgsz; /* Page size for host database */ + char *zSegmentsTbl; /* Name of %_segments table */ + sqlite3_blob *pSegments; /* Blob handle open on %_segments table */ + + /* + ** The following array of hash tables is used to buffer pending index + ** updates during transactions. All pending updates buffered at any one + ** time must share a common language-id (see the FTS4 langid= feature). + ** The current language id is stored in variable iPrevLangid. + ** + ** A single FTS4 table may have multiple full-text indexes. For each index + ** there is an entry in the aIndex[] array. Index 0 is an index of all the + ** terms that appear in the document set. Each subsequent index in aIndex[] + ** is an index of prefixes of a specific length. + ** + ** Variable nPendingData contains an estimate the memory consumed by the + ** pending data structures, including hash table overhead, but not including + ** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash + ** tables are flushed to disk. Variable iPrevDocid is the docid of the most + ** recently inserted record. */ - if( pzDataType ) *pzDataType = zDataType; - if( pzCollSeq ) *pzCollSeq = zCollSeq; - if( pNotNull ) *pNotNull = notnull; - if( pPrimaryKey ) *pPrimaryKey = primarykey; - if( pAutoinc ) *pAutoinc = autoinc; + int nIndex; /* Size of aIndex[] */ + struct Fts3Index { + int nPrefix; /* Prefix length (0 for main terms index) */ + Fts3Hash hPending; /* Pending terms table for this index */ + } *aIndex; + int nMaxPendingData; /* Max pending data before flush to disk */ + int nPendingData; /* Current bytes of pending data */ + sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */ + int iPrevLangid; /* Langid of recently inserted document */ - if( SQLITE_OK==rc && !pTab ){ - sqlite3DbFree(db, zErrMsg); - zErrMsg = sqlite3MPrintf(db, "no such table column: %s.%s", zTableName, - zColumnName); - rc = SQLITE_ERROR; - } - sqlite3ErrorWithMsg(db, rc, (zErrMsg?"%s":0), zErrMsg); - sqlite3DbFree(db, zErrMsg); - rc = sqlite3ApiExit(db, rc); - sqlite3_mutex_leave(db->mutex); - return rc; -} +#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) + /* State variables used for validating that the transaction control + ** methods of the virtual table are called at appropriate times. These + ** values do not contribute to FTS functionality; they are used for + ** verifying the operation of the SQLite core. + */ + int inTransaction; /* True after xBegin but before xCommit/xRollback */ + int mxSavepoint; /* Largest valid xSavepoint integer */ +#endif + +#ifdef SQLITE_TEST + /* True to disable the incremental doclist optimization. This is controled + ** by special insert command 'test-no-incr-doclist'. */ + int bNoIncrDoclist; +#endif +}; /* -** Sleep for a little while. Return the amount of time slept. +** When the core wants to read from the virtual table, it creates a +** virtual table cursor (an instance of the following structure) using +** the xOpen method. Cursors are destroyed using the xClose method. */ -SQLITE_API int SQLITE_STDCALL sqlite3_sleep(int ms){ - sqlite3_vfs *pVfs; - int rc; - pVfs = sqlite3_vfs_find(0); - if( pVfs==0 ) return 0; +struct Fts3Cursor { + sqlite3_vtab_cursor base; /* Base class used by SQLite core */ + i16 eSearch; /* Search strategy (see below) */ + u8 isEof; /* True if at End Of Results */ + u8 isRequireSeek; /* True if must seek pStmt to %_content row */ + sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ + Fts3Expr *pExpr; /* Parsed MATCH query string */ + int iLangid; /* Language being queried for */ + int nPhrase; /* Number of matchable phrases in query */ + Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ + sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ + char *pNextId; /* Pointer into the body of aDoclist */ + char *aDoclist; /* List of docids for full-text queries */ + int nDoclist; /* Size of buffer at aDoclist */ + u8 bDesc; /* True to sort in descending order */ + int eEvalmode; /* An FTS3_EVAL_XX constant */ + int nRowAvg; /* Average size of database rows, in pages */ + sqlite3_int64 nDoc; /* Documents in table */ + i64 iMinDocid; /* Minimum docid to return */ + i64 iMaxDocid; /* Maximum docid to return */ + int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ + MatchinfoBuffer *pMIBuffer; /* Buffer for matchinfo data */ +}; - /* This function works in milliseconds, but the underlying OsSleep() - ** API uses microseconds. Hence the 1000's. - */ - rc = (sqlite3OsSleep(pVfs, 1000*ms)/1000); - return rc; -} +#define FTS3_EVAL_FILTER 0 +#define FTS3_EVAL_NEXT 1 +#define FTS3_EVAL_MATCHINFO 2 /* -** Enable or disable the extended result codes. +** The Fts3Cursor.eSearch member is always set to one of the following. +** Actualy, Fts3Cursor.eSearch can be greater than or equal to +** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index +** of the column to be searched. For example, in +** +** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d); +** SELECT docid FROM ex1 WHERE b MATCH 'one two three'; +** +** Because the LHS of the MATCH operator is 2nd column "b", +** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a, +** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1" +** indicating that all columns should be searched, +** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4. */ -SQLITE_API int SQLITE_STDCALL sqlite3_extended_result_codes(sqlite3 *db, int onoff){ -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; -#endif - sqlite3_mutex_enter(db->mutex); - db->errMask = onoff ? 0xffffffff : 0xff; - sqlite3_mutex_leave(db->mutex); - return SQLITE_OK; -} +#define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */ +#define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */ +#define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ /* -** Invoke the xFileControl method on a particular database. +** The lower 16-bits of the sqlite3_index_info.idxNum value set by +** the xBestIndex() method contains the Fts3Cursor.eSearch value described +** above. The upper 16-bits contain a combination of the following +** bits, used to describe extra constraints on full-text searches. */ -SQLITE_API int SQLITE_STDCALL sqlite3_file_control(sqlite3 *db, const char *zDbName, int op, void *pArg){ - int rc = SQLITE_ERROR; - Btree *pBtree; +#define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */ +#define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */ +#define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */ -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; -#endif - sqlite3_mutex_enter(db->mutex); - pBtree = sqlite3DbNameToBtree(db, zDbName); - if( pBtree ){ - Pager *pPager; - sqlite3_file *fd; - sqlite3BtreeEnter(pBtree); - pPager = sqlite3BtreePager(pBtree); - assert( pPager!=0 ); - fd = sqlite3PagerFile(pPager); - assert( fd!=0 ); - if( op==SQLITE_FCNTL_FILE_POINTER ){ - *(sqlite3_file**)pArg = fd; - rc = SQLITE_OK; - }else if( fd->pMethods ){ - rc = sqlite3OsFileControl(fd, op, pArg); - }else{ - rc = SQLITE_NOTFOUND; - } - sqlite3BtreeLeave(pBtree); - } - sqlite3_mutex_leave(db->mutex); - return rc; -} +struct Fts3Doclist { + char *aAll; /* Array containing doclist (or NULL) */ + int nAll; /* Size of a[] in bytes */ + char *pNextDocid; /* Pointer to next docid */ + + sqlite3_int64 iDocid; /* Current docid (if pList!=0) */ + int bFreeList; /* True if pList should be sqlite3_free()d */ + char *pList; /* Pointer to position list following iDocid */ + int nList; /* Length of position list */ +}; /* -** Interface to the testing logic. +** A "phrase" is a sequence of one or more tokens that must match in +** sequence. A single token is the base case and the most common case. +** For a sequence of tokens contained in double-quotes (i.e. "one two three") +** nToken will be the number of tokens in the string. */ -SQLITE_API int SQLITE_CDECL sqlite3_test_control(int op, ...){ - int rc = 0; -#ifndef SQLITE_OMIT_BUILTIN_TEST - va_list ap; - va_start(ap, op); - switch( op ){ +struct Fts3PhraseToken { + char *z; /* Text of the token */ + int n; /* Number of bytes in buffer z */ + int isPrefix; /* True if token ends with a "*" character */ + int bFirst; /* True if token must appear at position 0 */ - /* - ** Save the current state of the PRNG. - */ - case SQLITE_TESTCTRL_PRNG_SAVE: { - sqlite3PrngSaveState(); - break; - } + /* Variables above this point are populated when the expression is + ** parsed (by code in fts3_expr.c). Below this point the variables are + ** used when evaluating the expression. */ + Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ + Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */ +}; - /* - ** Restore the state of the PRNG to the last state saved using - ** PRNG_SAVE. If PRNG_SAVE has never before been called, then - ** this verb acts like PRNG_RESET. - */ - case SQLITE_TESTCTRL_PRNG_RESTORE: { - sqlite3PrngRestoreState(); - break; - } +struct Fts3Phrase { + /* Cache of doclist for this phrase. */ + Fts3Doclist doclist; + int bIncr; /* True if doclist is loaded incrementally */ + int iDoclistToken; - /* - ** Reset the PRNG back to its uninitialized state. The next call - ** to sqlite3_randomness() will reseed the PRNG using a single call - ** to the xRandomness method of the default VFS. - */ - case SQLITE_TESTCTRL_PRNG_RESET: { - sqlite3_randomness(0,0); - break; - } + /* Used by sqlite3Fts3EvalPhrasePoslist() if this is a descendent of an + ** OR condition. */ + char *pOrPoslist; + i64 iOrDocid; - /* - ** sqlite3_test_control(BITVEC_TEST, size, program) - ** - ** Run a test against a Bitvec object of size. The program argument - ** is an array of integers that defines the test. Return -1 on a - ** memory allocation error, 0 on success, or non-zero for an error. - ** See the sqlite3BitvecBuiltinTest() for additional information. - */ - case SQLITE_TESTCTRL_BITVEC_TEST: { - int sz = va_arg(ap, int); - int *aProg = va_arg(ap, int*); - rc = sqlite3BitvecBuiltinTest(sz, aProg); - break; - } + /* Variables below this point are populated by fts3_expr.c when parsing + ** a MATCH expression. Everything above is part of the evaluation phase. + */ + int nToken; /* Number of tokens in the phrase */ + int iColumn; /* Index of column this phrase must match */ + Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ +}; - /* - ** sqlite3_test_control(FAULT_INSTALL, xCallback) - ** - ** Arrange to invoke xCallback() whenever sqlite3FaultSim() is called, - ** if xCallback is not NULL. - ** - ** As a test of the fault simulator mechanism itself, sqlite3FaultSim(0) - ** is called immediately after installing the new callback and the return - ** value from sqlite3FaultSim(0) becomes the return from - ** sqlite3_test_control(). - */ - case SQLITE_TESTCTRL_FAULT_INSTALL: { - /* MSVC is picky about pulling func ptrs from va lists. - ** http://support.microsoft.com/kb/47961 - ** sqlite3GlobalConfig.xTestCallback = va_arg(ap, int(*)(int)); - */ - typedef int(*TESTCALLBACKFUNC_t)(int); - sqlite3GlobalConfig.xTestCallback = va_arg(ap, TESTCALLBACKFUNC_t); - rc = sqlite3FaultSim(0); - break; - } +/* +** A tree of these objects forms the RHS of a MATCH operator. +** +** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist +** points to a malloced buffer, size nDoclist bytes, containing the results +** of this phrase query in FTS3 doclist format. As usual, the initial +** "Length" field found in doclists stored on disk is omitted from this +** buffer. +** +** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global +** matchinfo data. If it is not NULL, it points to an array of size nCol*3, +** where nCol is the number of columns in the queried FTS table. The array +** is populated as follows: +** +** aMI[iCol*3 + 0] = Undefined +** aMI[iCol*3 + 1] = Number of occurrences +** aMI[iCol*3 + 2] = Number of rows containing at least one instance +** +** The aMI array is allocated using sqlite3_malloc(). It should be freed +** when the expression node is. +*/ +struct Fts3Expr { + int eType; /* One of the FTSQUERY_XXX values defined below */ + int nNear; /* Valid if eType==FTSQUERY_NEAR */ + Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ + Fts3Expr *pLeft; /* Left operand */ + Fts3Expr *pRight; /* Right operand */ + Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ - /* - ** sqlite3_test_control(BENIGN_MALLOC_HOOKS, xBegin, xEnd) - ** - ** Register hooks to call to indicate which malloc() failures - ** are benign. - */ - case SQLITE_TESTCTRL_BENIGN_MALLOC_HOOKS: { - typedef void (*void_function)(void); - void_function xBenignBegin; - void_function xBenignEnd; - xBenignBegin = va_arg(ap, void_function); - xBenignEnd = va_arg(ap, void_function); - sqlite3BenignMallocHooks(xBenignBegin, xBenignEnd); - break; - } + /* The following are used by the fts3_eval.c module. */ + sqlite3_int64 iDocid; /* Current docid */ + u8 bEof; /* True this expression is at EOF already */ + u8 bStart; /* True if iDocid is valid */ + u8 bDeferred; /* True if this expression is entirely deferred */ - /* - ** sqlite3_test_control(SQLITE_TESTCTRL_PENDING_BYTE, unsigned int X) - ** - ** Set the PENDING byte to the value in the argument, if X>0. - ** Make no changes if X==0. Return the value of the pending byte - ** as it existing before this routine was called. - ** - ** IMPORTANT: Changing the PENDING byte from 0x40000000 results in - ** an incompatible database file format. Changing the PENDING byte - ** while any database connection is open results in undefined and - ** deleterious behavior. - */ - case SQLITE_TESTCTRL_PENDING_BYTE: { - rc = PENDING_BYTE; -#ifndef SQLITE_OMIT_WSD - { - unsigned int newVal = va_arg(ap, unsigned int); - if( newVal ) sqlite3PendingByte = newVal; - } + /* The following are used by the fts3_snippet.c module. */ + int iPhrase; /* Index of this phrase in matchinfo() results */ + u32 *aMI; /* See above */ +}; + +/* +** Candidate values for Fts3Query.eType. Note that the order of the first +** four values is in order of precedence when parsing expressions. For +** example, the following: +** +** "a OR b AND c NOT d NEAR e" +** +** is equivalent to: +** +** "a OR (b AND (c NOT (d NEAR e)))" +*/ +#define FTSQUERY_NEAR 1 +#define FTSQUERY_NOT 2 +#define FTSQUERY_AND 3 +#define FTSQUERY_OR 4 +#define FTSQUERY_PHRASE 5 + + +/* fts3_write.c */ +SQLITE_PRIVATE int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*); +SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *); +SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *); +SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *); +SQLITE_PRIVATE int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64, + sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); +SQLITE_PRIVATE int sqlite3Fts3SegReaderPending( + Fts3Table*,int,const char*,int,int,Fts3SegReader**); +SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *); +SQLITE_PRIVATE int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **); +SQLITE_PRIVATE int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*); + +SQLITE_PRIVATE int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); +SQLITE_PRIVATE int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **); + +#ifndef SQLITE_DISABLE_FTS4_DEFERRED +SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); +SQLITE_PRIVATE int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); +SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); +SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); +SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *); +#else +# define sqlite3Fts3FreeDeferredTokens(x) +# define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK +# define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK +# define sqlite3Fts3FreeDeferredDoclists(x) +# define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK #endif - break; - } - /* - ** sqlite3_test_control(SQLITE_TESTCTRL_ASSERT, int X) - ** - ** This action provides a run-time test to see whether or not - ** assert() was enabled at compile-time. If X is true and assert() - ** is enabled, then the return value is true. If X is true and - ** assert() is disabled, then the return value is zero. If X is - ** false and assert() is enabled, then the assertion fires and the - ** process aborts. If X is false and assert() is disabled, then the - ** return value is zero. - */ - case SQLITE_TESTCTRL_ASSERT: { - volatile int x = 0; - assert( (x = va_arg(ap,int))!=0 ); - rc = x; - break; - } +SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *); +SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *, int *); + +/* Special values interpreted by sqlite3SegReaderCursor() */ +#define FTS3_SEGCURSOR_PENDING -1 +#define FTS3_SEGCURSOR_ALL -2 + +SQLITE_PRIVATE int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*); +SQLITE_PRIVATE int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *); +SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *); + +SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor(Fts3Table *, + int, int, int, const char *, int, int, int, Fts3MultiSegReader *); + +/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ +#define FTS3_SEGMENT_REQUIRE_POS 0x00000001 +#define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 +#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 +#define FTS3_SEGMENT_PREFIX 0x00000008 +#define FTS3_SEGMENT_SCAN 0x00000010 +#define FTS3_SEGMENT_FIRST 0x00000020 + +/* Type passed as 4th argument to SegmentReaderIterate() */ +struct Fts3SegFilter { + const char *zTerm; + int nTerm; + int iCol; + int flags; +}; + +struct Fts3MultiSegReader { + /* Used internally by sqlite3Fts3SegReaderXXX() calls */ + Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */ + int nSegment; /* Size of apSegment array */ + int nAdvance; /* How many seg-readers to advance */ + Fts3SegFilter *pFilter; /* Pointer to filter object */ + char *aBuffer; /* Buffer to merge doclists in */ + int nBuffer; /* Allocated size of aBuffer[] in bytes */ + + int iColFilter; /* If >=0, filter for this column */ + int bRestart; + + /* Used by fts3.c only. */ + int nCost; /* Cost of running iterator */ + int bLookup; /* True if a lookup of a single entry. */ + + /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */ + char *zTerm; /* Pointer to term buffer */ + int nTerm; /* Size of zTerm in bytes */ + char *aDoclist; /* Pointer to doclist buffer */ + int nDoclist; /* Size of aDoclist[] in bytes */ +}; + +SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table*,int,int); + +#define fts3GetVarint32(p, piVal) ( \ + (*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \ +) + +/* fts3.c */ +SQLITE_PRIVATE void sqlite3Fts3ErrMsg(char**,const char*,...); +SQLITE_PRIVATE int sqlite3Fts3PutVarint(char *, sqlite3_int64); +SQLITE_PRIVATE int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); +SQLITE_PRIVATE int sqlite3Fts3GetVarint32(const char *, int *); +SQLITE_PRIVATE int sqlite3Fts3VarintLen(sqlite3_uint64); +SQLITE_PRIVATE void sqlite3Fts3Dequote(char *); +SQLITE_PRIVATE void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*); +SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); +SQLITE_PRIVATE int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *); +SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int*, Fts3Table*); +SQLITE_PRIVATE int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc); + +/* fts3_tokenizer.c */ +SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *, int *); +SQLITE_PRIVATE int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); +SQLITE_PRIVATE int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, + sqlite3_tokenizer **, char ** +); +SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char); +/* fts3_snippet.c */ +SQLITE_PRIVATE void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*); +SQLITE_PRIVATE void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *, + const char *, const char *, int, int +); +SQLITE_PRIVATE void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *); +SQLITE_PRIVATE void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p); - /* - ** sqlite3_test_control(SQLITE_TESTCTRL_ALWAYS, int X) - ** - ** This action provides a run-time test to see how the ALWAYS and - ** NEVER macros were defined at compile-time. - ** - ** The return value is ALWAYS(X). - ** - ** The recommended test is X==2. If the return value is 2, that means - ** ALWAYS() and NEVER() are both no-op pass-through macros, which is the - ** default setting. If the return value is 1, then ALWAYS() is either - ** hard-coded to true or else it asserts if its argument is false. - ** The first behavior (hard-coded to true) is the case if - ** SQLITE_TESTCTRL_ASSERT shows that assert() is disabled and the second - ** behavior (assert if the argument to ALWAYS() is false) is the case if - ** SQLITE_TESTCTRL_ASSERT shows that assert() is enabled. - ** - ** The run-time test procedure might look something like this: - ** - ** if( sqlite3_test_control(SQLITE_TESTCTRL_ALWAYS, 2)==2 ){ - ** // ALWAYS() and NEVER() are no-op pass-through macros - ** }else if( sqlite3_test_control(SQLITE_TESTCTRL_ASSERT, 1) ){ - ** // ALWAYS(x) asserts that x is true. NEVER(x) asserts x is false. - ** }else{ - ** // ALWAYS(x) is a constant 1. NEVER(x) is a constant 0. - ** } - */ - case SQLITE_TESTCTRL_ALWAYS: { - int x = va_arg(ap,int); - rc = ALWAYS(x); - break; - } +/* fts3_expr.c */ +SQLITE_PRIVATE int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int, + char **, int, int, int, const char *, int, Fts3Expr **, char ** +); +SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *); +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3 *db); +SQLITE_PRIVATE int sqlite3Fts3InitTerm(sqlite3 *db); +#endif - /* - ** sqlite3_test_control(SQLITE_TESTCTRL_BYTEORDER); - ** - ** The integer returned reveals the byte-order of the computer on which - ** SQLite is running: - ** - ** 1 big-endian, determined at run-time - ** 10 little-endian, determined at run-time - ** 432101 big-endian, determined at compile-time - ** 123410 little-endian, determined at compile-time - */ - case SQLITE_TESTCTRL_BYTEORDER: { - rc = SQLITE_BYTEORDER*100 + SQLITE_LITTLEENDIAN*10 + SQLITE_BIGENDIAN; - break; - } +SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int, + sqlite3_tokenizer_cursor ** +); - /* sqlite3_test_control(SQLITE_TESTCTRL_RESERVE, sqlite3 *db, int N) - ** - ** Set the nReserve size to N for the main database on the database - ** connection db. - */ - case SQLITE_TESTCTRL_RESERVE: { - sqlite3 *db = va_arg(ap, sqlite3*); - int x = va_arg(ap,int); - sqlite3_mutex_enter(db->mutex); - sqlite3BtreeSetPageSize(db->aDb[0].pBt, 0, x, 0); - sqlite3_mutex_leave(db->mutex); - break; - } +/* fts3_aux.c */ +SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db); - /* sqlite3_test_control(SQLITE_TESTCTRL_OPTIMIZATIONS, sqlite3 *db, int N) - ** - ** Enable or disable various optimizations for testing purposes. The - ** argument N is a bitmask of optimizations to be disabled. For normal - ** operation N should be 0. The idea is that a test program (like the - ** SQL Logic Test or SLT test module) can run the same SQL multiple times - ** with various optimizations disabled to verify that the same answer - ** is obtained in every case. - */ - case SQLITE_TESTCTRL_OPTIMIZATIONS: { - sqlite3 *db = va_arg(ap, sqlite3*); - db->dbOptFlags = (u16)(va_arg(ap, int) & 0xffff); - break; - } +SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *); -#ifdef SQLITE_N_KEYWORD - /* sqlite3_test_control(SQLITE_TESTCTRL_ISKEYWORD, const char *zWord) - ** - ** If zWord is a keyword recognized by the parser, then return the - ** number of keywords. Or if zWord is not a keyword, return 0. - ** - ** This test feature is only available in the amalgamation since - ** the SQLITE_N_KEYWORD macro is not defined in this file if SQLite - ** is built using separate source files. - */ - case SQLITE_TESTCTRL_ISKEYWORD: { - const char *zWord = va_arg(ap, const char*); - int n = sqlite3Strlen30(zWord); - rc = (sqlite3KeywordCode((u8*)zWord, n)!=TK_ID) ? SQLITE_N_KEYWORD : 0; - break; - } -#endif +SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart( + Fts3Table*, Fts3MultiSegReader*, int, const char*, int); +SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext( + Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *); +SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **); +SQLITE_PRIVATE int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *); +SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr); - /* sqlite3_test_control(SQLITE_TESTCTRL_SCRATCHMALLOC, sz, &pNew, pFree); - ** - ** Pass pFree into sqlite3ScratchFree(). - ** If sz>0 then allocate a scratch buffer into pNew. - */ - case SQLITE_TESTCTRL_SCRATCHMALLOC: { - void *pFree, **ppNew; - int sz; - sz = va_arg(ap, int); - ppNew = va_arg(ap, void**); - pFree = va_arg(ap, void*); - if( sz ) *ppNew = sqlite3ScratchMalloc(sz); - sqlite3ScratchFree(pFree); - break; - } +/* fts3_tokenize_vtab.c */ +SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *); - /* sqlite3_test_control(SQLITE_TESTCTRL_LOCALTIME_FAULT, int onoff); - ** - ** If parameter onoff is non-zero, configure the wrappers so that all - ** subsequent calls to localtime() and variants fail. If onoff is zero, - ** undo this setting. - */ - case SQLITE_TESTCTRL_LOCALTIME_FAULT: { - sqlite3GlobalConfig.bLocaltimeFault = va_arg(ap, int); - break; - } +/* fts3_unicode2.c (functions generated by parsing unicode text files) */ +#ifndef SQLITE_DISABLE_FTS3_UNICODE +SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int, int); +SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int); +SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int); +#endif - /* sqlite3_test_control(SQLITE_TESTCTRL_NEVER_CORRUPT, int); - ** - ** Set or clear a flag that indicates that the database file is always well- - ** formed and never corrupt. This flag is clear by default, indicating that - ** database files might have arbitrary corruption. Setting the flag during - ** testing causes certain assert() statements in the code to be activated - ** that demonstrat invariants on well-formed database files. - */ - case SQLITE_TESTCTRL_NEVER_CORRUPT: { - sqlite3GlobalConfig.neverCorrupt = va_arg(ap, int); - break; - } +#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */ +#endif /* _FTSINT_H */ +/************** End of fts3Int.h *********************************************/ +/************** Continuing where we left off in fts3.c ***********************/ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) - /* sqlite3_test_control(SQLITE_TESTCTRL_VDBE_COVERAGE, xCallback, ptr); - ** - ** Set the VDBE coverage callback function to xCallback with context - ** pointer ptr. - */ - case SQLITE_TESTCTRL_VDBE_COVERAGE: { -#ifdef SQLITE_VDBE_COVERAGE - typedef void (*branch_callback)(void*,int,u8,u8); - sqlite3GlobalConfig.xVdbeBranch = va_arg(ap,branch_callback); - sqlite3GlobalConfig.pVdbeBranchArg = va_arg(ap,void*); +#if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE) +# define SQLITE_CORE 1 #endif - break; - } - /* sqlite3_test_control(SQLITE_TESTCTRL_SORTER_MMAP, db, nMax); */ - case SQLITE_TESTCTRL_SORTER_MMAP: { - sqlite3 *db = va_arg(ap, sqlite3*); - db->nMaxSorterMmap = va_arg(ap, int); - break; - } +/* #include */ +/* #include */ +/* #include */ +/* #include */ +/* #include */ +/* #include */ - /* sqlite3_test_control(SQLITE_TESTCTRL_ISINIT); - ** - ** Return SQLITE_OK if SQLite has been initialized and SQLITE_ERROR if - ** not. - */ - case SQLITE_TESTCTRL_ISINIT: { - if( sqlite3GlobalConfig.isInit==0 ) rc = SQLITE_ERROR; - break; - } +/* #include "fts3.h" */ +#ifndef SQLITE_CORE +/* # include "sqlite3ext.h" */ + SQLITE_EXTENSION_INIT1 +#endif - /* sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, db, dbName, onOff, tnum); - ** - ** This test control is used to create imposter tables. "db" is a pointer - ** to the database connection. dbName is the database name (ex: "main" or - ** "temp") which will receive the imposter. "onOff" turns imposter mode on - ** or off. "tnum" is the root page of the b-tree to which the imposter - ** table should connect. - ** - ** Enable imposter mode only when the schema has already been parsed. Then - ** run a single CREATE TABLE statement to construct the imposter table in - ** the parsed schema. Then turn imposter mode back off again. - ** - ** If onOff==0 and tnum>0 then reset the schema for all databases, causing - ** the schema to be reparsed the next time it is needed. This has the - ** effect of erasing all imposter tables. - */ - case SQLITE_TESTCTRL_IMPOSTER: { - sqlite3 *db = va_arg(ap, sqlite3*); - sqlite3_mutex_enter(db->mutex); - db->init.iDb = sqlite3FindDbName(db, va_arg(ap,const char*)); - db->init.busy = db->init.imposterTable = va_arg(ap,int); - db->init.newTnum = va_arg(ap,int); - if( db->init.busy==0 && db->init.newTnum>0 ){ - sqlite3ResetAllSchemasOfConnection(db); - } - sqlite3_mutex_leave(db->mutex); - break; - } - } - va_end(ap); -#endif /* SQLITE_OMIT_BUILTIN_TEST */ - return rc; -} +static int fts3EvalNext(Fts3Cursor *pCsr); +static int fts3EvalStart(Fts3Cursor *pCsr); +static int fts3TermSegReaderCursor( + Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **); -/* -** This is a utility routine, useful to VFS implementations, that checks -** to see if a database file was a URI that contained a specific query -** parameter, and if so obtains the value of the query parameter. -** -** The zFilename argument is the filename pointer passed into the xOpen() -** method of a VFS implementation. The zParam argument is the name of the -** query parameter we seek. This routine returns the value of the zParam -** parameter if it exists. If the parameter does not exist, this routine -** returns a NULL pointer. -*/ -SQLITE_API const char *SQLITE_STDCALL sqlite3_uri_parameter(const char *zFilename, const char *zParam){ - if( zFilename==0 || zParam==0 ) return 0; - zFilename += sqlite3Strlen30(zFilename) + 1; - while( zFilename[0] ){ - int x = strcmp(zFilename, zParam); - zFilename += sqlite3Strlen30(zFilename) + 1; - if( x==0 ) return zFilename; - zFilename += sqlite3Strlen30(zFilename) + 1; - } - return 0; -} +#ifndef SQLITE_AMALGAMATION +# if defined(SQLITE_DEBUG) +SQLITE_PRIVATE int sqlite3Fts3Always(int b) { assert( b ); return b; } +SQLITE_PRIVATE int sqlite3Fts3Never(int b) { assert( !b ); return b; } +# endif +#endif -/* -** Return a boolean value for a query parameter. +/* +** Write a 64-bit variable-length integer to memory starting at p[0]. +** The length of data written will be between 1 and FTS3_VARINT_MAX bytes. +** The number of bytes written is returned. */ -SQLITE_API int SQLITE_STDCALL sqlite3_uri_boolean(const char *zFilename, const char *zParam, int bDflt){ - const char *z = sqlite3_uri_parameter(zFilename, zParam); - bDflt = bDflt!=0; - return z ? sqlite3GetBoolean(z, bDflt) : bDflt; +SQLITE_PRIVATE int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){ + unsigned char *q = (unsigned char *) p; + sqlite_uint64 vu = v; + do{ + *q++ = (unsigned char) ((vu & 0x7f) | 0x80); + vu >>= 7; + }while( vu!=0 ); + q[-1] &= 0x7f; /* turn off high bit in final byte */ + assert( q - (unsigned char *)p <= FTS3_VARINT_MAX ); + return (int) (q - (unsigned char *)p); } -/* -** Return a 64-bit integer value for a query parameter. -*/ -SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_uri_int64( - const char *zFilename, /* Filename as passed to xOpen */ - const char *zParam, /* URI parameter sought */ - sqlite3_int64 bDflt /* return if parameter is missing */ -){ - const char *z = sqlite3_uri_parameter(zFilename, zParam); - sqlite3_int64 v; - if( z && sqlite3DecOrHexToI64(z, &v)==SQLITE_OK ){ - bDflt = v; - } - return bDflt; -} +#define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \ + v = (v & mask1) | ( (*ptr++) << shift ); \ + if( (v & mask2)==0 ){ var = v; return ret; } +#define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \ + v = (*ptr++); \ + if( (v & mask2)==0 ){ var = v; return ret; } -/* -** Return the Btree pointer identified by zDbName. Return NULL if not found. +/* +** Read a 64-bit variable-length integer from memory starting at p[0]. +** Return the number of bytes read, or 0 on error. +** The value is stored in *v. */ -SQLITE_PRIVATE Btree *sqlite3DbNameToBtree(sqlite3 *db, const char *zDbName){ - int i; - for(i=0; inDb; i++){ - if( db->aDb[i].pBt - && (zDbName==0 || sqlite3StrICmp(zDbName, db->aDb[i].zName)==0) - ){ - return db->aDb[i].pBt; - } +SQLITE_PRIVATE int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){ + const char *pStart = p; + u32 a; + u64 b; + int shift; + + GETVARINT_INIT(a, p, 0, 0x00, 0x80, *v, 1); + GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *v, 2); + GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *v, 3); + GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *v, 4); + b = (a & 0x0FFFFFFF ); + + for(shift=28; shift<=63; shift+=7){ + u64 c = *p++; + b += (c&0x7F) << shift; + if( (c & 0x80)==0 ) break; } - return 0; + *v = b; + return (int)(p - pStart); } /* -** Return the filename of the database associated with a database -** connection. +** Similar to sqlite3Fts3GetVarint(), except that the output is truncated to a +** 32-bit integer before it is returned. */ -SQLITE_API const char *SQLITE_STDCALL sqlite3_db_filename(sqlite3 *db, const char *zDbName){ - Btree *pBt; -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return 0; - } +SQLITE_PRIVATE int sqlite3Fts3GetVarint32(const char *p, int *pi){ + u32 a; + +#ifndef fts3GetVarint32 + GETVARINT_INIT(a, p, 0, 0x00, 0x80, *pi, 1); +#else + a = (*p++); + assert( a & 0x80 ); #endif - pBt = sqlite3DbNameToBtree(db, zDbName); - return pBt ? sqlite3BtreeGetFilename(pBt) : 0; + + GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *pi, 2); + GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *pi, 3); + GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *pi, 4); + a = (a & 0x0FFFFFFF ); + *pi = (int)(a | ((u32)(*p & 0x0F) << 28)); + return 5; } /* -** Return 1 if database is read-only or 0 if read/write. Return -1 if -** no such database exists. +** Return the number of bytes required to encode v as a varint */ -SQLITE_API int SQLITE_STDCALL sqlite3_db_readonly(sqlite3 *db, const char *zDbName){ - Btree *pBt; -#ifdef SQLITE_ENABLE_API_ARMOR - if( !sqlite3SafetyCheckOk(db) ){ - (void)SQLITE_MISUSE_BKPT; - return -1; - } -#endif - pBt = sqlite3DbNameToBtree(db, zDbName); - return pBt ? sqlite3BtreeIsReadonly(pBt) : -1; +SQLITE_PRIVATE int sqlite3Fts3VarintLen(sqlite3_uint64 v){ + int i = 0; + do{ + i++; + v >>= 7; + }while( v!=0 ); + return i; } -/************** End of main.c ************************************************/ -/************** Begin file notify.c ******************************************/ /* -** 2009 March 3 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: +** Convert an SQL-style quoted string into a normal string by removing +** the quote characters. The conversion is done in-place. If the +** input does not begin with a quote character, then this routine +** is a no-op. ** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. +** Examples: ** -************************************************************************* +** "abc" becomes abc +** 'xyz' becomes xyz +** [pqr] becomes pqr +** `mno` becomes mno ** -** This file contains the implementation of the sqlite3_unlock_notify() -** API method and its associated functionality. */ +SQLITE_PRIVATE void sqlite3Fts3Dequote(char *z){ + char quote; /* Quote character (if any ) */ -/* Omit this entire file if SQLITE_ENABLE_UNLOCK_NOTIFY is not defined. */ -#ifdef SQLITE_ENABLE_UNLOCK_NOTIFY + quote = z[0]; + if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ + int iIn = 1; /* Index of next byte to read from input */ + int iOut = 0; /* Index of next byte to write to output */ -/* -** Public interfaces: -** -** sqlite3ConnectionBlocked() -** sqlite3ConnectionUnlocked() -** sqlite3ConnectionClosed() -** sqlite3_unlock_notify() -*/ + /* If the first byte was a '[', then the close-quote character is a ']' */ + if( quote=='[' ) quote = ']'; -#define assertMutexHeld() \ - assert( sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)) ) + while( z[iIn] ){ + if( z[iIn]==quote ){ + if( z[iIn+1]!=quote ) break; + z[iOut++] = quote; + iIn += 2; + }else{ + z[iOut++] = z[iIn++]; + } + } + z[iOut] = '\0'; + } +} /* -** Head of a linked list of all sqlite3 objects created by this process -** for which either sqlite3.pBlockingConnection or sqlite3.pUnlockConnection -** is not NULL. This variable may only accessed while the STATIC_MASTER -** mutex is held. +** Read a single varint from the doclist at *pp and advance *pp to point +** to the first byte past the end of the varint. Add the value of the varint +** to *pVal. */ -static sqlite3 *SQLITE_WSD sqlite3BlockedList = 0; +static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){ + sqlite3_int64 iVal; + *pp += sqlite3Fts3GetVarint(*pp, &iVal); + *pVal += iVal; +} -#ifndef NDEBUG /* -** This function is a complex assert() that verifies the following -** properties of the blocked connections list: -** -** 1) Each entry in the list has a non-NULL value for either -** pUnlockConnection or pBlockingConnection, or both. -** -** 2) All entries in the list that share a common value for -** xUnlockNotify are grouped together. +** When this function is called, *pp points to the first byte following a +** varint that is part of a doclist (or position-list, or any other list +** of varints). This function moves *pp to point to the start of that varint, +** and sets *pVal by the varint value. ** -** 3) If the argument db is not NULL, then none of the entries in the -** blocked connections list have pUnlockConnection or pBlockingConnection -** set to db. This is used when closing connection db. +** Argument pStart points to the first byte of the doclist that the +** varint is part of. */ -static void checkListProperties(sqlite3 *db){ - sqlite3 *p; - for(p=sqlite3BlockedList; p; p=p->pNextBlocked){ - int seen = 0; - sqlite3 *p2; +static void fts3GetReverseVarint( + char **pp, + char *pStart, + sqlite3_int64 *pVal +){ + sqlite3_int64 iVal; + char *p; - /* Verify property (1) */ - assert( p->pUnlockConnection || p->pBlockingConnection ); + /* Pointer p now points at the first byte past the varint we are + ** interested in. So, unless the doclist is corrupt, the 0x80 bit is + ** clear on character p[-1]. */ + for(p = (*pp)-2; p>=pStart && *p&0x80; p--); + p++; + *pp = p; - /* Verify property (2) */ - for(p2=sqlite3BlockedList; p2!=p; p2=p2->pNextBlocked){ - if( p2->xUnlockNotify==p->xUnlockNotify ) seen = 1; - assert( p2->xUnlockNotify==p->xUnlockNotify || !seen ); - assert( db==0 || p->pUnlockConnection!=db ); - assert( db==0 || p->pBlockingConnection!=db ); - } - } + sqlite3Fts3GetVarint(p, &iVal); + *pVal = iVal; } -#else -# define checkListProperties(x) -#endif /* -** Remove connection db from the blocked connections list. If connection -** db is not currently a part of the list, this function is a no-op. +** The xDisconnect() virtual table method. */ -static void removeFromBlockedList(sqlite3 *db){ - sqlite3 **pp; - assertMutexHeld(); - for(pp=&sqlite3BlockedList; *pp; pp = &(*pp)->pNextBlocked){ - if( *pp==db ){ - *pp = (*pp)->pNextBlocked; - break; - } +static int fts3DisconnectMethod(sqlite3_vtab *pVtab){ + Fts3Table *p = (Fts3Table *)pVtab; + int i; + + assert( p->nPendingData==0 ); + assert( p->pSegments==0 ); + + /* Free any prepared statements held */ + for(i=0; iaStmt); i++){ + sqlite3_finalize(p->aStmt[i]); } -} + sqlite3_free(p->zSegmentsTbl); + sqlite3_free(p->zReadExprlist); + sqlite3_free(p->zWriteExprlist); + sqlite3_free(p->zContentTbl); + sqlite3_free(p->zLanguageid); -/* -** Add connection db to the blocked connections list. It is assumed -** that it is not already a part of the list. -*/ -static void addToBlockedList(sqlite3 *db){ - sqlite3 **pp; - assertMutexHeld(); - for( - pp=&sqlite3BlockedList; - *pp && (*pp)->xUnlockNotify!=db->xUnlockNotify; - pp=&(*pp)->pNextBlocked - ); - db->pNextBlocked = *pp; - *pp = db; -} + /* Invoke the tokenizer destructor to free the tokenizer. */ + p->pTokenizer->pModule->xDestroy(p->pTokenizer); -/* -** Obtain the STATIC_MASTER mutex. -*/ -static void enterMutex(void){ - sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); - checkListProperties(0); + sqlite3_free(p); + return SQLITE_OK; } /* -** Release the STATIC_MASTER mutex. +** Write an error message into *pzErr */ -static void leaveMutex(void){ - assertMutexHeld(); - checkListProperties(0); - sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); +SQLITE_PRIVATE void sqlite3Fts3ErrMsg(char **pzErr, const char *zFormat, ...){ + va_list ap; + sqlite3_free(*pzErr); + va_start(ap, zFormat); + *pzErr = sqlite3_vmprintf(zFormat, ap); + va_end(ap); } /* -** Register an unlock-notify callback. -** -** This is called after connection "db" has attempted some operation -** but has received an SQLITE_LOCKED error because another connection -** (call it pOther) in the same process was busy using the same shared -** cache. pOther is found by looking at db->pBlockingConnection. -** -** If there is no blocking connection, the callback is invoked immediately, -** before this routine returns. -** -** If pOther is already blocked on db, then report SQLITE_LOCKED, to indicate -** a deadlock. -** -** Otherwise, make arrangements to invoke xNotify when pOther drops -** its locks. +** Construct one or more SQL statements from the format string given +** and then evaluate those statements. The success code is written +** into *pRc. ** -** Each call to this routine overrides any prior callbacks registered -** on the same "db". If xNotify==0 then any prior callbacks are immediately -** cancelled. +** If *pRc is initially non-zero then this routine is a no-op. */ -SQLITE_API int SQLITE_STDCALL sqlite3_unlock_notify( - sqlite3 *db, - void (*xNotify)(void **, int), - void *pArg +static void fts3DbExec( + int *pRc, /* Success code */ + sqlite3 *db, /* Database in which to run SQL */ + const char *zFormat, /* Format string for SQL */ + ... /* Arguments to the format string */ ){ - int rc = SQLITE_OK; - - sqlite3_mutex_enter(db->mutex); - enterMutex(); - - if( xNotify==0 ){ - removeFromBlockedList(db); - db->pBlockingConnection = 0; - db->pUnlockConnection = 0; - db->xUnlockNotify = 0; - db->pUnlockArg = 0; - }else if( 0==db->pBlockingConnection ){ - /* The blocking transaction has been concluded. Or there never was a - ** blocking transaction. In either case, invoke the notify callback - ** immediately. - */ - xNotify(&pArg, 1); + va_list ap; + char *zSql; + if( *pRc ) return; + va_start(ap, zFormat); + zSql = sqlite3_vmprintf(zFormat, ap); + va_end(ap); + if( zSql==0 ){ + *pRc = SQLITE_NOMEM; }else{ - sqlite3 *p; - - for(p=db->pBlockingConnection; p && p!=db; p=p->pUnlockConnection){} - if( p ){ - rc = SQLITE_LOCKED; /* Deadlock detected. */ - }else{ - db->pUnlockConnection = db->pBlockingConnection; - db->xUnlockNotify = xNotify; - db->pUnlockArg = pArg; - removeFromBlockedList(db); - addToBlockedList(db); - } + *pRc = sqlite3_exec(db, zSql, 0, 0, 0); + sqlite3_free(zSql); } - - leaveMutex(); - assert( !db->mallocFailed ); - sqlite3ErrorWithMsg(db, rc, (rc?"database is deadlocked":0)); - sqlite3_mutex_leave(db->mutex); - return rc; } /* -** This function is called while stepping or preparing a statement -** associated with connection db. The operation will return SQLITE_LOCKED -** to the user because it requires a lock that will not be available -** until connection pBlocker concludes its current transaction. +** The xDestroy() virtual table method. */ -SQLITE_PRIVATE void sqlite3ConnectionBlocked(sqlite3 *db, sqlite3 *pBlocker){ - enterMutex(); - if( db->pBlockingConnection==0 && db->pUnlockConnection==0 ){ - addToBlockedList(db); +static int fts3DestroyMethod(sqlite3_vtab *pVtab){ + Fts3Table *p = (Fts3Table *)pVtab; + int rc = SQLITE_OK; /* Return code */ + const char *zDb = p->zDb; /* Name of database (e.g. "main", "temp") */ + sqlite3 *db = p->db; /* Database handle */ + + /* Drop the shadow tables */ + if( p->zContentTbl==0 ){ + fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName); } - db->pBlockingConnection = pBlocker; - leaveMutex(); + fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName); + fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName); + fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName); + fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName); + + /* If everything has worked, invoke fts3DisconnectMethod() to free the + ** memory associated with the Fts3Table structure and return SQLITE_OK. + ** Otherwise, return an SQLite error code. + */ + return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc); } + /* -** This function is called when -** the transaction opened by database db has just finished. Locks held -** by database connection db have been released. -** -** This function loops through each entry in the blocked connections -** list and does the following: -** -** 1) If the sqlite3.pBlockingConnection member of a list entry is -** set to db, then set pBlockingConnection=0. -** -** 2) If the sqlite3.pUnlockConnection member of a list entry is -** set to db, then invoke the configured unlock-notify callback and -** set pUnlockConnection=0. +** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table +** passed as the first argument. This is done as part of the xConnect() +** and xCreate() methods. ** -** 3) If the two steps above mean that pBlockingConnection==0 and -** pUnlockConnection==0, remove the entry from the blocked connections -** list. +** If *pRc is non-zero when this function is called, it is a no-op. +** Otherwise, if an error occurs, an SQLite error code is stored in *pRc +** before returning. */ -SQLITE_PRIVATE void sqlite3ConnectionUnlocked(sqlite3 *db){ - void (*xUnlockNotify)(void **, int) = 0; /* Unlock-notify cb to invoke */ - int nArg = 0; /* Number of entries in aArg[] */ - sqlite3 **pp; /* Iterator variable */ - void **aArg; /* Arguments to the unlock callback */ - void **aDyn = 0; /* Dynamically allocated space for aArg[] */ - void *aStatic[16]; /* Starter space for aArg[]. No malloc required */ - - aArg = aStatic; - enterMutex(); /* Enter STATIC_MASTER mutex */ - - /* This loop runs once for each entry in the blocked-connections list. */ - for(pp=&sqlite3BlockedList; *pp; /* no-op */ ){ - sqlite3 *p = *pp; - - /* Step 1. */ - if( p->pBlockingConnection==db ){ - p->pBlockingConnection = 0; - } - - /* Step 2. */ - if( p->pUnlockConnection==db ){ - assert( p->xUnlockNotify ); - if( p->xUnlockNotify!=xUnlockNotify && nArg!=0 ){ - xUnlockNotify(aArg, nArg); - nArg = 0; - } +static void fts3DeclareVtab(int *pRc, Fts3Table *p){ + if( *pRc==SQLITE_OK ){ + int i; /* Iterator variable */ + int rc; /* Return code */ + char *zSql; /* SQL statement passed to declare_vtab() */ + char *zCols; /* List of user defined columns */ + const char *zLanguageid; - sqlite3BeginBenignMalloc(); - assert( aArg==aDyn || (aDyn==0 && aArg==aStatic) ); - assert( nArg<=(int)ArraySize(aStatic) || aArg==aDyn ); - if( (!aDyn && nArg==(int)ArraySize(aStatic)) - || (aDyn && nArg==(int)(sqlite3MallocSize(aDyn)/sizeof(void*))) - ){ - /* The aArg[] array needs to grow. */ - void **pNew = (void **)sqlite3Malloc(nArg*sizeof(void *)*2); - if( pNew ){ - memcpy(pNew, aArg, nArg*sizeof(void *)); - sqlite3_free(aDyn); - aDyn = aArg = pNew; - }else{ - /* This occurs when the array of context pointers that need to - ** be passed to the unlock-notify callback is larger than the - ** aStatic[] array allocated on the stack and the attempt to - ** allocate a larger array from the heap has failed. - ** - ** This is a difficult situation to handle. Returning an error - ** code to the caller is insufficient, as even if an error code - ** is returned the transaction on connection db will still be - ** closed and the unlock-notify callbacks on blocked connections - ** will go unissued. This might cause the application to wait - ** indefinitely for an unlock-notify callback that will never - ** arrive. - ** - ** Instead, invoke the unlock-notify callback with the context - ** array already accumulated. We can then clear the array and - ** begin accumulating any further context pointers without - ** requiring any dynamic allocation. This is sub-optimal because - ** it means that instead of one callback with a large array of - ** context pointers the application will receive two or more - ** callbacks with smaller arrays of context pointers, which will - ** reduce the applications ability to prioritize multiple - ** connections. But it is the best that can be done under the - ** circumstances. - */ - xUnlockNotify(aArg, nArg); - nArg = 0; - } - } - sqlite3EndBenignMalloc(); + zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid"); + sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); - aArg[nArg++] = p->pUnlockArg; - xUnlockNotify = p->xUnlockNotify; - p->pUnlockConnection = 0; - p->xUnlockNotify = 0; - p->pUnlockArg = 0; + /* Create a list of user columns for the virtual table */ + zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]); + for(i=1; zCols && inColumn; i++){ + zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]); } - /* Step 3. */ - if( p->pBlockingConnection==0 && p->pUnlockConnection==0 ){ - /* Remove connection p from the blocked connections list. */ - *pp = p->pNextBlocked; - p->pNextBlocked = 0; + /* Create the whole "CREATE TABLE" statement to pass to SQLite */ + zSql = sqlite3_mprintf( + "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)", + zCols, p->zName, zLanguageid + ); + if( !zCols || !zSql ){ + rc = SQLITE_NOMEM; }else{ - pp = &p->pNextBlocked; + rc = sqlite3_declare_vtab(p->db, zSql); } - } - if( nArg!=0 ){ - xUnlockNotify(aArg, nArg); + sqlite3_free(zSql); + sqlite3_free(zCols); + *pRc = rc; } - sqlite3_free(aDyn); - leaveMutex(); /* Leave STATIC_MASTER mutex */ } /* -** This is called when the database connection passed as an argument is -** being closed. The connection is removed from the blocked list. +** Create the %_stat table if it does not already exist. */ -SQLITE_PRIVATE void sqlite3ConnectionClosed(sqlite3 *db){ - sqlite3ConnectionUnlocked(db); - enterMutex(); - removeFromBlockedList(db); - checkListProperties(db); - leaveMutex(); +SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){ + fts3DbExec(pRc, p->db, + "CREATE TABLE IF NOT EXISTS %Q.'%q_stat'" + "(id INTEGER PRIMARY KEY, value BLOB);", + p->zDb, p->zName + ); + if( (*pRc)==SQLITE_OK ) p->bHasStat = 1; } -#endif - -/************** End of notify.c **********************************************/ -/************** Begin file fts3.c ********************************************/ -/* -** 2006 Oct 10 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This is an SQLite module implementing full-text search. -*/ /* -** The code in this file is only compiled if: -** -** * The FTS3 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS3 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). -*/ - -/* The full-text index is stored in a series of b+tree (-like) -** structures called segments which map terms to doclists. The -** structures are like b+trees in layout, but are constructed from the -** bottom up in optimal fashion and are not updatable. Since trees -** are built from the bottom up, things will be described from the -** bottom up. -** -** -**** Varints **** -** The basic unit of encoding is a variable-length integer called a -** varint. We encode variable-length integers in little-endian order -** using seven bits * per byte as follows: -** -** KEY: -** A = 0xxxxxxx 7 bits of data and one flag bit -** B = 1xxxxxxx 7 bits of data and one flag bit -** -** 7 bits - A -** 14 bits - BA -** 21 bits - BBA -** and so on. -** -** This is similar in concept to how sqlite encodes "varints" but -** the encoding is not the same. SQLite varints are big-endian -** are are limited to 9 bytes in length whereas FTS3 varints are -** little-endian and can be up to 10 bytes in length (in theory). -** -** Example encodings: -** -** 1: 0x01 -** 127: 0x7f -** 128: 0x81 0x00 -** -** -**** Document lists **** -** A doclist (document list) holds a docid-sorted list of hits for a -** given term. Doclists hold docids and associated token positions. -** A docid is the unique integer identifier for a single document. -** A position is the index of a word within the document. The first -** word of the document has a position of 0. -** -** FTS3 used to optionally store character offsets using a compile-time -** option. But that functionality is no longer supported. -** -** A doclist is stored like this: -** -** array { -** varint docid; (delta from previous doclist) -** array { (position list for column 0) -** varint position; (2 more than the delta from previous position) -** } -** array { -** varint POS_COLUMN; (marks start of position list for new column) -** varint column; (index of new column) -** array { -** varint position; (2 more than the delta from previous position) -** } -** } -** varint POS_END; (marks end of positions for this document. -** } -** -** Here, array { X } means zero or more occurrences of X, adjacent in -** memory. A "position" is an index of a token in the token stream -** generated by the tokenizer. Note that POS_END and POS_COLUMN occur -** in the same logical place as the position element, and act as sentinals -** ending a position list array. POS_END is 0. POS_COLUMN is 1. -** The positions numbers are not stored literally but rather as two more -** than the difference from the prior position, or the just the position plus -** 2 for the first position. Example: -** -** label: A B C D E F G H I J K -** value: 123 5 9 1 1 14 35 0 234 72 0 -** -** The 123 value is the first docid. For column zero in this document -** there are two matches at positions 3 and 10 (5-2 and 9-2+3). The 1 -** at D signals the start of a new column; the 1 at E indicates that the -** new column is column number 1. There are two positions at 12 and 45 -** (14-2 and 35-2+12). The 0 at H indicate the end-of-document. The -** 234 at I is the delta to next docid (357). It has one position 70 -** (72-2) and then terminates with the 0 at K. -** -** A "position-list" is the list of positions for multiple columns for -** a single docid. A "column-list" is the set of positions for a single -** column. Hence, a position-list consists of one or more column-lists, -** a document record consists of a docid followed by a position-list and -** a doclist consists of one or more document records. -** -** A bare doclist omits the position information, becoming an -** array of varint-encoded docids. -** -**** Segment leaf nodes **** -** Segment leaf nodes store terms and doclists, ordered by term. Leaf -** nodes are written using LeafWriter, and read using LeafReader (to -** iterate through a single leaf node's data) and LeavesReader (to -** iterate through a segment's entire leaf layer). Leaf nodes have -** the format: -** -** varint iHeight; (height from leaf level, always 0) -** varint nTerm; (length of first term) -** char pTerm[nTerm]; (content of first term) -** varint nDoclist; (length of term's associated doclist) -** char pDoclist[nDoclist]; (content of doclist) -** array { -** (further terms are delta-encoded) -** varint nPrefix; (length of prefix shared with previous term) -** varint nSuffix; (length of unshared suffix) -** char pTermSuffix[nSuffix];(unshared suffix of next term) -** varint nDoclist; (length of term's associated doclist) -** char pDoclist[nDoclist]; (content of doclist) -** } -** -** Here, array { X } means zero or more occurrences of X, adjacent in -** memory. -** -** Leaf nodes are broken into blocks which are stored contiguously in -** the %_segments table in sorted order. This means that when the end -** of a node is reached, the next term is in the node with the next -** greater node id. -** -** New data is spilled to a new leaf node when the current node -** exceeds LEAF_MAX bytes (default 2048). New data which itself is -** larger than STANDALONE_MIN (default 1024) is placed in a standalone -** node (a leaf node with a single term and doclist). The goal of -** these settings is to pack together groups of small doclists while -** making it efficient to directly access large doclists. The -** assumption is that large doclists represent terms which are more -** likely to be query targets. -** -** TODO(shess) It may be useful for blocking decisions to be more -** dynamic. For instance, it may make more sense to have a 2.5k leaf -** node rather than splitting into 2k and .5k nodes. My intuition is -** that this might extend through 2x or 4x the pagesize. -** -** -**** Segment interior nodes **** -** Segment interior nodes store blockids for subtree nodes and terms -** to describe what data is stored by the each subtree. Interior -** nodes are written using InteriorWriter, and read using -** InteriorReader. InteriorWriters are created as needed when -** SegmentWriter creates new leaf nodes, or when an interior node -** itself grows too big and must be split. The format of interior -** nodes: -** -** varint iHeight; (height from leaf level, always >0) -** varint iBlockid; (block id of node's leftmost subtree) -** optional { -** varint nTerm; (length of first term) -** char pTerm[nTerm]; (content of first term) -** array { -** (further terms are delta-encoded) -** varint nPrefix; (length of shared prefix with previous term) -** varint nSuffix; (length of unshared suffix) -** char pTermSuffix[nSuffix]; (unshared suffix of next term) -** } -** } -** -** Here, optional { X } means an optional element, while array { X } -** means zero or more occurrences of X, adjacent in memory. -** -** An interior node encodes n terms separating n+1 subtrees. The -** subtree blocks are contiguous, so only the first subtree's blockid -** is encoded. The subtree at iBlockid will contain all terms less -** than the first term encoded (or all terms if no term is encoded). -** Otherwise, for terms greater than or equal to pTerm[i] but less -** than pTerm[i+1], the subtree for that term will be rooted at -** iBlockid+i. Interior nodes only store enough term data to -** distinguish adjacent children (if the rightmost term of the left -** child is "something", and the leftmost term of the right child is -** "wicked", only "w" is stored). +** Create the backing store tables (%_content, %_segments and %_segdir) +** required by the FTS3 table passed as the only argument. This is done +** as part of the vtab xCreate() method. ** -** New data is spilled to a new interior node at the same height when -** the current node exceeds INTERIOR_MAX bytes (default 2048). -** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing -** interior nodes and making the tree too skinny. The interior nodes -** at a given height are naturally tracked by interior nodes at -** height+1, and so on. +** If the p->bHasDocsize boolean is true (indicating that this is an +** FTS4 table, not an FTS3 table) then also create the %_docsize and +** %_stat tables required by FTS4. +*/ +static int fts3CreateTables(Fts3Table *p){ + int rc = SQLITE_OK; /* Return code */ + int i; /* Iterator variable */ + sqlite3 *db = p->db; /* The database connection */ + + if( p->zContentTbl==0 ){ + const char *zLanguageid = p->zLanguageid; + char *zContentCols; /* Columns of %_content table */ + + /* Create a list of user columns for the content table */ + zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY"); + for(i=0; zContentCols && inColumn; i++){ + char *z = p->azColumn[i]; + zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z); + } + if( zLanguageid && zContentCols ){ + zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid); + } + if( zContentCols==0 ) rc = SQLITE_NOMEM; + + /* Create the content table */ + fts3DbExec(&rc, db, + "CREATE TABLE %Q.'%q_content'(%s)", + p->zDb, p->zName, zContentCols + ); + sqlite3_free(zContentCols); + } + + /* Create other tables */ + fts3DbExec(&rc, db, + "CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);", + p->zDb, p->zName + ); + fts3DbExec(&rc, db, + "CREATE TABLE %Q.'%q_segdir'(" + "level INTEGER," + "idx INTEGER," + "start_block INTEGER," + "leaves_end_block INTEGER," + "end_block INTEGER," + "root BLOB," + "PRIMARY KEY(level, idx)" + ");", + p->zDb, p->zName + ); + if( p->bHasDocsize ){ + fts3DbExec(&rc, db, + "CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);", + p->zDb, p->zName + ); + } + assert( p->bHasStat==p->bFts4 ); + if( p->bHasStat ){ + sqlite3Fts3CreateStatTable(&rc, p); + } + return rc; +} + +/* +** Store the current database page-size in bytes in p->nPgsz. ** +** If *pRc is non-zero when this function is called, it is a no-op. +** Otherwise, if an error occurs, an SQLite error code is stored in *pRc +** before returning. +*/ +static void fts3DatabasePageSize(int *pRc, Fts3Table *p){ + if( *pRc==SQLITE_OK ){ + int rc; /* Return code */ + char *zSql; /* SQL text "PRAGMA %Q.page_size" */ + sqlite3_stmt *pStmt; /* Compiled "PRAGMA %Q.page_size" statement */ + + zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_step(pStmt); + p->nPgsz = sqlite3_column_int(pStmt, 0); + rc = sqlite3_finalize(pStmt); + }else if( rc==SQLITE_AUTH ){ + p->nPgsz = 1024; + rc = SQLITE_OK; + } + } + assert( p->nPgsz>0 || rc!=SQLITE_OK ); + sqlite3_free(zSql); + *pRc = rc; + } +} + +/* +** "Special" FTS4 arguments are column specifications of the following form: ** -**** Segment directory **** -** The segment directory in table %_segdir stores meta-information for -** merging and deleting segments, and also the root node of the -** segment's tree. +** = ** -** The root node is the top node of the segment's tree after encoding -** the entire segment, restricted to ROOT_MAX bytes (default 1024). -** This could be either a leaf node or an interior node. If the top -** node requires more than ROOT_MAX bytes, it is flushed to %_segments -** and a new root interior node is generated (which should always fit -** within ROOT_MAX because it only needs space for 2 varints, the -** height and the blockid of the previous root). +** There may not be whitespace surrounding the "=" character. The +** term may be quoted, but the may not. +*/ +static int fts3IsSpecialColumn( + const char *z, + int *pnKey, + char **pzValue +){ + char *zValue; + const char *zCsr = z; + + while( *zCsr!='=' ){ + if( *zCsr=='\0' ) return 0; + zCsr++; + } + + *pnKey = (int)(zCsr-z); + zValue = sqlite3_mprintf("%s", &zCsr[1]); + if( zValue ){ + sqlite3Fts3Dequote(zValue); + } + *pzValue = zValue; + return 1; +} + +/* +** Append the output of a printf() style formatting to an existing string. +*/ +static void fts3Appendf( + int *pRc, /* IN/OUT: Error code */ + char **pz, /* IN/OUT: Pointer to string buffer */ + const char *zFormat, /* Printf format string to append */ + ... /* Arguments for printf format string */ +){ + if( *pRc==SQLITE_OK ){ + va_list ap; + char *z; + va_start(ap, zFormat); + z = sqlite3_vmprintf(zFormat, ap); + va_end(ap); + if( z && *pz ){ + char *z2 = sqlite3_mprintf("%s%s", *pz, z); + sqlite3_free(z); + z = z2; + } + if( z==0 ) *pRc = SQLITE_NOMEM; + sqlite3_free(*pz); + *pz = z; + } +} + +/* +** Return a copy of input string zInput enclosed in double-quotes (") and +** with all double quote characters escaped. For example: ** -** The meta-information in the segment directory is: -** level - segment level (see below) -** idx - index within level -** - (level,idx uniquely identify a segment) -** start_block - first leaf node -** leaves_end_block - last leaf node -** end_block - last block (including interior nodes) -** root - contents of root node +** fts3QuoteId("un \"zip\"") -> "un \"\"zip\"\"" ** -** If the root node is a leaf node, then start_block, -** leaves_end_block, and end_block are all 0. +** The pointer returned points to memory obtained from sqlite3_malloc(). It +** is the callers responsibility to call sqlite3_free() to release this +** memory. +*/ +static char *fts3QuoteId(char const *zInput){ + int nRet; + char *zRet; + nRet = 2 + (int)strlen(zInput)*2 + 1; + zRet = sqlite3_malloc(nRet); + if( zRet ){ + int i; + char *z = zRet; + *(z++) = '"'; + for(i=0; zInput[i]; i++){ + if( zInput[i]=='"' ) *(z++) = '"'; + *(z++) = zInput[i]; + } + *(z++) = '"'; + *(z++) = '\0'; + } + return zRet; +} + +/* +** Return a list of comma separated SQL expressions and a FROM clause that +** could be used in a SELECT statement such as the following: ** +** SELECT FROM %_content AS x ... ** -**** Segment merging **** -** To amortize update costs, segments are grouped into levels and -** merged in batches. Each increase in level represents exponentially -** more documents. +** to return the docid, followed by each column of text data in order +** from left to write. If parameter zFunc is not NULL, then instead of +** being returned directly each column of text data is passed to an SQL +** function named zFunc first. For example, if zFunc is "unzip" and the +** table has the three user-defined columns "a", "b", and "c", the following +** string is returned: ** -** New documents (actually, document updates) are tokenized and -** written individually (using LeafWriter) to a level 0 segment, with -** incrementing idx. When idx reaches MERGE_COUNT (default 16), all -** level 0 segments are merged into a single level 1 segment. Level 1 -** is populated like level 0, and eventually MERGE_COUNT level 1 -** segments are merged to a single level 2 segment (representing -** MERGE_COUNT^2 updates), and so on. +** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x" ** -** A segment merge traverses all segments at a given level in -** parallel, performing a straightforward sorted merge. Since segment -** leaf nodes are written in to the %_segments table in order, this -** merge traverses the underlying sqlite disk structures efficiently. -** After the merge, all segment blocks from the merged level are -** deleted. +** The pointer returned points to a buffer allocated by sqlite3_malloc(). It +** is the responsibility of the caller to eventually free it. ** -** MERGE_COUNT controls how often we merge segments. 16 seems to be -** somewhat of a sweet spot for insertion performance. 32 and 64 show -** very similar performance numbers to 16 on insertion, though they're -** a tiny bit slower (perhaps due to more overhead in merge-time -** sorting). 8 is about 20% slower than 16, 4 about 50% slower than -** 16, 2 about 66% slower than 16. +** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and +** a NULL pointer is returned). Otherwise, if an OOM error is encountered +** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If +** no error occurs, *pRc is left unmodified. +*/ +static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){ + char *zRet = 0; + char *zFree = 0; + char *zFunction; + int i; + + if( p->zContentTbl==0 ){ + if( !zFunc ){ + zFunction = ""; + }else{ + zFree = zFunction = fts3QuoteId(zFunc); + } + fts3Appendf(pRc, &zRet, "docid"); + for(i=0; inColumn; i++){ + fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]); + } + if( p->zLanguageid ){ + fts3Appendf(pRc, &zRet, ", x.%Q", "langid"); + } + sqlite3_free(zFree); + }else{ + fts3Appendf(pRc, &zRet, "rowid"); + for(i=0; inColumn; i++){ + fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]); + } + if( p->zLanguageid ){ + fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid); + } + } + fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", + p->zDb, + (p->zContentTbl ? p->zContentTbl : p->zName), + (p->zContentTbl ? "" : "_content") + ); + return zRet; +} + +/* +** Return a list of N comma separated question marks, where N is the number +** of columns in the %_content table (one for the docid plus one for each +** user-defined text column). ** -** At query time, high MERGE_COUNT increases the number of segments -** which need to be scanned and merged. For instance, with 100k docs -** inserted: +** If argument zFunc is not NULL, then all but the first question mark +** is preceded by zFunc and an open bracket, and followed by a closed +** bracket. For example, if zFunc is "zip" and the FTS3 table has three +** user-defined text columns, the following string is returned: ** -** MERGE_COUNT segments -** 16 25 -** 8 12 -** 4 10 -** 2 6 +** "?, zip(?), zip(?), zip(?)" ** -** This appears to have only a moderate impact on queries for very -** frequent terms (which are somewhat dominated by segment merge -** costs), and infrequent and non-existent terms still seem to be fast -** even with many segments. +** The pointer returned points to a buffer allocated by sqlite3_malloc(). It +** is the responsibility of the caller to eventually free it. ** -** TODO(shess) That said, it would be nice to have a better query-side -** argument for MERGE_COUNT of 16. Also, it is possible/likely that -** optimizations to things like doclist merging will swing the sweet -** spot around. +** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and +** a NULL pointer is returned). Otherwise, if an OOM error is encountered +** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If +** no error occurs, *pRc is left unmodified. +*/ +static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){ + char *zRet = 0; + char *zFree = 0; + char *zFunction; + int i; + + if( !zFunc ){ + zFunction = ""; + }else{ + zFree = zFunction = fts3QuoteId(zFunc); + } + fts3Appendf(pRc, &zRet, "?"); + for(i=0; inColumn; i++){ + fts3Appendf(pRc, &zRet, ",%s(?)", zFunction); + } + if( p->zLanguageid ){ + fts3Appendf(pRc, &zRet, ", ?"); + } + sqlite3_free(zFree); + return zRet; +} + +/* +** This function interprets the string at (*pp) as a non-negative integer +** value. It reads the integer and sets *pnOut to the value read, then +** sets *pp to point to the byte immediately following the last byte of +** the integer value. ** +** Only decimal digits ('0'..'9') may be part of an integer value. ** +** If *pp does not being with a decimal digit SQLITE_ERROR is returned and +** the output value undefined. Otherwise SQLITE_OK is returned. ** -**** Handling of deletions and updates **** -** Since we're using a segmented structure, with no docid-oriented -** index into the term index, we clearly cannot simply update the term -** index when a document is deleted or updated. For deletions, we -** write an empty doclist (varint(docid) varint(POS_END)), for updates -** we simply write the new doclist. Segment merges overwrite older -** data for a particular docid with newer data, so deletes or updates -** will eventually overtake the earlier data and knock it out. The -** query logic likewise merges doclists so that newer data knocks out -** older data. +** This function is used when parsing the "prefix=" FTS4 parameter. */ +static int fts3GobbleInt(const char **pp, int *pnOut){ + const int MAX_NPREFIX = 10000000; + const char *p; /* Iterator pointer */ + int nInt = 0; /* Output value */ + + for(p=*pp; p[0]>='0' && p[0]<='9'; p++){ + nInt = nInt * 10 + (p[0] - '0'); + if( nInt>MAX_NPREFIX ){ + nInt = 0; + break; + } + } + if( p==*pp ) return SQLITE_ERROR; + *pnOut = nInt; + *pp = p; + return SQLITE_OK; +} -/************** Include fts3Int.h in the middle of fts3.c ********************/ -/************** Begin file fts3Int.h *****************************************/ /* -** 2009 Nov 12 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: +** This function is called to allocate an array of Fts3Index structures +** representing the indexes maintained by the current FTS table. FTS tables +** always maintain the main "terms" index, but may also maintain one or +** more "prefix" indexes, depending on the value of the "prefix=" parameter +** (if any) specified as part of the CREATE VIRTUAL TABLE statement. ** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. +** Argument zParam is passed the value of the "prefix=" option if one was +** specified, or NULL otherwise. ** -****************************************************************************** +** If no error occurs, SQLITE_OK is returned and *apIndex set to point to +** the allocated array. *pnIndex is set to the number of elements in the +** array. If an error does occur, an SQLite error code is returned. ** +** Regardless of whether or not an error is returned, it is the responsibility +** of the caller to call sqlite3_free() on the output array to free it. */ -#ifndef _FTSINT_H -#define _FTSINT_H +static int fts3PrefixParameter( + const char *zParam, /* ABC in prefix=ABC parameter to parse */ + int *pnIndex, /* OUT: size of *apIndex[] array */ + struct Fts3Index **apIndex /* OUT: Array of indexes for this table */ +){ + struct Fts3Index *aIndex; /* Allocated array */ + int nIndex = 1; /* Number of entries in array */ -#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) -# define NDEBUG 1 -#endif + if( zParam && zParam[0] ){ + const char *p; + nIndex++; + for(p=zParam; *p; p++){ + if( *p==',' ) nIndex++; + } + } -/* -** FTS4 is really an extension for FTS3. It is enabled using the -** SQLITE_ENABLE_FTS3 macro. But to avoid confusion we also all -** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3. -*/ -#if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3) -# define SQLITE_ENABLE_FTS3 -#endif + aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex); + *apIndex = aIndex; + if( !aIndex ){ + return SQLITE_NOMEM; + } -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex); + if( zParam ){ + const char *p = zParam; + int i; + for(i=1; i=0 ); + if( nPrefix==0 ){ + nIndex--; + i--; + }else{ + aIndex[i].nPrefix = nPrefix; + } + p++; + } + } -/* If not building as part of the core, include sqlite3ext.h. */ -#ifndef SQLITE_CORE -SQLITE_EXTENSION_INIT3 -#endif + *pnIndex = nIndex; + return SQLITE_OK; +} -/************** Include fts3_tokenizer.h in the middle of fts3Int.h **********/ -/************** Begin file fts3_tokenizer.h **********************************/ /* -** 2006 July 10 +** This function is called when initializing an FTS4 table that uses the +** content=xxx option. It determines the number of and names of the columns +** of the new FTS4 table. ** -** The author disclaims copyright to this source code. +** The third argument passed to this function is the value passed to the +** config=xxx option (i.e. "xxx"). This function queries the database for +** a table of that name. If found, the output variables are populated +** as follows: ** -************************************************************************* -** Defines the interface to tokenizers used by fulltext-search. There -** are three basic components: +** *pnCol: Set to the number of columns table xxx has, ** -** sqlite3_tokenizer_module is a singleton defining the tokenizer -** interface functions. This is essentially the class structure for -** tokenizers. +** *pnStr: Set to the total amount of space required to store a copy +** of each columns name, including the nul-terminator. ** -** sqlite3_tokenizer is used to define a particular tokenizer, perhaps -** including customization information defined at creation time. +** *pazCol: Set to point to an array of *pnCol strings. Each string is +** the name of the corresponding column in table xxx. The array +** and its contents are allocated using a single allocation. It +** is the responsibility of the caller to free this allocation +** by eventually passing the *pazCol value to sqlite3_free(). ** -** sqlite3_tokenizer_cursor is generated by a tokenizer to generate -** tokens from a particular input. +** If the table cannot be found, an error code is returned and the output +** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is +** returned (and the output variables are undefined). */ -#ifndef _FTS3_TOKENIZER_H_ -#define _FTS3_TOKENIZER_H_ +static int fts3ContentColumns( + sqlite3 *db, /* Database handle */ + const char *zDb, /* Name of db (i.e. "main", "temp" etc.) */ + const char *zTbl, /* Name of content table */ + const char ***pazCol, /* OUT: Malloc'd array of column names */ + int *pnCol, /* OUT: Size of array *pazCol */ + int *pnStr, /* OUT: Bytes of string content */ + char **pzErr /* OUT: error message */ +){ + int rc = SQLITE_OK; /* Return code */ + char *zSql; /* "SELECT *" statement on zTbl */ + sqlite3_stmt *pStmt = 0; /* Compiled version of zSql */ -/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. -** If tokenizers are to be allowed to call sqlite3_*() functions, then -** we will need a way to register the API consistently. -*/ + zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + sqlite3Fts3ErrMsg(pzErr, "%s", sqlite3_errmsg(db)); + } + } + sqlite3_free(zSql); + + if( rc==SQLITE_OK ){ + const char **azCol; /* Output array */ + int nStr = 0; /* Size of all column names (incl. 0x00) */ + int nCol; /* Number of table columns */ + int i; /* Used to iterate through columns */ + + /* Loop through the returned columns. Set nStr to the number of bytes of + ** space required to store a copy of each column name, including the + ** nul-terminator byte. */ + nCol = sqlite3_column_count(pStmt); + for(i=0; i module name ("fts3" or "fts4") +** argv[1] -> database name +** argv[2] -> table name +** argv[...] -> "column name" and other module argument fields. */ -typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; -typedef struct sqlite3_tokenizer sqlite3_tokenizer; -typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; +static int fts3InitVtab( + int isCreate, /* True for xCreate, false for xConnect */ + sqlite3 *db, /* The SQLite database connection */ + void *pAux, /* Hash table containing tokenizers */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ + char **pzErr /* Write any error message here */ +){ + Fts3Hash *pHash = (Fts3Hash *)pAux; + Fts3Table *p = 0; /* Pointer to allocated vtab */ + int rc = SQLITE_OK; /* Return code */ + int i; /* Iterator variable */ + int nByte; /* Size of allocation used for *p */ + int iCol; /* Column index */ + int nString = 0; /* Bytes required to hold all column names */ + int nCol = 0; /* Number of columns in the FTS table */ + char *zCsr; /* Space for holding column names */ + int nDb; /* Bytes required to hold database name */ + int nName; /* Bytes required to hold table name */ + int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */ + const char **aCol; /* Array of column names */ + sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */ -struct sqlite3_tokenizer_module { + int nIndex = 0; /* Size of aIndex[] array */ + struct Fts3Index *aIndex = 0; /* Array of indexes for this table */ - /* - ** Structure version. Should always be set to 0 or 1. - */ - int iVersion; + /* The results of parsing supported FTS4 key=value options: */ + int bNoDocsize = 0; /* True to omit %_docsize table */ + int bDescIdx = 0; /* True to store descending indexes */ + char *zPrefix = 0; /* Prefix parameter value (or NULL) */ + char *zCompress = 0; /* compress=? parameter (or NULL) */ + char *zUncompress = 0; /* uncompress=? parameter (or NULL) */ + char *zContent = 0; /* content=? parameter (or NULL) */ + char *zLanguageid = 0; /* languageid=? parameter (or NULL) */ + char **azNotindexed = 0; /* The set of notindexed= columns */ + int nNotindexed = 0; /* Size of azNotindexed[] array */ - /* - ** Create a new tokenizer. The values in the argv[] array are the - ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL - ** TABLE statement that created the fts3 table. For example, if - ** the following SQL is executed: - ** - ** CREATE .. USING fts3( ... , tokenizer arg1 arg2) - ** - ** then argc is set to 2, and the argv[] array contains pointers - ** to the strings "arg1" and "arg2". - ** - ** This method should return either SQLITE_OK (0), or an SQLite error - ** code. If SQLITE_OK is returned, then *ppTokenizer should be set - ** to point at the newly created tokenizer structure. The generic - ** sqlite3_tokenizer.pModule variable should not be initialized by - ** this callback. The caller will do so. - */ - int (*xCreate)( - int argc, /* Size of argv array */ - const char *const*argv, /* Tokenizer argument strings */ - sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ + assert( strlen(argv[0])==4 ); + assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4) + || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4) ); - /* - ** Destroy an existing tokenizer. The fts3 module calls this method - ** exactly once for each successful call to xCreate(). - */ - int (*xDestroy)(sqlite3_tokenizer *pTokenizer); - - /* - ** Create a tokenizer cursor to tokenize an input buffer. The caller - ** is responsible for ensuring that the input buffer remains valid - ** until the cursor is closed (using the xClose() method). - */ - int (*xOpen)( - sqlite3_tokenizer *pTokenizer, /* Tokenizer object */ - const char *pInput, int nBytes, /* Input buffer */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */ - ); + nDb = (int)strlen(argv[1]) + 1; + nName = (int)strlen(argv[2]) + 1; - /* - ** Destroy an existing tokenizer cursor. The fts3 module calls this - ** method exactly once for each successful call to xOpen(). - */ - int (*xClose)(sqlite3_tokenizer_cursor *pCursor); + nByte = sizeof(const char *) * (argc-2); + aCol = (const char **)sqlite3_malloc(nByte); + if( aCol ){ + memset((void*)aCol, 0, nByte); + azNotindexed = (char **)sqlite3_malloc(nByte); + } + if( azNotindexed ){ + memset(azNotindexed, 0, nByte); + } + if( !aCol || !azNotindexed ){ + rc = SQLITE_NOMEM; + goto fts3_init_out; + } - /* - ** Retrieve the next token from the tokenizer cursor pCursor. This - ** method should either return SQLITE_OK and set the values of the - ** "OUT" variables identified below, or SQLITE_DONE to indicate that - ** the end of the buffer has been reached, or an SQLite error code. + /* Loop through all of the arguments passed by the user to the FTS3/4 + ** module (i.e. all the column names and special arguments). This loop + ** does the following: ** - ** *ppToken should be set to point at a buffer containing the - ** normalized version of the token (i.e. after any case-folding and/or - ** stemming has been performed). *pnBytes should be set to the length - ** of this buffer in bytes. The input text that generated the token is - ** identified by the byte offsets returned in *piStartOffset and - ** *piEndOffset. *piStartOffset should be set to the index of the first - ** byte of the token in the input buffer. *piEndOffset should be set - ** to the index of the first byte just past the end of the token in - ** the input buffer. + ** + Figures out the number of columns the FTSX table will have, and + ** the number of bytes of space that must be allocated to store copies + ** of the column names. ** - ** The buffer *ppToken is set to point at is managed by the tokenizer - ** implementation. It is only required to be valid until the next call - ** to xNext() or xClose(). - */ - /* TODO(shess) current implementation requires pInput to be - ** nul-terminated. This should either be fixed, or pInput/nBytes - ** should be converted to zInput. - */ - int (*xNext)( - sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */ - const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */ - int *piStartOffset, /* OUT: Byte offset of token in input buffer */ - int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */ - int *piPosition /* OUT: Number of tokens returned before this one */ - ); - - /*********************************************************************** - ** Methods below this point are only available if iVersion>=1. - */ - - /* - ** Configure the language id of a tokenizer cursor. + ** + If there is a tokenizer specification included in the arguments, + ** initializes the tokenizer pTokenizer. */ - int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid); -}; - -struct sqlite3_tokenizer { - const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */ - /* Tokenizer implementations will typically add additional fields */ -}; - -struct sqlite3_tokenizer_cursor { - sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */ - /* Tokenizer implementations will typically add additional fields */ -}; + for(i=3; rc==SQLITE_OK && i8 + && 0==sqlite3_strnicmp(z, "tokenize", 8) + && 0==sqlite3Fts3IsIdChar(z[8]) + ){ + rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr); + } + /* Check if it is an FTS4 special argument. */ + else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){ + struct Fts4Option { + const char *zOpt; + int nOpt; + } aFts4Opt[] = { + { "matchinfo", 9 }, /* 0 -> MATCHINFO */ + { "prefix", 6 }, /* 1 -> PREFIX */ + { "compress", 8 }, /* 2 -> COMPRESS */ + { "uncompress", 10 }, /* 3 -> UNCOMPRESS */ + { "order", 5 }, /* 4 -> ORDER */ + { "content", 7 }, /* 5 -> CONTENT */ + { "languageid", 10 }, /* 6 -> LANGUAGEID */ + { "notindexed", 10 } /* 7 -> NOTINDEXED */ + }; -#endif /* _FTS3_TOKENIZER_H_ */ + int iOpt; + if( !zVal ){ + rc = SQLITE_NOMEM; + }else{ + for(iOpt=0; iOptnOpt && !sqlite3_strnicmp(z, pOp->zOpt, pOp->nOpt) ){ + break; + } + } + if( iOpt==SizeofArray(aFts4Opt) ){ + sqlite3Fts3ErrMsg(pzErr, "unrecognized parameter: %s", z); + rc = SQLITE_ERROR; + }else{ + switch( iOpt ){ + case 0: /* MATCHINFO */ + if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){ + sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo: %s", zVal); + rc = SQLITE_ERROR; + } + bNoDocsize = 1; + break; -/************** End of fts3_tokenizer.h **************************************/ -/************** Continuing where we left off in fts3Int.h ********************/ -/************** Include fts3_hash.h in the middle of fts3Int.h ***************/ -/************** Begin file fts3_hash.h ***************************************/ -/* -** 2001 September 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This is the header file for the generic hash-table implementation -** used in SQLite. We've modified it slightly to serve as a standalone -** hash table implementation for the full-text indexing module. -** -*/ -#ifndef _FTS3_HASH_H_ -#define _FTS3_HASH_H_ + case 1: /* PREFIX */ + sqlite3_free(zPrefix); + zPrefix = zVal; + zVal = 0; + break; -/* Forward declarations of structures. */ -typedef struct Fts3Hash Fts3Hash; -typedef struct Fts3HashElem Fts3HashElem; + case 2: /* COMPRESS */ + sqlite3_free(zCompress); + zCompress = zVal; + zVal = 0; + break; -/* A complete hash table is an instance of the following structure. -** The internals of this structure are intended to be opaque -- client -** code should not attempt to access or modify the fields of this structure -** directly. Change this structure only by using the routines below. -** However, many of the "procedures" and "functions" for modifying and -** accessing this structure are really macros, so we can't really make -** this structure opaque. -*/ -struct Fts3Hash { - char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */ - char copyKey; /* True if copy of key made on insert */ - int count; /* Number of entries in this table */ - Fts3HashElem *first; /* The first element of the array */ - int htsize; /* Number of buckets in the hash table */ - struct _fts3ht { /* the hash table */ - int count; /* Number of entries with this hash */ - Fts3HashElem *chain; /* Pointer to first entry with this hash */ - } *ht; -}; + case 3: /* UNCOMPRESS */ + sqlite3_free(zUncompress); + zUncompress = zVal; + zVal = 0; + break; -/* Each element in the hash table is an instance of the following -** structure. All elements are stored on a single doubly-linked list. -** -** Again, this structure is intended to be opaque, but it can't really -** be opaque because it is used by macros. -*/ -struct Fts3HashElem { - Fts3HashElem *next, *prev; /* Next and previous elements in the table */ - void *data; /* Data associated with this element */ - void *pKey; int nKey; /* Key associated with this element */ -}; + case 4: /* ORDER */ + if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3)) + && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4)) + ){ + sqlite3Fts3ErrMsg(pzErr, "unrecognized order: %s", zVal); + rc = SQLITE_ERROR; + } + bDescIdx = (zVal[0]=='d' || zVal[0]=='D'); + break; -/* -** There are 2 different modes of operation for a hash table: -** -** FTS3_HASH_STRING pKey points to a string that is nKey bytes long -** (including the null-terminator, if any). Case -** is respected in comparisons. -** -** FTS3_HASH_BINARY pKey points to binary data nKey bytes long. -** memcmp() is used to compare keys. -** -** A copy of the key is made if the copyKey parameter to fts3HashInit is 1. -*/ -#define FTS3_HASH_STRING 1 -#define FTS3_HASH_BINARY 2 + case 5: /* CONTENT */ + sqlite3_free(zContent); + zContent = zVal; + zVal = 0; + break; -/* -** Access routines. To delete, insert a NULL pointer. -*/ -SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey); -SQLITE_PRIVATE void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey, void *pData); -SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int nKey); -SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash*); -SQLITE_PRIVATE Fts3HashElem *sqlite3Fts3HashFindElem(const Fts3Hash *, const void *, int); + case 6: /* LANGUAGEID */ + assert( iOpt==6 ); + sqlite3_free(zLanguageid); + zLanguageid = zVal; + zVal = 0; + break; -/* -** Shorthand for the functions above -*/ -#define fts3HashInit sqlite3Fts3HashInit -#define fts3HashInsert sqlite3Fts3HashInsert -#define fts3HashFind sqlite3Fts3HashFind -#define fts3HashClear sqlite3Fts3HashClear -#define fts3HashFindElem sqlite3Fts3HashFindElem + case 7: /* NOTINDEXED */ + azNotindexed[nNotindexed++] = zVal; + zVal = 0; + break; + } + } + sqlite3_free(zVal); + } + } -/* -** Macros for looping over all elements of a hash table. The idiom is -** like this: -** -** Fts3Hash h; -** Fts3HashElem *p; -** ... -** for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){ -** SomeStructure *pData = fts3HashData(p); -** // do something with pData -** } -*/ -#define fts3HashFirst(H) ((H)->first) -#define fts3HashNext(E) ((E)->next) -#define fts3HashData(E) ((E)->data) -#define fts3HashKey(E) ((E)->pKey) -#define fts3HashKeysize(E) ((E)->nKey) + /* Otherwise, the argument is a column name. */ + else { + nString += (int)(strlen(z) + 1); + aCol[nCol++] = z; + } + } -/* -** Number of entries in a hash table -*/ -#define fts3HashCount(H) ((H)->count) + /* If a content=xxx option was specified, the following: + ** + ** 1. Ignore any compress= and uncompress= options. + ** + ** 2. If no column names were specified as part of the CREATE VIRTUAL + ** TABLE statement, use all columns from the content table. + */ + if( rc==SQLITE_OK && zContent ){ + sqlite3_free(zCompress); + sqlite3_free(zUncompress); + zCompress = 0; + zUncompress = 0; + if( nCol==0 ){ + sqlite3_free((void*)aCol); + aCol = 0; + rc = fts3ContentColumns(db, argv[1], zContent,&aCol,&nCol,&nString,pzErr); -#endif /* _FTS3_HASH_H_ */ + /* If a languageid= option was specified, remove the language id + ** column from the aCol[] array. */ + if( rc==SQLITE_OK && zLanguageid ){ + int j; + for(j=0; jdb = db; + p->nColumn = nCol; + p->nPendingData = 0; + p->azColumn = (char **)&p[1]; + p->pTokenizer = pTokenizer; + p->nMaxPendingData = FTS3_MAX_PENDING_DATA; + p->bHasDocsize = (isFts4 && bNoDocsize==0); + p->bHasStat = isFts4; + p->bFts4 = isFts4; + p->bDescIdx = bDescIdx; + p->nAutoincrmerge = 0xff; /* 0xff means setting unknown */ + p->zContentTbl = zContent; + p->zLanguageid = zLanguageid; + zContent = 0; + zLanguageid = 0; + TESTONLY( p->inTransaction = -1 ); + TESTONLY( p->mxSavepoint = -1 ); -/* -** This is the maximum amount of data (in bytes) to store in the -** Fts3Table.pendingTerms hash table. Normally, the hash table is -** populated as documents are inserted/updated/deleted in a transaction -** and used to create a new segment when the transaction is committed. -** However if this limit is reached midway through a transaction, a new -** segment is created and the hash table cleared immediately. -*/ -#define FTS3_MAX_PENDING_DATA (1*1024*1024) + p->aIndex = (struct Fts3Index *)&p->azColumn[nCol]; + memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex); + p->nIndex = nIndex; + for(i=0; iaIndex[i].hPending, FTS3_HASH_STRING, 1); + } + p->abNotindexed = (u8 *)&p->aIndex[nIndex]; -/* -** Macro to return the number of elements in an array. SQLite has a -** similar macro called ArraySize(). Use a different name to avoid -** a collision when building an amalgamation with built-in FTS3. -*/ -#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0]))) + /* Fill in the zName and zDb fields of the vtab structure. */ + zCsr = (char *)&p->abNotindexed[nCol]; + p->zName = zCsr; + memcpy(zCsr, argv[2], nName); + zCsr += nName; + p->zDb = zCsr; + memcpy(zCsr, argv[1], nDb); + zCsr += nDb; + /* Fill in the azColumn array */ + for(iCol=0; iColazColumn[iCol] = zCsr; + zCsr += n+1; + assert( zCsr <= &((char *)p)[nByte] ); + } -#ifndef MIN -# define MIN(x,y) ((x)<(y)?(x):(y)) -#endif -#ifndef MAX -# define MAX(x,y) ((x)>(y)?(x):(y)) -#endif + /* Fill in the abNotindexed array */ + for(iCol=0; iColazColumn[iCol]); + for(i=0; iazColumn[iCol], zNot, n) + ){ + p->abNotindexed[iCol] = 1; + sqlite3_free(zNot); + azNotindexed[i] = 0; + } + } + } + for(i=0; izReadExprlist = fts3ReadExprList(p, zUncompress, &rc); + p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc); + if( rc!=SQLITE_OK ) goto fts3_init_out; -/* -** FTS4 virtual tables may maintain multiple indexes - one index of all terms -** in the document set and zero or more prefix indexes. All indexes are stored -** as one or more b+-trees in the %_segments and %_segdir tables. -** -** It is possible to determine which index a b+-tree belongs to based on the -** value stored in the "%_segdir.level" column. Given this value L, the index -** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with -** level values between 0 and 1023 (inclusive) belong to index 0, all levels -** between 1024 and 2047 to index 1, and so on. -** -** It is considered impossible for an index to use more than 1024 levels. In -** theory though this may happen, but only after at least -** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables. -*/ -#define FTS3_SEGDIR_MAXLEVEL 1024 -#define FTS3_SEGDIR_MAXLEVEL_STR "1024" + /* If this is an xCreate call, create the underlying tables in the + ** database. TODO: For xConnect(), it could verify that said tables exist. + */ + if( isCreate ){ + rc = fts3CreateTables(p); + } -/* -** The testcase() macro is only used by the amalgamation. If undefined, -** make it a no-op. -*/ -#ifndef testcase -# define testcase(X) -#endif + /* Check to see if a legacy fts3 table has been "upgraded" by the + ** addition of a %_stat table so that it can use incremental merge. + */ + if( !isFts4 && !isCreate ){ + p->bHasStat = 2; + } -/* -** Terminator values for position-lists and column-lists. -*/ -#define POS_COLUMN (1) /* Column-list terminator */ -#define POS_END (0) /* Position-list terminator */ + /* Figure out the page-size for the database. This is required in order to + ** estimate the cost of loading large doclists from the database. */ + fts3DatabasePageSize(&rc, p); + p->nNodeSize = p->nPgsz-35; -/* -** This section provides definitions to allow the -** FTS3 extension to be compiled outside of the -** amalgamation. -*/ -#ifndef SQLITE_AMALGAMATION -/* -** Macros indicating that conditional expressions are always true or -** false. -*/ -#ifdef SQLITE_COVERAGE_TEST -# define ALWAYS(x) (1) -# define NEVER(X) (0) -#else -# define ALWAYS(x) (x) -# define NEVER(x) (x) -#endif + /* Declare the table schema to SQLite. */ + fts3DeclareVtab(&rc, p); -/* -** Internal types used by SQLite. -*/ -typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */ -typedef short int i16; /* 2-byte (or larger) signed integer */ -typedef unsigned int u32; /* 4-byte unsigned integer */ -typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */ -typedef sqlite3_int64 i64; /* 8-byte signed integer */ +fts3_init_out: + sqlite3_free(zPrefix); + sqlite3_free(aIndex); + sqlite3_free(zCompress); + sqlite3_free(zUncompress); + sqlite3_free(zContent); + sqlite3_free(zLanguageid); + for(i=0; ipModule->xDestroy(pTokenizer); + } + }else{ + assert( p->pSegments==0 ); + *ppVTab = &p->base; + } + return rc; +} /* -** Macro used to suppress compiler warnings for unused parameters. +** The xConnect() and xCreate() methods for the virtual table. All the +** work is done in function fts3InitVtab(). */ -#define UNUSED_PARAMETER(x) (void)(x) +static int fts3ConnectMethod( + sqlite3 *db, /* Database connection */ + void *pAux, /* Pointer to tokenizer hash table */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ + char **pzErr /* OUT: sqlite3_malloc'd error message */ +){ + return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); +} +static int fts3CreateMethod( + sqlite3 *db, /* Database connection */ + void *pAux, /* Pointer to tokenizer hash table */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ + char **pzErr /* OUT: sqlite3_malloc'd error message */ +){ + return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); +} /* -** Activate assert() only if SQLITE_TEST is enabled. +** Set the pIdxInfo->estimatedRows variable to nRow. Unless this +** extension is currently being used by a version of SQLite too old to +** support estimatedRows. In that case this function is a no-op. */ -#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) -# define NDEBUG 1 +static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ +#if SQLITE_VERSION_NUMBER>=3008002 + if( sqlite3_libversion_number()>=3008002 ){ + pIdxInfo->estimatedRows = nRow; + } #endif +} -/* -** The TESTONLY macro is used to enclose variable declarations or -** other bits of code that are needed to support the arguments -** within testcase() and assert() macros. +/* +** Implementation of the xBestIndex method for FTS3 tables. There +** are three possible strategies, in order of preference: +** +** 1. Direct lookup by rowid or docid. +** 2. Full-text search using a MATCH operator on a non-docid column. +** 3. Linear scan of %_content table. */ -#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) -# define TESTONLY(X) X -#else -# define TESTONLY(X) -#endif +static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ + Fts3Table *p = (Fts3Table *)pVTab; + int i; /* Iterator variable */ + int iCons = -1; /* Index of constraint to use */ -#endif /* SQLITE_AMALGAMATION */ + int iLangidCons = -1; /* Index of langid=x constraint, if present */ + int iDocidGe = -1; /* Index of docid>=x constraint, if present */ + int iDocidLe = -1; /* Index of docid<=x constraint, if present */ + int iIdx; -#ifdef SQLITE_DEBUG -SQLITE_PRIVATE int sqlite3Fts3Corrupt(void); -# define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt() -#else -# define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB -#endif + /* By default use a full table scan. This is an expensive option, + ** so search through the constraints to see if a more efficient + ** strategy is possible. + */ + pInfo->idxNum = FTS3_FULLSCAN_SEARCH; + pInfo->estimatedCost = 5000000; + for(i=0; inConstraint; i++){ + int bDocid; /* True if this constraint is on docid */ + struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i]; + if( pCons->usable==0 ){ + if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ + /* There exists an unusable MATCH constraint. This means that if + ** the planner does elect to use the results of this call as part + ** of the overall query plan the user will see an "unable to use + ** function MATCH in the requested context" error. To discourage + ** this, return a very high cost here. */ + pInfo->idxNum = FTS3_FULLSCAN_SEARCH; + pInfo->estimatedCost = 1e50; + fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50); + return SQLITE_OK; + } + continue; + } -typedef struct Fts3Table Fts3Table; -typedef struct Fts3Cursor Fts3Cursor; -typedef struct Fts3Expr Fts3Expr; -typedef struct Fts3Phrase Fts3Phrase; -typedef struct Fts3PhraseToken Fts3PhraseToken; + bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1); -typedef struct Fts3Doclist Fts3Doclist; -typedef struct Fts3SegFilter Fts3SegFilter; -typedef struct Fts3DeferredToken Fts3DeferredToken; -typedef struct Fts3SegReader Fts3SegReader; -typedef struct Fts3MultiSegReader Fts3MultiSegReader; + /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */ + if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){ + pInfo->idxNum = FTS3_DOCID_SEARCH; + pInfo->estimatedCost = 1.0; + iCons = i; + } -/* -** A connection to a fulltext index is an instance of the following -** structure. The xCreate and xConnect methods create an instance -** of this structure and xDestroy and xDisconnect free that instance. -** All other methods receive a pointer to the structure as one of their -** arguments. -*/ -struct Fts3Table { - sqlite3_vtab base; /* Base class used by SQLite core */ - sqlite3 *db; /* The database connection */ - const char *zDb; /* logical database name */ - const char *zName; /* virtual table name */ - int nColumn; /* number of named columns in virtual table */ - char **azColumn; /* column names. malloced */ - u8 *abNotindexed; /* True for 'notindexed' columns */ - sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ - char *zContentTbl; /* content=xxx option, or NULL */ - char *zLanguageid; /* languageid=xxx option, or NULL */ - int nAutoincrmerge; /* Value configured by 'automerge' */ - u32 nLeafAdd; /* Number of leaf blocks added this trans */ + /* A MATCH constraint. Use a full-text search. + ** + ** If there is more than one MATCH constraint available, use the first + ** one encountered. If there is both a MATCH constraint and a direct + ** rowid/docid lookup, prefer the MATCH strategy. This is done even + ** though the rowid/docid lookup is faster than a MATCH query, selecting + ** it would lead to an "unable to use function MATCH in the requested + ** context" error. + */ + if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH + && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn + ){ + pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn; + pInfo->estimatedCost = 2.0; + iCons = i; + } - /* Precompiled statements used by the implementation. Each of these - ** statements is run and reset within a single virtual table API call. - */ - sqlite3_stmt *aStmt[40]; + /* Equality constraint on the langid column */ + if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ + && pCons->iColumn==p->nColumn + 2 + ){ + iLangidCons = i; + } - char *zReadExprlist; - char *zWriteExprlist; + if( bDocid ){ + switch( pCons->op ){ + case SQLITE_INDEX_CONSTRAINT_GE: + case SQLITE_INDEX_CONSTRAINT_GT: + iDocidGe = i; + break; - int nNodeSize; /* Soft limit for node size */ - u8 bFts4; /* True for FTS4, false for FTS3 */ - u8 bHasStat; /* True if %_stat table exists (2==unknown) */ - u8 bHasDocsize; /* True if %_docsize table exists */ - u8 bDescIdx; /* True if doclists are in reverse order */ - u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */ - int nPgsz; /* Page size for host database */ - char *zSegmentsTbl; /* Name of %_segments table */ - sqlite3_blob *pSegments; /* Blob handle open on %_segments table */ + case SQLITE_INDEX_CONSTRAINT_LE: + case SQLITE_INDEX_CONSTRAINT_LT: + iDocidLe = i; + break; + } + } + } - /* - ** The following array of hash tables is used to buffer pending index - ** updates during transactions. All pending updates buffered at any one - ** time must share a common language-id (see the FTS4 langid= feature). - ** The current language id is stored in variable iPrevLangid. - ** - ** A single FTS4 table may have multiple full-text indexes. For each index - ** there is an entry in the aIndex[] array. Index 0 is an index of all the - ** terms that appear in the document set. Each subsequent index in aIndex[] - ** is an index of prefixes of a specific length. - ** - ** Variable nPendingData contains an estimate the memory consumed by the - ** pending data structures, including hash table overhead, but not including - ** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash - ** tables are flushed to disk. Variable iPrevDocid is the docid of the most - ** recently inserted record. - */ - int nIndex; /* Size of aIndex[] */ - struct Fts3Index { - int nPrefix; /* Prefix length (0 for main terms index) */ - Fts3Hash hPending; /* Pending terms table for this index */ - } *aIndex; - int nMaxPendingData; /* Max pending data before flush to disk */ - int nPendingData; /* Current bytes of pending data */ - sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */ - int iPrevLangid; /* Langid of recently inserted document */ + iIdx = 1; + if( iCons>=0 ){ + pInfo->aConstraintUsage[iCons].argvIndex = iIdx++; + pInfo->aConstraintUsage[iCons].omit = 1; + } + if( iLangidCons>=0 ){ + pInfo->idxNum |= FTS3_HAVE_LANGID; + pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++; + } + if( iDocidGe>=0 ){ + pInfo->idxNum |= FTS3_HAVE_DOCID_GE; + pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++; + } + if( iDocidLe>=0 ){ + pInfo->idxNum |= FTS3_HAVE_DOCID_LE; + pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++; + } -#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) - /* State variables used for validating that the transaction control - ** methods of the virtual table are called at appropriate times. These - ** values do not contribute to FTS functionality; they are used for - ** verifying the operation of the SQLite core. + /* Regardless of the strategy selected, FTS can deliver rows in rowid (or + ** docid) order. Both ascending and descending are possible. */ - int inTransaction; /* True after xBegin but before xCommit/xRollback */ - int mxSavepoint; /* Largest valid xSavepoint integer */ -#endif - -#ifdef SQLITE_TEST - /* True to disable the incremental doclist optimization. This is controled - ** by special insert command 'test-no-incr-doclist'. */ - int bNoIncrDoclist; -#endif -}; - -/* -** When the core wants to read from the virtual table, it creates a -** virtual table cursor (an instance of the following structure) using -** the xOpen method. Cursors are destroyed using the xClose method. -*/ -struct Fts3Cursor { - sqlite3_vtab_cursor base; /* Base class used by SQLite core */ - i16 eSearch; /* Search strategy (see below) */ - u8 isEof; /* True if at End Of Results */ - u8 isRequireSeek; /* True if must seek pStmt to %_content row */ - sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ - Fts3Expr *pExpr; /* Parsed MATCH query string */ - int iLangid; /* Language being queried for */ - int nPhrase; /* Number of matchable phrases in query */ - Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ - sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ - char *pNextId; /* Pointer into the body of aDoclist */ - char *aDoclist; /* List of docids for full-text queries */ - int nDoclist; /* Size of buffer at aDoclist */ - u8 bDesc; /* True to sort in descending order */ - int eEvalmode; /* An FTS3_EVAL_XX constant */ - int nRowAvg; /* Average size of database rows, in pages */ - sqlite3_int64 nDoc; /* Documents in table */ - i64 iMinDocid; /* Minimum docid to return */ - i64 iMaxDocid; /* Maximum docid to return */ - int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ - u32 *aMatchinfo; /* Information about most recent match */ - int nMatchinfo; /* Number of elements in aMatchinfo[] */ - char *zMatchinfo; /* Matchinfo specification */ -}; - -#define FTS3_EVAL_FILTER 0 -#define FTS3_EVAL_NEXT 1 -#define FTS3_EVAL_MATCHINFO 2 + if( pInfo->nOrderBy==1 ){ + struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0]; + if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){ + if( pOrder->desc ){ + pInfo->idxStr = "DESC"; + }else{ + pInfo->idxStr = "ASC"; + } + pInfo->orderByConsumed = 1; + } + } -/* -** The Fts3Cursor.eSearch member is always set to one of the following. -** Actualy, Fts3Cursor.eSearch can be greater than or equal to -** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index -** of the column to be searched. For example, in -** -** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d); -** SELECT docid FROM ex1 WHERE b MATCH 'one two three'; -** -** Because the LHS of the MATCH operator is 2nd column "b", -** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a, -** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1" -** indicating that all columns should be searched, -** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4. -*/ -#define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */ -#define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */ -#define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ + assert( p->pSegments==0 ); + return SQLITE_OK; +} /* -** The lower 16-bits of the sqlite3_index_info.idxNum value set by -** the xBestIndex() method contains the Fts3Cursor.eSearch value described -** above. The upper 16-bits contain a combination of the following -** bits, used to describe extra constraints on full-text searches. +** Implementation of xOpen method. */ -#define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */ -#define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */ -#define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */ +static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ + sqlite3_vtab_cursor *pCsr; /* Allocated cursor */ -struct Fts3Doclist { - char *aAll; /* Array containing doclist (or NULL) */ - int nAll; /* Size of a[] in bytes */ - char *pNextDocid; /* Pointer to next docid */ + UNUSED_PARAMETER(pVTab); - sqlite3_int64 iDocid; /* Current docid (if pList!=0) */ - int bFreeList; /* True if pList should be sqlite3_free()d */ - char *pList; /* Pointer to position list following iDocid */ - int nList; /* Length of position list */ -}; + /* Allocate a buffer large enough for an Fts3Cursor structure. If the + ** allocation succeeds, zero it and return SQLITE_OK. Otherwise, + ** if the allocation fails, return SQLITE_NOMEM. + */ + *ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor)); + if( !pCsr ){ + return SQLITE_NOMEM; + } + memset(pCsr, 0, sizeof(Fts3Cursor)); + return SQLITE_OK; +} /* -** A "phrase" is a sequence of one or more tokens that must match in -** sequence. A single token is the base case and the most common case. -** For a sequence of tokens contained in double-quotes (i.e. "one two three") -** nToken will be the number of tokens in the string. +** Close the cursor. For additional information see the documentation +** on the xClose method of the virtual table interface. */ -struct Fts3PhraseToken { - char *z; /* Text of the token */ - int n; /* Number of bytes in buffer z */ - int isPrefix; /* True if token ends with a "*" character */ - int bFirst; /* True if token must appear at position 0 */ - - /* Variables above this point are populated when the expression is - ** parsed (by code in fts3_expr.c). Below this point the variables are - ** used when evaluating the expression. */ - Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ - Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */ -}; - -struct Fts3Phrase { - /* Cache of doclist for this phrase. */ - Fts3Doclist doclist; - int bIncr; /* True if doclist is loaded incrementally */ - int iDoclistToken; - - /* Used by sqlite3Fts3EvalPhrasePoslist() if this is a descendent of an - ** OR condition. */ - char *pOrPoslist; - i64 iOrDocid; - - /* Variables below this point are populated by fts3_expr.c when parsing - ** a MATCH expression. Everything above is part of the evaluation phase. - */ - int nToken; /* Number of tokens in the phrase */ - int iColumn; /* Index of column this phrase must match */ - Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ -}; +static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){ + Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); + sqlite3_finalize(pCsr->pStmt); + sqlite3Fts3ExprFree(pCsr->pExpr); + sqlite3Fts3FreeDeferredTokens(pCsr); + sqlite3_free(pCsr->aDoclist); + sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); + sqlite3_free(pCsr); + return SQLITE_OK; +} /* -** A tree of these objects forms the RHS of a MATCH operator. -** -** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist -** points to a malloced buffer, size nDoclist bytes, containing the results -** of this phrase query in FTS3 doclist format. As usual, the initial -** "Length" field found in doclists stored on disk is omitted from this -** buffer. +** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then +** compose and prepare an SQL statement of the form: ** -** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global -** matchinfo data. If it is not NULL, it points to an array of size nCol*3, -** where nCol is the number of columns in the queried FTS table. The array -** is populated as follows: +** "SELECT FROM %_content WHERE rowid = ?" ** -** aMI[iCol*3 + 0] = Undefined -** aMI[iCol*3 + 1] = Number of occurrences -** aMI[iCol*3 + 2] = Number of rows containing at least one instance +** (or the equivalent for a content=xxx table) and set pCsr->pStmt to +** it. If an error occurs, return an SQLite error code. ** -** The aMI array is allocated using sqlite3_malloc(). It should be freed -** when the expression node is. +** Otherwise, set *ppStmt to point to pCsr->pStmt and return SQLITE_OK. */ -struct Fts3Expr { - int eType; /* One of the FTSQUERY_XXX values defined below */ - int nNear; /* Valid if eType==FTSQUERY_NEAR */ - Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ - Fts3Expr *pLeft; /* Left operand */ - Fts3Expr *pRight; /* Right operand */ - Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ +static int fts3CursorSeekStmt(Fts3Cursor *pCsr, sqlite3_stmt **ppStmt){ + int rc = SQLITE_OK; + if( pCsr->pStmt==0 ){ + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + char *zSql; + zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist); + if( !zSql ) return SQLITE_NOMEM; + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); + sqlite3_free(zSql); + } + *ppStmt = pCsr->pStmt; + return rc; +} - /* The following are used by the fts3_eval.c module. */ - sqlite3_int64 iDocid; /* Current docid */ - u8 bEof; /* True this expression is at EOF already */ - u8 bStart; /* True if iDocid is valid */ - u8 bDeferred; /* True if this expression is entirely deferred */ +/* +** Position the pCsr->pStmt statement so that it is on the row +** of the %_content table that contains the last match. Return +** SQLITE_OK on success. +*/ +static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ + int rc = SQLITE_OK; + if( pCsr->isRequireSeek ){ + sqlite3_stmt *pStmt = 0; - u32 *aMI; -}; + rc = fts3CursorSeekStmt(pCsr, &pStmt); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId); + pCsr->isRequireSeek = 0; + if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){ + return SQLITE_OK; + }else{ + rc = sqlite3_reset(pCsr->pStmt); + if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){ + /* If no row was found and no error has occurred, then the %_content + ** table is missing a row that is present in the full-text index. + ** The data structures are corrupt. */ + rc = FTS_CORRUPT_VTAB; + pCsr->isEof = 1; + } + } + } + } + + if( rc!=SQLITE_OK && pContext ){ + sqlite3_result_error_code(pContext, rc); + } + return rc; +} /* -** Candidate values for Fts3Query.eType. Note that the order of the first -** four values is in order of precedence when parsing expressions. For -** example, the following: +** This function is used to process a single interior node when searching +** a b-tree for a term or term prefix. The node data is passed to this +** function via the zNode/nNode parameters. The term to search for is +** passed in zTerm/nTerm. ** -** "a OR b AND c NOT d NEAR e" +** If piFirst is not NULL, then this function sets *piFirst to the blockid +** of the child node that heads the sub-tree that may contain the term. ** -** is equivalent to: +** If piLast is not NULL, then *piLast is set to the right-most child node +** that heads a sub-tree that may contain a term for which zTerm/nTerm is +** a prefix. ** -** "a OR (b AND (c NOT (d NEAR e)))" +** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK. */ -#define FTSQUERY_NEAR 1 -#define FTSQUERY_NOT 2 -#define FTSQUERY_AND 3 -#define FTSQUERY_OR 4 -#define FTSQUERY_PHRASE 5 - - -/* fts3_write.c */ -SQLITE_PRIVATE int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*); -SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *); -SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *); -SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *); -SQLITE_PRIVATE int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64, - sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); -SQLITE_PRIVATE int sqlite3Fts3SegReaderPending( - Fts3Table*,int,const char*,int,int,Fts3SegReader**); -SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *); -SQLITE_PRIVATE int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **); -SQLITE_PRIVATE int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*); - -SQLITE_PRIVATE int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); -SQLITE_PRIVATE int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **); - -#ifndef SQLITE_DISABLE_FTS4_DEFERRED -SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); -SQLITE_PRIVATE int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); -SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); -SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); -SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *); -#else -# define sqlite3Fts3FreeDeferredTokens(x) -# define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK -# define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK -# define sqlite3Fts3FreeDeferredDoclists(x) -# define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK -#endif - -SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *); -SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *, int *); - -/* Special values interpreted by sqlite3SegReaderCursor() */ -#define FTS3_SEGCURSOR_PENDING -1 -#define FTS3_SEGCURSOR_ALL -2 - -SQLITE_PRIVATE int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*); -SQLITE_PRIVATE int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *); -SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *); - -SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor(Fts3Table *, - int, int, int, const char *, int, int, int, Fts3MultiSegReader *); - -/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ -#define FTS3_SEGMENT_REQUIRE_POS 0x00000001 -#define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 -#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 -#define FTS3_SEGMENT_PREFIX 0x00000008 -#define FTS3_SEGMENT_SCAN 0x00000010 -#define FTS3_SEGMENT_FIRST 0x00000020 - -/* Type passed as 4th argument to SegmentReaderIterate() */ -struct Fts3SegFilter { - const char *zTerm; - int nTerm; - int iCol; - int flags; -}; - -struct Fts3MultiSegReader { - /* Used internally by sqlite3Fts3SegReaderXXX() calls */ - Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */ - int nSegment; /* Size of apSegment array */ - int nAdvance; /* How many seg-readers to advance */ - Fts3SegFilter *pFilter; /* Pointer to filter object */ - char *aBuffer; /* Buffer to merge doclists in */ - int nBuffer; /* Allocated size of aBuffer[] in bytes */ - - int iColFilter; /* If >=0, filter for this column */ - int bRestart; - - /* Used by fts3.c only. */ - int nCost; /* Cost of running iterator */ - int bLookup; /* True if a lookup of a single entry. */ - - /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */ - char *zTerm; /* Pointer to term buffer */ - int nTerm; /* Size of zTerm in bytes */ - char *aDoclist; /* Pointer to doclist buffer */ - int nDoclist; /* Size of aDoclist[] in bytes */ -}; - -SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table*,int,int); - -#define fts3GetVarint32(p, piVal) ( \ - (*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \ -) - -/* fts3.c */ -SQLITE_PRIVATE int sqlite3Fts3PutVarint(char *, sqlite3_int64); -SQLITE_PRIVATE int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); -SQLITE_PRIVATE int sqlite3Fts3GetVarint32(const char *, int *); -SQLITE_PRIVATE int sqlite3Fts3VarintLen(sqlite3_uint64); -SQLITE_PRIVATE void sqlite3Fts3Dequote(char *); -SQLITE_PRIVATE void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*); -SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); -SQLITE_PRIVATE int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *); -SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int*, Fts3Table*); - -/* fts3_tokenizer.c */ -SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *, int *); -SQLITE_PRIVATE int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); -SQLITE_PRIVATE int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, - sqlite3_tokenizer **, char ** -); -SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char); - -/* fts3_snippet.c */ -SQLITE_PRIVATE void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*); -SQLITE_PRIVATE void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *, - const char *, const char *, int, int -); -SQLITE_PRIVATE void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *); - -/* fts3_expr.c */ -SQLITE_PRIVATE int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int, - char **, int, int, int, const char *, int, Fts3Expr **, char ** -); -SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *); -#ifdef SQLITE_TEST -SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3 *db); -SQLITE_PRIVATE int sqlite3Fts3InitTerm(sqlite3 *db); -#endif - -SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int, - sqlite3_tokenizer_cursor ** -); - -/* fts3_aux.c */ -SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db); - -SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *); - -SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart( - Fts3Table*, Fts3MultiSegReader*, int, const char*, int); -SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext( - Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *); -SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **); -SQLITE_PRIVATE int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *); -SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr); +static int fts3ScanInteriorNode( + const char *zTerm, /* Term to select leaves for */ + int nTerm, /* Size of term zTerm in bytes */ + const char *zNode, /* Buffer containing segment interior node */ + int nNode, /* Size of buffer at zNode */ + sqlite3_int64 *piFirst, /* OUT: Selected child node */ + sqlite3_int64 *piLast /* OUT: Selected child node */ +){ + int rc = SQLITE_OK; /* Return code */ + const char *zCsr = zNode; /* Cursor to iterate through node */ + const char *zEnd = &zCsr[nNode];/* End of interior node buffer */ + char *zBuffer = 0; /* Buffer to load terms into */ + int nAlloc = 0; /* Size of allocated buffer */ + int isFirstTerm = 1; /* True when processing first term on page */ + sqlite3_int64 iChild; /* Block id of child node to descend to */ -/* fts3_tokenize_vtab.c */ -SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *); + /* Skip over the 'height' varint that occurs at the start of every + ** interior node. Then load the blockid of the left-child of the b-tree + ** node into variable iChild. + ** + ** Even if the data structure on disk is corrupted, this (reading two + ** varints from the buffer) does not risk an overread. If zNode is a + ** root node, then the buffer comes from a SELECT statement. SQLite does + ** not make this guarantee explicitly, but in practice there are always + ** either more than 20 bytes of allocated space following the nNode bytes of + ** contents, or two zero bytes. Or, if the node is read from the %_segments + ** table, then there are always 20 bytes of zeroed padding following the + ** nNode bytes of content (see sqlite3Fts3ReadBlock() for details). + */ + zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); + zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); + if( zCsr>zEnd ){ + return FTS_CORRUPT_VTAB; + } + + while( zCsrzEnd ){ + rc = FTS_CORRUPT_VTAB; + goto finish_scan; + } + if( nPrefix+nSuffix>nAlloc ){ + char *zNew; + nAlloc = (nPrefix+nSuffix) * 2; + zNew = (char *)sqlite3_realloc(zBuffer, nAlloc); + if( !zNew ){ + rc = SQLITE_NOMEM; + goto finish_scan; + } + zBuffer = zNew; + } + assert( zBuffer ); + memcpy(&zBuffer[nPrefix], zCsr, nSuffix); + nBuffer = nPrefix + nSuffix; + zCsr += nSuffix; -/* fts3_unicode2.c (functions generated by parsing unicode text files) */ -#ifndef SQLITE_DISABLE_FTS3_UNICODE -SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int, int); -SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int); -SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int); -#endif + /* Compare the term we are searching for with the term just loaded from + ** the interior node. If the specified term is greater than or equal + ** to the term from the interior node, then all terms on the sub-tree + ** headed by node iChild are smaller than zTerm. No need to search + ** iChild. + ** + ** If the interior node term is larger than the specified term, then + ** the tree headed by iChild may contain the specified term. + */ + cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer)); + if( piFirst && (cmp<0 || (cmp==0 && nBuffer>nTerm)) ){ + *piFirst = iChild; + piFirst = 0; + } -#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */ -#endif /* _FTSINT_H */ + if( piLast && cmp<0 ){ + *piLast = iChild; + piLast = 0; + } -/************** End of fts3Int.h *********************************************/ -/************** Continuing where we left off in fts3.c ***********************/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + iChild++; + }; -#if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE) -# define SQLITE_CORE 1 -#endif + if( piFirst ) *piFirst = iChild; + if( piLast ) *piLast = iChild; -/* #include */ -/* #include */ -/* #include */ -/* #include */ -/* #include */ -/* #include */ + finish_scan: + sqlite3_free(zBuffer); + return rc; +} -#ifndef SQLITE_CORE - SQLITE_EXTENSION_INIT1 -#endif -static int fts3EvalNext(Fts3Cursor *pCsr); -static int fts3EvalStart(Fts3Cursor *pCsr); -static int fts3TermSegReaderCursor( - Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **); +/* +** The buffer pointed to by argument zNode (size nNode bytes) contains an +** interior node of a b-tree segment. The zTerm buffer (size nTerm bytes) +** contains a term. This function searches the sub-tree headed by the zNode +** node for the range of leaf nodes that may contain the specified term +** or terms for which the specified term is a prefix. +** +** If piLeaf is not NULL, then *piLeaf is set to the blockid of the +** left-most leaf node in the tree that may contain the specified term. +** If piLeaf2 is not NULL, then *piLeaf2 is set to the blockid of the +** right-most leaf node that may contain a term for which the specified +** term is a prefix. +** +** It is possible that the range of returned leaf nodes does not contain +** the specified term or any terms for which it is a prefix. However, if the +** segment does contain any such terms, they are stored within the identified +** range. Because this function only inspects interior segment nodes (and +** never loads leaf nodes into memory), it is not possible to be sure. +** +** If an error occurs, an error code other than SQLITE_OK is returned. +*/ +static int fts3SelectLeaf( + Fts3Table *p, /* Virtual table handle */ + const char *zTerm, /* Term to select leaves for */ + int nTerm, /* Size of term zTerm in bytes */ + const char *zNode, /* Buffer containing segment interior node */ + int nNode, /* Size of buffer at zNode */ + sqlite3_int64 *piLeaf, /* Selected leaf node */ + sqlite3_int64 *piLeaf2 /* Selected leaf node */ +){ + int rc = SQLITE_OK; /* Return code */ + int iHeight; /* Height of this node in tree */ -/* -** Write a 64-bit variable-length integer to memory starting at p[0]. -** The length of data written will be between 1 and FTS3_VARINT_MAX bytes. -** The number of bytes written is returned. -*/ -SQLITE_PRIVATE int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){ - unsigned char *q = (unsigned char *) p; - sqlite_uint64 vu = v; - do{ - *q++ = (unsigned char) ((vu & 0x7f) | 0x80); - vu >>= 7; - }while( vu!=0 ); - q[-1] &= 0x7f; /* turn off high bit in final byte */ - assert( q - (unsigned char *)p <= FTS3_VARINT_MAX ); - return (int) (q - (unsigned char *)p); -} + assert( piLeaf || piLeaf2 ); -#define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \ - v = (v & mask1) | ( (*ptr++) << shift ); \ - if( (v & mask2)==0 ){ var = v; return ret; } -#define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \ - v = (*ptr++); \ - if( (v & mask2)==0 ){ var = v; return ret; } + fts3GetVarint32(zNode, &iHeight); + rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2); + assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) ); -/* -** Read a 64-bit variable-length integer from memory starting at p[0]. -** Return the number of bytes read, or 0 on error. -** The value is stored in *v. -*/ -SQLITE_PRIVATE int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){ - const char *pStart = p; - u32 a; - u64 b; - int shift; + if( rc==SQLITE_OK && iHeight>1 ){ + char *zBlob = 0; /* Blob read from %_segments table */ + int nBlob = 0; /* Size of zBlob in bytes */ - GETVARINT_INIT(a, p, 0, 0x00, 0x80, *v, 1); - GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *v, 2); - GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *v, 3); - GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *v, 4); - b = (a & 0x0FFFFFFF ); + if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){ + rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0); + if( rc==SQLITE_OK ){ + rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0); + } + sqlite3_free(zBlob); + piLeaf = 0; + zBlob = 0; + } - for(shift=28; shift<=63; shift+=7){ - u64 c = *p++; - b += (c&0x7F) << shift; - if( (c & 0x80)==0 ) break; + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0); + } + if( rc==SQLITE_OK ){ + rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2); + } + sqlite3_free(zBlob); } - *v = b; - return (int)(p - pStart); + + return rc; } /* -** Similar to sqlite3Fts3GetVarint(), except that the output is truncated to a -** 32-bit integer before it is returned. +** This function is used to create delta-encoded serialized lists of FTS3 +** varints. Each call to this function appends a single varint to a list. */ -SQLITE_PRIVATE int sqlite3Fts3GetVarint32(const char *p, int *pi){ - u32 a; - -#ifndef fts3GetVarint32 - GETVARINT_INIT(a, p, 0, 0x00, 0x80, *pi, 1); -#else - a = (*p++); - assert( a & 0x80 ); -#endif - - GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *pi, 2); - GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *pi, 3); - GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *pi, 4); - a = (a & 0x0FFFFFFF ); - *pi = (int)(a | ((u32)(*p & 0x0F) << 28)); - return 5; +static void fts3PutDeltaVarint( + char **pp, /* IN/OUT: Output pointer */ + sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */ + sqlite3_int64 iVal /* Write this value to the list */ +){ + assert( iVal-*piPrev > 0 || (*piPrev==0 && iVal==0) ); + *pp += sqlite3Fts3PutVarint(*pp, iVal-*piPrev); + *piPrev = iVal; } /* -** Return the number of bytes required to encode v as a varint +** When this function is called, *ppPoslist is assumed to point to the +** start of a position-list. After it returns, *ppPoslist points to the +** first byte after the position-list. +** +** A position list is list of positions (delta encoded) and columns for +** a single document record of a doclist. So, in other words, this +** routine advances *ppPoslist so that it points to the next docid in +** the doclist, or to the first byte past the end of the doclist. +** +** If pp is not NULL, then the contents of the position list are copied +** to *pp. *pp is set to point to the first byte past the last byte copied +** before this function returns. */ -SQLITE_PRIVATE int sqlite3Fts3VarintLen(sqlite3_uint64 v){ - int i = 0; - do{ - i++; - v >>= 7; - }while( v!=0 ); - return i; +static void fts3PoslistCopy(char **pp, char **ppPoslist){ + char *pEnd = *ppPoslist; + char c = 0; + + /* The end of a position list is marked by a zero encoded as an FTS3 + ** varint. A single POS_END (0) byte. Except, if the 0 byte is preceded by + ** a byte with the 0x80 bit set, then it is not a varint 0, but the tail + ** of some other, multi-byte, value. + ** + ** The following while-loop moves pEnd to point to the first byte that is not + ** immediately preceded by a byte with the 0x80 bit set. Then increments + ** pEnd once more so that it points to the byte immediately following the + ** last byte in the position-list. + */ + while( *pEnd | c ){ + c = *pEnd++ & 0x80; + testcase( c!=0 && (*pEnd)==0 ); + } + pEnd++; /* Advance past the POS_END terminator byte */ + + if( pp ){ + int n = (int)(pEnd - *ppPoslist); + char *p = *pp; + memcpy(p, *ppPoslist, n); + p += n; + *pp = p; + } + *ppPoslist = pEnd; } /* -** Convert an SQL-style quoted string into a normal string by removing -** the quote characters. The conversion is done in-place. If the -** input does not begin with a quote character, then this routine -** is a no-op. +** When this function is called, *ppPoslist is assumed to point to the +** start of a column-list. After it returns, *ppPoslist points to the +** to the terminator (POS_COLUMN or POS_END) byte of the column-list. ** -** Examples: +** A column-list is list of delta-encoded positions for a single column +** within a single document within a doclist. ** -** "abc" becomes abc -** 'xyz' becomes xyz -** [pqr] becomes pqr -** `mno` becomes mno +** The column-list is terminated either by a POS_COLUMN varint (1) or +** a POS_END varint (0). This routine leaves *ppPoslist pointing to +** the POS_COLUMN or POS_END that terminates the column-list. ** +** If pp is not NULL, then the contents of the column-list are copied +** to *pp. *pp is set to point to the first byte past the last byte copied +** before this function returns. The POS_COLUMN or POS_END terminator +** is not copied into *pp. */ -SQLITE_PRIVATE void sqlite3Fts3Dequote(char *z){ - char quote; /* Quote character (if any ) */ - - quote = z[0]; - if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ - int iIn = 1; /* Index of next byte to read from input */ - int iOut = 0; /* Index of next byte to write to output */ - - /* If the first byte was a '[', then the close-quote character is a ']' */ - if( quote=='[' ) quote = ']'; +static void fts3ColumnlistCopy(char **pp, char **ppPoslist){ + char *pEnd = *ppPoslist; + char c = 0; - while( ALWAYS(z[iIn]) ){ - if( z[iIn]==quote ){ - if( z[iIn+1]!=quote ) break; - z[iOut++] = quote; - iIn += 2; - }else{ - z[iOut++] = z[iIn++]; - } - } - z[iOut] = '\0'; + /* A column-list is terminated by either a 0x01 or 0x00 byte that is + ** not part of a multi-byte varint. + */ + while( 0xFE & (*pEnd | c) ){ + c = *pEnd++ & 0x80; + testcase( c!=0 && ((*pEnd)&0xfe)==0 ); + } + if( pp ){ + int n = (int)(pEnd - *ppPoslist); + char *p = *pp; + memcpy(p, *ppPoslist, n); + p += n; + *pp = p; } + *ppPoslist = pEnd; } /* -** Read a single varint from the doclist at *pp and advance *pp to point -** to the first byte past the end of the varint. Add the value of the varint -** to *pVal. +** Value used to signify the end of an position-list. This is safe because +** it is not possible to have a document with 2^31 terms. */ -static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){ - sqlite3_int64 iVal; - *pp += sqlite3Fts3GetVarint(*pp, &iVal); - *pVal += iVal; -} +#define POSITION_LIST_END 0x7fffffff /* -** When this function is called, *pp points to the first byte following a -** varint that is part of a doclist (or position-list, or any other list -** of varints). This function moves *pp to point to the start of that varint, -** and sets *pVal by the varint value. +** This function is used to help parse position-lists. When this function is +** called, *pp may point to the start of the next varint in the position-list +** being parsed, or it may point to 1 byte past the end of the position-list +** (in which case **pp will be a terminator bytes POS_END (0) or +** (1)). ** -** Argument pStart points to the first byte of the doclist that the -** varint is part of. +** If *pp points past the end of the current position-list, set *pi to +** POSITION_LIST_END and return. Otherwise, read the next varint from *pp, +** increment the current value of *pi by the value read, and set *pp to +** point to the next value before returning. +** +** Before calling this routine *pi must be initialized to the value of +** the previous position, or zero if we are reading the first position +** in the position-list. Because positions are delta-encoded, the value +** of the previous position is needed in order to compute the value of +** the next position. */ -static void fts3GetReverseVarint( - char **pp, - char *pStart, - sqlite3_int64 *pVal +static void fts3ReadNextPos( + char **pp, /* IN/OUT: Pointer into position-list buffer */ + sqlite3_int64 *pi /* IN/OUT: Value read from position-list */ ){ - sqlite3_int64 iVal; - char *p; - - /* Pointer p now points at the first byte past the varint we are - ** interested in. So, unless the doclist is corrupt, the 0x80 bit is - ** clear on character p[-1]. */ - for(p = (*pp)-2; p>=pStart && *p&0x80; p--); - p++; - *pp = p; + if( (**pp)&0xFE ){ + fts3GetDeltaVarint(pp, pi); + *pi -= 2; + }else{ + *pi = POSITION_LIST_END; + } +} - sqlite3Fts3GetVarint(p, &iVal); - *pVal = iVal; +/* +** If parameter iCol is not 0, write an POS_COLUMN (1) byte followed by +** the value of iCol encoded as a varint to *pp. This will start a new +** column list. +** +** Set *pp to point to the byte just after the last byte written before +** returning (do not modify it if iCol==0). Return the total number of bytes +** written (0 if iCol==0). +*/ +static int fts3PutColNumber(char **pp, int iCol){ + int n = 0; /* Number of bytes written */ + if( iCol ){ + char *p = *pp; /* Output pointer */ + n = 1 + sqlite3Fts3PutVarint(&p[1], iCol); + *p = 0x01; + *pp = &p[n]; + } + return n; } /* -** The xDisconnect() virtual table method. +** Compute the union of two position lists. The output written +** into *pp contains all positions of both *pp1 and *pp2 in sorted +** order and with any duplicates removed. All pointers are +** updated appropriately. The caller is responsible for insuring +** that there is enough space in *pp to hold the complete output. */ -static int fts3DisconnectMethod(sqlite3_vtab *pVtab){ - Fts3Table *p = (Fts3Table *)pVtab; - int i; +static void fts3PoslistMerge( + char **pp, /* Output buffer */ + char **pp1, /* Left input list */ + char **pp2 /* Right input list */ +){ + char *p = *pp; + char *p1 = *pp1; + char *p2 = *pp2; - assert( p->nPendingData==0 ); - assert( p->pSegments==0 ); + while( *p1 || *p2 ){ + int iCol1; /* The current column index in pp1 */ + int iCol2; /* The current column index in pp2 */ - /* Free any prepared statements held */ - for(i=0; iaStmt); i++){ - sqlite3_finalize(p->aStmt[i]); - } - sqlite3_free(p->zSegmentsTbl); - sqlite3_free(p->zReadExprlist); - sqlite3_free(p->zWriteExprlist); - sqlite3_free(p->zContentTbl); - sqlite3_free(p->zLanguageid); + if( *p1==POS_COLUMN ) fts3GetVarint32(&p1[1], &iCol1); + else if( *p1==POS_END ) iCol1 = POSITION_LIST_END; + else iCol1 = 0; - /* Invoke the tokenizer destructor to free the tokenizer. */ - p->pTokenizer->pModule->xDestroy(p->pTokenizer); + if( *p2==POS_COLUMN ) fts3GetVarint32(&p2[1], &iCol2); + else if( *p2==POS_END ) iCol2 = POSITION_LIST_END; + else iCol2 = 0; - sqlite3_free(p); - return SQLITE_OK; + if( iCol1==iCol2 ){ + sqlite3_int64 i1 = 0; /* Last position from pp1 */ + sqlite3_int64 i2 = 0; /* Last position from pp2 */ + sqlite3_int64 iPrev = 0; + int n = fts3PutColNumber(&p, iCol1); + p1 += n; + p2 += n; + + /* At this point, both p1 and p2 point to the start of column-lists + ** for the same column (the column with index iCol1 and iCol2). + ** A column-list is a list of non-negative delta-encoded varints, each + ** incremented by 2 before being stored. Each list is terminated by a + ** POS_END (0) or POS_COLUMN (1). The following block merges the two lists + ** and writes the results to buffer p. p is left pointing to the byte + ** after the list written. No terminator (POS_END or POS_COLUMN) is + ** written to the output. + */ + fts3GetDeltaVarint(&p1, &i1); + fts3GetDeltaVarint(&p2, &i2); + do { + fts3PutDeltaVarint(&p, &iPrev, (i1pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e. +** when the *pp1 token appears before the *pp2 token, but not more than nToken +** slots before it. +** +** e.g. nToken==1 searches for adjacent positions. */ -static void fts3DbExec( - int *pRc, /* Success code */ - sqlite3 *db, /* Database in which to run SQL */ - const char *zFormat, /* Format string for SQL */ - ... /* Arguments to the format string */ +static int fts3PoslistPhraseMerge( + char **pp, /* IN/OUT: Preallocated output buffer */ + int nToken, /* Maximum difference in token positions */ + int isSaveLeft, /* Save the left position */ + int isExact, /* If *pp1 is exactly nTokens before *pp2 */ + char **pp1, /* IN/OUT: Left input list */ + char **pp2 /* IN/OUT: Right input list */ ){ - va_list ap; - char *zSql; - if( *pRc ) return; - va_start(ap, zFormat); - zSql = sqlite3_vmprintf(zFormat, ap); - va_end(ap); - if( zSql==0 ){ - *pRc = SQLITE_NOMEM; - }else{ - *pRc = sqlite3_exec(db, zSql, 0, 0, 0); - sqlite3_free(zSql); - } -} + char *p = *pp; + char *p1 = *pp1; + char *p2 = *pp2; + int iCol1 = 0; + int iCol2 = 0; -/* -** The xDestroy() virtual table method. -*/ -static int fts3DestroyMethod(sqlite3_vtab *pVtab){ - Fts3Table *p = (Fts3Table *)pVtab; - int rc = SQLITE_OK; /* Return code */ - const char *zDb = p->zDb; /* Name of database (e.g. "main", "temp") */ - sqlite3 *db = p->db; /* Database handle */ + /* Never set both isSaveLeft and isExact for the same invocation. */ + assert( isSaveLeft==0 || isExact==0 ); - /* Drop the shadow tables */ - if( p->zContentTbl==0 ){ - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName); + assert( p!=0 && *p1!=0 && *p2!=0 ); + if( *p1==POS_COLUMN ){ + p1++; + p1 += fts3GetVarint32(p1, &iCol1); + } + if( *p2==POS_COLUMN ){ + p2++; + p2 += fts3GetVarint32(p2, &iCol2); } - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName); - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName); - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName); - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName); - /* If everything has worked, invoke fts3DisconnectMethod() to free the - ** memory associated with the Fts3Table structure and return SQLITE_OK. - ** Otherwise, return an SQLite error code. - */ - return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc); -} + while( 1 ){ + if( iCol1==iCol2 ){ + char *pSave = p; + sqlite3_int64 iPrev = 0; + sqlite3_int64 iPos1 = 0; + sqlite3_int64 iPos2 = 0; + if( iCol1 ){ + *p++ = POS_COLUMN; + p += sqlite3Fts3PutVarint(p, iCol1); + } -/* -** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table -** passed as the first argument. This is done as part of the xConnect() -** and xCreate() methods. -** -** If *pRc is non-zero when this function is called, it is a no-op. -** Otherwise, if an error occurs, an SQLite error code is stored in *pRc -** before returning. -*/ -static void fts3DeclareVtab(int *pRc, Fts3Table *p){ - if( *pRc==SQLITE_OK ){ - int i; /* Iterator variable */ - int rc; /* Return code */ - char *zSql; /* SQL statement passed to declare_vtab() */ - char *zCols; /* List of user defined columns */ - const char *zLanguageid; + assert( *p1!=POS_END && *p1!=POS_COLUMN ); + assert( *p2!=POS_END && *p2!=POS_COLUMN ); + fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2; + fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; - zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid"); - sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); + while( 1 ){ + if( iPos2==iPos1+nToken + || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken) + ){ + sqlite3_int64 iSave; + iSave = isSaveLeft ? iPos1 : iPos2; + fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2; + pSave = 0; + assert( p ); + } + if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){ + if( (*p2&0xFE)==0 ) break; + fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; + }else{ + if( (*p1&0xFE)==0 ) break; + fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2; + } + } - /* Create a list of user columns for the virtual table */ - zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]); - for(i=1; zCols && inColumn; i++){ - zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]); + if( pSave ){ + assert( pp && p ); + p = pSave; + } + + fts3ColumnlistCopy(0, &p1); + fts3ColumnlistCopy(0, &p2); + assert( (*p1&0xFE)==0 && (*p2&0xFE)==0 ); + if( 0==*p1 || 0==*p2 ) break; + + p1++; + p1 += fts3GetVarint32(p1, &iCol1); + p2++; + p2 += fts3GetVarint32(p2, &iCol2); } - /* Create the whole "CREATE TABLE" statement to pass to SQLite */ - zSql = sqlite3_mprintf( - "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)", - zCols, p->zName, zLanguageid - ); - if( !zCols || !zSql ){ - rc = SQLITE_NOMEM; + /* Advance pointer p1 or p2 (whichever corresponds to the smaller of + ** iCol1 and iCol2) so that it points to either the 0x00 that marks the + ** end of the position list, or the 0x01 that precedes the next + ** column-number in the position list. + */ + else if( iCol1db, zSql); + fts3ColumnlistCopy(0, &p2); + if( 0==*p2 ) break; + p2++; + p2 += fts3GetVarint32(p2, &iCol2); } - - sqlite3_free(zSql); - sqlite3_free(zCols); - *pRc = rc; } -} -/* -** Create the %_stat table if it does not already exist. -*/ -SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){ - fts3DbExec(pRc, p->db, - "CREATE TABLE IF NOT EXISTS %Q.'%q_stat'" - "(id INTEGER PRIMARY KEY, value BLOB);", - p->zDb, p->zName - ); - if( (*pRc)==SQLITE_OK ) p->bHasStat = 1; + fts3PoslistCopy(0, &p2); + fts3PoslistCopy(0, &p1); + *pp1 = p1; + *pp2 = p2; + if( *pp==p ){ + return 0; + } + *p++ = 0x00; + *pp = p; + return 1; } /* -** Create the backing store tables (%_content, %_segments and %_segdir) -** required by the FTS3 table passed as the only argument. This is done -** as part of the vtab xCreate() method. +** Merge two position-lists as required by the NEAR operator. The argument +** position lists correspond to the left and right phrases of an expression +** like: ** -** If the p->bHasDocsize boolean is true (indicating that this is an -** FTS4 table, not an FTS3 table) then also create the %_docsize and -** %_stat tables required by FTS4. +** "phrase 1" NEAR "phrase number 2" +** +** Position list *pp1 corresponds to the left-hand side of the NEAR +** expression and *pp2 to the right. As usual, the indexes in the position +** lists are the offsets of the last token in each phrase (tokens "1" and "2" +** in the example above). +** +** The output position list - written to *pp - is a copy of *pp2 with those +** entries that are not sufficiently NEAR entries in *pp1 removed. */ -static int fts3CreateTables(Fts3Table *p){ - int rc = SQLITE_OK; /* Return code */ - int i; /* Iterator variable */ - sqlite3 *db = p->db; /* The database connection */ +static int fts3PoslistNearMerge( + char **pp, /* Output buffer */ + char *aTmp, /* Temporary buffer space */ + int nRight, /* Maximum difference in token positions */ + int nLeft, /* Maximum difference in token positions */ + char **pp1, /* IN/OUT: Left input list */ + char **pp2 /* IN/OUT: Right input list */ +){ + char *p1 = *pp1; + char *p2 = *pp2; - if( p->zContentTbl==0 ){ - const char *zLanguageid = p->zLanguageid; - char *zContentCols; /* Columns of %_content table */ + char *pTmp1 = aTmp; + char *pTmp2; + char *aTmp2; + int res = 1; - /* Create a list of user columns for the content table */ - zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY"); - for(i=0; zContentCols && inColumn; i++){ - char *z = p->azColumn[i]; - zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z); - } - if( zLanguageid && zContentCols ){ - zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid); - } - if( zContentCols==0 ) rc = SQLITE_NOMEM; - - /* Create the content table */ - fts3DbExec(&rc, db, - "CREATE TABLE %Q.'%q_content'(%s)", - p->zDb, p->zName, zContentCols - ); - sqlite3_free(zContentCols); + fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2); + aTmp2 = pTmp2 = pTmp1; + *pp1 = p1; + *pp2 = p2; + fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1); + if( pTmp1!=aTmp && pTmp2!=aTmp2 ){ + fts3PoslistMerge(pp, &aTmp, &aTmp2); + }else if( pTmp1!=aTmp ){ + fts3PoslistCopy(pp, &aTmp); + }else if( pTmp2!=aTmp2 ){ + fts3PoslistCopy(pp, &aTmp2); + }else{ + res = 0; } - /* Create other tables */ - fts3DbExec(&rc, db, - "CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);", - p->zDb, p->zName - ); - fts3DbExec(&rc, db, - "CREATE TABLE %Q.'%q_segdir'(" - "level INTEGER," - "idx INTEGER," - "start_block INTEGER," - "leaves_end_block INTEGER," - "end_block INTEGER," - "root BLOB," - "PRIMARY KEY(level, idx)" - ");", - p->zDb, p->zName - ); - if( p->bHasDocsize ){ - fts3DbExec(&rc, db, - "CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);", - p->zDb, p->zName - ); - } - assert( p->bHasStat==p->bFts4 ); - if( p->bHasStat ){ - sqlite3Fts3CreateStatTable(&rc, p); - } - return rc; + return res; } +/* +** An instance of this function is used to merge together the (potentially +** large number of) doclists for each term that matches a prefix query. +** See function fts3TermSelectMerge() for details. +*/ +typedef struct TermSelect TermSelect; +struct TermSelect { + char *aaOutput[16]; /* Malloc'd output buffers */ + int anOutput[16]; /* Size each output buffer in bytes */ +}; + /* -** Store the current database page-size in bytes in p->nPgsz. +** This function is used to read a single varint from a buffer. Parameter +** pEnd points 1 byte past the end of the buffer. When this function is +** called, if *pp points to pEnd or greater, then the end of the buffer +** has been reached. In this case *pp is set to 0 and the function returns. ** -** If *pRc is non-zero when this function is called, it is a no-op. -** Otherwise, if an error occurs, an SQLite error code is stored in *pRc -** before returning. +** If *pp does not point to or past pEnd, then a single varint is read +** from *pp. *pp is then set to point 1 byte past the end of the read varint. +** +** If bDescIdx is false, the value read is added to *pVal before returning. +** If it is true, the value read is subtracted from *pVal before this +** function returns. */ -static void fts3DatabasePageSize(int *pRc, Fts3Table *p){ - if( *pRc==SQLITE_OK ){ - int rc; /* Return code */ - char *zSql; /* SQL text "PRAGMA %Q.page_size" */ - sqlite3_stmt *pStmt; /* Compiled "PRAGMA %Q.page_size" statement */ - - zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb); - if( !zSql ){ - rc = SQLITE_NOMEM; +static void fts3GetDeltaVarint3( + char **pp, /* IN/OUT: Point to read varint from */ + char *pEnd, /* End of buffer */ + int bDescIdx, /* True if docids are descending */ + sqlite3_int64 *pVal /* IN/OUT: Integer value */ +){ + if( *pp>=pEnd ){ + *pp = 0; + }else{ + sqlite3_int64 iVal; + *pp += sqlite3Fts3GetVarint(*pp, &iVal); + if( bDescIdx ){ + *pVal -= iVal; }else{ - rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_step(pStmt); - p->nPgsz = sqlite3_column_int(pStmt, 0); - rc = sqlite3_finalize(pStmt); - }else if( rc==SQLITE_AUTH ){ - p->nPgsz = 1024; - rc = SQLITE_OK; - } + *pVal += iVal; } - assert( p->nPgsz>0 || rc!=SQLITE_OK ); - sqlite3_free(zSql); - *pRc = rc; } } /* -** "Special" FTS4 arguments are column specifications of the following form: +** This function is used to write a single varint to a buffer. The varint +** is written to *pp. Before returning, *pp is set to point 1 byte past the +** end of the value written. ** -** = +** If *pbFirst is zero when this function is called, the value written to +** the buffer is that of parameter iVal. ** -** There may not be whitespace surrounding the "=" character. The -** term may be quoted, but the may not. +** If *pbFirst is non-zero when this function is called, then the value +** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal) +** (if bDescIdx is non-zero). +** +** Before returning, this function always sets *pbFirst to 1 and *piPrev +** to the value of parameter iVal. */ -static int fts3IsSpecialColumn( - const char *z, - int *pnKey, - char **pzValue +static void fts3PutDeltaVarint3( + char **pp, /* IN/OUT: Output pointer */ + int bDescIdx, /* True for descending docids */ + sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */ + int *pbFirst, /* IN/OUT: True after first int written */ + sqlite3_int64 iVal /* Write this value to the list */ ){ - char *zValue; - const char *zCsr = z; - - while( *zCsr!='=' ){ - if( *zCsr=='\0' ) return 0; - zCsr++; - } - - *pnKey = (int)(zCsr-z); - zValue = sqlite3_mprintf("%s", &zCsr[1]); - if( zValue ){ - sqlite3Fts3Dequote(zValue); + sqlite3_int64 iWrite; + if( bDescIdx==0 || *pbFirst==0 ){ + iWrite = iVal - *piPrev; + }else{ + iWrite = *piPrev - iVal; } - *pzValue = zValue; - return 1; + assert( *pbFirst || *piPrev==0 ); + assert( *pbFirst==0 || iWrite>0 ); + *pp += sqlite3Fts3PutVarint(*pp, iWrite); + *piPrev = iVal; + *pbFirst = 1; } + /* -** Append the output of a printf() style formatting to an existing string. +** This macro is used by various functions that merge doclists. The two +** arguments are 64-bit docid values. If the value of the stack variable +** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2). +** Otherwise, (i2-i1). +** +** Using this makes it easier to write code that can merge doclists that are +** sorted in either ascending or descending order. */ -static void fts3Appendf( - int *pRc, /* IN/OUT: Error code */ - char **pz, /* IN/OUT: Pointer to string buffer */ - const char *zFormat, /* Printf format string to append */ - ... /* Arguments for printf format string */ -){ - if( *pRc==SQLITE_OK ){ - va_list ap; - char *z; - va_start(ap, zFormat); - z = sqlite3_vmprintf(zFormat, ap); - va_end(ap); - if( z && *pz ){ - char *z2 = sqlite3_mprintf("%s%s", *pz, z); - sqlite3_free(z); - z = z2; - } - if( z==0 ) *pRc = SQLITE_NOMEM; - sqlite3_free(*pz); - *pz = z; - } -} +#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2)) /* -** Return a copy of input string zInput enclosed in double-quotes (") and -** with all double quote characters escaped. For example: +** This function does an "OR" merge of two doclists (output contains all +** positions contained in either argument doclist). If the docids in the +** input doclists are sorted in ascending order, parameter bDescDoclist +** should be false. If they are sorted in ascending order, it should be +** passed a non-zero value. ** -** fts3QuoteId("un \"zip\"") -> "un \"\"zip\"\"" +** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer +** containing the output doclist and SQLITE_OK is returned. In this case +** *pnOut is set to the number of bytes in the output doclist. ** -** The pointer returned points to memory obtained from sqlite3_malloc(). It -** is the callers responsibility to call sqlite3_free() to release this -** memory. +** If an error occurs, an SQLite error code is returned. The output values +** are undefined in this case. */ -static char *fts3QuoteId(char const *zInput){ - int nRet; - char *zRet; - nRet = 2 + (int)strlen(zInput)*2 + 1; - zRet = sqlite3_malloc(nRet); - if( zRet ){ - int i; - char *z = zRet; - *(z++) = '"'; - for(i=0; zInput[i]; i++){ - if( zInput[i]=='"' ) *(z++) = '"'; - *(z++) = zInput[i]; +static int fts3DoclistOrMerge( + int bDescDoclist, /* True if arguments are desc */ + char *a1, int n1, /* First doclist */ + char *a2, int n2, /* Second doclist */ + char **paOut, int *pnOut /* OUT: Malloc'd doclist */ +){ + sqlite3_int64 i1 = 0; + sqlite3_int64 i2 = 0; + sqlite3_int64 iPrev = 0; + char *pEnd1 = &a1[n1]; + char *pEnd2 = &a2[n2]; + char *p1 = a1; + char *p2 = a2; + char *p; + char *aOut; + int bFirstOut = 0; + + *paOut = 0; + *pnOut = 0; + + /* Allocate space for the output. Both the input and output doclists + ** are delta encoded. If they are in ascending order (bDescDoclist==0), + ** then the first docid in each list is simply encoded as a varint. For + ** each subsequent docid, the varint stored is the difference between the + ** current and previous docid (a positive number - since the list is in + ** ascending order). + ** + ** The first docid written to the output is therefore encoded using the + ** same number of bytes as it is in whichever of the input lists it is + ** read from. And each subsequent docid read from the same input list + ** consumes either the same or less bytes as it did in the input (since + ** the difference between it and the previous value in the output must + ** be a positive value less than or equal to the delta value read from + ** the input list). The same argument applies to all but the first docid + ** read from the 'other' list. And to the contents of all position lists + ** that will be copied and merged from the input to the output. + ** + ** However, if the first docid copied to the output is a negative number, + ** then the encoding of the first docid from the 'other' input list may + ** be larger in the output than it was in the input (since the delta value + ** may be a larger positive integer than the actual docid). + ** + ** The space required to store the output is therefore the sum of the + ** sizes of the two inputs, plus enough space for exactly one of the input + ** docids to grow. + ** + ** A symetric argument may be made if the doclists are in descending + ** order. + */ + aOut = sqlite3_malloc(n1+n2+FTS3_VARINT_MAX-1); + if( !aOut ) return SQLITE_NOMEM; + + p = aOut; + fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); + while( p1 || p2 ){ + sqlite3_int64 iDiff = DOCID_CMP(i1, i2); + + if( p2 && p1 && iDiff==0 ){ + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); + fts3PoslistMerge(&p, &p1, &p2); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); + }else if( !p2 || (p1 && iDiff<0) ){ + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); + fts3PoslistCopy(&p, &p1); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + }else{ + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2); + fts3PoslistCopy(&p, &p2); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); } - *(z++) = '"'; - *(z++) = '\0'; } - return zRet; + + *paOut = aOut; + *pnOut = (int)(p-aOut); + assert( *pnOut<=n1+n2+FTS3_VARINT_MAX-1 ); + return SQLITE_OK; } /* -** Return a list of comma separated SQL expressions and a FROM clause that -** could be used in a SELECT statement such as the following: -** -** SELECT FROM %_content AS x ... -** -** to return the docid, followed by each column of text data in order -** from left to write. If parameter zFunc is not NULL, then instead of -** being returned directly each column of text data is passed to an SQL -** function named zFunc first. For example, if zFunc is "unzip" and the -** table has the three user-defined columns "a", "b", and "c", the following -** string is returned: -** -** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x" +** This function does a "phrase" merge of two doclists. In a phrase merge, +** the output contains a copy of each position from the right-hand input +** doclist for which there is a position in the left-hand input doclist +** exactly nDist tokens before it. ** -** The pointer returned points to a buffer allocated by sqlite3_malloc(). It -** is the responsibility of the caller to eventually free it. +** If the docids in the input doclists are sorted in ascending order, +** parameter bDescDoclist should be false. If they are sorted in ascending +** order, it should be passed a non-zero value. ** -** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and -** a NULL pointer is returned). Otherwise, if an OOM error is encountered -** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If -** no error occurs, *pRc is left unmodified. +** The right-hand input doclist is overwritten by this function. */ -static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){ - char *zRet = 0; - char *zFree = 0; - char *zFunction; - int i; +static int fts3DoclistPhraseMerge( + int bDescDoclist, /* True if arguments are desc */ + int nDist, /* Distance from left to right (1=adjacent) */ + char *aLeft, int nLeft, /* Left doclist */ + char **paRight, int *pnRight /* IN/OUT: Right/output doclist */ +){ + sqlite3_int64 i1 = 0; + sqlite3_int64 i2 = 0; + sqlite3_int64 iPrev = 0; + char *aRight = *paRight; + char *pEnd1 = &aLeft[nLeft]; + char *pEnd2 = &aRight[*pnRight]; + char *p1 = aLeft; + char *p2 = aRight; + char *p; + int bFirstOut = 0; + char *aOut; - if( p->zContentTbl==0 ){ - if( !zFunc ){ - zFunction = ""; - }else{ - zFree = zFunction = fts3QuoteId(zFunc); - } - fts3Appendf(pRc, &zRet, "docid"); - for(i=0; inColumn; i++){ - fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]); - } - if( p->zLanguageid ){ - fts3Appendf(pRc, &zRet, ", x.%Q", "langid"); - } - sqlite3_free(zFree); + assert( nDist>0 ); + if( bDescDoclist ){ + aOut = sqlite3_malloc(*pnRight + FTS3_VARINT_MAX); + if( aOut==0 ) return SQLITE_NOMEM; }else{ - fts3Appendf(pRc, &zRet, "rowid"); - for(i=0; inColumn; i++){ - fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]); - } - if( p->zLanguageid ){ - fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid); + aOut = aRight; + } + p = aOut; + + fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); + + while( p1 && p2 ){ + sqlite3_int64 iDiff = DOCID_CMP(i1, i2); + if( iDiff==0 ){ + char *pSave = p; + sqlite3_int64 iPrevSave = iPrev; + int bFirstOutSave = bFirstOut; + + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); + if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){ + p = pSave; + iPrev = iPrevSave; + bFirstOut = bFirstOutSave; + } + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); + }else if( iDiff<0 ){ + fts3PoslistCopy(0, &p1); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + }else{ + fts3PoslistCopy(0, &p2); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); } } - fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", - p->zDb, - (p->zContentTbl ? p->zContentTbl : p->zName), - (p->zContentTbl ? "" : "_content") - ); - return zRet; + + *pnRight = (int)(p - aOut); + if( bDescDoclist ){ + sqlite3_free(aRight); + *paRight = aOut; + } + + return SQLITE_OK; } /* -** Return a list of N comma separated question marks, where N is the number -** of columns in the %_content table (one for the docid plus one for each -** user-defined text column). -** -** If argument zFunc is not NULL, then all but the first question mark -** is preceded by zFunc and an open bracket, and followed by a closed -** bracket. For example, if zFunc is "zip" and the FTS3 table has three -** user-defined text columns, the following string is returned: -** -** "?, zip(?), zip(?), zip(?)" -** -** The pointer returned points to a buffer allocated by sqlite3_malloc(). It -** is the responsibility of the caller to eventually free it. -** -** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and -** a NULL pointer is returned). Otherwise, if an OOM error is encountered -** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If -** no error occurs, *pRc is left unmodified. +** Argument pList points to a position list nList bytes in size. This +** function checks to see if the position list contains any entries for +** a token in position 0 (of any column). If so, it writes argument iDelta +** to the output buffer pOut, followed by a position list consisting only +** of the entries from pList at position 0, and terminated by an 0x00 byte. +** The value returned is the number of bytes written to pOut (if any). */ -static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){ - char *zRet = 0; - char *zFree = 0; - char *zFunction; - int i; +SQLITE_PRIVATE int sqlite3Fts3FirstFilter( + sqlite3_int64 iDelta, /* Varint that may be written to pOut */ + char *pList, /* Position list (no 0x00 term) */ + int nList, /* Size of pList in bytes */ + char *pOut /* Write output here */ +){ + int nOut = 0; + int bWritten = 0; /* True once iDelta has been written */ + char *p = pList; + char *pEnd = &pList[nList]; - if( !zFunc ){ - zFunction = ""; - }else{ - zFree = zFunction = fts3QuoteId(zFunc); + if( *p!=0x01 ){ + if( *p==0x02 ){ + nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); + pOut[nOut++] = 0x02; + bWritten = 1; + } + fts3ColumnlistCopy(0, &p); } - fts3Appendf(pRc, &zRet, "?"); - for(i=0; inColumn; i++){ - fts3Appendf(pRc, &zRet, ",%s(?)", zFunction); + + while( pzLanguageid ){ - fts3Appendf(pRc, &zRet, ", ?"); + if( bWritten ){ + pOut[nOut++] = 0x00; } - sqlite3_free(zFree); - return zRet; + + return nOut; } + /* -** This function interprets the string at (*pp) as a non-negative integer -** value. It reads the integer and sets *pnOut to the value read, then -** sets *pp to point to the byte immediately following the last byte of -** the integer value. -** -** Only decimal digits ('0'..'9') may be part of an integer value. -** -** If *pp does not being with a decimal digit SQLITE_ERROR is returned and -** the output value undefined. Otherwise SQLITE_OK is returned. +** Merge all doclists in the TermSelect.aaOutput[] array into a single +** doclist stored in TermSelect.aaOutput[0]. If successful, delete all +** other doclists (except the aaOutput[0] one) and return SQLITE_OK. ** -** This function is used when parsing the "prefix=" FTS4 parameter. +** If an OOM error occurs, return SQLITE_NOMEM. In this case it is +** the responsibility of the caller to free any doclists left in the +** TermSelect.aaOutput[] array. */ -static int fts3GobbleInt(const char **pp, int *pnOut){ - const int MAX_NPREFIX = 10000000; - const char *p; /* Iterator pointer */ - int nInt = 0; /* Output value */ +static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){ + char *aOut = 0; + int nOut = 0; + int i; - for(p=*pp; p[0]>='0' && p[0]<='9'; p++){ - nInt = nInt * 10 + (p[0] - '0'); - if( nInt>MAX_NPREFIX ){ - nInt = 0; - break; + /* Loop through the doclists in the aaOutput[] array. Merge them all + ** into a single doclist. + */ + for(i=0; iaaOutput); i++){ + if( pTS->aaOutput[i] ){ + if( !aOut ){ + aOut = pTS->aaOutput[i]; + nOut = pTS->anOutput[i]; + pTS->aaOutput[i] = 0; + }else{ + int nNew; + char *aNew; + + int rc = fts3DoclistOrMerge(p->bDescIdx, + pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew + ); + if( rc!=SQLITE_OK ){ + sqlite3_free(aOut); + return rc; + } + + sqlite3_free(pTS->aaOutput[i]); + sqlite3_free(aOut); + pTS->aaOutput[i] = 0; + aOut = aNew; + nOut = nNew; + } } } - if( p==*pp ) return SQLITE_ERROR; - *pnOut = nInt; - *pp = p; + + pTS->aaOutput[0] = aOut; + pTS->anOutput[0] = nOut; return SQLITE_OK; } /* -** This function is called to allocate an array of Fts3Index structures -** representing the indexes maintained by the current FTS table. FTS tables -** always maintain the main "terms" index, but may also maintain one or -** more "prefix" indexes, depending on the value of the "prefix=" parameter -** (if any) specified as part of the CREATE VIRTUAL TABLE statement. -** -** Argument zParam is passed the value of the "prefix=" option if one was -** specified, or NULL otherwise. +** Merge the doclist aDoclist/nDoclist into the TermSelect object passed +** as the first argument. The merge is an "OR" merge (see function +** fts3DoclistOrMerge() for details). ** -** If no error occurs, SQLITE_OK is returned and *apIndex set to point to -** the allocated array. *pnIndex is set to the number of elements in the -** array. If an error does occur, an SQLite error code is returned. +** This function is called with the doclist for each term that matches +** a queried prefix. It merges all these doclists into one, the doclist +** for the specified prefix. Since there can be a very large number of +** doclists to merge, the merging is done pair-wise using the TermSelect +** object. ** -** Regardless of whether or not an error is returned, it is the responsibility -** of the caller to call sqlite3_free() on the output array to free it. +** This function returns SQLITE_OK if the merge is successful, or an +** SQLite error code (SQLITE_NOMEM) if an error occurs. */ -static int fts3PrefixParameter( - const char *zParam, /* ABC in prefix=ABC parameter to parse */ - int *pnIndex, /* OUT: size of *apIndex[] array */ - struct Fts3Index **apIndex /* OUT: Array of indexes for this table */ +static int fts3TermSelectMerge( + Fts3Table *p, /* FTS table handle */ + TermSelect *pTS, /* TermSelect object to merge into */ + char *aDoclist, /* Pointer to doclist */ + int nDoclist /* Size of aDoclist in bytes */ ){ - struct Fts3Index *aIndex; /* Allocated array */ - int nIndex = 1; /* Number of entries in array */ - - if( zParam && zParam[0] ){ - const char *p; - nIndex++; - for(p=zParam; *p; p++){ - if( *p==',' ) nIndex++; + if( pTS->aaOutput[0]==0 ){ + /* If this is the first term selected, copy the doclist to the output + ** buffer using memcpy(). + ** + ** Add FTS3_VARINT_MAX bytes of unused space to the end of the + ** allocation. This is so as to ensure that the buffer is big enough + ** to hold the current doclist AND'd with any other doclist. If the + ** doclists are stored in order=ASC order, this padding would not be + ** required (since the size of [doclistA AND doclistB] is always less + ** than or equal to the size of [doclistA] in that case). But this is + ** not true for order=DESC. For example, a doclist containing (1, -1) + ** may be smaller than (-1), as in the first example the -1 may be stored + ** as a single-byte delta, whereas in the second it must be stored as a + ** FTS3_VARINT_MAX byte varint. + ** + ** Similar padding is added in the fts3DoclistOrMerge() function. + */ + pTS->aaOutput[0] = sqlite3_malloc(nDoclist + FTS3_VARINT_MAX + 1); + pTS->anOutput[0] = nDoclist; + if( pTS->aaOutput[0] ){ + memcpy(pTS->aaOutput[0], aDoclist, nDoclist); + }else{ + return SQLITE_NOMEM; } - } - - aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex); - *apIndex = aIndex; - if( !aIndex ){ - return SQLITE_NOMEM; - } + }else{ + char *aMerge = aDoclist; + int nMerge = nDoclist; + int iOut; - memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex); - if( zParam ){ - const char *p = zParam; - int i; - for(i=1; i=0 ); - if( nPrefix==0 ){ - nIndex--; - i--; + for(iOut=0; iOutaaOutput); iOut++){ + if( pTS->aaOutput[iOut]==0 ){ + assert( iOut>0 ); + pTS->aaOutput[iOut] = aMerge; + pTS->anOutput[iOut] = nMerge; + break; }else{ - aIndex[i].nPrefix = nPrefix; + char *aNew; + int nNew; + + int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge, + pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew + ); + if( rc!=SQLITE_OK ){ + if( aMerge!=aDoclist ) sqlite3_free(aMerge); + return rc; + } + + if( aMerge!=aDoclist ) sqlite3_free(aMerge); + sqlite3_free(pTS->aaOutput[iOut]); + pTS->aaOutput[iOut] = 0; + + aMerge = aNew; + nMerge = nNew; + if( (iOut+1)==SizeofArray(pTS->aaOutput) ){ + pTS->aaOutput[iOut] = aMerge; + pTS->anOutput[iOut] = nMerge; + } } - p++; } } + return SQLITE_OK; +} - *pnIndex = nIndex; +/* +** Append SegReader object pNew to the end of the pCsr->apSegment[] array. +*/ +static int fts3SegReaderCursorAppend( + Fts3MultiSegReader *pCsr, + Fts3SegReader *pNew +){ + if( (pCsr->nSegment%16)==0 ){ + Fts3SegReader **apNew; + int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); + apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte); + if( !apNew ){ + sqlite3Fts3SegReaderFree(pNew); + return SQLITE_NOMEM; + } + pCsr->apSegment = apNew; + } + pCsr->apSegment[pCsr->nSegment++] = pNew; return SQLITE_OK; } /* -** This function is called when initializing an FTS4 table that uses the -** content=xxx option. It determines the number of and names of the columns -** of the new FTS4 table. -** -** The third argument passed to this function is the value passed to the -** config=xxx option (i.e. "xxx"). This function queries the database for -** a table of that name. If found, the output variables are populated -** as follows: -** -** *pnCol: Set to the number of columns table xxx has, -** -** *pnStr: Set to the total amount of space required to store a copy -** of each columns name, including the nul-terminator. -** -** *pazCol: Set to point to an array of *pnCol strings. Each string is -** the name of the corresponding column in table xxx. The array -** and its contents are allocated using a single allocation. It -** is the responsibility of the caller to free this allocation -** by eventually passing the *pazCol value to sqlite3_free(). +** Add seg-reader objects to the Fts3MultiSegReader object passed as the +** 8th argument. ** -** If the table cannot be found, an error code is returned and the output -** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is -** returned (and the output variables are undefined). +** This function returns SQLITE_OK if successful, or an SQLite error code +** otherwise. */ -static int fts3ContentColumns( - sqlite3 *db, /* Database handle */ - const char *zDb, /* Name of db (i.e. "main", "temp" etc.) */ - const char *zTbl, /* Name of content table */ - const char ***pazCol, /* OUT: Malloc'd array of column names */ - int *pnCol, /* OUT: Size of array *pazCol */ - int *pnStr /* OUT: Bytes of string content */ +static int fts3SegReaderCursor( + Fts3Table *p, /* FTS3 table handle */ + int iLangid, /* Language id */ + int iIndex, /* Index to search (from 0 to p->nIndex-1) */ + int iLevel, /* Level of segments to scan */ + const char *zTerm, /* Term to query for */ + int nTerm, /* Size of zTerm in bytes */ + int isPrefix, /* True for a prefix search */ + int isScan, /* True to scan from zTerm to EOF */ + Fts3MultiSegReader *pCsr /* Cursor object to populate */ ){ - int rc = SQLITE_OK; /* Return code */ - char *zSql; /* "SELECT *" statement on zTbl */ - sqlite3_stmt *pStmt = 0; /* Compiled version of zSql */ + int rc = SQLITE_OK; /* Error code */ + sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */ + int rc2; /* Result of sqlite3_reset() */ - zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl); - if( !zSql ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); + /* If iLevel is less than 0 and this is not a scan, include a seg-reader + ** for the pending-terms. If this is a scan, then this call must be being + ** made by an fts4aux module, not an FTS table. In this case calling + ** Fts3SegReaderPending might segfault, as the data structures used by + ** fts4aux are not completely populated. So it's easiest to filter these + ** calls out here. */ + if( iLevel<0 && p->aIndex ){ + Fts3SegReader *pSeg = 0; + rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix||isScan, &pSeg); + if( rc==SQLITE_OK && pSeg ){ + rc = fts3SegReaderCursorAppend(pCsr, pSeg); + } } - sqlite3_free(zSql); - - if( rc==SQLITE_OK ){ - const char **azCol; /* Output array */ - int nStr = 0; /* Size of all column names (incl. 0x00) */ - int nCol; /* Number of table columns */ - int i; /* Used to iterate through columns */ - /* Loop through the returned columns. Set nStr to the number of bytes of - ** space required to store a copy of each column name, including the - ** nul-terminator byte. */ - nCol = sqlite3_column_count(pStmt); - for(i=0; inSegment+1, + (isPrefix==0 && isScan==0), + iStartBlock, iLeavesEndBlock, + iEndBlock, zRoot, nRoot, &pSeg + ); + if( rc!=SQLITE_OK ) goto finished; + rc = fts3SegReaderCursorAppend(pCsr, pSeg); } - sqlite3_finalize(pStmt); - - /* Set the output variables. */ - *pnCol = nCol; - *pnStr = nStr; - *pazCol = azCol; } + finished: + rc2 = sqlite3_reset(pStmt); + if( rc==SQLITE_DONE ) rc = rc2; + return rc; } /* -** This function is the implementation of both the xConnect and xCreate -** methods of the FTS3 virtual table. -** -** The argv[] array contains the following: -** -** argv[0] -> module name ("fts3" or "fts4") -** argv[1] -> database name -** argv[2] -> table name -** argv[...] -> "column name" and other module argument fields. +** Set up a cursor object for iterating through a full-text index or a +** single level therein. */ -static int fts3InitVtab( - int isCreate, /* True for xCreate, false for xConnect */ - sqlite3 *db, /* The SQLite database connection */ - void *pAux, /* Hash table containing tokenizers */ - int argc, /* Number of elements in argv array */ - const char * const *argv, /* xCreate/xConnect argument array */ - sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ - char **pzErr /* Write any error message here */ +SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor( + Fts3Table *p, /* FTS3 table handle */ + int iLangid, /* Language-id to search */ + int iIndex, /* Index to search (from 0 to p->nIndex-1) */ + int iLevel, /* Level of segments to scan */ + const char *zTerm, /* Term to query for */ + int nTerm, /* Size of zTerm in bytes */ + int isPrefix, /* True for a prefix search */ + int isScan, /* True to scan from zTerm to EOF */ + Fts3MultiSegReader *pCsr /* Cursor object to populate */ ){ - Fts3Hash *pHash = (Fts3Hash *)pAux; - Fts3Table *p = 0; /* Pointer to allocated vtab */ - int rc = SQLITE_OK; /* Return code */ - int i; /* Iterator variable */ - int nByte; /* Size of allocation used for *p */ - int iCol; /* Column index */ - int nString = 0; /* Bytes required to hold all column names */ - int nCol = 0; /* Number of columns in the FTS table */ - char *zCsr; /* Space for holding column names */ - int nDb; /* Bytes required to hold database name */ - int nName; /* Bytes required to hold table name */ - int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */ - const char **aCol; /* Array of column names */ - sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */ - - int nIndex = 0; /* Size of aIndex[] array */ - struct Fts3Index *aIndex = 0; /* Array of indexes for this table */ - - /* The results of parsing supported FTS4 key=value options: */ - int bNoDocsize = 0; /* True to omit %_docsize table */ - int bDescIdx = 0; /* True to store descending indexes */ - char *zPrefix = 0; /* Prefix parameter value (or NULL) */ - char *zCompress = 0; /* compress=? parameter (or NULL) */ - char *zUncompress = 0; /* uncompress=? parameter (or NULL) */ - char *zContent = 0; /* content=? parameter (or NULL) */ - char *zLanguageid = 0; /* languageid=? parameter (or NULL) */ - char **azNotindexed = 0; /* The set of notindexed= columns */ - int nNotindexed = 0; /* Size of azNotindexed[] array */ - - assert( strlen(argv[0])==4 ); - assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4) - || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4) + assert( iIndex>=0 && iIndexnIndex ); + assert( iLevel==FTS3_SEGCURSOR_ALL + || iLevel==FTS3_SEGCURSOR_PENDING + || iLevel>=0 ); + assert( iLevel8 - && 0==sqlite3_strnicmp(z, "tokenize", 8) - && 0==sqlite3Fts3IsIdChar(z[8]) - ){ - rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr); - } + pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader)); + if( pSegcsr ){ + int i; + int bFound = 0; /* True once an index has been found */ + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; - /* Check if it is an FTS4 special argument. */ - else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){ - struct Fts4Option { - const char *zOpt; - int nOpt; - } aFts4Opt[] = { - { "matchinfo", 9 }, /* 0 -> MATCHINFO */ - { "prefix", 6 }, /* 1 -> PREFIX */ - { "compress", 8 }, /* 2 -> COMPRESS */ - { "uncompress", 10 }, /* 3 -> UNCOMPRESS */ - { "order", 5 }, /* 4 -> ORDER */ - { "content", 7 }, /* 5 -> CONTENT */ - { "languageid", 10 }, /* 6 -> LANGUAGEID */ - { "notindexed", 10 } /* 7 -> NOTINDEXED */ - }; + if( isPrefix ){ + for(i=1; bFound==0 && inIndex; i++){ + if( p->aIndex[i].nPrefix==nTerm ){ + bFound = 1; + rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, + i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr + ); + pSegcsr->bLookup = 1; + } + } - int iOpt; - if( !zVal ){ - rc = SQLITE_NOMEM; - }else{ - for(iOpt=0; iOptnOpt && !sqlite3_strnicmp(z, pOp->zOpt, pOp->nOpt) ){ - break; + for(i=1; bFound==0 && inIndex; i++){ + if( p->aIndex[i].nPrefix==nTerm+1 ){ + bFound = 1; + rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, + i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr + ); + if( rc==SQLITE_OK ){ + rc = fts3SegReaderCursorAddZero( + p, pCsr->iLangid, zTerm, nTerm, pSegcsr + ); } } - if( iOpt==SizeofArray(aFts4Opt) ){ - *pzErr = sqlite3_mprintf("unrecognized parameter: %s", z); - rc = SQLITE_ERROR; - }else{ - switch( iOpt ){ - case 0: /* MATCHINFO */ - if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){ - *pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal); - rc = SQLITE_ERROR; - } - bNoDocsize = 1; - break; + } + } - case 1: /* PREFIX */ - sqlite3_free(zPrefix); - zPrefix = zVal; - zVal = 0; - break; + if( bFound==0 ){ + rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, + 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr + ); + pSegcsr->bLookup = !isPrefix; + } + } - case 2: /* COMPRESS */ - sqlite3_free(zCompress); - zCompress = zVal; - zVal = 0; - break; + *ppSegcsr = pSegcsr; + return rc; +} - case 3: /* UNCOMPRESS */ - sqlite3_free(zUncompress); - zUncompress = zVal; - zVal = 0; - break; +/* +** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor(). +*/ +static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){ + sqlite3Fts3SegReaderFinish(pSegcsr); + sqlite3_free(pSegcsr); +} - case 4: /* ORDER */ - if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3)) - && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4)) - ){ - *pzErr = sqlite3_mprintf("unrecognized order: %s", zVal); - rc = SQLITE_ERROR; - } - bDescIdx = (zVal[0]=='d' || zVal[0]=='D'); - break; +/* +** This function retrieves the doclist for the specified term (or term +** prefix) from the database. +*/ +static int fts3TermSelect( + Fts3Table *p, /* Virtual table handle */ + Fts3PhraseToken *pTok, /* Token to query for */ + int iColumn, /* Column to query (or -ve for all columns) */ + int *pnOut, /* OUT: Size of buffer at *ppOut */ + char **ppOut /* OUT: Malloced result buffer */ +){ + int rc; /* Return code */ + Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */ + TermSelect tsc; /* Object for pair-wise doclist merging */ + Fts3SegFilter filter; /* Segment term filter configuration */ - case 5: /* CONTENT */ - sqlite3_free(zContent); - zContent = zVal; - zVal = 0; - break; + pSegcsr = pTok->pSegcsr; + memset(&tsc, 0, sizeof(TermSelect)); - case 6: /* LANGUAGEID */ - assert( iOpt==6 ); - sqlite3_free(zLanguageid); - zLanguageid = zVal; - zVal = 0; - break; + filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS + | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) + | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0) + | (iColumnnColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); + filter.iCol = iColumn; + filter.zTerm = pTok->z; + filter.nTerm = pTok->n; - case 7: /* NOTINDEXED */ - azNotindexed[nNotindexed++] = zVal; - zVal = 0; - break; - } - } - sqlite3_free(zVal); - } - } + rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter); + while( SQLITE_OK==rc + && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) + ){ + rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist); + } - /* Otherwise, the argument is a column name. */ - else { - nString += (int)(strlen(z) + 1); - aCol[nCol++] = z; + if( rc==SQLITE_OK ){ + rc = fts3TermSelectFinishMerge(p, &tsc); + } + if( rc==SQLITE_OK ){ + *ppOut = tsc.aaOutput[0]; + *pnOut = tsc.anOutput[0]; + }else{ + int i; + for(i=0; ipSegcsr = 0; + return rc; +} - /* If a languageid= option was specified, remove the language id - ** column from the aCol[] array. */ - if( rc==SQLITE_OK && zLanguageid ){ - int j; - for(j=0; jeSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){ + if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ + pCsr->isEof = 1; + rc = sqlite3_reset(pCsr->pStmt); + }else{ + pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); + rc = SQLITE_OK; + } + }else{ + rc = fts3EvalNext((Fts3Cursor *)pCursor); } - assert( pTokenizer ); + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); + return rc; +} - rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex); - if( rc==SQLITE_ERROR ){ - assert( zPrefix ); - *pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix); - } - if( rc!=SQLITE_OK ) goto fts3_init_out; +/* +** The following are copied from sqliteInt.h. +** +** Constants for the largest and smallest possible 64-bit signed integers. +** These macros are designed to work correctly on both 32-bit and 64-bit +** compilers. +*/ +#ifndef SQLITE_AMALGAMATION +# define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32)) +# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64) +#endif - /* Allocate and populate the Fts3Table structure. */ - nByte = sizeof(Fts3Table) + /* Fts3Table */ - nCol * sizeof(char *) + /* azColumn */ - nIndex * sizeof(struct Fts3Index) + /* aIndex */ - nCol * sizeof(u8) + /* abNotindexed */ - nName + /* zName */ - nDb + /* zDb */ - nString; /* Space for azColumn strings */ - p = (Fts3Table*)sqlite3_malloc(nByte); - if( p==0 ){ - rc = SQLITE_NOMEM; - goto fts3_init_out; +/* +** If the numeric type of argument pVal is "integer", then return it +** converted to a 64-bit signed integer. Otherwise, return a copy of +** the second parameter, iDefault. +*/ +static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){ + if( pVal ){ + int eType = sqlite3_value_numeric_type(pVal); + if( eType==SQLITE_INTEGER ){ + return sqlite3_value_int64(pVal); + } } - memset(p, 0, nByte); - p->db = db; - p->nColumn = nCol; - p->nPendingData = 0; - p->azColumn = (char **)&p[1]; - p->pTokenizer = pTokenizer; - p->nMaxPendingData = FTS3_MAX_PENDING_DATA; - p->bHasDocsize = (isFts4 && bNoDocsize==0); - p->bHasStat = isFts4; - p->bFts4 = isFts4; - p->bDescIdx = bDescIdx; - p->nAutoincrmerge = 0xff; /* 0xff means setting unknown */ - p->zContentTbl = zContent; - p->zLanguageid = zLanguageid; - zContent = 0; - zLanguageid = 0; - TESTONLY( p->inTransaction = -1 ); - TESTONLY( p->mxSavepoint = -1 ); + return iDefault; +} - p->aIndex = (struct Fts3Index *)&p->azColumn[nCol]; - memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex); - p->nIndex = nIndex; - for(i=0; iaIndex[i].hPending, FTS3_HASH_STRING, 1); - } - p->abNotindexed = (u8 *)&p->aIndex[nIndex]; +/* +** This is the xFilter interface for the virtual table. See +** the virtual table xFilter method documentation for additional +** information. +** +** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against +** the %_content table. +** +** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry +** in the %_content table. +** +** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index. The +** column on the left-hand side of the MATCH operator is column +** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed. argv[0] is the right-hand +** side of the MATCH operator. +*/ +static int fts3FilterMethod( + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ + int idxNum, /* Strategy index */ + const char *idxStr, /* Unused */ + int nVal, /* Number of elements in apVal */ + sqlite3_value **apVal /* Arguments for the indexing scheme */ +){ + int rc = SQLITE_OK; + char *zSql; /* SQL statement used to access %_content */ + int eSearch; + Fts3Table *p = (Fts3Table *)pCursor->pVtab; + Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; - /* Fill in the zName and zDb fields of the vtab structure. */ - zCsr = (char *)&p->abNotindexed[nCol]; - p->zName = zCsr; - memcpy(zCsr, argv[2], nName); - zCsr += nName; - p->zDb = zCsr; - memcpy(zCsr, argv[1], nDb); - zCsr += nDb; + sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */ + sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */ + sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */ + sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */ + int iIdx; - /* Fill in the azColumn array */ - for(iCol=0; iColazColumn[iCol] = zCsr; - zCsr += n+1; - assert( zCsr <= &((char *)p)[nByte] ); - } + UNUSED_PARAMETER(idxStr); + UNUSED_PARAMETER(nVal); - /* Fill in the abNotindexed array */ - for(iCol=0; iColazColumn[iCol]); - for(i=0; iazColumn[iCol], zNot, n) - ){ - p->abNotindexed[iCol] = 1; - sqlite3_free(zNot); - azNotindexed[i] = 0; - } - } - } - for(i=0; i=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); + assert( p->pSegments==0 ); - if( rc==SQLITE_OK && (zCompress==0)!=(zUncompress==0) ){ - char const *zMiss = (zCompress==0 ? "compress" : "uncompress"); - rc = SQLITE_ERROR; - *pzErr = sqlite3_mprintf("missing %s parameter in fts4 constructor", zMiss); - } - p->zReadExprlist = fts3ReadExprList(p, zUncompress, &rc); - p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc); - if( rc!=SQLITE_OK ) goto fts3_init_out; + /* Collect arguments into local variables */ + iIdx = 0; + if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++]; + assert( iIdx==nVal ); - /* If this is an xCreate call, create the underlying tables in the - ** database. TODO: For xConnect(), it could verify that said tables exist. - */ - if( isCreate ){ - rc = fts3CreateTables(p); - } + /* In case the cursor has been used before, clear it now. */ + sqlite3_finalize(pCsr->pStmt); + sqlite3_free(pCsr->aDoclist); + sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); + sqlite3Fts3ExprFree(pCsr->pExpr); + memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); - /* Check to see if a legacy fts3 table has been "upgraded" by the - ** addition of a %_stat table so that it can use incremental merge. - */ - if( !isFts4 && !isCreate ){ - p->bHasStat = 2; + /* Set the lower and upper bounds on docids to return */ + pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64); + pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64); + + if( idxStr ){ + pCsr->bDesc = (idxStr[0]=='D'); + }else{ + pCsr->bDesc = p->bDescIdx; } + pCsr->eSearch = (i16)eSearch; - /* Figure out the page-size for the database. This is required in order to - ** estimate the cost of loading large doclists from the database. */ - fts3DatabasePageSize(&rc, p); - p->nNodeSize = p->nPgsz-35; + if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){ + int iCol = eSearch-FTS3_FULLTEXT_SEARCH; + const char *zQuery = (const char *)sqlite3_value_text(pCons); - /* Declare the table schema to SQLite. */ - fts3DeclareVtab(&rc, p); + if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){ + return SQLITE_NOMEM; + } -fts3_init_out: - sqlite3_free(zPrefix); - sqlite3_free(aIndex); - sqlite3_free(zCompress); - sqlite3_free(zUncompress); - sqlite3_free(zContent); - sqlite3_free(zLanguageid); - for(i=0; ipModule->xDestroy(pTokenizer); + pCsr->iLangid = 0; + if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid); + + assert( p->base.zErrMsg==0 ); + rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, + p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr, + &p->base.zErrMsg + ); + if( rc!=SQLITE_OK ){ + return rc; } - }else{ - assert( p->pSegments==0 ); - *ppVTab = &p->base; + + rc = fts3EvalStart(pCsr); + sqlite3Fts3SegmentsClose(p); + if( rc!=SQLITE_OK ) return rc; + pCsr->pNextId = pCsr->aDoclist; + pCsr->iPrevId = 0; } - return rc; + + /* Compile a SELECT statement for this cursor. For a full-table-scan, the + ** statement loops through all rows of the %_content table. For a + ** full-text query or docid lookup, the statement retrieves a single + ** row by docid. + */ + if( eSearch==FTS3_FULLSCAN_SEARCH ){ + if( pDocidGe || pDocidLe ){ + zSql = sqlite3_mprintf( + "SELECT %s WHERE rowid BETWEEN %lld AND %lld ORDER BY rowid %s", + p->zReadExprlist, pCsr->iMinDocid, pCsr->iMaxDocid, + (pCsr->bDesc ? "DESC" : "ASC") + ); + }else{ + zSql = sqlite3_mprintf("SELECT %s ORDER BY rowid %s", + p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC") + ); + } + if( zSql ){ + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); + sqlite3_free(zSql); + }else{ + rc = SQLITE_NOMEM; + } + }else if( eSearch==FTS3_DOCID_SEARCH ){ + rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt); + if( rc==SQLITE_OK ){ + rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons); + } + } + if( rc!=SQLITE_OK ) return rc; + + return fts3NextMethod(pCursor); } -/* -** The xConnect() and xCreate() methods for the virtual table. All the -** work is done in function fts3InitVtab(). +/* +** This is the xEof method of the virtual table. SQLite calls this +** routine to find out if it has reached the end of a result set. */ -static int fts3ConnectMethod( - sqlite3 *db, /* Database connection */ - void *pAux, /* Pointer to tokenizer hash table */ - int argc, /* Number of elements in argv array */ - const char * const *argv, /* xCreate/xConnect argument array */ - sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ - char **pzErr /* OUT: sqlite3_malloc'd error message */ -){ - return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); -} -static int fts3CreateMethod( - sqlite3 *db, /* Database connection */ - void *pAux, /* Pointer to tokenizer hash table */ - int argc, /* Number of elements in argv array */ - const char * const *argv, /* xCreate/xConnect argument array */ - sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ - char **pzErr /* OUT: sqlite3_malloc'd error message */ -){ - return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); +static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){ + return ((Fts3Cursor *)pCursor)->isEof; } -/* -** Set the pIdxInfo->estimatedRows variable to nRow. Unless this -** extension is currently being used by a version of SQLite too old to -** support estimatedRows. In that case this function is a no-op. +/* +** This is the xRowid method. The SQLite core calls this routine to +** retrieve the rowid for the current row of the result set. fts3 +** exposes %_content.docid as the rowid for the virtual table. The +** rowid should be written to *pRowid. */ -static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ -#if SQLITE_VERSION_NUMBER>=3008002 - if( sqlite3_libversion_number()>=3008002 ){ - pIdxInfo->estimatedRows = nRow; - } -#endif +static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ + Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; + *pRowid = pCsr->iPrevId; + return SQLITE_OK; } /* -** Implementation of the xBestIndex method for FTS3 tables. There -** are three possible strategies, in order of preference: +** This is the xColumn method, called by SQLite to request a value from +** the row that the supplied cursor currently points to. ** -** 1. Direct lookup by rowid or docid. -** 2. Full-text search using a MATCH operator on a non-docid column. -** 3. Linear scan of %_content table. +** If: +** +** (iCol < p->nColumn) -> The value of the iCol'th user column. +** (iCol == p->nColumn) -> Magic column with the same name as the table. +** (iCol == p->nColumn+1) -> Docid column +** (iCol == p->nColumn+2) -> Langid column */ -static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ - Fts3Table *p = (Fts3Table *)pVTab; - int i; /* Iterator variable */ - int iCons = -1; /* Index of constraint to use */ +static int fts3ColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ +){ + int rc = SQLITE_OK; /* Return Code */ + Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; + Fts3Table *p = (Fts3Table *)pCursor->pVtab; - int iLangidCons = -1; /* Index of langid=x constraint, if present */ - int iDocidGe = -1; /* Index of docid>=x constraint, if present */ - int iDocidLe = -1; /* Index of docid<=x constraint, if present */ - int iIdx; + /* The column value supplied by SQLite must be in range. */ + assert( iCol>=0 && iCol<=p->nColumn+2 ); - /* By default use a full table scan. This is an expensive option, - ** so search through the constraints to see if a more efficient - ** strategy is possible. - */ - pInfo->idxNum = FTS3_FULLSCAN_SEARCH; - pInfo->estimatedCost = 5000000; - for(i=0; inConstraint; i++){ - int bDocid; /* True if this constraint is on docid */ - struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i]; - if( pCons->usable==0 ){ - if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ - /* There exists an unusable MATCH constraint. This means that if - ** the planner does elect to use the results of this call as part - ** of the overall query plan the user will see an "unable to use - ** function MATCH in the requested context" error. To discourage - ** this, return a very high cost here. */ - pInfo->idxNum = FTS3_FULLSCAN_SEARCH; - pInfo->estimatedCost = 1e50; - fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50); - return SQLITE_OK; + if( iCol==p->nColumn+1 ){ + /* This call is a request for the "docid" column. Since "docid" is an + ** alias for "rowid", use the xRowid() method to obtain the value. + */ + sqlite3_result_int64(pCtx, pCsr->iPrevId); + }else if( iCol==p->nColumn ){ + /* The extra column whose name is the same as the table. + ** Return a blob which is a pointer to the cursor. */ + sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); + }else if( iCol==p->nColumn+2 && pCsr->pExpr ){ + sqlite3_result_int64(pCtx, pCsr->iLangid); + }else{ + /* The requested column is either a user column (one that contains + ** indexed data), or the language-id column. */ + rc = fts3CursorSeek(0, pCsr); + + if( rc==SQLITE_OK ){ + if( iCol==p->nColumn+2 ){ + int iLangid = 0; + if( p->zLanguageid ){ + iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1); + } + sqlite3_result_int(pCtx, iLangid); + }else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){ + sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); } - continue; } + } - bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1); + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); + return rc; +} - /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */ - if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){ - pInfo->idxNum = FTS3_DOCID_SEARCH; - pInfo->estimatedCost = 1.0; - iCons = i; - } +/* +** This function is the implementation of the xUpdate callback used by +** FTS3 virtual tables. It is invoked by SQLite each time a row is to be +** inserted, updated or deleted. +*/ +static int fts3UpdateMethod( + sqlite3_vtab *pVtab, /* Virtual table handle */ + int nArg, /* Size of argument array */ + sqlite3_value **apVal, /* Array of arguments */ + sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ +){ + return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid); +} - /* A MATCH constraint. Use a full-text search. - ** - ** If there is more than one MATCH constraint available, use the first - ** one encountered. If there is both a MATCH constraint and a direct - ** rowid/docid lookup, prefer the MATCH strategy. This is done even - ** though the rowid/docid lookup is faster than a MATCH query, selecting - ** it would lead to an "unable to use function MATCH in the requested - ** context" error. - */ - if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH - && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn - ){ - pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn; - pInfo->estimatedCost = 2.0; - iCons = i; - } +/* +** Implementation of xSync() method. Flush the contents of the pending-terms +** hash-table to the database. +*/ +static int fts3SyncMethod(sqlite3_vtab *pVtab){ - /* Equality constraint on the langid column */ - if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ - && pCons->iColumn==p->nColumn + 2 - ){ - iLangidCons = i; - } + /* Following an incremental-merge operation, assuming that the input + ** segments are not completely consumed (the usual case), they are updated + ** in place to remove the entries that have already been merged. This + ** involves updating the leaf block that contains the smallest unmerged + ** entry and each block (if any) between the leaf and the root node. So + ** if the height of the input segment b-trees is N, and input segments + ** are merged eight at a time, updating the input segments at the end + ** of an incremental-merge requires writing (8*(1+N)) blocks. N is usually + ** small - often between 0 and 2. So the overhead of the incremental + ** merge is somewhere between 8 and 24 blocks. To avoid this overhead + ** dwarfing the actual productive work accomplished, the incremental merge + ** is only attempted if it will write at least 64 leaf blocks. Hence + ** nMinMerge. + ** + ** Of course, updating the input segments also involves deleting a bunch + ** of blocks from the segments table. But this is not considered overhead + ** as it would also be required by a crisis-merge that used the same input + ** segments. + */ + const u32 nMinMerge = 64; /* Minimum amount of incr-merge work to do */ - if( bDocid ){ - switch( pCons->op ){ - case SQLITE_INDEX_CONSTRAINT_GE: - case SQLITE_INDEX_CONSTRAINT_GT: - iDocidGe = i; - break; + Fts3Table *p = (Fts3Table*)pVtab; + int rc = sqlite3Fts3PendingTermsFlush(p); - case SQLITE_INDEX_CONSTRAINT_LE: - case SQLITE_INDEX_CONSTRAINT_LT: - iDocidLe = i; - break; - } - } - } + if( rc==SQLITE_OK + && p->nLeafAdd>(nMinMerge/16) + && p->nAutoincrmerge && p->nAutoincrmerge!=0xff + ){ + int mxLevel = 0; /* Maximum relative level value in db */ + int A; /* Incr-merge parameter A */ - iIdx = 1; - if( iCons>=0 ){ - pInfo->aConstraintUsage[iCons].argvIndex = iIdx++; - pInfo->aConstraintUsage[iCons].omit = 1; - } - if( iLangidCons>=0 ){ - pInfo->idxNum |= FTS3_HAVE_LANGID; - pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++; - } - if( iDocidGe>=0 ){ - pInfo->idxNum |= FTS3_HAVE_DOCID_GE; - pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++; - } - if( iDocidLe>=0 ){ - pInfo->idxNum |= FTS3_HAVE_DOCID_LE; - pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++; - } + rc = sqlite3Fts3MaxLevel(p, &mxLevel); + assert( rc==SQLITE_OK || mxLevel==0 ); + A = p->nLeafAdd * mxLevel; + A += (A/2); + if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge); + } + sqlite3Fts3SegmentsClose(p); + return rc; +} - /* Regardless of the strategy selected, FTS can deliver rows in rowid (or - ** docid) order. Both ascending and descending are possible. - */ - if( pInfo->nOrderBy==1 ){ - struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0]; - if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){ - if( pOrder->desc ){ - pInfo->idxStr = "DESC"; - }else{ - pInfo->idxStr = "ASC"; +/* +** If it is currently unknown whether or not the FTS table has an %_stat +** table (if p->bHasStat==2), attempt to determine this (set p->bHasStat +** to 0 or 1). Return SQLITE_OK if successful, or an SQLite error code +** if an error occurs. +*/ +static int fts3SetHasStat(Fts3Table *p){ + int rc = SQLITE_OK; + if( p->bHasStat==2 ){ + const char *zFmt ="SELECT 1 FROM %Q.sqlite_master WHERE tbl_name='%q_stat'"; + char *zSql = sqlite3_mprintf(zFmt, p->zDb, p->zName); + if( zSql ){ + sqlite3_stmt *pStmt = 0; + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + int bHasStat = (sqlite3_step(pStmt)==SQLITE_ROW); + rc = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ) p->bHasStat = bHasStat; } - pInfo->orderByConsumed = 1; + sqlite3_free(zSql); + }else{ + rc = SQLITE_NOMEM; } } + return rc; +} +/* +** Implementation of xBegin() method. +*/ +static int fts3BeginMethod(sqlite3_vtab *pVtab){ + Fts3Table *p = (Fts3Table*)pVtab; + UNUSED_PARAMETER(pVtab); assert( p->pSegments==0 ); - return SQLITE_OK; + assert( p->nPendingData==0 ); + assert( p->inTransaction!=1 ); + TESTONLY( p->inTransaction = 1 ); + TESTONLY( p->mxSavepoint = -1; ); + p->nLeafAdd = 0; + return fts3SetHasStat(p); } /* -** Implementation of xOpen method. +** Implementation of xCommit() method. This is a no-op. The contents of +** the pending-terms hash-table have already been flushed into the database +** by fts3SyncMethod(). */ -static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ - sqlite3_vtab_cursor *pCsr; /* Allocated cursor */ - - UNUSED_PARAMETER(pVTab); - - /* Allocate a buffer large enough for an Fts3Cursor structure. If the - ** allocation succeeds, zero it and return SQLITE_OK. Otherwise, - ** if the allocation fails, return SQLITE_NOMEM. - */ - *ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor)); - if( !pCsr ){ - return SQLITE_NOMEM; - } - memset(pCsr, 0, sizeof(Fts3Cursor)); +static int fts3CommitMethod(sqlite3_vtab *pVtab){ + TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); + UNUSED_PARAMETER(pVtab); + assert( p->nPendingData==0 ); + assert( p->inTransaction!=0 ); + assert( p->pSegments==0 ); + TESTONLY( p->inTransaction = 0 ); + TESTONLY( p->mxSavepoint = -1; ); return SQLITE_OK; } /* -** Close the cursor. For additional information see the documentation -** on the xClose method of the virtual table interface. +** Implementation of xRollback(). Discard the contents of the pending-terms +** hash-table. Any changes made to the database are reverted by SQLite. */ -static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){ - Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; - assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); - sqlite3_finalize(pCsr->pStmt); - sqlite3Fts3ExprFree(pCsr->pExpr); - sqlite3Fts3FreeDeferredTokens(pCsr); - sqlite3_free(pCsr->aDoclist); - sqlite3_free(pCsr->aMatchinfo); - assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); - sqlite3_free(pCsr); +static int fts3RollbackMethod(sqlite3_vtab *pVtab){ + Fts3Table *p = (Fts3Table*)pVtab; + sqlite3Fts3PendingTermsClear(p); + assert( p->inTransaction!=0 ); + TESTONLY( p->inTransaction = 0 ); + TESTONLY( p->mxSavepoint = -1; ); return SQLITE_OK; } /* -** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then -** compose and prepare an SQL statement of the form: -** -** "SELECT FROM %_content WHERE rowid = ?" -** -** (or the equivalent for a content=xxx table) and set pCsr->pStmt to -** it. If an error occurs, return an SQLite error code. -** -** Otherwise, set *ppStmt to point to pCsr->pStmt and return SQLITE_OK. +** When called, *ppPoslist must point to the byte immediately following the +** end of a position-list. i.e. ( (*ppPoslist)[-1]==POS_END ). This function +** moves *ppPoslist so that it instead points to the first byte of the +** same position list. */ -static int fts3CursorSeekStmt(Fts3Cursor *pCsr, sqlite3_stmt **ppStmt){ - int rc = SQLITE_OK; - if( pCsr->pStmt==0 ){ - Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; - char *zSql; - zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist); - if( !zSql ) return SQLITE_NOMEM; - rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); - sqlite3_free(zSql); +static void fts3ReversePoslist(char *pStart, char **ppPoslist){ + char *p = &(*ppPoslist)[-2]; + char c = 0; + + /* Skip backwards passed any trailing 0x00 bytes added by NearTrim() */ + while( p>pStart && (c=*p--)==0 ); + + /* Search backwards for a varint with value zero (the end of the previous + ** poslist). This is an 0x00 byte preceded by some byte that does not + ** have the 0x80 bit set. */ + while( p>pStart && (*p & 0x80) | c ){ + c = *p--; } - *ppStmt = pCsr->pStmt; - return rc; + assert( p==pStart || c==0 ); + + /* At this point p points to that preceding byte without the 0x80 bit + ** set. So to find the start of the poslist, skip forward 2 bytes then + ** over a varint. + ** + ** Normally. The other case is that p==pStart and the poslist to return + ** is the first in the doclist. In this case do not skip forward 2 bytes. + ** The second part of the if condition (c==0 && *ppPoslist>&p[2]) + ** is required for cases where the first byte of a doclist and the + ** doclist is empty. For example, if the first docid is 10, a doclist + ** that begins with: + ** + ** 0x0A 0x00 + */ + if( p>pStart || (c==0 && *ppPoslist>&p[2]) ){ p = &p[2]; } + while( *p++&0x80 ); + *ppPoslist = p; } /* -** Position the pCsr->pStmt statement so that it is on the row -** of the %_content table that contains the last match. Return -** SQLITE_OK on success. +** Helper function used by the implementation of the overloaded snippet(), +** offsets() and optimize() SQL functions. +** +** If the value passed as the third argument is a blob of size +** sizeof(Fts3Cursor*), then the blob contents are copied to the +** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error +** message is written to context pContext and SQLITE_ERROR returned. The +** string passed via zFunc is used as part of the error message. */ -static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ - int rc = SQLITE_OK; - if( pCsr->isRequireSeek ){ - sqlite3_stmt *pStmt = 0; - - rc = fts3CursorSeekStmt(pCsr, &pStmt); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId); - pCsr->isRequireSeek = 0; - if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){ - return SQLITE_OK; - }else{ - rc = sqlite3_reset(pCsr->pStmt); - if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){ - /* If no row was found and no error has occurred, then the %_content - ** table is missing a row that is present in the full-text index. - ** The data structures are corrupt. */ - rc = FTS_CORRUPT_VTAB; - pCsr->isEof = 1; - } - } - } - } - - if( rc!=SQLITE_OK && pContext ){ - sqlite3_result_error_code(pContext, rc); +static int fts3FunctionArg( + sqlite3_context *pContext, /* SQL function call context */ + const char *zFunc, /* Function name */ + sqlite3_value *pVal, /* argv[0] passed to function */ + Fts3Cursor **ppCsr /* OUT: Store cursor handle here */ +){ + Fts3Cursor *pRet; + if( sqlite3_value_type(pVal)!=SQLITE_BLOB + || sqlite3_value_bytes(pVal)!=sizeof(Fts3Cursor *) + ){ + char *zErr = sqlite3_mprintf("illegal first argument to %s", zFunc); + sqlite3_result_error(pContext, zErr, -1); + sqlite3_free(zErr); + return SQLITE_ERROR; } - return rc; + memcpy(&pRet, sqlite3_value_blob(pVal), sizeof(Fts3Cursor *)); + *ppCsr = pRet; + return SQLITE_OK; } /* -** This function is used to process a single interior node when searching -** a b-tree for a term or term prefix. The node data is passed to this -** function via the zNode/nNode parameters. The term to search for is -** passed in zTerm/nTerm. -** -** If piFirst is not NULL, then this function sets *piFirst to the blockid -** of the child node that heads the sub-tree that may contain the term. -** -** If piLast is not NULL, then *piLast is set to the right-most child node -** that heads a sub-tree that may contain a term for which zTerm/nTerm is -** a prefix. -** -** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK. +** Implementation of the snippet() function for FTS3 */ -static int fts3ScanInteriorNode( - const char *zTerm, /* Term to select leaves for */ - int nTerm, /* Size of term zTerm in bytes */ - const char *zNode, /* Buffer containing segment interior node */ - int nNode, /* Size of buffer at zNode */ - sqlite3_int64 *piFirst, /* OUT: Selected child node */ - sqlite3_int64 *piLast /* OUT: Selected child node */ +static void fts3SnippetFunc( + sqlite3_context *pContext, /* SQLite function call context */ + int nVal, /* Size of apVal[] array */ + sqlite3_value **apVal /* Array of arguments */ ){ - int rc = SQLITE_OK; /* Return code */ - const char *zCsr = zNode; /* Cursor to iterate through node */ - const char *zEnd = &zCsr[nNode];/* End of interior node buffer */ - char *zBuffer = 0; /* Buffer to load terms into */ - int nAlloc = 0; /* Size of allocated buffer */ - int isFirstTerm = 1; /* True when processing first term on page */ - sqlite3_int64 iChild; /* Block id of child node to descend to */ + Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ + const char *zStart = ""; + const char *zEnd = ""; + const char *zEllipsis = "..."; + int iCol = -1; + int nToken = 15; /* Default number of tokens in snippet */ - /* Skip over the 'height' varint that occurs at the start of every - ** interior node. Then load the blockid of the left-child of the b-tree - ** node into variable iChild. - ** - ** Even if the data structure on disk is corrupted, this (reading two - ** varints from the buffer) does not risk an overread. If zNode is a - ** root node, then the buffer comes from a SELECT statement. SQLite does - ** not make this guarantee explicitly, but in practice there are always - ** either more than 20 bytes of allocated space following the nNode bytes of - ** contents, or two zero bytes. Or, if the node is read from the %_segments - ** table, then there are always 20 bytes of zeroed padding following the - ** nNode bytes of content (see sqlite3Fts3ReadBlock() for details). + /* There must be at least one argument passed to this function (otherwise + ** the non-overloaded version would have been called instead of this one). */ - zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); - zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); - if( zCsr>zEnd ){ - return FTS_CORRUPT_VTAB; - } - - while( zCsrzEnd ){ - rc = FTS_CORRUPT_VTAB; - goto finish_scan; - } - if( nPrefix+nSuffix>nAlloc ){ - char *zNew; - nAlloc = (nPrefix+nSuffix) * 2; - zNew = (char *)sqlite3_realloc(zBuffer, nAlloc); - if( !zNew ){ - rc = SQLITE_NOMEM; - goto finish_scan; - } - zBuffer = zNew; - } - assert( zBuffer ); - memcpy(&zBuffer[nPrefix], zCsr, nSuffix); - nBuffer = nPrefix + nSuffix; - zCsr += nSuffix; + assert( nVal>=1 ); - /* Compare the term we are searching for with the term just loaded from - ** the interior node. If the specified term is greater than or equal - ** to the term from the interior node, then all terms on the sub-tree - ** headed by node iChild are smaller than zTerm. No need to search - ** iChild. - ** - ** If the interior node term is larger than the specified term, then - ** the tree headed by iChild may contain the specified term. - */ - cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer)); - if( piFirst && (cmp<0 || (cmp==0 && nBuffer>nTerm)) ){ - *piFirst = iChild; - piFirst = 0; - } + if( nVal>6 ){ + sqlite3_result_error(pContext, + "wrong number of arguments to function snippet()", -1); + return; + } + if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return; - if( piLast && cmp<0 ){ - *piLast = iChild; - piLast = 0; - } + switch( nVal ){ + case 6: nToken = sqlite3_value_int(apVal[5]); + case 5: iCol = sqlite3_value_int(apVal[4]); + case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]); + case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]); + case 2: zStart = (const char*)sqlite3_value_text(apVal[1]); + } + if( !zEllipsis || !zEnd || !zStart ){ + sqlite3_result_error_nomem(pContext); + }else if( nToken==0 ){ + sqlite3_result_text(pContext, "", -1, SQLITE_STATIC); + }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ + sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken); + } +} - iChild++; - }; +/* +** Implementation of the offsets() function for FTS3 +*/ +static void fts3OffsetsFunc( + sqlite3_context *pContext, /* SQLite function call context */ + int nVal, /* Size of argument array */ + sqlite3_value **apVal /* Array of arguments */ +){ + Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ - if( piFirst ) *piFirst = iChild; - if( piLast ) *piLast = iChild; + UNUSED_PARAMETER(nVal); - finish_scan: - sqlite3_free(zBuffer); - return rc; + assert( nVal==1 ); + if( fts3FunctionArg(pContext, "offsets", apVal[0], &pCsr) ) return; + assert( pCsr ); + if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ + sqlite3Fts3Offsets(pContext, pCsr); + } } - -/* -** The buffer pointed to by argument zNode (size nNode bytes) contains an -** interior node of a b-tree segment. The zTerm buffer (size nTerm bytes) -** contains a term. This function searches the sub-tree headed by the zNode -** node for the range of leaf nodes that may contain the specified term -** or terms for which the specified term is a prefix. -** -** If piLeaf is not NULL, then *piLeaf is set to the blockid of the -** left-most leaf node in the tree that may contain the specified term. -** If piLeaf2 is not NULL, then *piLeaf2 is set to the blockid of the -** right-most leaf node that may contain a term for which the specified -** term is a prefix. +/* +** Implementation of the special optimize() function for FTS3. This +** function merges all segments in the database to a single segment. +** Example usage is: ** -** It is possible that the range of returned leaf nodes does not contain -** the specified term or any terms for which it is a prefix. However, if the -** segment does contain any such terms, they are stored within the identified -** range. Because this function only inspects interior segment nodes (and -** never loads leaf nodes into memory), it is not possible to be sure. +** SELECT optimize(t) FROM t LIMIT 1; ** -** If an error occurs, an error code other than SQLITE_OK is returned. -*/ -static int fts3SelectLeaf( - Fts3Table *p, /* Virtual table handle */ - const char *zTerm, /* Term to select leaves for */ - int nTerm, /* Size of term zTerm in bytes */ - const char *zNode, /* Buffer containing segment interior node */ - int nNode, /* Size of buffer at zNode */ - sqlite3_int64 *piLeaf, /* Selected leaf node */ - sqlite3_int64 *piLeaf2 /* Selected leaf node */ +** where 't' is the name of an FTS3 table. +*/ +static void fts3OptimizeFunc( + sqlite3_context *pContext, /* SQLite function call context */ + int nVal, /* Size of argument array */ + sqlite3_value **apVal /* Array of arguments */ ){ - int rc = SQLITE_OK; /* Return code */ - int iHeight; /* Height of this node in tree */ + int rc; /* Return code */ + Fts3Table *p; /* Virtual table handle */ + Fts3Cursor *pCursor; /* Cursor handle passed through apVal[0] */ - assert( piLeaf || piLeaf2 ); + UNUSED_PARAMETER(nVal); - fts3GetVarint32(zNode, &iHeight); - rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2); - assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) ); + assert( nVal==1 ); + if( fts3FunctionArg(pContext, "optimize", apVal[0], &pCursor) ) return; + p = (Fts3Table *)pCursor->base.pVtab; + assert( p ); - if( rc==SQLITE_OK && iHeight>1 ){ - char *zBlob = 0; /* Blob read from %_segments table */ - int nBlob = 0; /* Size of zBlob in bytes */ + rc = sqlite3Fts3Optimize(p); - if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){ - rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0); - if( rc==SQLITE_OK ){ - rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0); - } - sqlite3_free(zBlob); - piLeaf = 0; - zBlob = 0; - } + switch( rc ){ + case SQLITE_OK: + sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC); + break; + case SQLITE_DONE: + sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC); + break; + default: + sqlite3_result_error_code(pContext, rc); + break; + } +} - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0); - } - if( rc==SQLITE_OK ){ - rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2); +/* +** Implementation of the matchinfo() function for FTS3 +*/ +static void fts3MatchinfoFunc( + sqlite3_context *pContext, /* SQLite function call context */ + int nVal, /* Size of argument array */ + sqlite3_value **apVal /* Array of arguments */ +){ + Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ + assert( nVal==1 || nVal==2 ); + if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){ + const char *zArg = 0; + if( nVal>1 ){ + zArg = (const char *)sqlite3_value_text(apVal[1]); } - sqlite3_free(zBlob); + sqlite3Fts3Matchinfo(pContext, pCsr, zArg); } - - return rc; } /* -** This function is used to create delta-encoded serialized lists of FTS3 -** varints. Each call to this function appends a single varint to a list. +** This routine implements the xFindFunction method for the FTS3 +** virtual table. */ -static void fts3PutDeltaVarint( - char **pp, /* IN/OUT: Output pointer */ - sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */ - sqlite3_int64 iVal /* Write this value to the list */ +static int fts3FindFunctionMethod( + sqlite3_vtab *pVtab, /* Virtual table handle */ + int nArg, /* Number of SQL function arguments */ + const char *zName, /* Name of SQL function */ + void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ + void **ppArg /* Unused */ ){ - assert( iVal-*piPrev > 0 || (*piPrev==0 && iVal==0) ); - *pp += sqlite3Fts3PutVarint(*pp, iVal-*piPrev); - *piPrev = iVal; + struct Overloaded { + const char *zName; + void (*xFunc)(sqlite3_context*,int,sqlite3_value**); + } aOverload[] = { + { "snippet", fts3SnippetFunc }, + { "offsets", fts3OffsetsFunc }, + { "optimize", fts3OptimizeFunc }, + { "matchinfo", fts3MatchinfoFunc }, + }; + int i; /* Iterator variable */ + + UNUSED_PARAMETER(pVtab); + UNUSED_PARAMETER(nArg); + UNUSED_PARAMETER(ppArg); + + for(i=0; idb; /* Database connection */ + int rc; /* Return Code */ - /* The end of a position list is marked by a zero encoded as an FTS3 - ** varint. A single POS_END (0) byte. Except, if the 0 byte is preceded by - ** a byte with the 0x80 bit set, then it is not a varint 0, but the tail - ** of some other, multi-byte, value. - ** - ** The following while-loop moves pEnd to point to the first byte that is not - ** immediately preceded by a byte with the 0x80 bit set. Then increments - ** pEnd once more so that it points to the byte immediately following the - ** last byte in the position-list. + /* At this point it must be known if the %_stat table exists or not. + ** So bHasStat may not be 2. */ + rc = fts3SetHasStat(p); + + /* As it happens, the pending terms table is always empty here. This is + ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction + ** always opens a savepoint transaction. And the xSavepoint() method + ** flushes the pending terms table. But leave the (no-op) call to + ** PendingTermsFlush() in in case that changes. */ - while( *pEnd | c ){ - c = *pEnd++ & 0x80; - testcase( c!=0 && (*pEnd)==0 ); + assert( p->nPendingData==0 ); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3PendingTermsFlush(p); } - pEnd++; /* Advance past the POS_END terminator byte */ - if( pp ){ - int n = (int)(pEnd - *ppPoslist); - char *p = *pp; - memcpy(p, *ppPoslist, n); - p += n; - *pp = p; + if( p->zContentTbl==0 ){ + fts3DbExec(&rc, db, + "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';", + p->zDb, p->zName, zName + ); } - *ppPoslist = pEnd; + + if( p->bHasDocsize ){ + fts3DbExec(&rc, db, + "ALTER TABLE %Q.'%q_docsize' RENAME TO '%q_docsize';", + p->zDb, p->zName, zName + ); + } + if( p->bHasStat ){ + fts3DbExec(&rc, db, + "ALTER TABLE %Q.'%q_stat' RENAME TO '%q_stat';", + p->zDb, p->zName, zName + ); + } + fts3DbExec(&rc, db, + "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';", + p->zDb, p->zName, zName + ); + fts3DbExec(&rc, db, + "ALTER TABLE %Q.'%q_segdir' RENAME TO '%q_segdir';", + p->zDb, p->zName, zName + ); + return rc; } /* -** When this function is called, *ppPoslist is assumed to point to the -** start of a column-list. After it returns, *ppPoslist points to the -** to the terminator (POS_COLUMN or POS_END) byte of the column-list. -** -** A column-list is list of delta-encoded positions for a single column -** within a single document within a doclist. -** -** The column-list is terminated either by a POS_COLUMN varint (1) or -** a POS_END varint (0). This routine leaves *ppPoslist pointing to -** the POS_COLUMN or POS_END that terminates the column-list. +** The xSavepoint() method. ** -** If pp is not NULL, then the contents of the column-list are copied -** to *pp. *pp is set to point to the first byte past the last byte copied -** before this function returns. The POS_COLUMN or POS_END terminator -** is not copied into *pp. +** Flush the contents of the pending-terms table to disk. */ -static void fts3ColumnlistCopy(char **pp, char **ppPoslist){ - char *pEnd = *ppPoslist; - char c = 0; - - /* A column-list is terminated by either a 0x01 or 0x00 byte that is - ** not part of a multi-byte varint. - */ - while( 0xFE & (*pEnd | c) ){ - c = *pEnd++ & 0x80; - testcase( c!=0 && ((*pEnd)&0xfe)==0 ); - } - if( pp ){ - int n = (int)(pEnd - *ppPoslist); - char *p = *pp; - memcpy(p, *ppPoslist, n); - p += n; - *pp = p; +static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ + int rc = SQLITE_OK; + UNUSED_PARAMETER(iSavepoint); + assert( ((Fts3Table *)pVtab)->inTransaction ); + assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint ); + TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint ); + if( ((Fts3Table *)pVtab)->bIgnoreSavepoint==0 ){ + rc = fts3SyncMethod(pVtab); } - *ppPoslist = pEnd; + return rc; } /* -** Value used to signify the end of an position-list. This is safe because -** it is not possible to have a document with 2^31 terms. +** The xRelease() method. +** +** This is a no-op. */ -#define POSITION_LIST_END 0x7fffffff +static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ + TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); + UNUSED_PARAMETER(iSavepoint); + UNUSED_PARAMETER(pVtab); + assert( p->inTransaction ); + assert( p->mxSavepoint >= iSavepoint ); + TESTONLY( p->mxSavepoint = iSavepoint-1 ); + return SQLITE_OK; +} /* -** This function is used to help parse position-lists. When this function is -** called, *pp may point to the start of the next varint in the position-list -** being parsed, or it may point to 1 byte past the end of the position-list -** (in which case **pp will be a terminator bytes POS_END (0) or -** (1)). +** The xRollbackTo() method. ** -** If *pp points past the end of the current position-list, set *pi to -** POSITION_LIST_END and return. Otherwise, read the next varint from *pp, -** increment the current value of *pi by the value read, and set *pp to -** point to the next value before returning. +** Discard the contents of the pending terms table. +*/ +static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ + Fts3Table *p = (Fts3Table*)pVtab; + UNUSED_PARAMETER(iSavepoint); + assert( p->inTransaction ); + assert( p->mxSavepoint >= iSavepoint ); + TESTONLY( p->mxSavepoint = iSavepoint ); + sqlite3Fts3PendingTermsClear(p); + return SQLITE_OK; +} + +static const sqlite3_module fts3Module = { + /* iVersion */ 2, + /* xCreate */ fts3CreateMethod, + /* xConnect */ fts3ConnectMethod, + /* xBestIndex */ fts3BestIndexMethod, + /* xDisconnect */ fts3DisconnectMethod, + /* xDestroy */ fts3DestroyMethod, + /* xOpen */ fts3OpenMethod, + /* xClose */ fts3CloseMethod, + /* xFilter */ fts3FilterMethod, + /* xNext */ fts3NextMethod, + /* xEof */ fts3EofMethod, + /* xColumn */ fts3ColumnMethod, + /* xRowid */ fts3RowidMethod, + /* xUpdate */ fts3UpdateMethod, + /* xBegin */ fts3BeginMethod, + /* xSync */ fts3SyncMethod, + /* xCommit */ fts3CommitMethod, + /* xRollback */ fts3RollbackMethod, + /* xFindFunction */ fts3FindFunctionMethod, + /* xRename */ fts3RenameMethod, + /* xSavepoint */ fts3SavepointMethod, + /* xRelease */ fts3ReleaseMethod, + /* xRollbackTo */ fts3RollbackToMethod, +}; + +/* +** This function is registered as the module destructor (called when an +** FTS3 enabled database connection is closed). It frees the memory +** allocated for the tokenizer hash table. +*/ +static void hashDestroy(void *p){ + Fts3Hash *pHash = (Fts3Hash *)p; + sqlite3Fts3HashClear(pHash); + sqlite3_free(pHash); +} + +/* +** The fts3 built-in tokenizers - "simple", "porter" and "icu"- are +** implemented in files fts3_tokenizer1.c, fts3_porter.c and fts3_icu.c +** respectively. The following three forward declarations are for functions +** declared in these files used to retrieve the respective implementations. ** -** Before calling this routine *pi must be initialized to the value of -** the previous position, or zero if we are reading the first position -** in the position-list. Because positions are delta-encoded, the value -** of the previous position is needed in order to compute the value of -** the next position. +** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed +** to by the argument to point to the "simple" tokenizer implementation. +** And so on. */ -static void fts3ReadNextPos( - char **pp, /* IN/OUT: Pointer into position-list buffer */ - sqlite3_int64 *pi /* IN/OUT: Value read from position-list */ -){ - if( (**pp)&0xFE ){ - fts3GetDeltaVarint(pp, pi); - *pi -= 2; +SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); +SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); +#ifndef SQLITE_DISABLE_FTS3_UNICODE +SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule); +#endif +#ifdef SQLITE_ENABLE_ICU +SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); +#endif + +/* +** Initialize the fts3 extension. If this extension is built as part +** of the sqlite library, then this function is called directly by +** SQLite. If fts3 is built as a dynamically loadable extension, this +** function is called by the sqlite3_extension_init() entry point. +*/ +SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){ + int rc = SQLITE_OK; + Fts3Hash *pHash = 0; + const sqlite3_tokenizer_module *pSimple = 0; + const sqlite3_tokenizer_module *pPorter = 0; +#ifndef SQLITE_DISABLE_FTS3_UNICODE + const sqlite3_tokenizer_module *pUnicode = 0; +#endif + +#ifdef SQLITE_ENABLE_ICU + const sqlite3_tokenizer_module *pIcu = 0; + sqlite3Fts3IcuTokenizerModule(&pIcu); +#endif + +#ifndef SQLITE_DISABLE_FTS3_UNICODE + sqlite3Fts3UnicodeTokenizer(&pUnicode); +#endif + +#ifdef SQLITE_TEST + rc = sqlite3Fts3InitTerm(db); + if( rc!=SQLITE_OK ) return rc; +#endif + + rc = sqlite3Fts3InitAux(db); + if( rc!=SQLITE_OK ) return rc; + + sqlite3Fts3SimpleTokenizerModule(&pSimple); + sqlite3Fts3PorterTokenizerModule(&pPorter); + + /* Allocate and initialize the hash-table used to store tokenizers. */ + pHash = sqlite3_malloc(sizeof(Fts3Hash)); + if( !pHash ){ + rc = SQLITE_NOMEM; }else{ - *pi = POSITION_LIST_END; + sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); + } + + /* Load the built-in tokenizers into the hash table */ + if( rc==SQLITE_OK ){ + if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple) + || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) + +#ifndef SQLITE_DISABLE_FTS3_UNICODE + || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode) +#endif +#ifdef SQLITE_ENABLE_ICU + || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu)) +#endif + ){ + rc = SQLITE_NOMEM; + } + } + +#ifdef SQLITE_TEST + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3ExprInitTestInterface(db); + } +#endif + + /* Create the virtual table wrapper around the hash-table and overload + ** the two scalar functions. If this is successful, register the + ** module with sqlite. + */ + if( SQLITE_OK==rc + && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer")) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1)) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 1)) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 2)) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1)) + ){ + rc = sqlite3_create_module_v2( + db, "fts3", &fts3Module, (void *)pHash, hashDestroy + ); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_module_v2( + db, "fts4", &fts3Module, (void *)pHash, 0 + ); + } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3InitTok(db, (void *)pHash); + } + return rc; + } + + + /* An error has occurred. Delete the hash table and return the error code. */ + assert( rc!=SQLITE_OK ); + if( pHash ){ + sqlite3Fts3HashClear(pHash); + sqlite3_free(pHash); } + return rc; } /* -** If parameter iCol is not 0, write an POS_COLUMN (1) byte followed by -** the value of iCol encoded as a varint to *pp. This will start a new -** column list. +** Allocate an Fts3MultiSegReader for each token in the expression headed +** by pExpr. ** -** Set *pp to point to the byte just after the last byte written before -** returning (do not modify it if iCol==0). Return the total number of bytes -** written (0 if iCol==0). +** An Fts3SegReader object is a cursor that can seek or scan a range of +** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple +** Fts3SegReader objects internally to provide an interface to seek or scan +** within the union of all segments of a b-tree. Hence the name. +** +** If the allocated Fts3MultiSegReader just seeks to a single entry in a +** segment b-tree (if the term is not a prefix or it is a prefix for which +** there exists prefix b-tree of the right length) then it may be traversed +** and merged incrementally. Otherwise, it has to be merged into an in-memory +** doclist and then traversed. */ -static int fts3PutColNumber(char **pp, int iCol){ - int n = 0; /* Number of bytes written */ - if( iCol ){ - char *p = *pp; /* Output pointer */ - n = 1 + sqlite3Fts3PutVarint(&p[1], iCol); - *p = 0x01; - *pp = &p[n]; +static void fts3EvalAllocateReaders( + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Allocate readers for this expression */ + int *pnToken, /* OUT: Total number of tokens in phrase. */ + int *pnOr, /* OUT: Total number of OR nodes in expr. */ + int *pRc /* IN/OUT: Error code */ +){ + if( pExpr && SQLITE_OK==*pRc ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + int i; + int nToken = pExpr->pPhrase->nToken; + *pnToken += nToken; + for(i=0; ipPhrase->aToken[i]; + int rc = fts3TermSegReaderCursor(pCsr, + pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr + ); + if( rc!=SQLITE_OK ){ + *pRc = rc; + return; + } + } + assert( pExpr->pPhrase->iDoclistToken==0 ); + pExpr->pPhrase->iDoclistToken = -1; + }else{ + *pnOr += (pExpr->eType==FTSQUERY_OR); + fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc); + fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc); + } } - return n; } /* -** Compute the union of two position lists. The output written -** into *pp contains all positions of both *pp1 and *pp2 in sorted -** order and with any duplicates removed. All pointers are -** updated appropriately. The caller is responsible for insuring -** that there is enough space in *pp to hold the complete output. +** Arguments pList/nList contain the doclist for token iToken of phrase p. +** It is merged into the main doclist stored in p->doclist.aAll/nAll. +** +** This function assumes that pList points to a buffer allocated using +** sqlite3_malloc(). This function takes responsibility for eventually +** freeing the buffer. +** +** SQLITE_OK is returned if successful, or SQLITE_NOMEM if an error occurs. */ -static void fts3PoslistMerge( - char **pp, /* Output buffer */ - char **pp1, /* Left input list */ - char **pp2 /* Right input list */ +static int fts3EvalPhraseMergeToken( + Fts3Table *pTab, /* FTS Table pointer */ + Fts3Phrase *p, /* Phrase to merge pList/nList into */ + int iToken, /* Token pList/nList corresponds to */ + char *pList, /* Pointer to doclist */ + int nList /* Number of bytes in pList */ ){ - char *p = *pp; - char *p1 = *pp1; - char *p2 = *pp2; + int rc = SQLITE_OK; + assert( iToken!=p->iDoclistToken ); - while( *p1 || *p2 ){ - int iCol1; /* The current column index in pp1 */ - int iCol2; /* The current column index in pp2 */ + if( pList==0 ){ + sqlite3_free(p->doclist.aAll); + p->doclist.aAll = 0; + p->doclist.nAll = 0; + } - if( *p1==POS_COLUMN ) fts3GetVarint32(&p1[1], &iCol1); - else if( *p1==POS_END ) iCol1 = POSITION_LIST_END; - else iCol1 = 0; + else if( p->iDoclistToken<0 ){ + p->doclist.aAll = pList; + p->doclist.nAll = nList; + } - if( *p2==POS_COLUMN ) fts3GetVarint32(&p2[1], &iCol2); - else if( *p2==POS_END ) iCol2 = POSITION_LIST_END; - else iCol2 = 0; + else if( p->doclist.aAll==0 ){ + sqlite3_free(pList); + } - if( iCol1==iCol2 ){ - sqlite3_int64 i1 = 0; /* Last position from pp1 */ - sqlite3_int64 i2 = 0; /* Last position from pp2 */ - sqlite3_int64 iPrev = 0; - int n = fts3PutColNumber(&p, iCol1); - p1 += n; - p2 += n; + else { + char *pLeft; + char *pRight; + int nLeft; + int nRight; + int nDiff; - /* At this point, both p1 and p2 point to the start of column-lists - ** for the same column (the column with index iCol1 and iCol2). - ** A column-list is a list of non-negative delta-encoded varints, each - ** incremented by 2 before being stored. Each list is terminated by a - ** POS_END (0) or POS_COLUMN (1). The following block merges the two lists - ** and writes the results to buffer p. p is left pointing to the byte - ** after the list written. No terminator (POS_END or POS_COLUMN) is - ** written to the output. - */ - fts3GetDeltaVarint(&p1, &i1); - fts3GetDeltaVarint(&p2, &i2); - do { - fts3PutDeltaVarint(&p, &iPrev, (i1iDoclistTokendoclist.aAll; + nLeft = p->doclist.nAll; + pRight = pList; + nRight = nList; + nDiff = iToken - p->iDoclistToken; }else{ - p2 += fts3PutColNumber(&p, iCol2); - fts3ColumnlistCopy(&p, &p2); + pRight = p->doclist.aAll; + nRight = p->doclist.nAll; + pLeft = pList; + nLeft = nList; + nDiff = p->iDoclistToken - iToken; } + + rc = fts3DoclistPhraseMerge( + pTab->bDescIdx, nDiff, pLeft, nLeft, &pRight, &nRight + ); + sqlite3_free(pLeft); + p->doclist.aAll = pRight; + p->doclist.nAll = nRight; } - *p++ = POS_END; - *pp = p; - *pp1 = p1 + 1; - *pp2 = p2 + 1; + if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken; + return rc; } /* -** This function is used to merge two position lists into one. When it is -** called, *pp1 and *pp2 must both point to position lists. A position-list is -** the part of a doclist that follows each document id. For example, if a row -** contains: -** -** 'a b c'|'x y z'|'a b b a' -** -** Then the position list for this row for token 'b' would consist of: -** -** 0x02 0x01 0x02 0x03 0x03 0x00 -** -** When this function returns, both *pp1 and *pp2 are left pointing to the -** byte following the 0x00 terminator of their respective position lists. -** -** If isSaveLeft is 0, an entry is added to the output position list for -** each position in *pp2 for which there exists one or more positions in -** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e. -** when the *pp1 token appears before the *pp2 token, but not more than nToken -** slots before it. +** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist +** does not take deferred tokens into account. ** -** e.g. nToken==1 searches for adjacent positions. +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. */ -static int fts3PoslistPhraseMerge( - char **pp, /* IN/OUT: Preallocated output buffer */ - int nToken, /* Maximum difference in token positions */ - int isSaveLeft, /* Save the left position */ - int isExact, /* If *pp1 is exactly nTokens before *pp2 */ - char **pp1, /* IN/OUT: Left input list */ - char **pp2 /* IN/OUT: Right input list */ +static int fts3EvalPhraseLoad( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p /* Phrase object */ ){ - char *p = *pp; - char *p1 = *pp1; - char *p2 = *pp2; - int iCol1 = 0; - int iCol2 = 0; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int iToken; + int rc = SQLITE_OK; - /* Never set both isSaveLeft and isExact for the same invocation. */ - assert( isSaveLeft==0 || isExact==0 ); + for(iToken=0; rc==SQLITE_OK && iTokennToken; iToken++){ + Fts3PhraseToken *pToken = &p->aToken[iToken]; + assert( pToken->pDeferred==0 || pToken->pSegcsr==0 ); - assert( p!=0 && *p1!=0 && *p2!=0 ); - if( *p1==POS_COLUMN ){ - p1++; - p1 += fts3GetVarint32(p1, &iCol1); - } - if( *p2==POS_COLUMN ){ - p2++; - p2 += fts3GetVarint32(p2, &iCol2); + if( pToken->pSegcsr ){ + int nThis = 0; + char *pThis = 0; + rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis); + if( rc==SQLITE_OK ){ + rc = fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis); + } + } + assert( pToken->pSegcsr==0 ); } - while( 1 ){ - if( iCol1==iCol2 ){ - char *pSave = p; - sqlite3_int64 iPrev = 0; - sqlite3_int64 iPos1 = 0; - sqlite3_int64 iPos2 = 0; + return rc; +} - if( iCol1 ){ - *p++ = POS_COLUMN; - p += sqlite3Fts3PutVarint(p, iCol1); - } +/* +** This function is called on each phrase after the position lists for +** any deferred tokens have been loaded into memory. It updates the phrases +** current position list to include only those positions that are really +** instances of the phrase (after considering deferred tokens). If this +** means that the phrase does not appear in the current row, doclist.pList +** and doclist.nList are both zeroed. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ +static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ + int iToken; /* Used to iterate through phrase tokens */ + char *aPoslist = 0; /* Position list for deferred tokens */ + int nPoslist = 0; /* Number of bytes in aPoslist */ + int iPrev = -1; /* Token number of previous deferred token */ - assert( *p1!=POS_END && *p1!=POS_COLUMN ); - assert( *p2!=POS_END && *p2!=POS_COLUMN ); - fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2; - fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; + assert( pPhrase->doclist.bFreeList==0 ); - while( 1 ){ - if( iPos2==iPos1+nToken - || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken) - ){ - sqlite3_int64 iSave; - iSave = isSaveLeft ? iPos1 : iPos2; - fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2; - pSave = 0; - assert( p ); - } - if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){ - if( (*p2&0xFE)==0 ) break; - fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; - }else{ - if( (*p1&0xFE)==0 ) break; - fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2; - } - } + for(iToken=0; iTokennToken; iToken++){ + Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; + Fts3DeferredToken *pDeferred = pToken->pDeferred; - if( pSave ){ - assert( pp && p ); - p = pSave; - } + if( pDeferred ){ + char *pList; + int nList; + int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList); + if( rc!=SQLITE_OK ) return rc; - fts3ColumnlistCopy(0, &p1); - fts3ColumnlistCopy(0, &p2); - assert( (*p1&0xFE)==0 && (*p2&0xFE)==0 ); - if( 0==*p1 || 0==*p2 ) break; + if( pList==0 ){ + sqlite3_free(aPoslist); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + return SQLITE_OK; - p1++; - p1 += fts3GetVarint32(p1, &iCol1); - p2++; - p2 += fts3GetVarint32(p2, &iCol2); + }else if( aPoslist==0 ){ + aPoslist = pList; + nPoslist = nList; + + }else{ + char *aOut = pList; + char *p1 = aPoslist; + char *p2 = aOut; + + assert( iPrev>=0 ); + fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2); + sqlite3_free(aPoslist); + aPoslist = pList; + nPoslist = (int)(aOut - aPoslist); + if( nPoslist==0 ){ + sqlite3_free(aPoslist); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + return SQLITE_OK; + } + } + iPrev = iToken; } + } - /* Advance pointer p1 or p2 (whichever corresponds to the smaller of - ** iCol1 and iCol2) so that it points to either the 0x00 that marks the - ** end of the position list, or the 0x01 that precedes the next - ** column-number in the position list. - */ - else if( iCol1=0 ){ + int nMaxUndeferred = pPhrase->iDoclistToken; + if( nMaxUndeferred<0 ){ + pPhrase->doclist.pList = aPoslist; + pPhrase->doclist.nList = nPoslist; + pPhrase->doclist.iDocid = pCsr->iPrevId; + pPhrase->doclist.bFreeList = 1; }else{ - fts3ColumnlistCopy(0, &p2); - if( 0==*p2 ) break; - p2++; - p2 += fts3GetVarint32(p2, &iCol2); + int nDistance; + char *p1; + char *p2; + char *aOut; + + if( nMaxUndeferred>iPrev ){ + p1 = aPoslist; + p2 = pPhrase->doclist.pList; + nDistance = nMaxUndeferred - iPrev; + }else{ + p1 = pPhrase->doclist.pList; + p2 = aPoslist; + nDistance = iPrev - nMaxUndeferred; + } + + aOut = (char *)sqlite3_malloc(nPoslist+8); + if( !aOut ){ + sqlite3_free(aPoslist); + return SQLITE_NOMEM; + } + + pPhrase->doclist.pList = aOut; + if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){ + pPhrase->doclist.bFreeList = 1; + pPhrase->doclist.nList = (int)(aOut - pPhrase->doclist.pList); + }else{ + sqlite3_free(aOut); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + } + sqlite3_free(aPoslist); } } - fts3PoslistCopy(0, &p2); - fts3PoslistCopy(0, &p1); - *pp1 = p1; - *pp2 = p2; - if( *pp==p ){ - return 0; - } - *p++ = 0x00; - *pp = p; - return 1; + return SQLITE_OK; } /* -** Merge two position-lists as required by the NEAR operator. The argument -** position lists correspond to the left and right phrases of an expression -** like: -** -** "phrase 1" NEAR "phrase number 2" +** Maximum number of tokens a phrase may have to be considered for the +** incremental doclists strategy. +*/ +#define MAX_INCR_PHRASE_TOKENS 4 + +/* +** This function is called for each Fts3Phrase in a full-text query +** expression to initialize the mechanism for returning rows. Once this +** function has been called successfully on an Fts3Phrase, it may be +** used with fts3EvalPhraseNext() to iterate through the matching docids. ** -** Position list *pp1 corresponds to the left-hand side of the NEAR -** expression and *pp2 to the right. As usual, the indexes in the position -** lists are the offsets of the last token in each phrase (tokens "1" and "2" -** in the example above). +** If parameter bOptOk is true, then the phrase may (or may not) use the +** incremental loading strategy. Otherwise, the entire doclist is loaded into +** memory within this call. ** -** The output position list - written to *pp - is a copy of *pp2 with those -** entries that are not sufficiently NEAR entries in *pp1 removed. +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. */ -static int fts3PoslistNearMerge( - char **pp, /* Output buffer */ - char *aTmp, /* Temporary buffer space */ - int nRight, /* Maximum difference in token positions */ - int nLeft, /* Maximum difference in token positions */ - char **pp1, /* IN/OUT: Left input list */ - char **pp2 /* IN/OUT: Right input list */ -){ - char *p1 = *pp1; - char *p2 = *pp2; +static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; /* Error code */ + int i; - char *pTmp1 = aTmp; - char *pTmp2; - char *aTmp2; - int res = 1; + /* Determine if doclists may be loaded from disk incrementally. This is + ** possible if the bOptOk argument is true, the FTS doclists will be + ** scanned in forward order, and the phrase consists of + ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first" + ** tokens or prefix tokens that cannot use a prefix-index. */ + int bHaveIncr = 0; + int bIncrOk = (bOptOk + && pCsr->bDesc==pTab->bDescIdx + && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 +#ifdef SQLITE_TEST + && pTab->bNoIncrDoclist==0 +#endif + ); + for(i=0; bIncrOk==1 && inToken; i++){ + Fts3PhraseToken *pToken = &p->aToken[i]; + if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){ + bIncrOk = 0; + } + if( pToken->pSegcsr ) bHaveIncr = 1; + } - fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2); - aTmp2 = pTmp2 = pTmp1; - *pp1 = p1; - *pp2 = p2; - fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1); - if( pTmp1!=aTmp && pTmp2!=aTmp2 ){ - fts3PoslistMerge(pp, &aTmp, &aTmp2); - }else if( pTmp1!=aTmp ){ - fts3PoslistCopy(pp, &aTmp); - }else if( pTmp2!=aTmp2 ){ - fts3PoslistCopy(pp, &aTmp2); + if( bIncrOk && bHaveIncr ){ + /* Use the incremental approach. */ + int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); + for(i=0; rc==SQLITE_OK && inToken; i++){ + Fts3PhraseToken *pToken = &p->aToken[i]; + Fts3MultiSegReader *pSegcsr = pToken->pSegcsr; + if( pSegcsr ){ + rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n); + } + } + p->bIncr = 1; }else{ - res = 0; + /* Load the full doclist for the phrase into memory. */ + rc = fts3EvalPhraseLoad(pCsr, p); + p->bIncr = 0; } - return res; + assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr ); + return rc; } -/* -** An instance of this function is used to merge together the (potentially -** large number of) doclists for each term that matches a prefix query. -** See function fts3TermSelectMerge() for details. -*/ -typedef struct TermSelect TermSelect; -struct TermSelect { - char *aaOutput[16]; /* Malloc'd output buffers */ - int anOutput[16]; /* Size each output buffer in bytes */ -}; - /* -** This function is used to read a single varint from a buffer. Parameter -** pEnd points 1 byte past the end of the buffer. When this function is -** called, if *pp points to pEnd or greater, then the end of the buffer -** has been reached. In this case *pp is set to 0 and the function returns. -** -** If *pp does not point to or past pEnd, then a single varint is read -** from *pp. *pp is then set to point 1 byte past the end of the read varint. +** This function is used to iterate backwards (from the end to start) +** through doclists. It is used by this module to iterate through phrase +** doclists in reverse and by the fts3_write.c module to iterate through +** pending-terms lists when writing to databases with "order=desc". ** -** If bDescIdx is false, the value read is added to *pVal before returning. -** If it is true, the value read is subtracted from *pVal before this -** function returns. +** The doclist may be sorted in ascending (parameter bDescIdx==0) or +** descending (parameter bDescIdx==1) order of docid. Regardless, this +** function iterates from the end of the doclist to the beginning. */ -static void fts3GetDeltaVarint3( - char **pp, /* IN/OUT: Point to read varint from */ - char *pEnd, /* End of buffer */ - int bDescIdx, /* True if docids are descending */ - sqlite3_int64 *pVal /* IN/OUT: Integer value */ +SQLITE_PRIVATE void sqlite3Fts3DoclistPrev( + int bDescIdx, /* True if the doclist is desc */ + char *aDoclist, /* Pointer to entire doclist */ + int nDoclist, /* Length of aDoclist in bytes */ + char **ppIter, /* IN/OUT: Iterator pointer */ + sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */ + int *pnList, /* OUT: List length pointer */ + u8 *pbEof /* OUT: End-of-file flag */ ){ - if( *pp>=pEnd ){ - *pp = 0; - }else{ - sqlite3_int64 iVal; - *pp += sqlite3Fts3GetVarint(*pp, &iVal); - if( bDescIdx ){ - *pVal -= iVal; - }else{ - *pVal += iVal; + char *p = *ppIter; + + assert( nDoclist>0 ); + assert( *pbEof==0 ); + assert( p || *piDocid==0 ); + assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) ); + + if( p==0 ){ + sqlite3_int64 iDocid = 0; + char *pNext = 0; + char *pDocid = aDoclist; + char *pEnd = &aDoclist[nDoclist]; + int iMul = 1; + + while( pDocid0 ); - *pp += sqlite3Fts3PutVarint(*pp, iWrite); - *piPrev = iVal; - *pbFirst = 1; } - -/* -** This macro is used by various functions that merge doclists. The two -** arguments are 64-bit docid values. If the value of the stack variable -** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2). -** Otherwise, (i2-i1). -** -** Using this makes it easier to write code that can merge doclists that are -** sorted in either ascending or descending order. -*/ -#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2)) - /* -** This function does an "OR" merge of two doclists (output contains all -** positions contained in either argument doclist). If the docids in the -** input doclists are sorted in ascending order, parameter bDescDoclist -** should be false. If they are sorted in ascending order, it should be -** passed a non-zero value. -** -** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer -** containing the output doclist and SQLITE_OK is returned. In this case -** *pnOut is set to the number of bytes in the output doclist. -** -** If an error occurs, an SQLite error code is returned. The output values -** are undefined in this case. +** Iterate forwards through a doclist. */ -static int fts3DoclistOrMerge( - int bDescDoclist, /* True if arguments are desc */ - char *a1, int n1, /* First doclist */ - char *a2, int n2, /* Second doclist */ - char **paOut, int *pnOut /* OUT: Malloc'd doclist */ +SQLITE_PRIVATE void sqlite3Fts3DoclistNext( + int bDescIdx, /* True if the doclist is desc */ + char *aDoclist, /* Pointer to entire doclist */ + int nDoclist, /* Length of aDoclist in bytes */ + char **ppIter, /* IN/OUT: Iterator pointer */ + sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */ + u8 *pbEof /* OUT: End-of-file flag */ ){ - sqlite3_int64 i1 = 0; - sqlite3_int64 i2 = 0; - sqlite3_int64 iPrev = 0; - char *pEnd1 = &a1[n1]; - char *pEnd2 = &a2[n2]; - char *p1 = a1; - char *p2 = a2; - char *p; - char *aOut; - int bFirstOut = 0; - - *paOut = 0; - *pnOut = 0; - - /* Allocate space for the output. Both the input and output doclists - ** are delta encoded. If they are in ascending order (bDescDoclist==0), - ** then the first docid in each list is simply encoded as a varint. For - ** each subsequent docid, the varint stored is the difference between the - ** current and previous docid (a positive number - since the list is in - ** ascending order). - ** - ** The first docid written to the output is therefore encoded using the - ** same number of bytes as it is in whichever of the input lists it is - ** read from. And each subsequent docid read from the same input list - ** consumes either the same or less bytes as it did in the input (since - ** the difference between it and the previous value in the output must - ** be a positive value less than or equal to the delta value read from - ** the input list). The same argument applies to all but the first docid - ** read from the 'other' list. And to the contents of all position lists - ** that will be copied and merged from the input to the output. - ** - ** However, if the first docid copied to the output is a negative number, - ** then the encoding of the first docid from the 'other' input list may - ** be larger in the output than it was in the input (since the delta value - ** may be a larger positive integer than the actual docid). - ** - ** The space required to store the output is therefore the sum of the - ** sizes of the two inputs, plus enough space for exactly one of the input - ** docids to grow. - ** - ** A symetric argument may be made if the doclists are in descending - ** order. - */ - aOut = sqlite3_malloc(n1+n2+FTS3_VARINT_MAX-1); - if( !aOut ) return SQLITE_NOMEM; + char *p = *ppIter; - p = aOut; - fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); - fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); - while( p1 || p2 ){ - sqlite3_int64 iDiff = DOCID_CMP(i1, i2); + assert( nDoclist>0 ); + assert( *pbEof==0 ); + assert( p || *piDocid==0 ); + assert( !p || (p>=aDoclist && p<=&aDoclist[nDoclist]) ); - if( p2 && p1 && iDiff==0 ){ - fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); - fts3PoslistMerge(&p, &p1, &p2); - fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); - fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); - }else if( !p2 || (p1 && iDiff<0) ){ - fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); - fts3PoslistCopy(&p, &p1); - fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + if( p==0 ){ + p = aDoclist; + p += sqlite3Fts3GetVarint(p, piDocid); + }else{ + fts3PoslistCopy(0, &p); + while( p<&aDoclist[nDoclist] && *p==0 ) p++; + if( p>=&aDoclist[nDoclist] ){ + *pbEof = 1; }else{ - fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2); - fts3PoslistCopy(&p, &p2); - fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); + sqlite3_int64 iVar; + p += sqlite3Fts3GetVarint(p, &iVar); + *piDocid += ((bDescIdx ? -1 : 1) * iVar); } } - *paOut = aOut; - *pnOut = (int)(p-aOut); - assert( *pnOut<=n1+n2+FTS3_VARINT_MAX-1 ); - return SQLITE_OK; + *ppIter = p; } /* -** This function does a "phrase" merge of two doclists. In a phrase merge, -** the output contains a copy of each position from the right-hand input -** doclist for which there is a position in the left-hand input doclist -** exactly nDist tokens before it. -** -** If the docids in the input doclists are sorted in ascending order, -** parameter bDescDoclist should be false. If they are sorted in ascending -** order, it should be passed a non-zero value. -** -** The right-hand input doclist is overwritten by this function. +** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof +** to true if EOF is reached. */ -static int fts3DoclistPhraseMerge( - int bDescDoclist, /* True if arguments are desc */ - int nDist, /* Distance from left to right (1=adjacent) */ - char *aLeft, int nLeft, /* Left doclist */ - char **paRight, int *pnRight /* IN/OUT: Right/output doclist */ +static void fts3EvalDlPhraseNext( + Fts3Table *pTab, + Fts3Doclist *pDL, + u8 *pbEof ){ - sqlite3_int64 i1 = 0; - sqlite3_int64 i2 = 0; - sqlite3_int64 iPrev = 0; - char *aRight = *paRight; - char *pEnd1 = &aLeft[nLeft]; - char *pEnd2 = &aRight[*pnRight]; - char *p1 = aLeft; - char *p2 = aRight; - char *p; - int bFirstOut = 0; - char *aOut; - - assert( nDist>0 ); - if( bDescDoclist ){ - aOut = sqlite3_malloc(*pnRight + FTS3_VARINT_MAX); - if( aOut==0 ) return SQLITE_NOMEM; + char *pIter; /* Used to iterate through aAll */ + char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ + + if( pDL->pNextDocid ){ + pIter = pDL->pNextDocid; }else{ - aOut = aRight; + pIter = pDL->aAll; } - p = aOut; - - fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); - fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); - while( p1 && p2 ){ - sqlite3_int64 iDiff = DOCID_CMP(i1, i2); - if( iDiff==0 ){ - char *pSave = p; - sqlite3_int64 iPrevSave = iPrev; - int bFirstOutSave = bFirstOut; - - fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); - if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){ - p = pSave; - iPrev = iPrevSave; - bFirstOut = bFirstOutSave; - } - fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); - fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); - }else if( iDiff<0 ){ - fts3PoslistCopy(0, &p1); - fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + if( pIter>=pEnd ){ + /* We have already reached the end of this doclist. EOF. */ + *pbEof = 1; + }else{ + sqlite3_int64 iDelta; + pIter += sqlite3Fts3GetVarint(pIter, &iDelta); + if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ + pDL->iDocid += iDelta; }else{ - fts3PoslistCopy(0, &p2); - fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); + pDL->iDocid -= iDelta; } - } + pDL->pList = pIter; + fts3PoslistCopy(0, &pIter); + pDL->nList = (int)(pIter - pDL->pList); - *pnRight = (int)(p - aOut); - if( bDescDoclist ){ - sqlite3_free(aRight); - *paRight = aOut; - } + /* pIter now points just past the 0x00 that terminates the position- + ** list for document pDL->iDocid. However, if this position-list was + ** edited in place by fts3EvalNearTrim(), then pIter may not actually + ** point to the start of the next docid value. The following line deals + ** with this case by advancing pIter past the zero-padding added by + ** fts3EvalNearTrim(). */ + while( pIterpNextDocid = pIter; + assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); + *pbEof = 0; + } } /* -** Argument pList points to a position list nList bytes in size. This -** function checks to see if the position list contains any entries for -** a token in position 0 (of any column). If so, it writes argument iDelta -** to the output buffer pOut, followed by a position list consisting only -** of the entries from pList at position 0, and terminated by an 0x00 byte. -** The value returned is the number of bytes written to pOut (if any). +** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext(). */ -SQLITE_PRIVATE int sqlite3Fts3FirstFilter( - sqlite3_int64 iDelta, /* Varint that may be written to pOut */ - char *pList, /* Position list (no 0x00 term) */ - int nList, /* Size of pList in bytes */ - char *pOut /* Write output here */ -){ - int nOut = 0; - int bWritten = 0; /* True once iDelta has been written */ - char *p = pList; - char *pEnd = &pList[nList]; +typedef struct TokenDoclist TokenDoclist; +struct TokenDoclist { + int bIgnore; + sqlite3_int64 iDocid; + char *pList; + int nList; +}; - if( *p!=0x01 ){ - if( *p==0x02 ){ - nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); - pOut[nOut++] = 0x02; - bWritten = 1; - } - fts3ColumnlistCopy(0, &p); - } +/* +** Token pToken is an incrementally loaded token that is part of a +** multi-token phrase. Advance it to the next matching document in the +** database and populate output variable *p with the details of the new +** entry. Or, if the iterator has reached EOF, set *pbEof to true. +** +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. +*/ +static int incrPhraseTokenNext( + Fts3Table *pTab, /* Virtual table handle */ + Fts3Phrase *pPhrase, /* Phrase to advance token of */ + int iToken, /* Specific token to advance */ + TokenDoclist *p, /* OUT: Docid and doclist for new entry */ + u8 *pbEof /* OUT: True if iterator is at EOF */ +){ + int rc = SQLITE_OK; - while( piDoclistToken==iToken ){ + assert( p->bIgnore==0 ); + assert( pPhrase->aToken[iToken].pSegcsr==0 ); + fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof); + p->pList = pPhrase->doclist.pList; + p->nList = pPhrase->doclist.nList; + p->iDocid = pPhrase->doclist.iDocid; + }else{ + Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; + assert( pToken->pDeferred==0 ); + assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 ); + if( pToken->pSegcsr ){ + assert( p->bIgnore==0 ); + rc = sqlite3Fts3MsrIncrNext( + pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList + ); + if( p->pList==0 ) *pbEof = 1; + }else{ + p->bIgnore = 1; } - fts3ColumnlistCopy(0, &p); - } - if( bWritten ){ - pOut[nOut++] = 0x00; } - return nOut; + return rc; } /* -** Merge all doclists in the TermSelect.aaOutput[] array into a single -** doclist stored in TermSelect.aaOutput[0]. If successful, delete all -** other doclists (except the aaOutput[0] one) and return SQLITE_OK. +** The phrase iterator passed as the second argument: ** -** If an OOM error occurs, return SQLITE_NOMEM. In this case it is -** the responsibility of the caller to free any doclists left in the -** TermSelect.aaOutput[] array. +** * features at least one token that uses an incremental doclist, and +** +** * does not contain any deferred tokens. +** +** Advance it to the next matching documnent in the database and populate +** the Fts3Doclist.pList and nList fields. +** +** If there is no "next" entry and no error occurs, then *pbEof is set to +** 1 before returning. Otherwise, if no error occurs and the iterator is +** successfully advanced, *pbEof is set to 0. +** +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. */ -static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){ - char *aOut = 0; - int nOut = 0; - int i; +static int fts3EvalIncrPhraseNext( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p, /* Phrase object to advance to next docid */ + u8 *pbEof /* OUT: Set to 1 if EOF */ +){ + int rc = SQLITE_OK; + Fts3Doclist *pDL = &p->doclist; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + u8 bEof = 0; - /* Loop through the doclists in the aaOutput[] array. Merge them all - ** into a single doclist. - */ - for(i=0; iaaOutput); i++){ - if( pTS->aaOutput[i] ){ - if( !aOut ){ - aOut = pTS->aaOutput[i]; - nOut = pTS->anOutput[i]; - pTS->aaOutput[i] = 0; - }else{ - int nNew; - char *aNew; + /* This is only called if it is guaranteed that the phrase has at least + ** one incremental token. In which case the bIncr flag is set. */ + assert( p->bIncr==1 ); - int rc = fts3DoclistOrMerge(p->bDescIdx, - pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew - ); - if( rc!=SQLITE_OK ){ - sqlite3_free(aOut); - return rc; + if( p->nToken==1 && p->bIncr ){ + rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, + &pDL->iDocid, &pDL->pList, &pDL->nList + ); + if( pDL->pList==0 ) bEof = 1; + }else{ + int bDescDoclist = pCsr->bDesc; + struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS]; + + memset(a, 0, sizeof(a)); + assert( p->nToken<=MAX_INCR_PHRASE_TOKENS ); + assert( p->iDoclistTokennToken && bEof==0; i++){ + rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); + if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){ + iMax = a[i].iDocid; + bMaxSet = 1; } + } + assert( rc!=SQLITE_OK || (p->nToken>=1 && a[p->nToken-1].bIgnore==0) ); + assert( rc!=SQLITE_OK || bMaxSet ); - sqlite3_free(pTS->aaOutput[i]); - sqlite3_free(aOut); - pTS->aaOutput[i] = 0; - aOut = aNew; - nOut = nNew; + /* Keep advancing iterators until they all point to the same document */ + for(i=0; inToken; i++){ + while( rc==SQLITE_OK && bEof==0 + && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0 + ){ + rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); + if( DOCID_CMP(a[i].iDocid, iMax)>0 ){ + iMax = a[i].iDocid; + i = 0; + } + } + } + + /* Check if the current entries really are a phrase match */ + if( bEof==0 ){ + int nList = 0; + int nByte = a[p->nToken-1].nList; + char *aDoclist = sqlite3_malloc(nByte+1); + if( !aDoclist ) return SQLITE_NOMEM; + memcpy(aDoclist, a[p->nToken-1].pList, nByte+1); + + for(i=0; i<(p->nToken-1); i++){ + if( a[i].bIgnore==0 ){ + char *pL = a[i].pList; + char *pR = aDoclist; + char *pOut = aDoclist; + int nDist = p->nToken-1-i; + int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR); + if( res==0 ) break; + nList = (int)(pOut - aDoclist); + } + } + if( i==(p->nToken-1) ){ + pDL->iDocid = iMax; + pDL->pList = aDoclist; + pDL->nList = nList; + pDL->bFreeList = 1; + break; + } + sqlite3_free(aDoclist); } } } - pTS->aaOutput[0] = aOut; - pTS->anOutput[0] = nOut; - return SQLITE_OK; + *pbEof = bEof; + return rc; } /* -** Merge the doclist aDoclist/nDoclist into the TermSelect object passed -** as the first argument. The merge is an "OR" merge (see function -** fts3DoclistOrMerge() for details). -** -** This function is called with the doclist for each term that matches -** a queried prefix. It merges all these doclists into one, the doclist -** for the specified prefix. Since there can be a very large number of -** doclists to merge, the merging is done pair-wise using the TermSelect -** object. +** Attempt to move the phrase iterator to point to the next matching docid. +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. ** -** This function returns SQLITE_OK if the merge is successful, or an -** SQLite error code (SQLITE_NOMEM) if an error occurs. +** If there is no "next" entry and no error occurs, then *pbEof is set to +** 1 before returning. Otherwise, if no error occurs and the iterator is +** successfully advanced, *pbEof is set to 0. */ -static int fts3TermSelectMerge( - Fts3Table *p, /* FTS table handle */ - TermSelect *pTS, /* TermSelect object to merge into */ - char *aDoclist, /* Pointer to doclist */ - int nDoclist /* Size of aDoclist in bytes */ +static int fts3EvalPhraseNext( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p, /* Phrase object to advance to next docid */ + u8 *pbEof /* OUT: Set to 1 if EOF */ ){ - if( pTS->aaOutput[0]==0 ){ - /* If this is the first term selected, copy the doclist to the output - ** buffer using memcpy(). - ** - ** Add FTS3_VARINT_MAX bytes of unused space to the end of the - ** allocation. This is so as to ensure that the buffer is big enough - ** to hold the current doclist AND'd with any other doclist. If the - ** doclists are stored in order=ASC order, this padding would not be - ** required (since the size of [doclistA AND doclistB] is always less - ** than or equal to the size of [doclistA] in that case). But this is - ** not true for order=DESC. For example, a doclist containing (1, -1) - ** may be smaller than (-1), as in the first example the -1 may be stored - ** as a single-byte delta, whereas in the second it must be stored as a - ** FTS3_VARINT_MAX byte varint. - ** - ** Similar padding is added in the fts3DoclistOrMerge() function. - */ - pTS->aaOutput[0] = sqlite3_malloc(nDoclist + FTS3_VARINT_MAX + 1); - pTS->anOutput[0] = nDoclist; - if( pTS->aaOutput[0] ){ - memcpy(pTS->aaOutput[0], aDoclist, nDoclist); - }else{ - return SQLITE_NOMEM; - } - }else{ - char *aMerge = aDoclist; - int nMerge = nDoclist; - int iOut; + int rc = SQLITE_OK; + Fts3Doclist *pDL = &p->doclist; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - for(iOut=0; iOutaaOutput); iOut++){ - if( pTS->aaOutput[iOut]==0 ){ - assert( iOut>0 ); - pTS->aaOutput[iOut] = aMerge; - pTS->anOutput[iOut] = nMerge; - break; - }else{ - char *aNew; - int nNew; + if( p->bIncr ){ + rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof); + }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ + sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, + &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof + ); + pDL->pList = pDL->pNextDocid; + }else{ + fts3EvalDlPhraseNext(pTab, pDL, pbEof); + } - int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge, - pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew - ); - if( rc!=SQLITE_OK ){ - if( aMerge!=aDoclist ) sqlite3_free(aMerge); - return rc; - } + return rc; +} - if( aMerge!=aDoclist ) sqlite3_free(aMerge); - sqlite3_free(pTS->aaOutput[iOut]); - pTS->aaOutput[iOut] = 0; - - aMerge = aNew; - nMerge = nNew; - if( (iOut+1)==SizeofArray(pTS->aaOutput) ){ - pTS->aaOutput[iOut] = aMerge; - pTS->anOutput[iOut] = nMerge; +/* +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, fts3EvalPhraseStart() is called on all phrases within the +** expression. Also the Fts3Expr.bDeferred variable is set to true for any +** expressions for which all descendent tokens are deferred. +** +** If parameter bOptOk is zero, then it is guaranteed that the +** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for +** each phrase in the expression (subject to deferred token processing). +** Or, if bOptOk is non-zero, then one or more tokens within the expression +** may be loaded incrementally, meaning doclist.aAll/nAll is not available. +** +** If an error occurs within this function, *pRc is set to an SQLite error +** code before returning. +*/ +static void fts3EvalStartReaders( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pExpr, /* Expression to initialize phrases in */ + int *pRc /* IN/OUT: Error code */ +){ + if( pExpr && SQLITE_OK==*pRc ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + int nToken = pExpr->pPhrase->nToken; + if( nToken ){ + int i; + for(i=0; ipPhrase->aToken[i].pDeferred==0 ) break; } + pExpr->bDeferred = (i==nToken); } + *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase); + }else{ + fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc); + fts3EvalStartReaders(pCsr, pExpr->pRight, pRc); + pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); } } - return SQLITE_OK; } /* -** Append SegReader object pNew to the end of the pCsr->apSegment[] array. +** An array of the following structures is assembled as part of the process +** of selecting tokens to defer before the query starts executing (as part +** of the xFilter() method). There is one element in the array for each +** token in the FTS expression. +** +** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong +** to phrases that are connected only by AND and NEAR operators (not OR or +** NOT). When determining tokens to defer, each AND/NEAR cluster is considered +** separately. The root of a tokens AND/NEAR cluster is stored in +** Fts3TokenAndCost.pRoot. */ -static int fts3SegReaderCursorAppend( - Fts3MultiSegReader *pCsr, - Fts3SegReader *pNew -){ - if( (pCsr->nSegment%16)==0 ){ - Fts3SegReader **apNew; - int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); - apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte); - if( !apNew ){ - sqlite3Fts3SegReaderFree(pNew); - return SQLITE_NOMEM; - } - pCsr->apSegment = apNew; - } - pCsr->apSegment[pCsr->nSegment++] = pNew; - return SQLITE_OK; -} +typedef struct Fts3TokenAndCost Fts3TokenAndCost; +struct Fts3TokenAndCost { + Fts3Phrase *pPhrase; /* The phrase the token belongs to */ + int iToken; /* Position of token in phrase */ + Fts3PhraseToken *pToken; /* The token itself */ + Fts3Expr *pRoot; /* Root of NEAR/AND cluster */ + int nOvfl; /* Number of overflow pages to load doclist */ + int iCol; /* The column the token must match */ +}; /* -** Add seg-reader objects to the Fts3MultiSegReader object passed as the -** 8th argument. +** This function is used to populate an allocated Fts3TokenAndCost array. ** -** This function returns SQLITE_OK if successful, or an SQLite error code -** otherwise. +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, if an error occurs during execution, *pRc is set to an +** SQLite error code. */ -static int fts3SegReaderCursor( - Fts3Table *p, /* FTS3 table handle */ - int iLangid, /* Language id */ - int iIndex, /* Index to search (from 0 to p->nIndex-1) */ - int iLevel, /* Level of segments to scan */ - const char *zTerm, /* Term to query for */ - int nTerm, /* Size of zTerm in bytes */ - int isPrefix, /* True for a prefix search */ - int isScan, /* True to scan from zTerm to EOF */ - Fts3MultiSegReader *pCsr /* Cursor object to populate */ +static void fts3EvalTokenCosts( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pRoot, /* Root of current AND/NEAR cluster */ + Fts3Expr *pExpr, /* Expression to consider */ + Fts3TokenAndCost **ppTC, /* Write new entries to *(*ppTC)++ */ + Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */ + int *pRc /* IN/OUT: Error code */ ){ - int rc = SQLITE_OK; /* Error code */ - sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */ - int rc2; /* Result of sqlite3_reset() */ - - /* If iLevel is less than 0 and this is not a scan, include a seg-reader - ** for the pending-terms. If this is a scan, then this call must be being - ** made by an fts4aux module, not an FTS table. In this case calling - ** Fts3SegReaderPending might segfault, as the data structures used by - ** fts4aux are not completely populated. So it's easiest to filter these - ** calls out here. */ - if( iLevel<0 && p->aIndex ){ - Fts3SegReader *pSeg = 0; - rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix, &pSeg); - if( rc==SQLITE_OK && pSeg ){ - rc = fts3SegReaderCursorAppend(pCsr, pSeg); - } - } - - if( iLevel!=FTS3_SEGCURSOR_PENDING ){ - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt); - } - - while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ - Fts3SegReader *pSeg = 0; - - /* Read the values returned by the SELECT into local variables. */ - sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1); - sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2); - sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3); - int nRoot = sqlite3_column_bytes(pStmt, 4); - char const *zRoot = sqlite3_column_blob(pStmt, 4); - - /* If zTerm is not NULL, and this segment is not stored entirely on its - ** root node, the range of leaves scanned can be reduced. Do this. */ - if( iStartBlock && zTerm ){ - sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0); - rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi); - if( rc!=SQLITE_OK ) goto finished; - if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock; + if( *pRc==SQLITE_OK ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + int i; + for(i=0; *pRc==SQLITE_OK && inToken; i++){ + Fts3TokenAndCost *pTC = (*ppTC)++; + pTC->pPhrase = pPhrase; + pTC->iToken = i; + pTC->pRoot = pRoot; + pTC->pToken = &pPhrase->aToken[i]; + pTC->iCol = pPhrase->iColumn; + *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl); } - - rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1, - (isPrefix==0 && isScan==0), - iStartBlock, iLeavesEndBlock, - iEndBlock, zRoot, nRoot, &pSeg + }else if( pExpr->eType!=FTSQUERY_NOT ){ + assert( pExpr->eType==FTSQUERY_OR + || pExpr->eType==FTSQUERY_AND + || pExpr->eType==FTSQUERY_NEAR ); - if( rc!=SQLITE_OK ) goto finished; - rc = fts3SegReaderCursorAppend(pCsr, pSeg); + assert( pExpr->pLeft && pExpr->pRight ); + if( pExpr->eType==FTSQUERY_OR ){ + pRoot = pExpr->pLeft; + **ppOr = pRoot; + (*ppOr)++; + } + fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc); + if( pExpr->eType==FTSQUERY_OR ){ + pRoot = pExpr->pRight; + **ppOr = pRoot; + (*ppOr)++; + } + fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc); } } - - finished: - rc2 = sqlite3_reset(pStmt); - if( rc==SQLITE_DONE ) rc = rc2; - - return rc; } /* -** Set up a cursor object for iterating through a full-text index or a -** single level therein. +** Determine the average document (row) size in pages. If successful, +** write this value to *pnPage and return SQLITE_OK. Otherwise, return +** an SQLite error code. +** +** The average document size in pages is calculated by first calculating +** determining the average size in bytes, B. If B is less than the amount +** of data that will fit on a single leaf page of an intkey table in +** this database, then the average docsize is 1. Otherwise, it is 1 plus +** the number of overflow pages consumed by a record B bytes in size. */ -SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor( - Fts3Table *p, /* FTS3 table handle */ - int iLangid, /* Language-id to search */ - int iIndex, /* Index to search (from 0 to p->nIndex-1) */ - int iLevel, /* Level of segments to scan */ - const char *zTerm, /* Term to query for */ - int nTerm, /* Size of zTerm in bytes */ - int isPrefix, /* True for a prefix search */ - int isScan, /* True to scan from zTerm to EOF */ - Fts3MultiSegReader *pCsr /* Cursor object to populate */ -){ - assert( iIndex>=0 && iIndexnIndex ); - assert( iLevel==FTS3_SEGCURSOR_ALL - || iLevel==FTS3_SEGCURSOR_PENDING - || iLevel>=0 - ); - assert( iLevelnRowAvg==0 ){ + /* The average document size, which is required to calculate the cost + ** of each doclist, has not yet been determined. Read the required + ** data from the %_stat table to calculate it. + ** + ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 + ** varints, where nCol is the number of columns in the FTS3 table. + ** The first varint is the number of documents currently stored in + ** the table. The following nCol varints contain the total amount of + ** data stored in all rows of each column of the table, from left + ** to right. + */ + int rc; + Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; + sqlite3_stmt *pStmt; + sqlite3_int64 nDoc = 0; + sqlite3_int64 nByte = 0; + const char *pEnd; + const char *a; - memset(pCsr, 0, sizeof(Fts3MultiSegReader)); - return fts3SegReaderCursor( - p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr - ); -} + rc = sqlite3Fts3SelectDoctotal(p, &pStmt); + if( rc!=SQLITE_OK ) return rc; + a = sqlite3_column_blob(pStmt, 0); + assert( a ); -/* -** In addition to its current configuration, have the Fts3MultiSegReader -** passed as the 4th argument also scan the doclist for term zTerm/nTerm. -** -** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. -*/ -static int fts3SegReaderCursorAddZero( - Fts3Table *p, /* FTS virtual table handle */ - int iLangid, - const char *zTerm, /* Term to scan doclist of */ - int nTerm, /* Number of bytes in zTerm */ - Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */ -){ - return fts3SegReaderCursor(p, - iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr - ); + pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; + a += sqlite3Fts3GetVarint(a, &nDoc); + while( anDoc = nDoc; + pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz); + assert( pCsr->nRowAvg>0 ); + rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK ) return rc; + } + + *pnPage = pCsr->nRowAvg; + return SQLITE_OK; } /* -** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or, -** if isPrefix is true, to scan the doclist for all terms for which -** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write -** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return -** an SQLite error code. +** This function is called to select the tokens (if any) that will be +** deferred. The array aTC[] has already been populated when this is +** called. ** -** It is the responsibility of the caller to free this object by eventually -** passing it to fts3SegReaderCursorFree() +** This function is called once for each AND/NEAR cluster in the +** expression. Each invocation determines which tokens to defer within +** the cluster with root node pRoot. See comments above the definition +** of struct Fts3TokenAndCost for more details. ** -** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. -** Output parameter *ppSegcsr is set to 0 if an error occurs. +** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken() +** called on each token to defer. Otherwise, an SQLite error code is +** returned. */ -static int fts3TermSegReaderCursor( - Fts3Cursor *pCsr, /* Virtual table cursor handle */ - const char *zTerm, /* Term to query for */ - int nTerm, /* Size of zTerm in bytes */ - int isPrefix, /* True for a prefix search */ - Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */ +static int fts3EvalSelectDeferred( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pRoot, /* Consider tokens with this root node */ + Fts3TokenAndCost *aTC, /* Array of expression tokens and costs */ + int nTC /* Number of entries in aTC[] */ ){ - Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */ - int rc = SQLITE_NOMEM; /* Return code */ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int nDocSize = 0; /* Number of pages per doc loaded */ + int rc = SQLITE_OK; /* Return code */ + int ii; /* Iterator variable for various purposes */ + int nOvfl = 0; /* Total overflow pages used by doclists */ + int nToken = 0; /* Total number of tokens in cluster */ - pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader)); - if( pSegcsr ){ - int i; - int bFound = 0; /* True once an index has been found */ - Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + int nMinEst = 0; /* The minimum count for any phrase so far. */ + int nLoad4 = 1; /* (Phrases that will be loaded)^4. */ - if( isPrefix ){ - for(i=1; bFound==0 && inIndex; i++){ - if( p->aIndex[i].nPrefix==nTerm ){ - bFound = 1; - rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, - i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr - ); - pSegcsr->bLookup = 1; - } + /* Tokens are never deferred for FTS tables created using the content=xxx + ** option. The reason being that it is not guaranteed that the content + ** table actually contains the same data as the index. To prevent this from + ** causing any problems, the deferred token optimization is completely + ** disabled for content=xxx tables. */ + if( pTab->zContentTbl ){ + return SQLITE_OK; + } + + /* Count the tokens in this AND/NEAR cluster. If none of the doclists + ** associated with the tokens spill onto overflow pages, or if there is + ** only 1 token, exit early. No tokens to defer in this case. */ + for(ii=0; ii0 ); + + + /* Iterate through all tokens in this AND/NEAR cluster, in ascending order + ** of the number of overflow pages that will be loaded by the pager layer + ** to retrieve the entire doclist for the token from the full-text index. + ** Load the doclists for tokens that are either: + ** + ** a. The cheapest token in the entire query (i.e. the one visited by the + ** first iteration of this loop), or + ** + ** b. Part of a multi-token phrase. + ** + ** After each token doclist is loaded, merge it with the others from the + ** same phrase and count the number of documents that the merged doclist + ** contains. Set variable "nMinEst" to the smallest number of documents in + ** any phrase doclist for which 1 or more token doclists have been loaded. + ** Let nOther be the number of other phrases for which it is certain that + ** one or more tokens will not be deferred. + ** + ** Then, for each token, defer it if loading the doclist would result in + ** loading N or more overflow pages into memory, where N is computed as: + ** + ** (nMinEst + 4^nOther - 1) / (4^nOther) + */ + for(ii=0; iinOvfl) + ){ + pTC = &aTC[iTC]; } + } + assert( pTC ); - for(i=1; bFound==0 && inIndex; i++){ - if( p->aIndex[i].nPrefix==nTerm+1 ){ - bFound = 1; - rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, - i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr + if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){ + /* The number of overflow pages to load for this (and therefore all + ** subsequent) tokens is greater than the estimated number of pages + ** that will be loaded if all subsequent tokens are deferred. + */ + Fts3PhraseToken *pToken = pTC->pToken; + rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); + fts3SegReaderCursorFree(pToken->pSegcsr); + pToken->pSegcsr = 0; + }else{ + /* Set nLoad4 to the value of (4^nOther) for the next iteration of the + ** for-loop. Except, limit the value to 2^24 to prevent it from + ** overflowing the 32-bit integer it is stored in. */ + if( ii<12 ) nLoad4 = nLoad4*4; + + if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){ + /* Either this is the cheapest token in the entire query, or it is + ** part of a multi-token phrase. Either way, the entire doclist will + ** (eventually) be loaded into memory. It may as well be now. */ + Fts3PhraseToken *pToken = pTC->pToken; + int nList = 0; + char *pList = 0; + rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList); + assert( rc==SQLITE_OK || pList==0 ); + if( rc==SQLITE_OK ){ + rc = fts3EvalPhraseMergeToken( + pTab, pTC->pPhrase, pTC->iToken,pList,nList ); - if( rc==SQLITE_OK ){ - rc = fts3SegReaderCursorAddZero( - p, pCsr->iLangid, zTerm, nTerm, pSegcsr - ); - } + } + if( rc==SQLITE_OK ){ + int nCount; + nCount = fts3DoclistCountDocids( + pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll + ); + if( ii==0 || nCountiLangid, - 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr - ); - pSegcsr->bLookup = !isPrefix; - } + pTC->pToken = 0; } - *ppSegcsr = pSegcsr; return rc; } /* -** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor(). +** This function is called from within the xFilter method. It initializes +** the full-text query currently stored in pCsr->pExpr. To iterate through +** the results of a query, the caller does: +** +** fts3EvalStart(pCsr); +** while( 1 ){ +** fts3EvalNext(pCsr); +** if( pCsr->bEof ) break; +** ... return row pCsr->iPrevId to the caller ... +** } */ -static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){ - sqlite3Fts3SegReaderFinish(pSegcsr); - sqlite3_free(pSegcsr); -} +static int fts3EvalStart(Fts3Cursor *pCsr){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int nToken = 0; + int nOr = 0; -/* -** This function retrieves the doclist for the specified term (or term -** prefix) from the database. -*/ -static int fts3TermSelect( - Fts3Table *p, /* Virtual table handle */ - Fts3PhraseToken *pTok, /* Token to query for */ - int iColumn, /* Column to query (or -ve for all columns) */ - int *pnOut, /* OUT: Size of buffer at *ppOut */ - char **ppOut /* OUT: Malloced result buffer */ -){ - int rc; /* Return code */ - Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */ - TermSelect tsc; /* Object for pair-wise doclist merging */ - Fts3SegFilter filter; /* Segment term filter configuration */ + /* Allocate a MultiSegReader for each token in the expression. */ + fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc); - pSegcsr = pTok->pSegcsr; - memset(&tsc, 0, sizeof(TermSelect)); + /* Determine which, if any, tokens in the expression should be deferred. */ +#ifndef SQLITE_DISABLE_FTS4_DEFERRED + if( rc==SQLITE_OK && nToken>1 && pTab->bFts4 ){ + Fts3TokenAndCost *aTC; + Fts3Expr **apOr; + aTC = (Fts3TokenAndCost *)sqlite3_malloc( + sizeof(Fts3TokenAndCost) * nToken + + sizeof(Fts3Expr *) * nOr * 2 + ); + apOr = (Fts3Expr **)&aTC[nToken]; - filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS - | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) - | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0) - | (iColumnnColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); - filter.iCol = iColumn; - filter.zTerm = pTok->z; - filter.nTerm = pTok->n; + if( !aTC ){ + rc = SQLITE_NOMEM; + }else{ + int ii; + Fts3TokenAndCost *pTC = aTC; + Fts3Expr **ppOr = apOr; - rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter); - while( SQLITE_OK==rc - && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) - ){ - rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist); - } + fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc); + nToken = (int)(pTC-aTC); + nOr = (int)(ppOr-apOr); - if( rc==SQLITE_OK ){ - rc = fts3TermSelectFinishMerge(p, &tsc); - } - if( rc==SQLITE_OK ){ - *ppOut = tsc.aaOutput[0]; - *pnOut = tsc.anOutput[0]; - }else{ - int i; - for(i=0; ipSegcsr = 0; + fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc); return rc; } /* -** This function counts the total number of docids in the doclist stored -** in buffer aList[], size nList bytes. -** -** If the isPoslist argument is true, then it is assumed that the doclist -** contains a position-list following each docid. Otherwise, it is assumed -** that the doclist is simply a list of docids stored as delta encoded -** varints. +** Invalidate the current position list for phrase pPhrase. */ -static int fts3DoclistCountDocids(char *aList, int nList){ - int nDoc = 0; /* Return value */ - if( aList ){ - char *aEnd = &aList[nList]; /* Pointer to one byte after EOF */ - char *p = aList; /* Cursor */ - while( pdoclist.bFreeList ){ + sqlite3_free(pPhrase->doclist.pList); } - - return nDoc; + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + pPhrase->doclist.bFreeList = 0; } /* -** Advance the cursor to the next row in the %_content table that -** matches the search criteria. For a MATCH search, this will be -** the next row that matches. For a full-table scan, this will be -** simply the next row in the %_content table. For a docid lookup, -** this routine simply sets the EOF flag. +** This function is called to edit the position list associated with +** the phrase object passed as the fifth argument according to a NEAR +** condition. For example: ** -** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned -** even if we reach end-of-file. The fts3EofMethod() will be called -** subsequently to determine whether or not an EOF was hit. -*/ -static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ - int rc; - Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; - if( pCsr->eSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){ - if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ - pCsr->isEof = 1; - rc = sqlite3_reset(pCsr->pStmt); - }else{ - pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); - rc = SQLITE_OK; - } - }else{ - rc = fts3EvalNext((Fts3Cursor *)pCursor); - } - assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); - return rc; -} - -/* -** The following are copied from sqliteInt.h. +** abc NEAR/5 "def ghi" ** -** Constants for the largest and smallest possible 64-bit signed integers. -** These macros are designed to work correctly on both 32-bit and 64-bit -** compilers. +** Parameter nNear is passed the NEAR distance of the expression (5 in +** the example above). When this function is called, *paPoslist points to +** the position list, and *pnToken is the number of phrase tokens in, the +** phrase on the other side of the NEAR operator to pPhrase. For example, +** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to +** the position list associated with phrase "abc". +** +** All positions in the pPhrase position list that are not sufficiently +** close to a position in the *paPoslist position list are removed. If this +** leaves 0 positions, zero is returned. Otherwise, non-zero. +** +** Before returning, *paPoslist is set to point to the position lsit +** associated with pPhrase. And *pnToken is set to the number of tokens in +** pPhrase. */ -#ifndef SQLITE_AMALGAMATION -# define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32)) -# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64) -#endif +static int fts3EvalNearTrim( + int nNear, /* NEAR distance. As in "NEAR/nNear". */ + char *aTmp, /* Temporary space to use */ + char **paPoslist, /* IN/OUT: Position list */ + int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ + Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */ +){ + int nParam1 = nNear + pPhrase->nToken; + int nParam2 = nNear + *pnToken; + int nNew; + char *p2; + char *pOut; + int res; -/* -** If the numeric type of argument pVal is "integer", then return it -** converted to a 64-bit signed integer. Otherwise, return a copy of -** the second parameter, iDefault. -*/ -static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){ - if( pVal ){ - int eType = sqlite3_value_numeric_type(pVal); - if( eType==SQLITE_INTEGER ){ - return sqlite3_value_int64(pVal); - } + assert( pPhrase->doclist.pList ); + + p2 = pOut = pPhrase->doclist.pList; + res = fts3PoslistNearMerge( + &pOut, aTmp, nParam1, nParam2, paPoslist, &p2 + ); + if( res ){ + nNew = (int)(pOut - pPhrase->doclist.pList) - 1; + assert( pPhrase->doclist.pList[nNew]=='\0' ); + assert( nNew<=pPhrase->doclist.nList && nNew>0 ); + memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew); + pPhrase->doclist.nList = nNew; + *paPoslist = pPhrase->doclist.pList; + *pnToken = pPhrase->nToken; } - return iDefault; + + return res; } /* -** This is the xFilter interface for the virtual table. See -** the virtual table xFilter method documentation for additional -** information. +** This function is a no-op if *pRc is other than SQLITE_OK when it is called. +** Otherwise, it advances the expression passed as the second argument to +** point to the next matching row in the database. Expressions iterate through +** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero, +** or descending if it is non-zero. ** -** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against -** the %_content table. +** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if +** successful, the following variables in pExpr are set: ** -** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry -** in the %_content table. +** Fts3Expr.bEof (non-zero if EOF - there is no next row) +** Fts3Expr.iDocid (valid if bEof==0. The docid of the next row) ** -** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index. The -** column on the left-hand side of the MATCH operator is column -** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed. argv[0] is the right-hand -** side of the MATCH operator. +** If the expression is of type FTSQUERY_PHRASE, and the expression is not +** at EOF, then the following variables are populated with the position list +** for the phrase for the visited row: +** +** FTs3Expr.pPhrase->doclist.nList (length of pList in bytes) +** FTs3Expr.pPhrase->doclist.pList (pointer to position list) +** +** It says above that this function advances the expression to the next +** matching row. This is usually true, but there are the following exceptions: +** +** 1. Deferred tokens are not taken into account. If a phrase consists +** entirely of deferred tokens, it is assumed to match every row in +** the db. In this case the position-list is not populated at all. +** +** Or, if a phrase contains one or more deferred tokens and one or +** more non-deferred tokens, then the expression is advanced to the +** next possible match, considering only non-deferred tokens. In other +** words, if the phrase is "A B C", and "B" is deferred, the expression +** is advanced to the next row that contains an instance of "A * C", +** where "*" may match any single token. The position list in this case +** is populated as for "A * C" before returning. +** +** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is +** advanced to point to the next row that matches "x AND y". +** +** See sqlite3Fts3EvalTestDeferred() for details on testing if a row is +** really a match, taking into account deferred tokens and NEAR operators. */ -static int fts3FilterMethod( - sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ - int idxNum, /* Strategy index */ - const char *idxStr, /* Unused */ - int nVal, /* Number of elements in apVal */ - sqlite3_value **apVal /* Arguments for the indexing scheme */ +static void fts3EvalNextRow( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pExpr, /* Expr. to advance to next matching row */ + int *pRc /* IN/OUT: Error code */ ){ - int rc = SQLITE_OK; - char *zSql; /* SQL statement used to access %_content */ - int eSearch; - Fts3Table *p = (Fts3Table *)pCursor->pVtab; - Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; - - sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */ - sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */ - sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */ - sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */ - int iIdx; - - UNUSED_PARAMETER(idxStr); - UNUSED_PARAMETER(nVal); - - eSearch = (idxNum & 0x0000FFFF); - assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); - assert( p->pSegments==0 ); - - /* Collect arguments into local variables */ - iIdx = 0; - if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++]; - if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++]; - if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++]; - if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++]; - assert( iIdx==nVal ); - - /* In case the cursor has been used before, clear it now. */ - sqlite3_finalize(pCsr->pStmt); - sqlite3_free(pCsr->aDoclist); - sqlite3_free(pCsr->aMatchinfo); - sqlite3Fts3ExprFree(pCsr->pExpr); - memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); - - /* Set the lower and upper bounds on docids to return */ - pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64); - pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64); + if( *pRc==SQLITE_OK ){ + int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */ + assert( pExpr->bEof==0 ); + pExpr->bStart = 1; - if( idxStr ){ - pCsr->bDesc = (idxStr[0]=='D'); - }else{ - pCsr->bDesc = p->bDescIdx; - } - pCsr->eSearch = (i16)eSearch; + switch( pExpr->eType ){ + case FTSQUERY_NEAR: + case FTSQUERY_AND: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + assert( !pLeft->bDeferred || !pRight->bDeferred ); - if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){ - int iCol = eSearch-FTS3_FULLTEXT_SEARCH; - const char *zQuery = (const char *)sqlite3_value_text(pCons); + if( pLeft->bDeferred ){ + /* LHS is entirely deferred. So we assume it matches every row. + ** Advance the RHS iterator to find the next row visited. */ + fts3EvalNextRow(pCsr, pRight, pRc); + pExpr->iDocid = pRight->iDocid; + pExpr->bEof = pRight->bEof; + }else if( pRight->bDeferred ){ + /* RHS is entirely deferred. So we assume it matches every row. + ** Advance the LHS iterator to find the next row visited. */ + fts3EvalNextRow(pCsr, pLeft, pRc); + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = pLeft->bEof; + }else{ + /* Neither the RHS or LHS are deferred. */ + fts3EvalNextRow(pCsr, pLeft, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); + while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){ + sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + if( iDiff==0 ) break; + if( iDiff<0 ){ + fts3EvalNextRow(pCsr, pLeft, pRc); + }else{ + fts3EvalNextRow(pCsr, pRight, pRc); + } + } + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = (pLeft->bEof || pRight->bEof); + if( pExpr->eType==FTSQUERY_NEAR && pExpr->bEof ){ + if( pRight->pPhrase && pRight->pPhrase->doclist.aAll ){ + Fts3Doclist *pDl = &pRight->pPhrase->doclist; + while( *pRc==SQLITE_OK && pRight->bEof==0 ){ + memset(pDl->pList, 0, pDl->nList); + fts3EvalNextRow(pCsr, pRight, pRc); + } + } + if( pLeft->pPhrase && pLeft->pPhrase->doclist.aAll ){ + Fts3Doclist *pDl = &pLeft->pPhrase->doclist; + while( *pRc==SQLITE_OK && pLeft->bEof==0 ){ + memset(pDl->pList, 0, pDl->nList); + fts3EvalNextRow(pCsr, pLeft, pRc); + } + } + } + } + break; + } + + case FTSQUERY_OR: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); - if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){ - return SQLITE_NOMEM; - } + assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid ); + assert( pRight->bStart || pLeft->iDocid==pRight->iDocid ); - pCsr->iLangid = 0; - if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid); + if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ + fts3EvalNextRow(pCsr, pLeft, pRc); + }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){ + fts3EvalNextRow(pCsr, pRight, pRc); + }else{ + fts3EvalNextRow(pCsr, pLeft, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); + } - assert( p->base.zErrMsg==0 ); - rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, - p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr, - &p->base.zErrMsg - ); - if( rc!=SQLITE_OK ){ - return rc; - } + pExpr->bEof = (pLeft->bEof && pRight->bEof); + iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ + pExpr->iDocid = pLeft->iDocid; + }else{ + pExpr->iDocid = pRight->iDocid; + } - rc = fts3EvalStart(pCsr); - sqlite3Fts3SegmentsClose(p); - if( rc!=SQLITE_OK ) return rc; - pCsr->pNextId = pCsr->aDoclist; - pCsr->iPrevId = 0; - } + break; + } - /* Compile a SELECT statement for this cursor. For a full-table-scan, the - ** statement loops through all rows of the %_content table. For a - ** full-text query or docid lookup, the statement retrieves a single - ** row by docid. - */ - if( eSearch==FTS3_FULLSCAN_SEARCH ){ - if( pDocidGe || pDocidLe ){ - zSql = sqlite3_mprintf( - "SELECT %s WHERE rowid BETWEEN %lld AND %lld ORDER BY rowid %s", - p->zReadExprlist, pCsr->iMinDocid, pCsr->iMaxDocid, - (pCsr->bDesc ? "DESC" : "ASC") - ); - }else{ - zSql = sqlite3_mprintf("SELECT %s ORDER BY rowid %s", - p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC") - ); - } - if( zSql ){ - rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); - sqlite3_free(zSql); - }else{ - rc = SQLITE_NOMEM; - } - }else if( eSearch==FTS3_DOCID_SEARCH ){ - rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt); - if( rc==SQLITE_OK ){ - rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons); - } - } - if( rc!=SQLITE_OK ) return rc; + case FTSQUERY_NOT: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; - return fts3NextMethod(pCursor); -} + if( pRight->bStart==0 ){ + fts3EvalNextRow(pCsr, pRight, pRc); + assert( *pRc!=SQLITE_OK || pRight->bStart ); + } -/* -** This is the xEof method of the virtual table. SQLite calls this -** routine to find out if it has reached the end of a result set. -*/ -static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){ - return ((Fts3Cursor *)pCursor)->isEof; -} + fts3EvalNextRow(pCsr, pLeft, pRc); + if( pLeft->bEof==0 ){ + while( !*pRc + && !pRight->bEof + && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 + ){ + fts3EvalNextRow(pCsr, pRight, pRc); + } + } + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = pLeft->bEof; + break; + } -/* -** This is the xRowid method. The SQLite core calls this routine to -** retrieve the rowid for the current row of the result set. fts3 -** exposes %_content.docid as the rowid for the virtual table. The -** rowid should be written to *pRowid. -*/ -static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ - Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; - *pRowid = pCsr->iPrevId; - return SQLITE_OK; + default: { + Fts3Phrase *pPhrase = pExpr->pPhrase; + fts3EvalInvalidatePoslist(pPhrase); + *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); + pExpr->iDocid = pPhrase->doclist.iDocid; + break; + } + } + } } -/* -** This is the xColumn method, called by SQLite to request a value from -** the row that the supplied cursor currently points to. +/* +** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR +** cluster, then this function returns 1 immediately. ** -** If: +** Otherwise, it checks if the current row really does match the NEAR +** expression, using the data currently stored in the position lists +** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression. ** -** (iCol < p->nColumn) -> The value of the iCol'th user column. -** (iCol == p->nColumn) -> Magic column with the same name as the table. -** (iCol == p->nColumn+1) -> Docid column -** (iCol == p->nColumn+2) -> Langid column +** If the current row is a match, the position list associated with each +** phrase in the NEAR expression is edited in place to contain only those +** phrase instances sufficiently close to their peers to satisfy all NEAR +** constraints. In this case it returns 1. If the NEAR expression does not +** match the current row, 0 is returned. The position lists may or may not +** be edited if 0 is returned. */ -static int fts3ColumnMethod( - sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ - sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ - int iCol /* Index of column to read value from */ -){ - int rc = SQLITE_OK; /* Return Code */ - Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; - Fts3Table *p = (Fts3Table *)pCursor->pVtab; +static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ + int res = 1; - /* The column value supplied by SQLite must be in range. */ - assert( iCol>=0 && iCol<=p->nColumn+2 ); + /* The following block runs if pExpr is the root of a NEAR query. + ** For example, the query: + ** + ** "w" NEAR "x" NEAR "y" NEAR "z" + ** + ** which is represented in tree form as: + ** + ** | + ** +--NEAR--+ <-- root of NEAR query + ** | | + ** +--NEAR--+ "z" + ** | | + ** +--NEAR--+ "y" + ** | | + ** "w" "x" + ** + ** The right-hand child of a NEAR node is always a phrase. The + ** left-hand child may be either a phrase or a NEAR node. There are + ** no exceptions to this - it's the way the parser in fts3_expr.c works. + */ + if( *pRc==SQLITE_OK + && pExpr->eType==FTSQUERY_NEAR + && pExpr->bEof==0 + && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) + ){ + Fts3Expr *p; + int nTmp = 0; /* Bytes of temp space */ + char *aTmp; /* Temp space for PoslistNearMerge() */ - if( iCol==p->nColumn+1 ){ - /* This call is a request for the "docid" column. Since "docid" is an - ** alias for "rowid", use the xRowid() method to obtain the value. - */ - sqlite3_result_int64(pCtx, pCsr->iPrevId); - }else if( iCol==p->nColumn ){ - /* The extra column whose name is the same as the table. - ** Return a blob which is a pointer to the cursor. */ - sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); - }else if( iCol==p->nColumn+2 && pCsr->pExpr ){ - sqlite3_result_int64(pCtx, pCsr->iLangid); - }else{ - /* The requested column is either a user column (one that contains - ** indexed data), or the language-id column. */ - rc = fts3CursorSeek(0, pCsr); + /* Allocate temporary working space. */ + for(p=pExpr; p->pLeft; p=p->pLeft){ + nTmp += p->pRight->pPhrase->doclist.nList; + } + nTmp += p->pPhrase->doclist.nList; + if( nTmp==0 ){ + res = 0; + }else{ + aTmp = sqlite3_malloc(nTmp*2); + if( !aTmp ){ + *pRc = SQLITE_NOMEM; + res = 0; + }else{ + char *aPoslist = p->pPhrase->doclist.pList; + int nToken = p->pPhrase->nToken; - if( rc==SQLITE_OK ){ - if( iCol==p->nColumn+2 ){ - int iLangid = 0; - if( p->zLanguageid ){ - iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1); + for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ + Fts3Phrase *pPhrase = p->pRight->pPhrase; + int nNear = p->nNear; + res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); + } + + aPoslist = pExpr->pRight->pPhrase->doclist.pList; + nToken = pExpr->pRight->pPhrase->nToken; + for(p=pExpr->pLeft; p && res; p=p->pLeft){ + int nNear; + Fts3Phrase *pPhrase; + assert( p->pParent && p->pParent->pLeft==p ); + nNear = p->pParent->nNear; + pPhrase = ( + p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase + ); + res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); } - sqlite3_result_int(pCtx, iLangid); - }else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){ - sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); } + + sqlite3_free(aTmp); } } - assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); - return rc; -} - -/* -** This function is the implementation of the xUpdate callback used by -** FTS3 virtual tables. It is invoked by SQLite each time a row is to be -** inserted, updated or deleted. -*/ -static int fts3UpdateMethod( - sqlite3_vtab *pVtab, /* Virtual table handle */ - int nArg, /* Size of argument array */ - sqlite3_value **apVal, /* Array of arguments */ - sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ -){ - return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid); + return res; } /* -** Implementation of xSync() method. Flush the contents of the pending-terms -** hash-table to the database. +** This function is a helper function for sqlite3Fts3EvalTestDeferred(). +** Assuming no error occurs or has occurred, It returns non-zero if the +** expression passed as the second argument matches the row that pCsr +** currently points to, or zero if it does not. +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** If an error occurs during execution of this function, *pRc is set to +** the appropriate SQLite error code. In this case the returned value is +** undefined. */ -static int fts3SyncMethod(sqlite3_vtab *pVtab){ +static int fts3EvalTestExpr( + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Expr to test. May or may not be root. */ + int *pRc /* IN/OUT: Error code */ +){ + int bHit = 1; /* Return value */ + if( *pRc==SQLITE_OK ){ + switch( pExpr->eType ){ + case FTSQUERY_NEAR: + case FTSQUERY_AND: + bHit = ( + fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) + && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) + && fts3EvalNearTest(pExpr, pRc) + ); - /* Following an incremental-merge operation, assuming that the input - ** segments are not completely consumed (the usual case), they are updated - ** in place to remove the entries that have already been merged. This - ** involves updating the leaf block that contains the smallest unmerged - ** entry and each block (if any) between the leaf and the root node. So - ** if the height of the input segment b-trees is N, and input segments - ** are merged eight at a time, updating the input segments at the end - ** of an incremental-merge requires writing (8*(1+N)) blocks. N is usually - ** small - often between 0 and 2. So the overhead of the incremental - ** merge is somewhere between 8 and 24 blocks. To avoid this overhead - ** dwarfing the actual productive work accomplished, the incremental merge - ** is only attempted if it will write at least 64 leaf blocks. Hence - ** nMinMerge. - ** - ** Of course, updating the input segments also involves deleting a bunch - ** of blocks from the segments table. But this is not considered overhead - ** as it would also be required by a crisis-merge that used the same input - ** segments. - */ - const u32 nMinMerge = 64; /* Minimum amount of incr-merge work to do */ + /* If the NEAR expression does not match any rows, zero the doclist for + ** all phrases involved in the NEAR. This is because the snippet(), + ** offsets() and matchinfo() functions are not supposed to recognize + ** any instances of phrases that are part of unmatched NEAR queries. + ** For example if this expression: + ** + ** ... MATCH 'a OR (b NEAR c)' + ** + ** is matched against a row containing: + ** + ** 'a b d e' + ** + ** then any snippet() should ony highlight the "a" term, not the "b" + ** (as "b" is part of a non-matching NEAR clause). + */ + if( bHit==0 + && pExpr->eType==FTSQUERY_NEAR + && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) + ){ + Fts3Expr *p; + for(p=pExpr; p->pPhrase==0; p=p->pLeft){ + if( p->pRight->iDocid==pCsr->iPrevId ){ + fts3EvalInvalidatePoslist(p->pRight->pPhrase); + } + } + if( p->iDocid==pCsr->iPrevId ){ + fts3EvalInvalidatePoslist(p->pPhrase); + } + } - Fts3Table *p = (Fts3Table*)pVtab; - int rc = sqlite3Fts3PendingTermsFlush(p); + break; - if( rc==SQLITE_OK - && p->nLeafAdd>(nMinMerge/16) - && p->nAutoincrmerge && p->nAutoincrmerge!=0xff - ){ - int mxLevel = 0; /* Maximum relative level value in db */ - int A; /* Incr-merge parameter A */ + case FTSQUERY_OR: { + int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc); + int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc); + bHit = bHit1 || bHit2; + break; + } - rc = sqlite3Fts3MaxLevel(p, &mxLevel); - assert( rc==SQLITE_OK || mxLevel==0 ); - A = p->nLeafAdd * mxLevel; - A += (A/2); - if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge); - } - sqlite3Fts3SegmentsClose(p); - return rc; -} + case FTSQUERY_NOT: + bHit = ( + fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) + && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) + ); + break; -/* -** If it is currently unknown whether or not the FTS table has an %_stat -** table (if p->bHasStat==2), attempt to determine this (set p->bHasStat -** to 0 or 1). Return SQLITE_OK if successful, or an SQLite error code -** if an error occurs. -*/ -static int fts3SetHasStat(Fts3Table *p){ - int rc = SQLITE_OK; - if( p->bHasStat==2 ){ - const char *zFmt ="SELECT 1 FROM %Q.sqlite_master WHERE tbl_name='%q_stat'"; - char *zSql = sqlite3_mprintf(zFmt, p->zDb, p->zName); - if( zSql ){ - sqlite3_stmt *pStmt = 0; - rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); - if( rc==SQLITE_OK ){ - int bHasStat = (sqlite3_step(pStmt)==SQLITE_ROW); - rc = sqlite3_finalize(pStmt); - if( rc==SQLITE_OK ) p->bHasStat = bHasStat; + default: { +#ifndef SQLITE_DISABLE_FTS4_DEFERRED + if( pCsr->pDeferred + && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred) + ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 ); + if( pExpr->bDeferred ){ + fts3EvalInvalidatePoslist(pPhrase); + } + *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase); + bHit = (pPhrase->doclist.pList!=0); + pExpr->iDocid = pCsr->iPrevId; + }else +#endif + { + bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId); + } + break; } - sqlite3_free(zSql); - }else{ - rc = SQLITE_NOMEM; } } - return rc; + return bHit; } /* -** Implementation of xBegin() method. +** This function is called as the second part of each xNext operation when +** iterating through the results of a full-text query. At this point the +** cursor points to a row that matches the query expression, with the +** following caveats: +** +** * Up until this point, "NEAR" operators in the expression have been +** treated as "AND". +** +** * Deferred tokens have not yet been considered. +** +** If *pRc is not SQLITE_OK when this function is called, it immediately +** returns 0. Otherwise, it tests whether or not after considering NEAR +** operators and deferred tokens the current row is still a match for the +** expression. It returns 1 if both of the following are true: +** +** 1. *pRc is SQLITE_OK when this function returns, and +** +** 2. After scanning the current FTS table row for the deferred tokens, +** it is determined that the row does *not* match the query. +** +** Or, if no error occurs and it seems the current row does match the FTS +** query, return 0. */ -static int fts3BeginMethod(sqlite3_vtab *pVtab){ - Fts3Table *p = (Fts3Table*)pVtab; - UNUSED_PARAMETER(pVtab); - assert( p->pSegments==0 ); - assert( p->nPendingData==0 ); - assert( p->inTransaction!=1 ); - TESTONLY( p->inTransaction = 1 ); - TESTONLY( p->mxSavepoint = -1; ); - p->nLeafAdd = 0; - return fts3SetHasStat(p); -} +SQLITE_PRIVATE int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc){ + int rc = *pRc; + int bMiss = 0; + if( rc==SQLITE_OK ){ -/* -** Implementation of xCommit() method. This is a no-op. The contents of -** the pending-terms hash-table have already been flushed into the database -** by fts3SyncMethod(). -*/ -static int fts3CommitMethod(sqlite3_vtab *pVtab){ - TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); - UNUSED_PARAMETER(pVtab); - assert( p->nPendingData==0 ); - assert( p->inTransaction!=0 ); - assert( p->pSegments==0 ); - TESTONLY( p->inTransaction = 0 ); - TESTONLY( p->mxSavepoint = -1; ); - return SQLITE_OK; -} + /* If there are one or more deferred tokens, load the current row into + ** memory and scan it to determine the position list for each deferred + ** token. Then, see if this row is really a match, considering deferred + ** tokens and NEAR operators (neither of which were taken into account + ** earlier, by fts3EvalNextRow()). + */ + if( pCsr->pDeferred ){ + rc = fts3CursorSeek(0, pCsr); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3CacheDeferredDoclists(pCsr); + } + } + bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc)); -/* -** Implementation of xRollback(). Discard the contents of the pending-terms -** hash-table. Any changes made to the database are reverted by SQLite. -*/ -static int fts3RollbackMethod(sqlite3_vtab *pVtab){ - Fts3Table *p = (Fts3Table*)pVtab; - sqlite3Fts3PendingTermsClear(p); - assert( p->inTransaction!=0 ); - TESTONLY( p->inTransaction = 0 ); - TESTONLY( p->mxSavepoint = -1; ); - return SQLITE_OK; + /* Free the position-lists accumulated for each deferred token above. */ + sqlite3Fts3FreeDeferredDoclists(pCsr); + *pRc = rc; + } + return (rc==SQLITE_OK && bMiss); } /* -** When called, *ppPoslist must point to the byte immediately following the -** end of a position-list. i.e. ( (*ppPoslist)[-1]==POS_END ). This function -** moves *ppPoslist so that it instead points to the first byte of the -** same position list. +** Advance to the next document that matches the FTS expression in +** Fts3Cursor.pExpr. */ -static void fts3ReversePoslist(char *pStart, char **ppPoslist){ - char *p = &(*ppPoslist)[-2]; - char c = 0; - - while( p>pStart && (c=*p--)==0 ); - while( p>pStart && (*p & 0x80) | c ){ - c = *p--; +static int fts3EvalNext(Fts3Cursor *pCsr){ + int rc = SQLITE_OK; /* Return Code */ + Fts3Expr *pExpr = pCsr->pExpr; + assert( pCsr->isEof==0 ); + if( pExpr==0 ){ + pCsr->isEof = 1; + }else{ + do { + if( pCsr->isRequireSeek==0 ){ + sqlite3_reset(pCsr->pStmt); + } + assert( sqlite3_data_count(pCsr->pStmt)==0 ); + fts3EvalNextRow(pCsr, pExpr, &rc); + pCsr->isEof = pExpr->bEof; + pCsr->isRequireSeek = 1; + pCsr->isMatchinfoNeeded = 1; + pCsr->iPrevId = pExpr->iDocid; + }while( pCsr->isEof==0 && sqlite3Fts3EvalTestDeferred(pCsr, &rc) ); } - if( p>pStart ){ p = &p[2]; } - while( *p++&0x80 ); - *ppPoslist = p; -} -/* -** Helper function used by the implementation of the overloaded snippet(), -** offsets() and optimize() SQL functions. -** -** If the value passed as the third argument is a blob of size -** sizeof(Fts3Cursor*), then the blob contents are copied to the -** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error -** message is written to context pContext and SQLITE_ERROR returned. The -** string passed via zFunc is used as part of the error message. -*/ -static int fts3FunctionArg( - sqlite3_context *pContext, /* SQL function call context */ - const char *zFunc, /* Function name */ - sqlite3_value *pVal, /* argv[0] passed to function */ - Fts3Cursor **ppCsr /* OUT: Store cursor handle here */ -){ - Fts3Cursor *pRet; - if( sqlite3_value_type(pVal)!=SQLITE_BLOB - || sqlite3_value_bytes(pVal)!=sizeof(Fts3Cursor *) - ){ - char *zErr = sqlite3_mprintf("illegal first argument to %s", zFunc); - sqlite3_result_error(pContext, zErr, -1); - sqlite3_free(zErr); - return SQLITE_ERROR; + /* Check if the cursor is past the end of the docid range specified + ** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag. */ + if( rc==SQLITE_OK && ( + (pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid) + || (pCsr->bDesc!=0 && pCsr->iPrevIdiMinDocid) + )){ + pCsr->isEof = 1; } - memcpy(&pRet, sqlite3_value_blob(pVal), sizeof(Fts3Cursor *)); - *ppCsr = pRet; - return SQLITE_OK; + + return rc; } /* -** Implementation of the snippet() function for FTS3 +** Restart interation for expression pExpr so that the next call to +** fts3EvalNext() visits the first row. Do not allow incremental +** loading or merging of phrase doclists for this iteration. +** +** If *pRc is other than SQLITE_OK when this function is called, it is +** a no-op. If an error occurs within this function, *pRc is set to an +** SQLite error code before returning. */ -static void fts3SnippetFunc( - sqlite3_context *pContext, /* SQLite function call context */ - int nVal, /* Size of apVal[] array */ - sqlite3_value **apVal /* Array of arguments */ +static void fts3EvalRestart( + Fts3Cursor *pCsr, + Fts3Expr *pExpr, + int *pRc ){ - Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ - const char *zStart = ""; - const char *zEnd = ""; - const char *zEllipsis = "..."; - int iCol = -1; - int nToken = 15; /* Default number of tokens in snippet */ + if( pExpr && *pRc==SQLITE_OK ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; - /* There must be at least one argument passed to this function (otherwise - ** the non-overloaded version would have been called instead of this one). - */ - assert( nVal>=1 ); + if( pPhrase ){ + fts3EvalInvalidatePoslist(pPhrase); + if( pPhrase->bIncr ){ + int i; + for(i=0; inToken; i++){ + Fts3PhraseToken *pToken = &pPhrase->aToken[i]; + assert( pToken->pDeferred==0 ); + if( pToken->pSegcsr ){ + sqlite3Fts3MsrIncrRestart(pToken->pSegcsr); + } + } + *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase); + } + pPhrase->doclist.pNextDocid = 0; + pPhrase->doclist.iDocid = 0; + pPhrase->pOrPoslist = 0; + } - if( nVal>6 ){ - sqlite3_result_error(pContext, - "wrong number of arguments to function snippet()", -1); - return; - } - if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return; + pExpr->iDocid = 0; + pExpr->bEof = 0; + pExpr->bStart = 0; - switch( nVal ){ - case 6: nToken = sqlite3_value_int(apVal[5]); - case 5: iCol = sqlite3_value_int(apVal[4]); - case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]); - case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]); - case 2: zStart = (const char*)sqlite3_value_text(apVal[1]); - } - if( !zEllipsis || !zEnd || !zStart ){ - sqlite3_result_error_nomem(pContext); - }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ - sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken); + fts3EvalRestart(pCsr, pExpr->pLeft, pRc); + fts3EvalRestart(pCsr, pExpr->pRight, pRc); } } /* -** Implementation of the offsets() function for FTS3 +** After allocating the Fts3Expr.aMI[] array for each phrase in the +** expression rooted at pExpr, the cursor iterates through all rows matched +** by pExpr, calling this function for each row. This function increments +** the values in Fts3Expr.aMI[] according to the position-list currently +** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase +** expression nodes. */ -static void fts3OffsetsFunc( - sqlite3_context *pContext, /* SQLite function call context */ - int nVal, /* Size of argument array */ - sqlite3_value **apVal /* Array of arguments */ -){ - Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ +static void fts3EvalUpdateCounts(Fts3Expr *pExpr){ + if( pExpr ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + if( pPhrase && pPhrase->doclist.pList ){ + int iCol = 0; + char *p = pPhrase->doclist.pList; - UNUSED_PARAMETER(nVal); + assert( *p ); + while( 1 ){ + u8 c = 0; + int iCnt = 0; + while( 0xFE & (*p | c) ){ + if( (c&0x80)==0 ) iCnt++; + c = *p++ & 0x80; + } - assert( nVal==1 ); - if( fts3FunctionArg(pContext, "offsets", apVal[0], &pCsr) ) return; - assert( pCsr ); - if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ - sqlite3Fts3Offsets(pContext, pCsr); + /* aMI[iCol*3 + 1] = Number of occurrences + ** aMI[iCol*3 + 2] = Number of rows containing at least one instance + */ + pExpr->aMI[iCol*3 + 1] += iCnt; + pExpr->aMI[iCol*3 + 2] += (iCnt>0); + if( *p==0x00 ) break; + p++; + p += fts3GetVarint32(p, &iCol); + } + } + + fts3EvalUpdateCounts(pExpr->pLeft); + fts3EvalUpdateCounts(pExpr->pRight); } } -/* -** Implementation of the special optimize() function for FTS3. This -** function merges all segments in the database to a single segment. -** Example usage is: +/* +** Expression pExpr must be of type FTSQUERY_PHRASE. ** -** SELECT optimize(t) FROM t LIMIT 1; +** If it is not already allocated and populated, this function allocates and +** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part +** of a NEAR expression, then it also allocates and populates the same array +** for all other phrases that are part of the NEAR expression. ** -** where 't' is the name of an FTS3 table. +** SQLITE_OK is returned if the aMI[] array is successfully allocated and +** populated. Otherwise, if an error occurs, an SQLite error code is returned. */ -static void fts3OptimizeFunc( - sqlite3_context *pContext, /* SQLite function call context */ - int nVal, /* Size of argument array */ - sqlite3_value **apVal /* Array of arguments */ +static int fts3EvalGatherStats( + Fts3Cursor *pCsr, /* Cursor object */ + Fts3Expr *pExpr /* FTSQUERY_PHRASE expression */ ){ - int rc; /* Return code */ - Fts3Table *p; /* Virtual table handle */ - Fts3Cursor *pCursor; /* Cursor handle passed through apVal[0] */ + int rc = SQLITE_OK; /* Return code */ - UNUSED_PARAMETER(nVal); + assert( pExpr->eType==FTSQUERY_PHRASE ); + if( pExpr->aMI==0 ){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + Fts3Expr *pRoot; /* Root of NEAR expression */ + Fts3Expr *p; /* Iterator used for several purposes */ - assert( nVal==1 ); - if( fts3FunctionArg(pContext, "optimize", apVal[0], &pCursor) ) return; - p = (Fts3Table *)pCursor->base.pVtab; - assert( p ); + sqlite3_int64 iPrevId = pCsr->iPrevId; + sqlite3_int64 iDocid; + u8 bEof; - rc = sqlite3Fts3Optimize(p); + /* Find the root of the NEAR expression */ + pRoot = pExpr; + while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){ + pRoot = pRoot->pParent; + } + iDocid = pRoot->iDocid; + bEof = pRoot->bEof; + assert( pRoot->bStart ); - switch( rc ){ - case SQLITE_OK: - sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC); - break; - case SQLITE_DONE: - sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC); - break; - default: - sqlite3_result_error_code(pContext, rc); - break; - } -} + /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */ + for(p=pRoot; p; p=p->pLeft){ + Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight); + assert( pE->aMI==0 ); + pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32)); + if( !pE->aMI ) return SQLITE_NOMEM; + memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32)); + } -/* -** Implementation of the matchinfo() function for FTS3 -*/ -static void fts3MatchinfoFunc( - sqlite3_context *pContext, /* SQLite function call context */ - int nVal, /* Size of argument array */ - sqlite3_value **apVal /* Array of arguments */ -){ - Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ - assert( nVal==1 || nVal==2 ); - if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){ - const char *zArg = 0; - if( nVal>1 ){ - zArg = (const char *)sqlite3_value_text(apVal[1]); + fts3EvalRestart(pCsr, pRoot, &rc); + + while( pCsr->isEof==0 && rc==SQLITE_OK ){ + + do { + /* Ensure the %_content statement is reset. */ + if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt); + assert( sqlite3_data_count(pCsr->pStmt)==0 ); + + /* Advance to the next document */ + fts3EvalNextRow(pCsr, pRoot, &rc); + pCsr->isEof = pRoot->bEof; + pCsr->isRequireSeek = 1; + pCsr->isMatchinfoNeeded = 1; + pCsr->iPrevId = pRoot->iDocid; + }while( pCsr->isEof==0 + && pRoot->eType==FTSQUERY_NEAR + && sqlite3Fts3EvalTestDeferred(pCsr, &rc) + ); + + if( rc==SQLITE_OK && pCsr->isEof==0 ){ + fts3EvalUpdateCounts(pRoot); + } + } + + pCsr->isEof = 0; + pCsr->iPrevId = iPrevId; + + if( bEof ){ + pRoot->bEof = bEof; + }else{ + /* Caution: pRoot may iterate through docids in ascending or descending + ** order. For this reason, even though it seems more defensive, the + ** do loop can not be written: + ** + ** do {...} while( pRoot->iDocidbEof==0 ); + }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK ); } - sqlite3Fts3Matchinfo(pContext, pCsr, zArg); } + return rc; } /* -** This routine implements the xFindFunction method for the FTS3 -** virtual table. +** This function is used by the matchinfo() module to query a phrase +** expression node for the following information: +** +** 1. The total number of occurrences of the phrase in each column of +** the FTS table (considering all rows), and +** +** 2. For each column, the number of rows in the table for which the +** column contains at least one instance of the phrase. +** +** If no error occurs, SQLITE_OK is returned and the values for each column +** written into the array aiOut as follows: +** +** aiOut[iCol*3 + 1] = Number of occurrences +** aiOut[iCol*3 + 2] = Number of rows containing at least one instance +** +** Caveats: +** +** * If a phrase consists entirely of deferred tokens, then all output +** values are set to the number of documents in the table. In other +** words we assume that very common tokens occur exactly once in each +** column of each row of the table. +** +** * If a phrase contains some deferred tokens (and some non-deferred +** tokens), count the potential occurrence identified by considering +** the non-deferred tokens instead of actual phrase occurrences. +** +** * If the phrase is part of a NEAR expression, then only phrase instances +** that meet the NEAR constraint are included in the counts. */ -static int fts3FindFunctionMethod( - sqlite3_vtab *pVtab, /* Virtual table handle */ - int nArg, /* Number of SQL function arguments */ - const char *zName, /* Name of SQL function */ - void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ - void **ppArg /* Unused */ +SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats( + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Phrase expression */ + u32 *aiOut /* Array to write results into (see above) */ ){ - struct Overloaded { - const char *zName; - void (*xFunc)(sqlite3_context*,int,sqlite3_value**); - } aOverload[] = { - { "snippet", fts3SnippetFunc }, - { "offsets", fts3OffsetsFunc }, - { "optimize", fts3OptimizeFunc }, - { "matchinfo", fts3MatchinfoFunc }, - }; - int i; /* Iterator variable */ - - UNUSED_PARAMETER(pVtab); - UNUSED_PARAMETER(nArg); - UNUSED_PARAMETER(ppArg); + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int iCol; - for(i=0; ibDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){ + assert( pCsr->nDoc>0 ); + for(iCol=0; iColnColumn; iCol++){ + aiOut[iCol*3 + 1] = (u32)pCsr->nDoc; + aiOut[iCol*3 + 2] = (u32)pCsr->nDoc; + } + }else{ + rc = fts3EvalGatherStats(pCsr, pExpr); + if( rc==SQLITE_OK ){ + assert( pExpr->aMI ); + for(iCol=0; iColnColumn; iCol++){ + aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1]; + aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2]; + } } } - /* No function of the specified name was found. Return 0. */ - return 0; + return rc; } /* -** Implementation of FTS3 xRename method. Rename an fts3 table. +** The expression pExpr passed as the second argument to this function +** must be of type FTSQUERY_PHRASE. +** +** The returned value is either NULL or a pointer to a buffer containing +** a position-list indicating the occurrences of the phrase in column iCol +** of the current row. +** +** More specifically, the returned buffer contains 1 varint for each +** occurrence of the phrase in the column, stored using the normal (delta+2) +** compression and is terminated by either an 0x01 or 0x00 byte. For example, +** if the requested column contains "a b X c d X X" and the position-list +** for 'X' is requested, the buffer returned may contain: +** +** 0x04 0x05 0x03 0x01 or 0x04 0x05 0x03 0x00 +** +** This function works regardless of whether or not the phrase is deferred, +** incremental, or neither. */ -static int fts3RenameMethod( - sqlite3_vtab *pVtab, /* Virtual table handle */ - const char *zName /* New name of table */ +SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist( + Fts3Cursor *pCsr, /* FTS3 cursor object */ + Fts3Expr *pExpr, /* Phrase to return doclist for */ + int iCol, /* Column to return position list for */ + char **ppOut /* OUT: Pointer to position list */ ){ - Fts3Table *p = (Fts3Table *)pVtab; - sqlite3 *db = p->db; /* Database connection */ - int rc; /* Return Code */ + Fts3Phrase *pPhrase = pExpr->pPhrase; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + char *pIter; + int iThis; + sqlite3_int64 iDocid; - /* At this point it must be known if the %_stat table exists or not. - ** So bHasStat may not be 2. */ - rc = fts3SetHasStat(p); - - /* As it happens, the pending terms table is always empty here. This is - ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction - ** always opens a savepoint transaction. And the xSavepoint() method - ** flushes the pending terms table. But leave the (no-op) call to - ** PendingTermsFlush() in in case that changes. - */ - assert( p->nPendingData==0 ); - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3PendingTermsFlush(p); + /* If this phrase is applies specifically to some column other than + ** column iCol, return a NULL pointer. */ + *ppOut = 0; + assert( iCol>=0 && iColnColumn ); + if( (pPhrase->iColumnnColumn && pPhrase->iColumn!=iCol) ){ + return SQLITE_OK; } - if( p->zContentTbl==0 ){ - fts3DbExec(&rc, db, - "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';", - p->zDb, p->zName, zName - ); + iDocid = pExpr->iDocid; + pIter = pPhrase->doclist.pList; + if( iDocid!=pCsr->iPrevId || pExpr->bEof ){ + int rc = SQLITE_OK; + int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */ + int bOr = 0; + u8 bTreeEof = 0; + Fts3Expr *p; /* Used to iterate from pExpr to root */ + Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */ + int bMatch; + + /* Check if this phrase descends from an OR expression node. If not, + ** return NULL. Otherwise, the entry that corresponds to docid + ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the + ** tree that the node is part of has been marked as EOF, but the node + ** itself is not EOF, then it may point to an earlier entry. */ + pNear = pExpr; + for(p=pExpr->pParent; p; p=p->pParent){ + if( p->eType==FTSQUERY_OR ) bOr = 1; + if( p->eType==FTSQUERY_NEAR ) pNear = p; + if( p->bEof ) bTreeEof = 1; + } + if( bOr==0 ) return SQLITE_OK; + + /* This is the descendent of an OR node. In this case we cannot use + ** an incremental phrase. Load the entire doclist for the phrase + ** into memory in this case. */ + if( pPhrase->bIncr ){ + int bEofSave = pNear->bEof; + fts3EvalRestart(pCsr, pNear, &rc); + while( rc==SQLITE_OK && !pNear->bEof ){ + fts3EvalNextRow(pCsr, pNear, &rc); + if( bEofSave==0 && pNear->iDocid==iDocid ) break; + } + assert( rc!=SQLITE_OK || pPhrase->bIncr==0 ); + } + if( bTreeEof ){ + while( rc==SQLITE_OK && !pNear->bEof ){ + fts3EvalNextRow(pCsr, pNear, &rc); + } + } + if( rc!=SQLITE_OK ) return rc; + + bMatch = 1; + for(p=pNear; p; p=p->pLeft){ + u8 bEof = 0; + Fts3Expr *pTest = p; + Fts3Phrase *pPh; + assert( pTest->eType==FTSQUERY_NEAR || pTest->eType==FTSQUERY_PHRASE ); + if( pTest->eType==FTSQUERY_NEAR ) pTest = pTest->pRight; + assert( pTest->eType==FTSQUERY_PHRASE ); + pPh = pTest->pPhrase; + + pIter = pPh->pOrPoslist; + iDocid = pPh->iOrDocid; + if( pCsr->bDesc==bDescDoclist ){ + bEof = !pPh->doclist.nAll || + (pIter >= (pPh->doclist.aAll + pPh->doclist.nAll)); + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ + sqlite3Fts3DoclistNext( + bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll, + &pIter, &iDocid, &bEof + ); + } + }else{ + bEof = !pPh->doclist.nAll || (pIter && pIter<=pPh->doclist.aAll); + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ + int dummy; + sqlite3Fts3DoclistPrev( + bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll, + &pIter, &iDocid, &dummy, &bEof + ); + } + } + pPh->pOrPoslist = pIter; + pPh->iOrDocid = iDocid; + if( bEof || iDocid!=pCsr->iPrevId ) bMatch = 0; + } + + if( bMatch ){ + pIter = pPhrase->pOrPoslist; + }else{ + pIter = 0; + } } + if( pIter==0 ) return SQLITE_OK; - if( p->bHasDocsize ){ - fts3DbExec(&rc, db, - "ALTER TABLE %Q.'%q_docsize' RENAME TO '%q_docsize';", - p->zDb, p->zName, zName - ); + if( *pIter==0x01 ){ + pIter++; + pIter += fts3GetVarint32(pIter, &iThis); + }else{ + iThis = 0; } - if( p->bHasStat ){ - fts3DbExec(&rc, db, - "ALTER TABLE %Q.'%q_stat' RENAME TO '%q_stat';", - p->zDb, p->zName, zName - ); + while( iThiszDb, p->zName, zName - ); - fts3DbExec(&rc, db, - "ALTER TABLE %Q.'%q_segdir' RENAME TO '%q_segdir';", - p->zDb, p->zName, zName - ); - return rc; + if( *pIter==0x00 ){ + pIter = 0; + } + + *ppOut = ((iCol==iThis)?pIter:0); + return SQLITE_OK; } /* -** The xSavepoint() method. +** Free all components of the Fts3Phrase structure that were allocated by +** the eval module. Specifically, this means to free: ** -** Flush the contents of the pending-terms table to disk. +** * the contents of pPhrase->doclist, and +** * any Fts3MultiSegReader objects held by phrase tokens. */ -static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ - int rc = SQLITE_OK; - UNUSED_PARAMETER(iSavepoint); - assert( ((Fts3Table *)pVtab)->inTransaction ); - assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint ); - TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint ); - if( ((Fts3Table *)pVtab)->bIgnoreSavepoint==0 ){ - rc = fts3SyncMethod(pVtab); +SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ + if( pPhrase ){ + int i; + sqlite3_free(pPhrase->doclist.aAll); + fts3EvalInvalidatePoslist(pPhrase); + memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); + for(i=0; inToken; i++){ + fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr); + pPhrase->aToken[i].pSegcsr = 0; + } } - return rc; } + /* -** The xRelease() method. -** -** This is a no-op. +** Return SQLITE_CORRUPT_VTAB. */ -static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ - TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); - UNUSED_PARAMETER(iSavepoint); - UNUSED_PARAMETER(pVtab); - assert( p->inTransaction ); - assert( p->mxSavepoint >= iSavepoint ); - TESTONLY( p->mxSavepoint = iSavepoint-1 ); - return SQLITE_OK; +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3Fts3Corrupt(){ + return SQLITE_CORRUPT_VTAB; } +#endif +#if !SQLITE_CORE /* -** The xRollbackTo() method. -** -** Discard the contents of the pending terms table. +** Initialize API pointer table, if required. */ -static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ - Fts3Table *p = (Fts3Table*)pVtab; - UNUSED_PARAMETER(iSavepoint); - assert( p->inTransaction ); - assert( p->mxSavepoint >= iSavepoint ); - TESTONLY( p->mxSavepoint = iSavepoint ); - sqlite3Fts3PendingTermsClear(p); - return SQLITE_OK; +#ifdef _WIN32 +__declspec(dllexport) +#endif +SQLITE_API int SQLITE_STDCALL sqlite3_fts3_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + SQLITE_EXTENSION_INIT2(pApi) + return sqlite3Fts3Init(db); } +#endif -static const sqlite3_module fts3Module = { - /* iVersion */ 2, - /* xCreate */ fts3CreateMethod, - /* xConnect */ fts3ConnectMethod, - /* xBestIndex */ fts3BestIndexMethod, - /* xDisconnect */ fts3DisconnectMethod, - /* xDestroy */ fts3DestroyMethod, - /* xOpen */ fts3OpenMethod, - /* xClose */ fts3CloseMethod, - /* xFilter */ fts3FilterMethod, - /* xNext */ fts3NextMethod, - /* xEof */ fts3EofMethod, - /* xColumn */ fts3ColumnMethod, - /* xRowid */ fts3RowidMethod, - /* xUpdate */ fts3UpdateMethod, - /* xBegin */ fts3BeginMethod, - /* xSync */ fts3SyncMethod, - /* xCommit */ fts3CommitMethod, - /* xRollback */ fts3RollbackMethod, - /* xFindFunction */ fts3FindFunctionMethod, - /* xRename */ fts3RenameMethod, - /* xSavepoint */ fts3SavepointMethod, - /* xRelease */ fts3ReleaseMethod, - /* xRollbackTo */ fts3RollbackToMethod, +#endif + +/************** End of fts3.c ************************************************/ +/************** Begin file fts3_aux.c ****************************************/ +/* +** 2011 Jan 27 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ + +typedef struct Fts3auxTable Fts3auxTable; +typedef struct Fts3auxCursor Fts3auxCursor; + +struct Fts3auxTable { + sqlite3_vtab base; /* Base class used by SQLite core */ + Fts3Table *pFts3Tab; }; -/* -** This function is registered as the module destructor (called when an -** FTS3 enabled database connection is closed). It frees the memory -** allocated for the tokenizer hash table. -*/ -static void hashDestroy(void *p){ - Fts3Hash *pHash = (Fts3Hash *)p; - sqlite3Fts3HashClear(pHash); - sqlite3_free(pHash); -} +struct Fts3auxCursor { + sqlite3_vtab_cursor base; /* Base class used by SQLite core */ + Fts3MultiSegReader csr; /* Must be right after "base" */ + Fts3SegFilter filter; + char *zStop; + int nStop; /* Byte-length of string zStop */ + int iLangid; /* Language id to query */ + int isEof; /* True if cursor is at EOF */ + sqlite3_int64 iRowid; /* Current rowid */ + + int iCol; /* Current value of 'col' column */ + int nStat; /* Size of aStat[] array */ + struct Fts3auxColstats { + sqlite3_int64 nDoc; /* 'documents' values for current csr row */ + sqlite3_int64 nOcc; /* 'occurrences' values for current csr row */ + } *aStat; +}; /* -** The fts3 built-in tokenizers - "simple", "porter" and "icu"- are -** implemented in files fts3_tokenizer1.c, fts3_porter.c and fts3_icu.c -** respectively. The following three forward declarations are for functions -** declared in these files used to retrieve the respective implementations. -** -** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed -** to by the argument to point to the "simple" tokenizer implementation. -** And so on. +** Schema of the terms table. */ -SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); -SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); -#ifndef SQLITE_DISABLE_FTS3_UNICODE -SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule); -#endif -#ifdef SQLITE_ENABLE_ICU -SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); -#endif +#define FTS3_AUX_SCHEMA \ + "CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)" /* -** Initialize the fts3 extension. If this extension is built as part -** of the sqlite library, then this function is called directly by -** SQLite. If fts3 is built as a dynamically loadable extension, this -** function is called by the sqlite3_extension_init() entry point. +** This function does all the work for both the xConnect and xCreate methods. +** These tables have no persistent representation of their own, so xConnect +** and xCreate are identical operations. */ -SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){ - int rc = SQLITE_OK; - Fts3Hash *pHash = 0; - const sqlite3_tokenizer_module *pSimple = 0; - const sqlite3_tokenizer_module *pPorter = 0; -#ifndef SQLITE_DISABLE_FTS3_UNICODE - const sqlite3_tokenizer_module *pUnicode = 0; -#endif - -#ifdef SQLITE_ENABLE_ICU - const sqlite3_tokenizer_module *pIcu = 0; - sqlite3Fts3IcuTokenizerModule(&pIcu); -#endif - -#ifndef SQLITE_DISABLE_FTS3_UNICODE - sqlite3Fts3UnicodeTokenizer(&pUnicode); -#endif - -#ifdef SQLITE_TEST - rc = sqlite3Fts3InitTerm(db); - if( rc!=SQLITE_OK ) return rc; -#endif +static int fts3auxConnectMethod( + sqlite3 *db, /* Database connection */ + void *pUnused, /* Unused */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ + char **pzErr /* OUT: sqlite3_malloc'd error message */ +){ + char const *zDb; /* Name of database (e.g. "main") */ + char const *zFts3; /* Name of fts3 table */ + int nDb; /* Result of strlen(zDb) */ + int nFts3; /* Result of strlen(zFts3) */ + int nByte; /* Bytes of space to allocate here */ + int rc; /* value returned by declare_vtab() */ + Fts3auxTable *p; /* Virtual table object to return */ - rc = sqlite3Fts3InitAux(db); - if( rc!=SQLITE_OK ) return rc; + UNUSED_PARAMETER(pUnused); - sqlite3Fts3SimpleTokenizerModule(&pSimple); - sqlite3Fts3PorterTokenizerModule(&pPorter); + /* The user should invoke this in one of two forms: + ** + ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table); + ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table); + */ + if( argc!=4 && argc!=5 ) goto bad_args; - /* Allocate and initialize the hash-table used to store tokenizers. */ - pHash = sqlite3_malloc(sizeof(Fts3Hash)); - if( !pHash ){ - rc = SQLITE_NOMEM; + zDb = argv[1]; + nDb = (int)strlen(zDb); + if( argc==5 ){ + if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){ + zDb = argv[3]; + nDb = (int)strlen(zDb); + zFts3 = argv[4]; + }else{ + goto bad_args; + } }else{ - sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); + zFts3 = argv[3]; } + nFts3 = (int)strlen(zFts3); - /* Load the built-in tokenizers into the hash table */ - if( rc==SQLITE_OK ){ - if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple) - || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) + rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA); + if( rc!=SQLITE_OK ) return rc; -#ifndef SQLITE_DISABLE_FTS3_UNICODE - || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode) -#endif -#ifdef SQLITE_ENABLE_ICU - || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu)) -#endif - ){ - rc = SQLITE_NOMEM; - } - } + nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2; + p = (Fts3auxTable *)sqlite3_malloc(nByte); + if( !p ) return SQLITE_NOMEM; + memset(p, 0, nByte); -#ifdef SQLITE_TEST - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3ExprInitTestInterface(db); - } -#endif + p->pFts3Tab = (Fts3Table *)&p[1]; + p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1]; + p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1]; + p->pFts3Tab->db = db; + p->pFts3Tab->nIndex = 1; - /* Create the virtual table wrapper around the hash-table and overload - ** the two scalar functions. If this is successful, register the - ** module with sqlite. - */ - if( SQLITE_OK==rc - && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer")) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1)) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 1)) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 2)) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1)) - ){ - rc = sqlite3_create_module_v2( - db, "fts3", &fts3Module, (void *)pHash, hashDestroy - ); - if( rc==SQLITE_OK ){ - rc = sqlite3_create_module_v2( - db, "fts4", &fts3Module, (void *)pHash, 0 - ); - } - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3InitTok(db, (void *)pHash); - } - return rc; - } + memcpy((char *)p->pFts3Tab->zDb, zDb, nDb); + memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3); + sqlite3Fts3Dequote((char *)p->pFts3Tab->zName); + *ppVtab = (sqlite3_vtab *)p; + return SQLITE_OK; - /* An error has occurred. Delete the hash table and return the error code. */ - assert( rc!=SQLITE_OK ); - if( pHash ){ - sqlite3Fts3HashClear(pHash); - sqlite3_free(pHash); - } - return rc; + bad_args: + sqlite3Fts3ErrMsg(pzErr, "invalid arguments to fts4aux constructor"); + return SQLITE_ERROR; } /* -** Allocate an Fts3MultiSegReader for each token in the expression headed -** by pExpr. -** -** An Fts3SegReader object is a cursor that can seek or scan a range of -** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple -** Fts3SegReader objects internally to provide an interface to seek or scan -** within the union of all segments of a b-tree. Hence the name. -** -** If the allocated Fts3MultiSegReader just seeks to a single entry in a -** segment b-tree (if the term is not a prefix or it is a prefix for which -** there exists prefix b-tree of the right length) then it may be traversed -** and merged incrementally. Otherwise, it has to be merged into an in-memory -** doclist and then traversed. +** This function does the work for both the xDisconnect and xDestroy methods. +** These tables have no persistent representation of their own, so xDisconnect +** and xDestroy are identical operations. */ -static void fts3EvalAllocateReaders( - Fts3Cursor *pCsr, /* FTS cursor handle */ - Fts3Expr *pExpr, /* Allocate readers for this expression */ - int *pnToken, /* OUT: Total number of tokens in phrase. */ - int *pnOr, /* OUT: Total number of OR nodes in expr. */ - int *pRc /* IN/OUT: Error code */ -){ - if( pExpr && SQLITE_OK==*pRc ){ - if( pExpr->eType==FTSQUERY_PHRASE ){ - int i; - int nToken = pExpr->pPhrase->nToken; - *pnToken += nToken; - for(i=0; ipPhrase->aToken[i]; - int rc = fts3TermSegReaderCursor(pCsr, - pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr - ); - if( rc!=SQLITE_OK ){ - *pRc = rc; - return; - } - } - assert( pExpr->pPhrase->iDoclistToken==0 ); - pExpr->pPhrase->iDoclistToken = -1; - }else{ - *pnOr += (pExpr->eType==FTSQUERY_OR); - fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc); - fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc); - } +static int fts3auxDisconnectMethod(sqlite3_vtab *pVtab){ + Fts3auxTable *p = (Fts3auxTable *)pVtab; + Fts3Table *pFts3 = p->pFts3Tab; + int i; + + /* Free any prepared statements held */ + for(i=0; iaStmt); i++){ + sqlite3_finalize(pFts3->aStmt[i]); } + sqlite3_free(pFts3->zSegmentsTbl); + sqlite3_free(p); + return SQLITE_OK; } +#define FTS4AUX_EQ_CONSTRAINT 1 +#define FTS4AUX_GE_CONSTRAINT 2 +#define FTS4AUX_LE_CONSTRAINT 4 + /* -** Arguments pList/nList contain the doclist for token iToken of phrase p. -** It is merged into the main doclist stored in p->doclist.aAll/nAll. -** -** This function assumes that pList points to a buffer allocated using -** sqlite3_malloc(). This function takes responsibility for eventually -** freeing the buffer. -** -** SQLITE_OK is returned if successful, or SQLITE_NOMEM if an error occurs. +** xBestIndex - Analyze a WHERE and ORDER BY clause. */ -static int fts3EvalPhraseMergeToken( - Fts3Table *pTab, /* FTS Table pointer */ - Fts3Phrase *p, /* Phrase to merge pList/nList into */ - int iToken, /* Token pList/nList corresponds to */ - char *pList, /* Pointer to doclist */ - int nList /* Number of bytes in pList */ +static int fts3auxBestIndexMethod( + sqlite3_vtab *pVTab, + sqlite3_index_info *pInfo ){ - int rc = SQLITE_OK; - assert( iToken!=p->iDoclistToken ); - - if( pList==0 ){ - sqlite3_free(p->doclist.aAll); - p->doclist.aAll = 0; - p->doclist.nAll = 0; - } + int i; + int iEq = -1; + int iGe = -1; + int iLe = -1; + int iLangid = -1; + int iNext = 1; /* Next free argvIndex value */ - else if( p->iDoclistToken<0 ){ - p->doclist.aAll = pList; - p->doclist.nAll = nList; - } + UNUSED_PARAMETER(pVTab); - else if( p->doclist.aAll==0 ){ - sqlite3_free(pList); + /* This vtab delivers always results in "ORDER BY term ASC" order. */ + if( pInfo->nOrderBy==1 + && pInfo->aOrderBy[0].iColumn==0 + && pInfo->aOrderBy[0].desc==0 + ){ + pInfo->orderByConsumed = 1; } - else { - char *pLeft; - char *pRight; - int nLeft; - int nRight; - int nDiff; + /* Search for equality and range constraints on the "term" column. + ** And equality constraints on the hidden "languageid" column. */ + for(i=0; inConstraint; i++){ + if( pInfo->aConstraint[i].usable ){ + int op = pInfo->aConstraint[i].op; + int iCol = pInfo->aConstraint[i].iColumn; - if( p->iDoclistTokendoclist.aAll; - nLeft = p->doclist.nAll; - pRight = pList; - nRight = nList; - nDiff = iToken - p->iDoclistToken; - }else{ - pRight = p->doclist.aAll; - nRight = p->doclist.nAll; - pLeft = pList; - nLeft = nList; - nDiff = p->iDoclistToken - iToken; + if( iCol==0 ){ + if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i; + if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i; + if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i; + if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i; + if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i; + } + if( iCol==4 ){ + if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i; + } } + } - rc = fts3DoclistPhraseMerge( - pTab->bDescIdx, nDiff, pLeft, nLeft, &pRight, &nRight - ); - sqlite3_free(pLeft); - p->doclist.aAll = pRight; - p->doclist.nAll = nRight; + if( iEq>=0 ){ + pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT; + pInfo->aConstraintUsage[iEq].argvIndex = iNext++; + pInfo->estimatedCost = 5; + }else{ + pInfo->idxNum = 0; + pInfo->estimatedCost = 20000; + if( iGe>=0 ){ + pInfo->idxNum += FTS4AUX_GE_CONSTRAINT; + pInfo->aConstraintUsage[iGe].argvIndex = iNext++; + pInfo->estimatedCost /= 2; + } + if( iLe>=0 ){ + pInfo->idxNum += FTS4AUX_LE_CONSTRAINT; + pInfo->aConstraintUsage[iLe].argvIndex = iNext++; + pInfo->estimatedCost /= 2; + } + } + if( iLangid>=0 ){ + pInfo->aConstraintUsage[iLangid].argvIndex = iNext++; + pInfo->estimatedCost--; } - if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken; - return rc; + return SQLITE_OK; } /* -** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist -** does not take deferred tokens into account. -** -** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +** xOpen - Open a cursor. */ -static int fts3EvalPhraseLoad( - Fts3Cursor *pCsr, /* FTS Cursor handle */ - Fts3Phrase *p /* Phrase object */ -){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int iToken; - int rc = SQLITE_OK; +static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ + Fts3auxCursor *pCsr; /* Pointer to cursor object to return */ - for(iToken=0; rc==SQLITE_OK && iTokennToken; iToken++){ - Fts3PhraseToken *pToken = &p->aToken[iToken]; - assert( pToken->pDeferred==0 || pToken->pSegcsr==0 ); + UNUSED_PARAMETER(pVTab); - if( pToken->pSegcsr ){ - int nThis = 0; - char *pThis = 0; - rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis); - if( rc==SQLITE_OK ){ - rc = fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis); - } - } - assert( pToken->pSegcsr==0 ); - } + pCsr = (Fts3auxCursor *)sqlite3_malloc(sizeof(Fts3auxCursor)); + if( !pCsr ) return SQLITE_NOMEM; + memset(pCsr, 0, sizeof(Fts3auxCursor)); - return rc; + *ppCsr = (sqlite3_vtab_cursor *)pCsr; + return SQLITE_OK; } /* -** This function is called on each phrase after the position lists for -** any deferred tokens have been loaded into memory. It updates the phrases -** current position list to include only those positions that are really -** instances of the phrase (after considering deferred tokens). If this -** means that the phrase does not appear in the current row, doclist.pList -** and doclist.nList are both zeroed. -** -** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +** xClose - Close a cursor. */ -static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ - int iToken; /* Used to iterate through phrase tokens */ - char *aPoslist = 0; /* Position list for deferred tokens */ - int nPoslist = 0; /* Number of bytes in aPoslist */ - int iPrev = -1; /* Token number of previous deferred token */ +static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){ + Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; + Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; - assert( pPhrase->doclist.bFreeList==0 ); + sqlite3Fts3SegmentsClose(pFts3); + sqlite3Fts3SegReaderFinish(&pCsr->csr); + sqlite3_free((void *)pCsr->filter.zTerm); + sqlite3_free(pCsr->zStop); + sqlite3_free(pCsr->aStat); + sqlite3_free(pCsr); + return SQLITE_OK; +} - for(iToken=0; iTokennToken; iToken++){ - Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; - Fts3DeferredToken *pDeferred = pToken->pDeferred; +static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){ + if( nSize>pCsr->nStat ){ + struct Fts3auxColstats *aNew; + aNew = (struct Fts3auxColstats *)sqlite3_realloc(pCsr->aStat, + sizeof(struct Fts3auxColstats) * nSize + ); + if( aNew==0 ) return SQLITE_NOMEM; + memset(&aNew[pCsr->nStat], 0, + sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat) + ); + pCsr->aStat = aNew; + pCsr->nStat = nSize; + } + return SQLITE_OK; +} - if( pDeferred ){ - char *pList; - int nList; - int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList); - if( rc!=SQLITE_OK ) return rc; +/* +** xNext - Advance the cursor to the next row, if any. +*/ +static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){ + Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; + Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; + int rc; - if( pList==0 ){ - sqlite3_free(aPoslist); - pPhrase->doclist.pList = 0; - pPhrase->doclist.nList = 0; - return SQLITE_OK; + /* Increment our pretend rowid value. */ + pCsr->iRowid++; - }else if( aPoslist==0 ){ - aPoslist = pList; - nPoslist = nList; + for(pCsr->iCol++; pCsr->iColnStat; pCsr->iCol++){ + if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK; + } - }else{ - char *aOut = pList; - char *p1 = aPoslist; - char *p2 = aOut; + rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr); + if( rc==SQLITE_ROW ){ + int i = 0; + int nDoclist = pCsr->csr.nDoclist; + char *aDoclist = pCsr->csr.aDoclist; + int iCol; - assert( iPrev>=0 ); - fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2); - sqlite3_free(aPoslist); - aPoslist = pList; - nPoslist = (int)(aOut - aPoslist); - if( nPoslist==0 ){ - sqlite3_free(aPoslist); - pPhrase->doclist.pList = 0; - pPhrase->doclist.nList = 0; - return SQLITE_OK; - } + int eState = 0; + + if( pCsr->zStop ){ + int n = (pCsr->nStopcsr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm; + int mc = memcmp(pCsr->zStop, pCsr->csr.zTerm, n); + if( mc<0 || (mc==0 && pCsr->csr.nTerm>pCsr->nStop) ){ + pCsr->isEof = 1; + return SQLITE_OK; } - iPrev = iToken; } - } - if( iPrev>=0 ){ - int nMaxUndeferred = pPhrase->iDoclistToken; - if( nMaxUndeferred<0 ){ - pPhrase->doclist.pList = aPoslist; - pPhrase->doclist.nList = nPoslist; - pPhrase->doclist.iDocid = pCsr->iPrevId; - pPhrase->doclist.bFreeList = 1; - }else{ - int nDistance; - char *p1; - char *p2; - char *aOut; + if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM; + memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat); + iCol = 0; - if( nMaxUndeferred>iPrev ){ - p1 = aPoslist; - p2 = pPhrase->doclist.pList; - nDistance = nMaxUndeferred - iPrev; - }else{ - p1 = pPhrase->doclist.pList; - p2 = aPoslist; - nDistance = iPrev - nMaxUndeferred; - } + while( idoclist.pList = aOut; - if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){ - pPhrase->doclist.bFreeList = 1; - pPhrase->doclist.nList = (int)(aOut - pPhrase->doclist.pList); - }else{ - sqlite3_free(aOut); - pPhrase->doclist.pList = 0; - pPhrase->doclist.nList = 0; + i += sqlite3Fts3GetVarint(&aDoclist[i], &v); + switch( eState ){ + /* State 0. In this state the integer just read was a docid. */ + case 0: + pCsr->aStat[0].nDoc++; + eState = 1; + iCol = 0; + break; + + /* State 1. In this state we are expecting either a 1, indicating + ** that the following integer will be a column number, or the + ** start of a position list for column 0. + ** + ** The only difference between state 1 and state 2 is that if the + ** integer encountered in state 1 is not 0 or 1, then we need to + ** increment the column 0 "nDoc" count for this term. + */ + case 1: + assert( iCol==0 ); + if( v>1 ){ + pCsr->aStat[1].nDoc++; + } + eState = 2; + /* fall through */ + + case 2: + if( v==0 ){ /* 0x00. Next integer will be a docid. */ + eState = 0; + }else if( v==1 ){ /* 0x01. Next integer will be a column number. */ + eState = 3; + }else{ /* 2 or greater. A position. */ + pCsr->aStat[iCol+1].nOcc++; + pCsr->aStat[0].nOcc++; + } + break; + + /* State 3. The integer just read is a column number. */ + default: assert( eState==3 ); + iCol = (int)v; + if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM; + pCsr->aStat[iCol+1].nDoc++; + eState = 2; + break; } - sqlite3_free(aPoslist); } - } - return SQLITE_OK; + pCsr->iCol = 0; + rc = SQLITE_OK; + }else{ + pCsr->isEof = 1; + } + return rc; } /* -** Maximum number of tokens a phrase may have to be considered for the -** incremental doclists strategy. +** xFilter - Initialize a cursor to point at the start of its data. */ -#define MAX_INCR_PHRASE_TOKENS 4 +static int fts3auxFilterMethod( + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ + int idxNum, /* Strategy index */ + const char *idxStr, /* Unused */ + int nVal, /* Number of elements in apVal */ + sqlite3_value **apVal /* Arguments for the indexing scheme */ +){ + Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; + Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; + int rc; + int isScan = 0; + int iLangVal = 0; /* Language id to query */ -/* -** This function is called for each Fts3Phrase in a full-text query -** expression to initialize the mechanism for returning rows. Once this -** function has been called successfully on an Fts3Phrase, it may be -** used with fts3EvalPhraseNext() to iterate through the matching docids. -** -** If parameter bOptOk is true, then the phrase may (or may not) use the -** incremental loading strategy. Otherwise, the entire doclist is loaded into -** memory within this call. -** -** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. -*/ -static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc = SQLITE_OK; /* Error code */ - int i; + int iEq = -1; /* Index of term=? value in apVal */ + int iGe = -1; /* Index of term>=? value in apVal */ + int iLe = -1; /* Index of term<=? value in apVal */ + int iLangid = -1; /* Index of languageid=? value in apVal */ + int iNext = 0; - /* Determine if doclists may be loaded from disk incrementally. This is - ** possible if the bOptOk argument is true, the FTS doclists will be - ** scanned in forward order, and the phrase consists of - ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first" - ** tokens or prefix tokens that cannot use a prefix-index. */ - int bHaveIncr = 0; - int bIncrOk = (bOptOk - && pCsr->bDesc==pTab->bDescIdx - && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 - && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 -#ifdef SQLITE_TEST - && pTab->bNoIncrDoclist==0 -#endif + UNUSED_PARAMETER(nVal); + UNUSED_PARAMETER(idxStr); + + assert( idxStr==0 ); + assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0 + || idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT + || idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT) ); - for(i=0; bIncrOk==1 && inToken; i++){ - Fts3PhraseToken *pToken = &p->aToken[i]; - if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){ - bIncrOk = 0; + + if( idxNum==FTS4AUX_EQ_CONSTRAINT ){ + iEq = iNext++; + }else{ + isScan = 1; + if( idxNum & FTS4AUX_GE_CONSTRAINT ){ + iGe = iNext++; } - if( pToken->pSegcsr ) bHaveIncr = 1; + if( idxNum & FTS4AUX_LE_CONSTRAINT ){ + iLe = iNext++; + } + } + if( iNextiColumn >= pTab->nColumn ? -1 : p->iColumn); - for(i=0; rc==SQLITE_OK && inToken; i++){ - Fts3PhraseToken *pToken = &p->aToken[i]; - Fts3MultiSegReader *pSegcsr = pToken->pSegcsr; - if( pSegcsr ){ - rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n); - } + /* In case this cursor is being reused, close and zero it. */ + testcase(pCsr->filter.zTerm); + sqlite3Fts3SegReaderFinish(&pCsr->csr); + sqlite3_free((void *)pCsr->filter.zTerm); + sqlite3_free(pCsr->aStat); + memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr); + + pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; + if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN; + + if( iEq>=0 || iGe>=0 ){ + const unsigned char *zStr = sqlite3_value_text(apVal[0]); + assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) ); + if( zStr ){ + pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr); + pCsr->filter.nTerm = sqlite3_value_bytes(apVal[0]); + if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM; } - p->bIncr = 1; - }else{ - /* Load the full doclist for the phrase into memory. */ - rc = fts3EvalPhraseLoad(pCsr, p); - p->bIncr = 0; } - assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr ); + if( iLe>=0 ){ + pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe])); + pCsr->nStop = sqlite3_value_bytes(apVal[iLe]); + if( pCsr->zStop==0 ) return SQLITE_NOMEM; + } + + if( iLangid>=0 ){ + iLangVal = sqlite3_value_int(apVal[iLangid]); + + /* If the user specified a negative value for the languageid, use zero + ** instead. This works, as the "languageid=?" constraint will also + ** be tested by the VDBE layer. The test will always be false (since + ** this module will not return a row with a negative languageid), and + ** so the overall query will return zero rows. */ + if( iLangVal<0 ) iLangVal = 0; + } + pCsr->iLangid = iLangVal; + + rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL, + pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr + ); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter); + } + + if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor); return rc; } /* -** This function is used to iterate backwards (from the end to start) -** through doclists. It is used by this module to iterate through phrase -** doclists in reverse and by the fts3_write.c module to iterate through -** pending-terms lists when writing to databases with "order=desc". -** -** The doclist may be sorted in ascending (parameter bDescIdx==0) or -** descending (parameter bDescIdx==1) order of docid. Regardless, this -** function iterates from the end of the doclist to the beginning. +** xEof - Return true if the cursor is at EOF, or false otherwise. */ -SQLITE_PRIVATE void sqlite3Fts3DoclistPrev( - int bDescIdx, /* True if the doclist is desc */ - char *aDoclist, /* Pointer to entire doclist */ - int nDoclist, /* Length of aDoclist in bytes */ - char **ppIter, /* IN/OUT: Iterator pointer */ - sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */ - int *pnList, /* OUT: List length pointer */ - u8 *pbEof /* OUT: End-of-file flag */ +static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){ + Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; + return pCsr->isEof; +} + +/* +** xColumn - Return a column value. +*/ +static int fts3auxColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ ){ - char *p = *ppIter; + Fts3auxCursor *p = (Fts3auxCursor *)pCursor; - assert( nDoclist>0 ); - assert( *pbEof==0 ); - assert( p || *piDocid==0 ); - assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) ); + assert( p->isEof==0 ); + switch( iCol ){ + case 0: /* term */ + sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT); + break; - if( p==0 ){ - sqlite3_int64 iDocid = 0; - char *pNext = 0; - char *pDocid = aDoclist; - char *pEnd = &aDoclist[nDoclist]; - int iMul = 1; + case 1: /* col */ + if( p->iCol ){ + sqlite3_result_int(pCtx, p->iCol-1); + }else{ + sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC); + } + break; - while( pDocidaStat[p->iCol].nDoc); + break; - *pnList = (int)(pEnd - pNext); - *ppIter = pNext; - *piDocid = iDocid; - }else{ - int iMul = (bDescIdx ? -1 : 1); - sqlite3_int64 iDelta; - fts3GetReverseVarint(&p, aDoclist, &iDelta); - *piDocid -= (iMul * iDelta); + case 3: /* occurrences */ + sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc); + break; - if( p==aDoclist ){ - *pbEof = 1; - }else{ - char *pSave = p; - fts3ReversePoslist(aDoclist, &p); - *pnList = (int)(pSave - p); - } - *ppIter = p; + default: /* languageid */ + assert( iCol==4 ); + sqlite3_result_int(pCtx, p->iLangid); + break; } + + return SQLITE_OK; } /* -** Iterate forwards through a doclist. +** xRowid - Return the current rowid for the cursor. */ -SQLITE_PRIVATE void sqlite3Fts3DoclistNext( - int bDescIdx, /* True if the doclist is desc */ - char *aDoclist, /* Pointer to entire doclist */ - int nDoclist, /* Length of aDoclist in bytes */ - char **ppIter, /* IN/OUT: Iterator pointer */ - sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */ - u8 *pbEof /* OUT: End-of-file flag */ +static int fts3auxRowidMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite_int64 *pRowid /* OUT: Rowid value */ ){ - char *p = *ppIter; - - assert( nDoclist>0 ); - assert( *pbEof==0 ); - assert( p || *piDocid==0 ); - assert( !p || (p>=aDoclist && p<=&aDoclist[nDoclist]) ); + Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; + *pRowid = pCsr->iRowid; + return SQLITE_OK; +} - if( p==0 ){ - p = aDoclist; - p += sqlite3Fts3GetVarint(p, piDocid); - }else{ - fts3PoslistCopy(0, &p); - if( p>=&aDoclist[nDoclist] ){ - *pbEof = 1; - }else{ - sqlite3_int64 iVar; - p += sqlite3Fts3GetVarint(p, &iVar); - *piDocid += ((bDescIdx ? -1 : 1) * iVar); - } - } +/* +** Register the fts3aux module with database connection db. Return SQLITE_OK +** if successful or an error code if sqlite3_create_module() fails. +*/ +SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db){ + static const sqlite3_module fts3aux_module = { + 0, /* iVersion */ + fts3auxConnectMethod, /* xCreate */ + fts3auxConnectMethod, /* xConnect */ + fts3auxBestIndexMethod, /* xBestIndex */ + fts3auxDisconnectMethod, /* xDisconnect */ + fts3auxDisconnectMethod, /* xDestroy */ + fts3auxOpenMethod, /* xOpen */ + fts3auxCloseMethod, /* xClose */ + fts3auxFilterMethod, /* xFilter */ + fts3auxNextMethod, /* xNext */ + fts3auxEofMethod, /* xEof */ + fts3auxColumnMethod, /* xColumn */ + fts3auxRowidMethod, /* xRowid */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindFunction */ + 0, /* xRename */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0 /* xRollbackTo */ + }; + int rc; /* Return code */ - *ppIter = p; + rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0); + return rc; } +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_aux.c ********************************************/ +/************** Begin file fts3_expr.c ***************************************/ /* -** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof -** to true if EOF is reached. +** 2008 Nov 28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This module contains code that implements a parser for fts3 query strings +** (the right-hand argument to the MATCH operator). Because the supported +** syntax is relatively simple, the whole tokenizer/parser system is +** hand-coded. */ -static void fts3EvalDlPhraseNext( - Fts3Table *pTab, - Fts3Doclist *pDL, - u8 *pbEof -){ - char *pIter; /* Used to iterate through aAll */ - char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ - - if( pDL->pNextDocid ){ - pIter = pDL->pNextDocid; - }else{ - pIter = pDL->aAll; - } +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) - if( pIter>=pEnd ){ - /* We have already reached the end of this doclist. EOF. */ - *pbEof = 1; - }else{ - sqlite3_int64 iDelta; - pIter += sqlite3Fts3GetVarint(pIter, &iDelta); - if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ - pDL->iDocid += iDelta; - }else{ - pDL->iDocid -= iDelta; - } - pDL->pList = pIter; - fts3PoslistCopy(0, &pIter); - pDL->nList = (int)(pIter - pDL->pList); +/* +** By default, this module parses the legacy syntax that has been +** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS +** is defined, then it uses the new syntax. The differences between +** the new and the old syntaxes are: +** +** a) The new syntax supports parenthesis. The old does not. +** +** b) The new syntax supports the AND and NOT operators. The old does not. +** +** c) The old syntax supports the "-" token qualifier. This is not +** supported by the new syntax (it is replaced by the NOT operator). +** +** d) When using the old syntax, the OR operator has a greater precedence +** than an implicit AND. When using the new, both implicity and explicit +** AND operators have a higher precedence than OR. +** +** If compiled with SQLITE_TEST defined, then this module exports the +** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable +** to zero causes the module to use the old syntax. If it is set to +** non-zero the new syntax is activated. This is so both syntaxes can +** be tested using a single build of testfixture. +** +** The following describes the syntax supported by the fts3 MATCH +** operator in a similar format to that used by the lemon parser +** generator. This module does not use actually lemon, it uses a +** custom parser. +** +** query ::= andexpr (OR andexpr)*. +** +** andexpr ::= notexpr (AND? notexpr)*. +** +** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*. +** notexpr ::= LP query RP. +** +** nearexpr ::= phrase (NEAR distance_opt nearexpr)*. +** +** distance_opt ::= . +** distance_opt ::= / INTEGER. +** +** phrase ::= TOKEN. +** phrase ::= COLUMN:TOKEN. +** phrase ::= "TOKEN TOKEN TOKEN...". +*/ - /* pIter now points just past the 0x00 that terminates the position- - ** list for document pDL->iDocid. However, if this position-list was - ** edited in place by fts3EvalNearTrim(), then pIter may not actually - ** point to the start of the next docid value. The following line deals - ** with this case by advancing pIter past the zero-padding added by - ** fts3EvalNearTrim(). */ - while( pIterpNextDocid = pIter; - assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); - *pbEof = 0; - } -} +/* +** Default span for NEAR operators. +*/ +#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 + +/* #include */ +/* #include */ /* -** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext(). +** isNot: +** This variable is used by function getNextNode(). When getNextNode() is +** called, it sets ParseContext.isNot to true if the 'next node' is a +** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the +** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to +** zero. */ -typedef struct TokenDoclist TokenDoclist; -struct TokenDoclist { - int bIgnore; - sqlite3_int64 iDocid; - char *pList; - int nList; +typedef struct ParseContext ParseContext; +struct ParseContext { + sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ + int iLangid; /* Language id used with tokenizer */ + const char **azCol; /* Array of column names for fts3 table */ + int bFts4; /* True to allow FTS4-only syntax */ + int nCol; /* Number of entries in azCol[] */ + int iDefaultCol; /* Default column to query */ + int isNot; /* True if getNextNode() sees a unary - */ + sqlite3_context *pCtx; /* Write error message here */ + int nNest; /* Number of nested brackets */ }; /* -** Token pToken is an incrementally loaded token that is part of a -** multi-token phrase. Advance it to the next matching document in the -** database and populate output variable *p with the details of the new -** entry. Or, if the iterator has reached EOF, set *pbEof to true. -** -** If an error occurs, return an SQLite error code. Otherwise, return -** SQLITE_OK. +** This function is equivalent to the standard isspace() function. +** +** The standard isspace() can be awkward to use safely, because although it +** is defined to accept an argument of type int, its behavior when passed +** an integer that falls outside of the range of the unsigned char type +** is undefined (and sometimes, "undefined" means segfault). This wrapper +** is defined to accept an argument of type char, and always returns 0 for +** any values that fall outside of the range of the unsigned char type (i.e. +** negative values). +*/ +static int fts3isspace(char c){ + return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; +} + +/* +** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, +** zero the memory before returning a pointer to it. If unsuccessful, +** return NULL. */ -static int incrPhraseTokenNext( - Fts3Table *pTab, /* Virtual table handle */ - Fts3Phrase *pPhrase, /* Phrase to advance token of */ - int iToken, /* Specific token to advance */ - TokenDoclist *p, /* OUT: Docid and doclist for new entry */ - u8 *pbEof /* OUT: True if iterator is at EOF */ +static void *fts3MallocZero(int nByte){ + void *pRet = sqlite3_malloc(nByte); + if( pRet ) memset(pRet, 0, nByte); + return pRet; +} + +SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer( + sqlite3_tokenizer *pTokenizer, + int iLangid, + const char *z, + int n, + sqlite3_tokenizer_cursor **ppCsr ){ - int rc = SQLITE_OK; + sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; + sqlite3_tokenizer_cursor *pCsr = 0; + int rc; - if( pPhrase->iDoclistToken==iToken ){ - assert( p->bIgnore==0 ); - assert( pPhrase->aToken[iToken].pSegcsr==0 ); - fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof); - p->pList = pPhrase->doclist.pList; - p->nList = pPhrase->doclist.nList; - p->iDocid = pPhrase->doclist.iDocid; - }else{ - Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; - assert( pToken->pDeferred==0 ); - assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 ); - if( pToken->pSegcsr ){ - assert( p->bIgnore==0 ); - rc = sqlite3Fts3MsrIncrNext( - pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList - ); - if( p->pList==0 ) *pbEof = 1; - }else{ - p->bIgnore = 1; + rc = pModule->xOpen(pTokenizer, z, n, &pCsr); + assert( rc==SQLITE_OK || pCsr==0 ); + if( rc==SQLITE_OK ){ + pCsr->pTokenizer = pTokenizer; + if( pModule->iVersion>=1 ){ + rc = pModule->xLanguageid(pCsr, iLangid); + if( rc!=SQLITE_OK ){ + pModule->xClose(pCsr); + pCsr = 0; + } } } - + *ppCsr = pCsr; return rc; } +/* +** Function getNextNode(), which is called by fts3ExprParse(), may itself +** call fts3ExprParse(). So this forward declaration is required. +*/ +static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); /* -** The phrase iterator passed as the second argument: -** -** * features at least one token that uses an incremental doclist, and -** -** * does not contain any deferred tokens. -** -** Advance it to the next matching documnent in the database and populate -** the Fts3Doclist.pList and nList fields. -** -** If there is no "next" entry and no error occurs, then *pbEof is set to -** 1 before returning. Otherwise, if no error occurs and the iterator is -** successfully advanced, *pbEof is set to 0. +** Extract the next token from buffer z (length n) using the tokenizer +** and other information (column names etc.) in pParse. Create an Fts3Expr +** structure of type FTSQUERY_PHRASE containing a phrase consisting of this +** single token and set *ppExpr to point to it. If the end of the buffer is +** reached before a token is found, set *ppExpr to zero. It is the +** responsibility of the caller to eventually deallocate the allocated +** Fts3Expr structure (if any) by passing it to sqlite3_free(). ** -** If an error occurs, return an SQLite error code. Otherwise, return -** SQLITE_OK. +** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation +** fails. */ -static int fts3EvalIncrPhraseNext( - Fts3Cursor *pCsr, /* FTS Cursor handle */ - Fts3Phrase *p, /* Phrase object to advance to next docid */ - u8 *pbEof /* OUT: Set to 1 if EOF */ +static int getNextToken( + ParseContext *pParse, /* fts3 query parse context */ + int iCol, /* Value for Fts3Phrase.iColumn */ + const char *z, int n, /* Input string */ + Fts3Expr **ppExpr, /* OUT: expression */ + int *pnConsumed /* OUT: Number of bytes consumed */ ){ - int rc = SQLITE_OK; - Fts3Doclist *pDL = &p->doclist; - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - u8 bEof = 0; - - /* This is only called if it is guaranteed that the phrase has at least - ** one incremental token. In which case the bIncr flag is set. */ - assert( p->bIncr==1 ); + sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; + sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; + int rc; + sqlite3_tokenizer_cursor *pCursor; + Fts3Expr *pRet = 0; + int i = 0; - if( p->nToken==1 && p->bIncr ){ - rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, - &pDL->iDocid, &pDL->pList, &pDL->nList - ); - if( pDL->pList==0 ) bEof = 1; - }else{ - int bDescDoclist = pCsr->bDesc; - struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS]; + /* Set variable i to the maximum number of bytes of input to tokenize. */ + for(i=0; inToken<=MAX_INCR_PHRASE_TOKENS ); - assert( p->iDoclistTokeniLangid, z, i, &pCursor); + if( rc==SQLITE_OK ){ + const char *zToken; + int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; + int nByte; /* total space to allocate */ - while( bEof==0 ){ - int bMaxSet = 0; - sqlite3_int64 iMax = 0; /* Largest docid for all iterators */ - int i; /* Used to iterate through tokens */ + rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); + if( rc==SQLITE_OK ){ + nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; + pRet = (Fts3Expr *)fts3MallocZero(nByte); + if( !pRet ){ + rc = SQLITE_NOMEM; + }else{ + pRet->eType = FTSQUERY_PHRASE; + pRet->pPhrase = (Fts3Phrase *)&pRet[1]; + pRet->pPhrase->nToken = 1; + pRet->pPhrase->iColumn = iCol; + pRet->pPhrase->aToken[0].n = nToken; + pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; + memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); - /* Advance the iterator for each token in the phrase once. */ - for(i=0; rc==SQLITE_OK && inToken && bEof==0; i++){ - rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); - if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){ - iMax = a[i].iDocid; - bMaxSet = 1; + if( iEndpPhrase->aToken[0].isPrefix = 1; + iEnd++; } - } - assert( rc!=SQLITE_OK || (p->nToken>=1 && a[p->nToken-1].bIgnore==0) ); - assert( rc!=SQLITE_OK || bMaxSet ); - /* Keep advancing iterators until they all point to the same document */ - for(i=0; inToken; i++){ - while( rc==SQLITE_OK && bEof==0 - && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0 - ){ - rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); - if( DOCID_CMP(a[i].iDocid, iMax)>0 ){ - iMax = a[i].iDocid; - i = 0; + while( 1 ){ + if( !sqlite3_fts3_enable_parentheses + && iStart>0 && z[iStart-1]=='-' + ){ + pParse->isNot = 1; + iStart--; + }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){ + pRet->pPhrase->aToken[0].bFirst = 1; + iStart--; + }else{ + break; } } - } - - /* Check if the current entries really are a phrase match */ - if( bEof==0 ){ - int nList = 0; - int nByte = a[p->nToken-1].nList; - char *aDoclist = sqlite3_malloc(nByte+1); - if( !aDoclist ) return SQLITE_NOMEM; - memcpy(aDoclist, a[p->nToken-1].pList, nByte+1); - for(i=0; i<(p->nToken-1); i++){ - if( a[i].bIgnore==0 ){ - char *pL = a[i].pList; - char *pR = aDoclist; - char *pOut = aDoclist; - int nDist = p->nToken-1-i; - int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR); - if( res==0 ) break; - nList = (int)(pOut - aDoclist); - } - } - if( i==(p->nToken-1) ){ - pDL->iDocid = iMax; - pDL->pList = aDoclist; - pDL->nList = nList; - pDL->bFreeList = 1; - break; - } - sqlite3_free(aDoclist); } + *pnConsumed = iEnd; + }else if( i && rc==SQLITE_DONE ){ + rc = SQLITE_OK; } - } - *pbEof = bEof; + pModule->xClose(pCursor); + } + + *ppExpr = pRet; return rc; } + /* -** Attempt to move the phrase iterator to point to the next matching docid. -** If an error occurs, return an SQLite error code. Otherwise, return -** SQLITE_OK. -** -** If there is no "next" entry and no error occurs, then *pbEof is set to -** 1 before returning. Otherwise, if no error occurs and the iterator is -** successfully advanced, *pbEof is set to 0. +** Enlarge a memory allocation. If an out-of-memory allocation occurs, +** then free the old allocation. */ -static int fts3EvalPhraseNext( - Fts3Cursor *pCsr, /* FTS Cursor handle */ - Fts3Phrase *p, /* Phrase object to advance to next docid */ - u8 *pbEof /* OUT: Set to 1 if EOF */ -){ - int rc = SQLITE_OK; - Fts3Doclist *pDL = &p->doclist; - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - - if( p->bIncr ){ - rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof); - }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ - sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, - &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof - ); - pDL->pList = pDL->pNextDocid; - }else{ - fts3EvalDlPhraseNext(pTab, pDL, pbEof); +static void *fts3ReallocOrFree(void *pOrig, int nNew){ + void *pRet = sqlite3_realloc(pOrig, nNew); + if( !pRet ){ + sqlite3_free(pOrig); } - - return rc; + return pRet; } /* +** Buffer zInput, length nInput, contains the contents of a quoted string +** that appeared as part of an fts3 query expression. Neither quote character +** is included in the buffer. This function attempts to tokenize the entire +** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE +** containing the results. ** -** If *pRc is not SQLITE_OK when this function is called, it is a no-op. -** Otherwise, fts3EvalPhraseStart() is called on all phrases within the -** expression. Also the Fts3Expr.bDeferred variable is set to true for any -** expressions for which all descendent tokens are deferred. -** -** If parameter bOptOk is zero, then it is guaranteed that the -** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for -** each phrase in the expression (subject to deferred token processing). -** Or, if bOptOk is non-zero, then one or more tokens within the expression -** may be loaded incrementally, meaning doclist.aAll/nAll is not available. -** -** If an error occurs within this function, *pRc is set to an SQLite error -** code before returning. +** If successful, SQLITE_OK is returned and *ppExpr set to point at the +** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory +** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set +** to 0. */ -static void fts3EvalStartReaders( - Fts3Cursor *pCsr, /* FTS Cursor handle */ - Fts3Expr *pExpr, /* Expression to initialize phrases in */ - int *pRc /* IN/OUT: Error code */ +static int getNextString( + ParseContext *pParse, /* fts3 query parse context */ + const char *zInput, int nInput, /* Input string */ + Fts3Expr **ppExpr /* OUT: expression */ ){ - if( pExpr && SQLITE_OK==*pRc ){ - if( pExpr->eType==FTSQUERY_PHRASE ){ - int i; - int nToken = pExpr->pPhrase->nToken; - for(i=0; ipPhrase->aToken[i].pDeferred==0 ) break; - } - pExpr->bDeferred = (i==nToken); - *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase); - }else{ - fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc); - fts3EvalStartReaders(pCsr, pExpr->pRight, pRc); - pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); - } - } -} + sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; + sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; + int rc; + Fts3Expr *p = 0; + sqlite3_tokenizer_cursor *pCursor = 0; + char *zTemp = 0; + int nTemp = 0; -/* -** An array of the following structures is assembled as part of the process -** of selecting tokens to defer before the query starts executing (as part -** of the xFilter() method). There is one element in the array for each -** token in the FTS expression. -** -** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong -** to phrases that are connected only by AND and NEAR operators (not OR or -** NOT). When determining tokens to defer, each AND/NEAR cluster is considered -** separately. The root of a tokens AND/NEAR cluster is stored in -** Fts3TokenAndCost.pRoot. -*/ -typedef struct Fts3TokenAndCost Fts3TokenAndCost; -struct Fts3TokenAndCost { - Fts3Phrase *pPhrase; /* The phrase the token belongs to */ - int iToken; /* Position of token in phrase */ - Fts3PhraseToken *pToken; /* The token itself */ - Fts3Expr *pRoot; /* Root of NEAR/AND cluster */ - int nOvfl; /* Number of overflow pages to load doclist */ - int iCol; /* The column the token must match */ -}; + const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase); + int nToken = 0; -/* -** This function is used to populate an allocated Fts3TokenAndCost array. -** -** If *pRc is not SQLITE_OK when this function is called, it is a no-op. -** Otherwise, if an error occurs during execution, *pRc is set to an -** SQLite error code. -*/ -static void fts3EvalTokenCosts( - Fts3Cursor *pCsr, /* FTS Cursor handle */ - Fts3Expr *pRoot, /* Root of current AND/NEAR cluster */ - Fts3Expr *pExpr, /* Expression to consider */ - Fts3TokenAndCost **ppTC, /* Write new entries to *(*ppTC)++ */ - Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */ - int *pRc /* IN/OUT: Error code */ -){ - if( *pRc==SQLITE_OK ){ - if( pExpr->eType==FTSQUERY_PHRASE ){ - Fts3Phrase *pPhrase = pExpr->pPhrase; - int i; - for(i=0; *pRc==SQLITE_OK && inToken; i++){ - Fts3TokenAndCost *pTC = (*ppTC)++; - pTC->pPhrase = pPhrase; - pTC->iToken = i; - pTC->pRoot = pRoot; - pTC->pToken = &pPhrase->aToken[i]; - pTC->iCol = pPhrase->iColumn; - *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl); - } - }else if( pExpr->eType!=FTSQUERY_NOT ){ - assert( pExpr->eType==FTSQUERY_OR - || pExpr->eType==FTSQUERY_AND - || pExpr->eType==FTSQUERY_NEAR - ); - assert( pExpr->pLeft && pExpr->pRight ); - if( pExpr->eType==FTSQUERY_OR ){ - pRoot = pExpr->pLeft; - **ppOr = pRoot; - (*ppOr)++; - } - fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc); - if( pExpr->eType==FTSQUERY_OR ){ - pRoot = pExpr->pRight; - **ppOr = pRoot; - (*ppOr)++; + /* The final Fts3Expr data structure, including the Fts3Phrase, + ** Fts3PhraseToken structures token buffers are all stored as a single + ** allocation so that the expression can be freed with a single call to + ** sqlite3_free(). Setting this up requires a two pass approach. + ** + ** The first pass, in the block below, uses a tokenizer cursor to iterate + ** through the tokens in the expression. This pass uses fts3ReallocOrFree() + ** to assemble data in two dynamic buffers: + ** + ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase + ** structure, followed by the array of Fts3PhraseToken + ** structures. This pass only populates the Fts3PhraseToken array. + ** + ** Buffer zTemp: Contains copies of all tokens. + ** + ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below, + ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase + ** structures. + */ + rc = sqlite3Fts3OpenTokenizer( + pTokenizer, pParse->iLangid, zInput, nInput, &pCursor); + if( rc==SQLITE_OK ){ + int ii; + for(ii=0; rc==SQLITE_OK; ii++){ + const char *zByte; + int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0; + rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); + if( rc==SQLITE_OK ){ + Fts3PhraseToken *pToken; + + p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken)); + if( !p ) goto no_mem; + + zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte); + if( !zTemp ) goto no_mem; + + assert( nToken==ii ); + pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii]; + memset(pToken, 0, sizeof(Fts3PhraseToken)); + + memcpy(&zTemp[nTemp], zByte, nByte); + nTemp += nByte; + + pToken->n = nByte; + pToken->isPrefix = (iEndbFirst = (iBegin>0 && zInput[iBegin-1]=='^'); + nToken = ii+1; } - fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc); } + + pModule->xClose(pCursor); + pCursor = 0; } -} -/* -** Determine the average document (row) size in pages. If successful, -** write this value to *pnPage and return SQLITE_OK. Otherwise, return -** an SQLite error code. -** -** The average document size in pages is calculated by first calculating -** determining the average size in bytes, B. If B is less than the amount -** of data that will fit on a single leaf page of an intkey table in -** this database, then the average docsize is 1. Otherwise, it is 1 plus -** the number of overflow pages consumed by a record B bytes in size. -*/ -static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){ - if( pCsr->nRowAvg==0 ){ - /* The average document size, which is required to calculate the cost - ** of each doclist, has not yet been determined. Read the required - ** data from the %_stat table to calculate it. - ** - ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 - ** varints, where nCol is the number of columns in the FTS3 table. - ** The first varint is the number of documents currently stored in - ** the table. The following nCol varints contain the total amount of - ** data stored in all rows of each column of the table, from left - ** to right. - */ - int rc; - Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; - sqlite3_stmt *pStmt; - sqlite3_int64 nDoc = 0; - sqlite3_int64 nByte = 0; - const char *pEnd; - const char *a; + if( rc==SQLITE_DONE ){ + int jj; + char *zBuf = 0; - rc = sqlite3Fts3SelectDoctotal(p, &pStmt); - if( rc!=SQLITE_OK ) return rc; - a = sqlite3_column_blob(pStmt, 0); - assert( a ); + p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp); + if( !p ) goto no_mem; + memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p); + p->eType = FTSQUERY_PHRASE; + p->pPhrase = (Fts3Phrase *)&p[1]; + p->pPhrase->iColumn = pParse->iDefaultCol; + p->pPhrase->nToken = nToken; - pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; - a += sqlite3Fts3GetVarint(a, &nDoc); - while( apPhrase->aToken[nToken]; + if( zTemp ){ + memcpy(zBuf, zTemp, nTemp); + sqlite3_free(zTemp); + }else{ + assert( nTemp==0 ); } - pCsr->nDoc = nDoc; - pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz); - assert( pCsr->nRowAvg>0 ); - rc = sqlite3_reset(pStmt); - if( rc!=SQLITE_OK ) return rc; + for(jj=0; jjpPhrase->nToken; jj++){ + p->pPhrase->aToken[jj].z = zBuf; + zBuf += p->pPhrase->aToken[jj].n; + } + rc = SQLITE_OK; } - *pnPage = pCsr->nRowAvg; - return SQLITE_OK; + *ppExpr = p; + return rc; +no_mem: + + if( pCursor ){ + pModule->xClose(pCursor); + } + sqlite3_free(zTemp); + sqlite3_free(p); + *ppExpr = 0; + return SQLITE_NOMEM; } /* -** This function is called to select the tokens (if any) that will be -** deferred. The array aTC[] has already been populated when this is -** called. -** -** This function is called once for each AND/NEAR cluster in the -** expression. Each invocation determines which tokens to defer within -** the cluster with root node pRoot. See comments above the definition -** of struct Fts3TokenAndCost for more details. +** The output variable *ppExpr is populated with an allocated Fts3Expr +** structure, or set to 0 if the end of the input buffer is reached. ** -** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken() -** called on each token to defer. Otherwise, an SQLite error code is -** returned. +** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM +** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. +** If SQLITE_ERROR is returned, pContext is populated with an error message. */ -static int fts3EvalSelectDeferred( - Fts3Cursor *pCsr, /* FTS Cursor handle */ - Fts3Expr *pRoot, /* Consider tokens with this root node */ - Fts3TokenAndCost *aTC, /* Array of expression tokens and costs */ - int nTC /* Number of entries in aTC[] */ +static int getNextNode( + ParseContext *pParse, /* fts3 query parse context */ + const char *z, int n, /* Input string */ + Fts3Expr **ppExpr, /* OUT: expression */ + int *pnConsumed /* OUT: Number of bytes consumed */ ){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int nDocSize = 0; /* Number of pages per doc loaded */ - int rc = SQLITE_OK; /* Return code */ - int ii; /* Iterator variable for various purposes */ - int nOvfl = 0; /* Total overflow pages used by doclists */ - int nToken = 0; /* Total number of tokens in cluster */ + static const struct Fts3Keyword { + char *z; /* Keyword text */ + unsigned char n; /* Length of the keyword */ + unsigned char parenOnly; /* Only valid in paren mode */ + unsigned char eType; /* Keyword code */ + } aKeyword[] = { + { "OR" , 2, 0, FTSQUERY_OR }, + { "AND", 3, 1, FTSQUERY_AND }, + { "NOT", 3, 1, FTSQUERY_NOT }, + { "NEAR", 4, 0, FTSQUERY_NEAR } + }; + int ii; + int iCol; + int iColLen; + int rc; + Fts3Expr *pRet = 0; - int nMinEst = 0; /* The minimum count for any phrase so far. */ - int nLoad4 = 1; /* (Phrases that will be loaded)^4. */ + const char *zInput = z; + int nInput = n; - /* Tokens are never deferred for FTS tables created using the content=xxx - ** option. The reason being that it is not guaranteed that the content - ** table actually contains the same data as the index. To prevent this from - ** causing any problems, the deferred token optimization is completely - ** disabled for content=xxx tables. */ - if( pTab->zContentTbl ){ - return SQLITE_OK; - } + pParse->isNot = 0; - /* Count the tokens in this AND/NEAR cluster. If none of the doclists - ** associated with the tokens spill onto overflow pages, or if there is - ** only 1 token, exit early. No tokens to defer in this case. */ - for(ii=0; ii0 && fts3isspace(*zInput) ){ + nInput--; + zInput++; + } + if( nInput==0 ){ + return SQLITE_DONE; } - if( nOvfl==0 || nToken<2 ) return SQLITE_OK; - /* Obtain the average docsize (in pages). */ - rc = fts3EvalAverageDocsize(pCsr, &nDocSize); - assert( rc!=SQLITE_OK || nDocSize>0 ); + /* See if we are dealing with a keyword. */ + for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){ + const struct Fts3Keyword *pKey = &aKeyword[ii]; + if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){ + continue; + } - /* Iterate through all tokens in this AND/NEAR cluster, in ascending order - ** of the number of overflow pages that will be loaded by the pager layer - ** to retrieve the entire doclist for the token from the full-text index. - ** Load the doclists for tokens that are either: - ** - ** a. The cheapest token in the entire query (i.e. the one visited by the - ** first iteration of this loop), or - ** - ** b. Part of a multi-token phrase. - ** - ** After each token doclist is loaded, merge it with the others from the - ** same phrase and count the number of documents that the merged doclist - ** contains. Set variable "nMinEst" to the smallest number of documents in - ** any phrase doclist for which 1 or more token doclists have been loaded. - ** Let nOther be the number of other phrases for which it is certain that - ** one or more tokens will not be deferred. - ** - ** Then, for each token, defer it if loading the doclist would result in - ** loading N or more overflow pages into memory, where N is computed as: - ** - ** (nMinEst + 4^nOther - 1) / (4^nOther) - */ - for(ii=0; ii=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){ + int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM; + int nKey = pKey->n; + char cNext; - /* Set pTC to point to the cheapest remaining token. */ - for(iTC=0; iTCnOvfl) - ){ - pTC = &aTC[iTC]; + /* If this is a "NEAR" keyword, check for an explicit nearness. */ + if( pKey->eType==FTSQUERY_NEAR ){ + assert( nKey==4 ); + if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){ + nNear = 0; + for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){ + nNear = nNear * 10 + (zInput[nKey] - '0'); + } + } } - } - assert( pTC ); - if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){ - /* The number of overflow pages to load for this (and therefore all - ** subsequent) tokens is greater than the estimated number of pages - ** that will be loaded if all subsequent tokens are deferred. + /* At this point this is probably a keyword. But for that to be true, + ** the next byte must contain either whitespace, an open or close + ** parenthesis, a quote character, or EOF. */ - Fts3PhraseToken *pToken = pTC->pToken; - rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); - fts3SegReaderCursorFree(pToken->pSegcsr); - pToken->pSegcsr = 0; - }else{ - /* Set nLoad4 to the value of (4^nOther) for the next iteration of the - ** for-loop. Except, limit the value to 2^24 to prevent it from - ** overflowing the 32-bit integer it is stored in. */ - if( ii<12 ) nLoad4 = nLoad4*4; - - if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){ - /* Either this is the cheapest token in the entire query, or it is - ** part of a multi-token phrase. Either way, the entire doclist will - ** (eventually) be loaded into memory. It may as well be now. */ - Fts3PhraseToken *pToken = pTC->pToken; - int nList = 0; - char *pList = 0; - rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList); - assert( rc==SQLITE_OK || pList==0 ); - if( rc==SQLITE_OK ){ - rc = fts3EvalPhraseMergeToken( - pTab, pTC->pPhrase, pTC->iToken,pList,nList - ); - } - if( rc==SQLITE_OK ){ - int nCount; - nCount = fts3DoclistCountDocids( - pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll - ); - if( ii==0 || nCounteType = pKey->eType; + pRet->nNear = nNear; + *ppExpr = pRet; + *pnConsumed = (int)((zInput - z) + nKey); + return SQLITE_OK; } + + /* Turns out that wasn't a keyword after all. This happens if the + ** user has supplied a token such as "ORacle". Continue. + */ } - pTC->pToken = 0; } - return rc; -} - -/* -** This function is called from within the xFilter method. It initializes -** the full-text query currently stored in pCsr->pExpr. To iterate through -** the results of a query, the caller does: -** -** fts3EvalStart(pCsr); -** while( 1 ){ -** fts3EvalNext(pCsr); -** if( pCsr->bEof ) break; -** ... return row pCsr->iPrevId to the caller ... -** } -*/ -static int fts3EvalStart(Fts3Cursor *pCsr){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc = SQLITE_OK; - int nToken = 0; - int nOr = 0; - - /* Allocate a MultiSegReader for each token in the expression. */ - fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc); - - /* Determine which, if any, tokens in the expression should be deferred. */ -#ifndef SQLITE_DISABLE_FTS4_DEFERRED - if( rc==SQLITE_OK && nToken>1 && pTab->bFts4 ){ - Fts3TokenAndCost *aTC; - Fts3Expr **apOr; - aTC = (Fts3TokenAndCost *)sqlite3_malloc( - sizeof(Fts3TokenAndCost) * nToken - + sizeof(Fts3Expr *) * nOr * 2 - ); - apOr = (Fts3Expr **)&aTC[nToken]; - - if( !aTC ){ - rc = SQLITE_NOMEM; - }else{ - int ii; - Fts3TokenAndCost *pTC = aTC; - Fts3Expr **ppOr = apOr; - - fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc); - nToken = (int)(pTC-aTC); - nOr = (int)(ppOr-apOr); - - if( rc==SQLITE_OK ){ - rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken); - for(ii=0; rc==SQLITE_OK && iinNest++; + rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); + if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } + *pnConsumed = (int)(zInput - z) + 1 + nConsumed; + return rc; + }else if( *zInput==')' ){ + pParse->nNest--; + *pnConsumed = (int)((zInput - z) + 1); + *ppExpr = 0; + return SQLITE_DONE; } } -#endif - fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc); + /* If control flows to this point, this must be a regular token, or + ** the end of the input. Read a regular token using the sqlite3_tokenizer + ** interface. Before doing so, figure out if there is an explicit + ** column specifier for the token. + ** + ** TODO: Strangely, it is not possible to associate a column specifier + ** with a quoted phrase, only with a single token. Not sure if this was + ** an implementation artifact or an intentional decision when fts3 was + ** first implemented. Whichever it was, this module duplicates the + ** limitation. + */ + iCol = pParse->iDefaultCol; + iColLen = 0; + for(ii=0; iinCol; ii++){ + const char *zStr = pParse->azCol[ii]; + int nStr = (int)strlen(zStr); + if( nInput>nStr && zInput[nStr]==':' + && sqlite3_strnicmp(zStr, zInput, nStr)==0 + ){ + iCol = ii; + iColLen = (int)((zInput - z) + nStr + 1); + break; + } + } + rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); + *pnConsumed += iColLen; return rc; } /* -** Invalidate the current position list for phrase pPhrase. +** The argument is an Fts3Expr structure for a binary operator (any type +** except an FTSQUERY_PHRASE). Return an integer value representing the +** precedence of the operator. Lower values have a higher precedence (i.e. +** group more tightly). For example, in the C language, the == operator +** groups more tightly than ||, and would therefore have a higher precedence. +** +** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS +** is defined), the order of the operators in precedence from highest to +** lowest is: +** +** NEAR +** NOT +** AND (including implicit ANDs) +** OR +** +** Note that when using the old query syntax, the OR operator has a higher +** precedence than the AND operator. */ -static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){ - if( pPhrase->doclist.bFreeList ){ - sqlite3_free(pPhrase->doclist.pList); +static int opPrecedence(Fts3Expr *p){ + assert( p->eType!=FTSQUERY_PHRASE ); + if( sqlite3_fts3_enable_parentheses ){ + return p->eType; + }else if( p->eType==FTSQUERY_NEAR ){ + return 1; + }else if( p->eType==FTSQUERY_OR ){ + return 2; } - pPhrase->doclist.pList = 0; - pPhrase->doclist.nList = 0; - pPhrase->doclist.bFreeList = 0; + assert( p->eType==FTSQUERY_AND ); + return 3; } /* -** This function is called to edit the position list associated with -** the phrase object passed as the fifth argument according to a NEAR -** condition. For example: -** -** abc NEAR/5 "def ghi" -** -** Parameter nNear is passed the NEAR distance of the expression (5 in -** the example above). When this function is called, *paPoslist points to -** the position list, and *pnToken is the number of phrase tokens in, the -** phrase on the other side of the NEAR operator to pPhrase. For example, -** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to -** the position list associated with phrase "abc". -** -** All positions in the pPhrase position list that are not sufficiently -** close to a position in the *paPoslist position list are removed. If this -** leaves 0 positions, zero is returned. Otherwise, non-zero. -** -** Before returning, *paPoslist is set to point to the position lsit -** associated with pPhrase. And *pnToken is set to the number of tokens in -** pPhrase. +** Argument ppHead contains a pointer to the current head of a query +** expression tree being parsed. pPrev is the expression node most recently +** inserted into the tree. This function adds pNew, which is always a binary +** operator node, into the expression tree based on the relative precedence +** of pNew and the existing nodes of the tree. This may result in the head +** of the tree changing, in which case *ppHead is set to the new root node. */ -static int fts3EvalNearTrim( - int nNear, /* NEAR distance. As in "NEAR/nNear". */ - char *aTmp, /* Temporary space to use */ - char **paPoslist, /* IN/OUT: Position list */ - int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ - Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */ +static void insertBinaryOperator( + Fts3Expr **ppHead, /* Pointer to the root node of a tree */ + Fts3Expr *pPrev, /* Node most recently inserted into the tree */ + Fts3Expr *pNew /* New binary node to insert into expression tree */ ){ - int nParam1 = nNear + pPhrase->nToken; - int nParam2 = nNear + *pnToken; - int nNew; - char *p2; - char *pOut; - int res; - - assert( pPhrase->doclist.pList ); - - p2 = pOut = pPhrase->doclist.pList; - res = fts3PoslistNearMerge( - &pOut, aTmp, nParam1, nParam2, paPoslist, &p2 - ); - if( res ){ - nNew = (int)(pOut - pPhrase->doclist.pList) - 1; - assert( pPhrase->doclist.pList[nNew]=='\0' ); - assert( nNew<=pPhrase->doclist.nList && nNew>0 ); - memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew); - pPhrase->doclist.nList = nNew; - *paPoslist = pPhrase->doclist.pList; - *pnToken = pPhrase->nToken; + Fts3Expr *pSplit = pPrev; + while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){ + pSplit = pSplit->pParent; } - return res; + if( pSplit->pParent ){ + assert( pSplit->pParent->pRight==pSplit ); + pSplit->pParent->pRight = pNew; + pNew->pParent = pSplit->pParent; + }else{ + *ppHead = pNew; + } + pNew->pLeft = pSplit; + pSplit->pParent = pNew; } /* -** This function is a no-op if *pRc is other than SQLITE_OK when it is called. -** Otherwise, it advances the expression passed as the second argument to -** point to the next matching row in the database. Expressions iterate through -** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero, -** or descending if it is non-zero. -** -** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if -** successful, the following variables in pExpr are set: -** -** Fts3Expr.bEof (non-zero if EOF - there is no next row) -** Fts3Expr.iDocid (valid if bEof==0. The docid of the next row) -** -** If the expression is of type FTSQUERY_PHRASE, and the expression is not -** at EOF, then the following variables are populated with the position list -** for the phrase for the visited row: -** -** FTs3Expr.pPhrase->doclist.nList (length of pList in bytes) -** FTs3Expr.pPhrase->doclist.pList (pointer to position list) -** -** It says above that this function advances the expression to the next -** matching row. This is usually true, but there are the following exceptions: -** -** 1. Deferred tokens are not taken into account. If a phrase consists -** entirely of deferred tokens, it is assumed to match every row in -** the db. In this case the position-list is not populated at all. -** -** Or, if a phrase contains one or more deferred tokens and one or -** more non-deferred tokens, then the expression is advanced to the -** next possible match, considering only non-deferred tokens. In other -** words, if the phrase is "A B C", and "B" is deferred, the expression -** is advanced to the next row that contains an instance of "A * C", -** where "*" may match any single token. The position list in this case -** is populated as for "A * C" before returning. +** Parse the fts3 query expression found in buffer z, length n. This function +** returns either when the end of the buffer is reached or an unmatched +** closing bracket - ')' - is encountered. ** -** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is -** advanced to point to the next row that matches "x AND y". -** -** See fts3EvalTestDeferredAndNear() for details on testing if a row is -** really a match, taking into account deferred tokens and NEAR operators. +** If successful, SQLITE_OK is returned, *ppExpr is set to point to the +** parsed form of the expression and *pnConsumed is set to the number of +** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM +** (out of memory error) or SQLITE_ERROR (parse error) is returned. */ -static void fts3EvalNextRow( - Fts3Cursor *pCsr, /* FTS Cursor handle */ - Fts3Expr *pExpr, /* Expr. to advance to next matching row */ - int *pRc /* IN/OUT: Error code */ +static int fts3ExprParse( + ParseContext *pParse, /* fts3 query parse context */ + const char *z, int n, /* Text of MATCH query */ + Fts3Expr **ppExpr, /* OUT: Parsed query structure */ + int *pnConsumed /* OUT: Number of bytes consumed */ ){ - if( *pRc==SQLITE_OK ){ - int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */ - assert( pExpr->bEof==0 ); - pExpr->bStart = 1; + Fts3Expr *pRet = 0; + Fts3Expr *pPrev = 0; + Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ + int nIn = n; + const char *zIn = z; + int rc = SQLITE_OK; + int isRequirePhrase = 1; - switch( pExpr->eType ){ - case FTSQUERY_NEAR: - case FTSQUERY_AND: { - Fts3Expr *pLeft = pExpr->pLeft; - Fts3Expr *pRight = pExpr->pRight; - assert( !pLeft->bDeferred || !pRight->bDeferred ); + while( rc==SQLITE_OK ){ + Fts3Expr *p = 0; + int nByte = 0; - if( pLeft->bDeferred ){ - /* LHS is entirely deferred. So we assume it matches every row. - ** Advance the RHS iterator to find the next row visited. */ - fts3EvalNextRow(pCsr, pRight, pRc); - pExpr->iDocid = pRight->iDocid; - pExpr->bEof = pRight->bEof; - }else if( pRight->bDeferred ){ - /* RHS is entirely deferred. So we assume it matches every row. - ** Advance the LHS iterator to find the next row visited. */ - fts3EvalNextRow(pCsr, pLeft, pRc); - pExpr->iDocid = pLeft->iDocid; - pExpr->bEof = pLeft->bEof; + rc = getNextNode(pParse, zIn, nIn, &p, &nByte); + assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); + if( rc==SQLITE_OK ){ + if( p ){ + int isPhrase; + + if( !sqlite3_fts3_enable_parentheses + && p->eType==FTSQUERY_PHRASE && pParse->isNot + ){ + /* Create an implicit NOT operator. */ + Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); + if( !pNot ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_NOMEM; + goto exprparse_out; + } + pNot->eType = FTSQUERY_NOT; + pNot->pRight = p; + p->pParent = pNot; + if( pNotBranch ){ + pNot->pLeft = pNotBranch; + pNotBranch->pParent = pNot; + } + pNotBranch = pNot; + p = pPrev; }else{ - /* Neither the RHS or LHS are deferred. */ - fts3EvalNextRow(pCsr, pLeft, pRc); - fts3EvalNextRow(pCsr, pRight, pRc); - while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){ - sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid); - if( iDiff==0 ) break; - if( iDiff<0 ){ - fts3EvalNextRow(pCsr, pLeft, pRc); - }else{ - fts3EvalNextRow(pCsr, pRight, pRc); - } + int eType = p->eType; + isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); + + /* The isRequirePhrase variable is set to true if a phrase or + ** an expression contained in parenthesis is required. If a + ** binary operator (AND, OR, NOT or NEAR) is encounted when + ** isRequirePhrase is set, this is a syntax error. + */ + if( !isPhrase && isRequirePhrase ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_ERROR; + goto exprparse_out; } - pExpr->iDocid = pLeft->iDocid; - pExpr->bEof = (pLeft->bEof || pRight->bEof); - if( pExpr->eType==FTSQUERY_NEAR && pExpr->bEof ){ - if( pRight->pPhrase && pRight->pPhrase->doclist.aAll ){ - Fts3Doclist *pDl = &pRight->pPhrase->doclist; - while( *pRc==SQLITE_OK && pRight->bEof==0 ){ - memset(pDl->pList, 0, pDl->nList); - fts3EvalNextRow(pCsr, pRight, pRc); - } - } - if( pLeft->pPhrase && pLeft->pPhrase->doclist.aAll ){ - Fts3Doclist *pDl = &pLeft->pPhrase->doclist; - while( *pRc==SQLITE_OK && pLeft->bEof==0 ){ - memset(pDl->pList, 0, pDl->nList); - fts3EvalNextRow(pCsr, pLeft, pRc); - } + + if( isPhrase && !isRequirePhrase ){ + /* Insert an implicit AND operator. */ + Fts3Expr *pAnd; + assert( pRet && pPrev ); + pAnd = fts3MallocZero(sizeof(Fts3Expr)); + if( !pAnd ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_NOMEM; + goto exprparse_out; } + pAnd->eType = FTSQUERY_AND; + insertBinaryOperator(&pRet, pPrev, pAnd); + pPrev = pAnd; } - } - break; - } - - case FTSQUERY_OR: { - Fts3Expr *pLeft = pExpr->pLeft; - Fts3Expr *pRight = pExpr->pRight; - sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); - - assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid ); - assert( pRight->bStart || pLeft->iDocid==pRight->iDocid ); - if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ - fts3EvalNextRow(pCsr, pLeft, pRc); - }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){ - fts3EvalNextRow(pCsr, pRight, pRc); - }else{ - fts3EvalNextRow(pCsr, pLeft, pRc); - fts3EvalNextRow(pCsr, pRight, pRc); - } + /* This test catches attempts to make either operand of a NEAR + ** operator something other than a phrase. For example, either of + ** the following: + ** + ** (bracketed expression) NEAR phrase + ** phrase NEAR (bracketed expression) + ** + ** Return an error in either case. + */ + if( pPrev && ( + (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) + || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) + )){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_ERROR; + goto exprparse_out; + } - pExpr->bEof = (pLeft->bEof && pRight->bEof); - iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); - if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ - pExpr->iDocid = pLeft->iDocid; - }else{ - pExpr->iDocid = pRight->iDocid; + if( isPhrase ){ + if( pRet ){ + assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); + pPrev->pRight = p; + p->pParent = pPrev; + }else{ + pRet = p; + } + }else{ + insertBinaryOperator(&pRet, pPrev, p); + } + isRequirePhrase = !isPhrase; } - - break; + pPrev = p; } + assert( nByte>0 ); + } + assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); + nIn -= nByte; + zIn += nByte; + } - case FTSQUERY_NOT: { - Fts3Expr *pLeft = pExpr->pLeft; - Fts3Expr *pRight = pExpr->pRight; - - if( pRight->bStart==0 ){ - fts3EvalNextRow(pCsr, pRight, pRc); - assert( *pRc!=SQLITE_OK || pRight->bStart ); - } + if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ + rc = SQLITE_ERROR; + } - fts3EvalNextRow(pCsr, pLeft, pRc); - if( pLeft->bEof==0 ){ - while( !*pRc - && !pRight->bEof - && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 - ){ - fts3EvalNextRow(pCsr, pRight, pRc); - } + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; + if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ + if( !pRet ){ + rc = SQLITE_ERROR; + }else{ + Fts3Expr *pIter = pNotBranch; + while( pIter->pLeft ){ + pIter = pIter->pLeft; } - pExpr->iDocid = pLeft->iDocid; - pExpr->bEof = pLeft->bEof; - break; + pIter->pLeft = pRet; + pRet->pParent = pIter; + pRet = pNotBranch; } + } + } + *pnConsumed = n - nIn; - default: { - Fts3Phrase *pPhrase = pExpr->pPhrase; - fts3EvalInvalidatePoslist(pPhrase); - *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); - pExpr->iDocid = pPhrase->doclist.iDocid; - break; +exprparse_out: + if( rc!=SQLITE_OK ){ + sqlite3Fts3ExprFree(pRet); + sqlite3Fts3ExprFree(pNotBranch); + pRet = 0; + } + *ppExpr = pRet; + return rc; +} + +/* +** Return SQLITE_ERROR if the maximum depth of the expression tree passed +** as the only argument is more than nMaxDepth. +*/ +static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ + int rc = SQLITE_OK; + if( p ){ + if( nMaxDepth<0 ){ + rc = SQLITE_TOOBIG; + }else{ + rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); + if( rc==SQLITE_OK ){ + rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); } } } + return rc; } /* -** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR -** cluster, then this function returns 1 immediately. +** This function attempts to transform the expression tree at (*pp) to +** an equivalent but more balanced form. The tree is modified in place. +** If successful, SQLITE_OK is returned and (*pp) set to point to the +** new root expression node. ** -** Otherwise, it checks if the current row really does match the NEAR -** expression, using the data currently stored in the position lists -** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression. +** nMaxDepth is the maximum allowable depth of the balanced sub-tree. ** -** If the current row is a match, the position list associated with each -** phrase in the NEAR expression is edited in place to contain only those -** phrase instances sufficiently close to their peers to satisfy all NEAR -** constraints. In this case it returns 1. If the NEAR expression does not -** match the current row, 0 is returned. The position lists may or may not -** be edited if 0 is returned. +** Otherwise, if an error occurs, an SQLite error code is returned and +** expression (*pp) freed. */ -static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ - int res = 1; +static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ + int rc = SQLITE_OK; /* Return code */ + Fts3Expr *pRoot = *pp; /* Initial root node */ + Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ + int eType = pRoot->eType; /* Type of node in this tree */ - /* The following block runs if pExpr is the root of a NEAR query. - ** For example, the query: - ** - ** "w" NEAR "x" NEAR "y" NEAR "z" - ** - ** which is represented in tree form as: - ** - ** | - ** +--NEAR--+ <-- root of NEAR query - ** | | - ** +--NEAR--+ "z" - ** | | - ** +--NEAR--+ "y" - ** | | - ** "w" "x" - ** - ** The right-hand child of a NEAR node is always a phrase. The - ** left-hand child may be either a phrase or a NEAR node. There are - ** no exceptions to this - it's the way the parser in fts3_expr.c works. - */ - if( *pRc==SQLITE_OK - && pExpr->eType==FTSQUERY_NEAR - && pExpr->bEof==0 - && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) - ){ - Fts3Expr *p; - int nTmp = 0; /* Bytes of temp space */ - char *aTmp; /* Temp space for PoslistNearMerge() */ + if( nMaxDepth==0 ){ + rc = SQLITE_ERROR; + } - /* Allocate temporary working space. */ - for(p=pExpr; p->pLeft; p=p->pLeft){ - nTmp += p->pRight->pPhrase->doclist.nList; - } - nTmp += p->pPhrase->doclist.nList; - if( nTmp==0 ){ - res = 0; + if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ + Fts3Expr **apLeaf; + apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); + if( 0==apLeaf ){ + rc = SQLITE_NOMEM; }else{ - aTmp = sqlite3_malloc(nTmp*2); - if( !aTmp ){ - *pRc = SQLITE_NOMEM; - res = 0; - }else{ - char *aPoslist = p->pPhrase->doclist.pList; - int nToken = p->pPhrase->nToken; + memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); + } - for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ - Fts3Phrase *pPhrase = p->pRight->pPhrase; - int nNear = p->nNear; - res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); - } + if( rc==SQLITE_OK ){ + int i; + Fts3Expr *p; - aPoslist = pExpr->pRight->pPhrase->doclist.pList; - nToken = pExpr->pRight->pPhrase->nToken; - for(p=pExpr->pLeft; p && res; p=p->pLeft){ - int nNear; - Fts3Phrase *pPhrase; - assert( p->pParent && p->pParent->pLeft==p ); - nNear = p->pParent->nNear; - pPhrase = ( - p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase - ); - res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); - } + /* Set $p to point to the left-most leaf in the tree of eType nodes. */ + for(p=pRoot; p->eType==eType; p=p->pLeft){ + assert( p->pParent==0 || p->pParent->pLeft==p ); + assert( p->pLeft && p->pRight ); } - sqlite3_free(aTmp); - } - } + /* This loop runs once for each leaf in the tree of eType nodes. */ + while( 1 ){ + int iLvl; + Fts3Expr *pParent = p->pParent; /* Current parent of p */ - return res; -} + assert( pParent==0 || pParent->pLeft==p ); + p->pParent = 0; + if( pParent ){ + pParent->pLeft = 0; + }else{ + pRoot = 0; + } + rc = fts3ExprBalance(&p, nMaxDepth-1); + if( rc!=SQLITE_OK ) break; -/* -** This function is a helper function for fts3EvalTestDeferredAndNear(). -** Assuming no error occurs or has occurred, It returns non-zero if the -** expression passed as the second argument matches the row that pCsr -** currently points to, or zero if it does not. -** -** If *pRc is not SQLITE_OK when this function is called, it is a no-op. -** If an error occurs during execution of this function, *pRc is set to -** the appropriate SQLite error code. In this case the returned value is -** undefined. -*/ -static int fts3EvalTestExpr( - Fts3Cursor *pCsr, /* FTS cursor handle */ - Fts3Expr *pExpr, /* Expr to test. May or may not be root. */ - int *pRc /* IN/OUT: Error code */ -){ - int bHit = 1; /* Return value */ - if( *pRc==SQLITE_OK ){ - switch( pExpr->eType ){ - case FTSQUERY_NEAR: - case FTSQUERY_AND: - bHit = ( - fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) - && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) - && fts3EvalNearTest(pExpr, pRc) - ); + for(iLvl=0; p && iLvlpLeft = apLeaf[iLvl]; + pFree->pRight = p; + pFree->pLeft->pParent = pFree; + pFree->pRight->pParent = pFree; - /* If the NEAR expression does not match any rows, zero the doclist for - ** all phrases involved in the NEAR. This is because the snippet(), - ** offsets() and matchinfo() functions are not supposed to recognize - ** any instances of phrases that are part of unmatched NEAR queries. - ** For example if this expression: - ** - ** ... MATCH 'a OR (b NEAR c)' - ** - ** is matched against a row containing: - ** - ** 'a b d e' - ** - ** then any snippet() should ony highlight the "a" term, not the "b" - ** (as "b" is part of a non-matching NEAR clause). - */ - if( bHit==0 - && pExpr->eType==FTSQUERY_NEAR - && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) - ){ - Fts3Expr *p; - for(p=pExpr; p->pPhrase==0; p=p->pLeft){ - if( p->pRight->iDocid==pCsr->iPrevId ){ - fts3EvalInvalidatePoslist(p->pRight->pPhrase); - } - } - if( p->iDocid==pCsr->iPrevId ){ - fts3EvalInvalidatePoslist(p->pPhrase); + p = pFree; + pFree = pFree->pParent; + p->pParent = 0; + apLeaf[iLvl] = 0; } } + if( p ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_TOOBIG; + break; + } - break; + /* If that was the last leaf node, break out of the loop */ + if( pParent==0 ) break; - case FTSQUERY_OR: { - int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc); - int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc); - bHit = bHit1 || bHit2; - break; + /* Set $p to point to the next leaf in the tree of eType nodes */ + for(p=pParent->pRight; p->eType==eType; p=p->pLeft); + + /* Remove pParent from the original tree. */ + assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); + pParent->pRight->pParent = pParent->pParent; + if( pParent->pParent ){ + pParent->pParent->pLeft = pParent->pRight; + }else{ + assert( pParent==pRoot ); + pRoot = pParent->pRight; + } + + /* Link pParent into the free node list. It will be used as an + ** internal node of the new tree. */ + pParent->pParent = pFree; + pFree = pParent; } - case FTSQUERY_NOT: - bHit = ( - fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) - && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) - ); - break; + if( rc==SQLITE_OK ){ + p = 0; + for(i=0; ipParent = 0; + }else{ + assert( pFree!=0 ); + pFree->pRight = p; + pFree->pLeft = apLeaf[i]; + pFree->pLeft->pParent = pFree; + pFree->pRight->pParent = pFree; - default: { -#ifndef SQLITE_DISABLE_FTS4_DEFERRED - if( pCsr->pDeferred - && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred) - ){ - Fts3Phrase *pPhrase = pExpr->pPhrase; - assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 ); - if( pExpr->bDeferred ){ - fts3EvalInvalidatePoslist(pPhrase); + p = pFree; + pFree = pFree->pParent; + p->pParent = 0; + } } - *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase); - bHit = (pPhrase->doclist.pList!=0); - pExpr->iDocid = pCsr->iPrevId; - }else -#endif - { - bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId); } - break; + pRoot = p; + }else{ + /* An error occurred. Delete the contents of the apLeaf[] array + ** and pFree list. Everything else is cleaned up by the call to + ** sqlite3Fts3ExprFree(pRoot) below. */ + Fts3Expr *pDel; + for(i=0; ipParent; + sqlite3_free(pDel); + } } + + assert( pFree==0 ); + sqlite3_free( apLeaf ); } } - return bHit; + + if( rc!=SQLITE_OK ){ + sqlite3Fts3ExprFree(pRoot); + pRoot = 0; + } + *pp = pRoot; + return rc; } /* -** This function is called as the second part of each xNext operation when -** iterating through the results of a full-text query. At this point the -** cursor points to a row that matches the query expression, with the -** following caveats: -** -** * Up until this point, "NEAR" operators in the expression have been -** treated as "AND". -** -** * Deferred tokens have not yet been considered. -** -** If *pRc is not SQLITE_OK when this function is called, it immediately -** returns 0. Otherwise, it tests whether or not after considering NEAR -** operators and deferred tokens the current row is still a match for the -** expression. It returns 1 if both of the following are true: -** -** 1. *pRc is SQLITE_OK when this function returns, and -** -** 2. After scanning the current FTS table row for the deferred tokens, -** it is determined that the row does *not* match the query. +** This function is similar to sqlite3Fts3ExprParse(), with the following +** differences: ** -** Or, if no error occurs and it seems the current row does match the FTS -** query, return 0. +** 1. It does not do expression rebalancing. +** 2. It does not check that the expression does not exceed the +** maximum allowable depth. +** 3. Even if it fails, *ppExpr may still be set to point to an +** expression tree. It should be deleted using sqlite3Fts3ExprFree() +** in this case. */ -static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){ - int rc = *pRc; - int bMiss = 0; - if( rc==SQLITE_OK ){ +static int fts3ExprParseUnbalanced( + sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ + int iLangid, /* Language id for tokenizer */ + char **azCol, /* Array of column names for fts3 table */ + int bFts4, /* True to allow FTS4-only syntax */ + int nCol, /* Number of entries in azCol[] */ + int iDefaultCol, /* Default column to query */ + const char *z, int n, /* Text of MATCH query */ + Fts3Expr **ppExpr /* OUT: Parsed query structure */ +){ + int nParsed; + int rc; + ParseContext sParse; - /* If there are one or more deferred tokens, load the current row into - ** memory and scan it to determine the position list for each deferred - ** token. Then, see if this row is really a match, considering deferred - ** tokens and NEAR operators (neither of which were taken into account - ** earlier, by fts3EvalNextRow()). - */ - if( pCsr->pDeferred ){ - rc = fts3CursorSeek(0, pCsr); - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3CacheDeferredDoclists(pCsr); - } - } - bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc)); + memset(&sParse, 0, sizeof(ParseContext)); + sParse.pTokenizer = pTokenizer; + sParse.iLangid = iLangid; + sParse.azCol = (const char **)azCol; + sParse.nCol = nCol; + sParse.iDefaultCol = iDefaultCol; + sParse.bFts4 = bFts4; + if( z==0 ){ + *ppExpr = 0; + return SQLITE_OK; + } + if( n<0 ){ + n = (int)strlen(z); + } + rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); + assert( rc==SQLITE_OK || *ppExpr==0 ); - /* Free the position-lists accumulated for each deferred token above. */ - sqlite3Fts3FreeDeferredDoclists(pCsr); - *pRc = rc; + /* Check for mismatched parenthesis */ + if( rc==SQLITE_OK && sParse.nNest ){ + rc = SQLITE_ERROR; } - return (rc==SQLITE_OK && bMiss); + + return rc; } /* -** Advance to the next document that matches the FTS expression in -** Fts3Cursor.pExpr. +** Parameters z and n contain a pointer to and length of a buffer containing +** an fts3 query expression, respectively. This function attempts to parse the +** query expression and create a tree of Fts3Expr structures representing the +** parsed expression. If successful, *ppExpr is set to point to the head +** of the parsed expression tree and SQLITE_OK is returned. If an error +** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse +** error) is returned and *ppExpr is set to 0. +** +** If parameter n is a negative number, then z is assumed to point to a +** nul-terminated string and the length is determined using strlen(). +** +** The first parameter, pTokenizer, is passed the fts3 tokenizer module to +** use to normalize query tokens while parsing the expression. The azCol[] +** array, which is assumed to contain nCol entries, should contain the names +** of each column in the target fts3 table, in order from left to right. +** Column names must be nul-terminated strings. +** +** The iDefaultCol parameter should be passed the index of the table column +** that appears on the left-hand-side of the MATCH operator (the default +** column to match against for tokens for which a column name is not explicitly +** specified as part of the query string), or -1 if tokens may by default +** match any table column. */ -static int fts3EvalNext(Fts3Cursor *pCsr){ - int rc = SQLITE_OK; /* Return Code */ - Fts3Expr *pExpr = pCsr->pExpr; - assert( pCsr->isEof==0 ); - if( pExpr==0 ){ - pCsr->isEof = 1; - }else{ - do { - if( pCsr->isRequireSeek==0 ){ - sqlite3_reset(pCsr->pStmt); - } - assert( sqlite3_data_count(pCsr->pStmt)==0 ); - fts3EvalNextRow(pCsr, pExpr, &rc); - pCsr->isEof = pExpr->bEof; - pCsr->isRequireSeek = 1; - pCsr->isMatchinfoNeeded = 1; - pCsr->iPrevId = pExpr->iDocid; - }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) ); +SQLITE_PRIVATE int sqlite3Fts3ExprParse( + sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ + int iLangid, /* Language id for tokenizer */ + char **azCol, /* Array of column names for fts3 table */ + int bFts4, /* True to allow FTS4-only syntax */ + int nCol, /* Number of entries in azCol[] */ + int iDefaultCol, /* Default column to query */ + const char *z, int n, /* Text of MATCH query */ + Fts3Expr **ppExpr, /* OUT: Parsed query structure */ + char **pzErr /* OUT: Error message (sqlite3_malloc) */ +){ + int rc = fts3ExprParseUnbalanced( + pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr + ); + + /* Rebalance the expression. And check that its depth does not exceed + ** SQLITE_FTS3_MAX_EXPR_DEPTH. */ + if( rc==SQLITE_OK && *ppExpr ){ + rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); + if( rc==SQLITE_OK ){ + rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); + } } - /* Check if the cursor is past the end of the docid range specified - ** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag. */ - if( rc==SQLITE_OK && ( - (pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid) - || (pCsr->bDesc!=0 && pCsr->iPrevIdiMinDocid) - )){ - pCsr->isEof = 1; + if( rc!=SQLITE_OK ){ + sqlite3Fts3ExprFree(*ppExpr); + *ppExpr = 0; + if( rc==SQLITE_TOOBIG ){ + sqlite3Fts3ErrMsg(pzErr, + "FTS expression tree is too large (maximum depth %d)", + SQLITE_FTS3_MAX_EXPR_DEPTH + ); + rc = SQLITE_ERROR; + }else if( rc==SQLITE_ERROR ){ + sqlite3Fts3ErrMsg(pzErr, "malformed MATCH expression: [%s]", z); + } } return rc; } /* -** Restart interation for expression pExpr so that the next call to -** fts3EvalNext() visits the first row. Do not allow incremental -** loading or merging of phrase doclists for this iteration. -** -** If *pRc is other than SQLITE_OK when this function is called, it is -** a no-op. If an error occurs within this function, *pRc is set to an -** SQLite error code before returning. +** Free a single node of an expression tree. */ -static void fts3EvalRestart( - Fts3Cursor *pCsr, - Fts3Expr *pExpr, - int *pRc -){ - if( pExpr && *pRc==SQLITE_OK ){ - Fts3Phrase *pPhrase = pExpr->pPhrase; +static void fts3FreeExprNode(Fts3Expr *p){ + assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); + sqlite3Fts3EvalPhraseCleanup(p->pPhrase); + sqlite3_free(p->aMI); + sqlite3_free(p); +} - if( pPhrase ){ - fts3EvalInvalidatePoslist(pPhrase); - if( pPhrase->bIncr ){ - int i; - for(i=0; inToken; i++){ - Fts3PhraseToken *pToken = &pPhrase->aToken[i]; - assert( pToken->pDeferred==0 ); - if( pToken->pSegcsr ){ - sqlite3Fts3MsrIncrRestart(pToken->pSegcsr); - } - } - *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase); +/* +** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). +** +** This function would be simpler if it recursively called itself. But +** that would mean passing a sufficiently large expression to ExprParse() +** could cause a stack overflow. +*/ +SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *pDel){ + Fts3Expr *p; + assert( pDel==0 || pDel->pParent==0 ); + for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ + assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); + } + while( p ){ + Fts3Expr *pParent = p->pParent; + fts3FreeExprNode(p); + if( pParent && p==pParent->pLeft && pParent->pRight ){ + p = pParent->pRight; + while( p && (p->pLeft || p->pRight) ){ + assert( p==p->pParent->pRight || p==p->pParent->pLeft ); + p = (p->pLeft ? p->pLeft : p->pRight); } - pPhrase->doclist.pNextDocid = 0; - pPhrase->doclist.iDocid = 0; - pPhrase->pOrPoslist = 0; + }else{ + p = pParent; } - - pExpr->iDocid = 0; - pExpr->bEof = 0; - pExpr->bStart = 0; - - fts3EvalRestart(pCsr, pExpr->pLeft, pRc); - fts3EvalRestart(pCsr, pExpr->pRight, pRc); } } +/**************************************************************************** +***************************************************************************** +** Everything after this point is just test code. +*/ + +#ifdef SQLITE_TEST + +/* #include */ + /* -** After allocating the Fts3Expr.aMI[] array for each phrase in the -** expression rooted at pExpr, the cursor iterates through all rows matched -** by pExpr, calling this function for each row. This function increments -** the values in Fts3Expr.aMI[] according to the position-list currently -** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase -** expression nodes. +** Function to query the hash-table of tokenizers (see README.tokenizers). */ -static void fts3EvalUpdateCounts(Fts3Expr *pExpr){ - if( pExpr ){ - Fts3Phrase *pPhrase = pExpr->pPhrase; - if( pPhrase && pPhrase->doclist.pList ){ - int iCol = 0; - char *p = pPhrase->doclist.pList; +static int queryTestTokenizer( + sqlite3 *db, + const char *zName, + const sqlite3_tokenizer_module **pp +){ + int rc; + sqlite3_stmt *pStmt; + const char zSql[] = "SELECT fts3_tokenizer(?)"; - assert( *p ); - while( 1 ){ - u8 c = 0; - int iCnt = 0; - while( 0xFE & (*p | c) ){ - if( (c&0x80)==0 ) iCnt++; - c = *p++ & 0x80; - } + *pp = 0; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + return rc; + } - /* aMI[iCol*3 + 1] = Number of occurrences - ** aMI[iCol*3 + 2] = Number of rows containing at least one instance - */ - pExpr->aMI[iCol*3 + 1] += iCnt; - pExpr->aMI[iCol*3 + 2] += (iCnt>0); - if( *p==0x00 ) break; - p++; - p += fts3GetVarint32(p, &iCol); + sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ + memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); + } + } + + return sqlite3_finalize(pStmt); +} + +/* +** Return a pointer to a buffer containing a text representation of the +** expression passed as the first argument. The buffer is obtained from +** sqlite3_malloc(). It is the responsibility of the caller to use +** sqlite3_free() to release the memory. If an OOM condition is encountered, +** NULL is returned. +** +** If the second argument is not NULL, then its contents are prepended to +** the returned expression text and then freed using sqlite3_free(). +*/ +static char *exprToString(Fts3Expr *pExpr, char *zBuf){ + if( pExpr==0 ){ + return sqlite3_mprintf(""); + } + switch( pExpr->eType ){ + case FTSQUERY_PHRASE: { + Fts3Phrase *pPhrase = pExpr->pPhrase; + int i; + zBuf = sqlite3_mprintf( + "%zPHRASE %d 0", zBuf, pPhrase->iColumn); + for(i=0; zBuf && inToken; i++){ + zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, + pPhrase->aToken[i].n, pPhrase->aToken[i].z, + (pPhrase->aToken[i].isPrefix?"+":"") + ); } + return zBuf; } - fts3EvalUpdateCounts(pExpr->pLeft); - fts3EvalUpdateCounts(pExpr->pRight); + case FTSQUERY_NEAR: + zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear); + break; + case FTSQUERY_NOT: + zBuf = sqlite3_mprintf("%zNOT ", zBuf); + break; + case FTSQUERY_AND: + zBuf = sqlite3_mprintf("%zAND ", zBuf); + break; + case FTSQUERY_OR: + zBuf = sqlite3_mprintf("%zOR ", zBuf); + break; } + + if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf); + if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf); + if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf); + + if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf); + if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf); + + return zBuf; } /* -** Expression pExpr must be of type FTSQUERY_PHRASE. +** This is the implementation of a scalar SQL function used to test the +** expression parser. It should be called as follows: ** -** If it is not already allocated and populated, this function allocates and -** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part -** of a NEAR expression, then it also allocates and populates the same array -** for all other phrases that are part of the NEAR expression. +** fts3_exprtest(, , , ...); ** -** SQLITE_OK is returned if the aMI[] array is successfully allocated and -** populated. Otherwise, if an error occurs, an SQLite error code is returned. +** The first argument, , is the name of the fts3 tokenizer used +** to parse the query expression (see README.tokenizers). The second argument +** is the query expression to parse. Each subsequent argument is the name +** of a column of the fts3 table that the query expression may refer to. +** For example: +** +** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); */ -static int fts3EvalGatherStats( - Fts3Cursor *pCsr, /* Cursor object */ - Fts3Expr *pExpr /* FTSQUERY_PHRASE expression */ +static void fts3ExprTest( + sqlite3_context *context, + int argc, + sqlite3_value **argv ){ - int rc = SQLITE_OK; /* Return code */ + sqlite3_tokenizer_module const *pModule = 0; + sqlite3_tokenizer *pTokenizer = 0; + int rc; + char **azCol = 0; + const char *zExpr; + int nExpr; + int nCol; + int ii; + Fts3Expr *pExpr; + char *zBuf = 0; + sqlite3 *db = sqlite3_context_db_handle(context); - assert( pExpr->eType==FTSQUERY_PHRASE ); - if( pExpr->aMI==0 ){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - Fts3Expr *pRoot; /* Root of NEAR expression */ - Fts3Expr *p; /* Iterator used for several purposes */ + if( argc<3 ){ + sqlite3_result_error(context, + "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 + ); + return; + } - sqlite3_int64 iPrevId = pCsr->iPrevId; - sqlite3_int64 iDocid; - u8 bEof; + rc = queryTestTokenizer(db, + (const char *)sqlite3_value_text(argv[0]), &pModule); + if( rc==SQLITE_NOMEM ){ + sqlite3_result_error_nomem(context); + goto exprtest_out; + }else if( !pModule ){ + sqlite3_result_error(context, "No such tokenizer module", -1); + goto exprtest_out; + } - /* Find the root of the NEAR expression */ - pRoot = pExpr; - while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){ - pRoot = pRoot->pParent; - } - iDocid = pRoot->iDocid; - bEof = pRoot->bEof; - assert( pRoot->bStart ); + rc = pModule->xCreate(0, 0, &pTokenizer); + assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); + if( rc==SQLITE_NOMEM ){ + sqlite3_result_error_nomem(context); + goto exprtest_out; + } + pTokenizer->pModule = pModule; - /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */ - for(p=pRoot; p; p=p->pLeft){ - Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight); - assert( pE->aMI==0 ); - pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32)); - if( !pE->aMI ) return SQLITE_NOMEM; - memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32)); - } + zExpr = (const char *)sqlite3_value_text(argv[1]); + nExpr = sqlite3_value_bytes(argv[1]); + nCol = argc-2; + azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); + if( !azCol ){ + sqlite3_result_error_nomem(context); + goto exprtest_out; + } + for(ii=0; iiisEof==0 && rc==SQLITE_OK ){ + if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ + sqlite3Fts3ExprFree(pExpr); + sqlite3_result_error(context, "Error parsing expression", -1); + }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ + sqlite3_result_error_nomem(context); + }else{ + sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); + sqlite3_free(zBuf); + } - do { - /* Ensure the %_content statement is reset. */ - if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt); - assert( sqlite3_data_count(pCsr->pStmt)==0 ); + sqlite3Fts3ExprFree(pExpr); - /* Advance to the next document */ - fts3EvalNextRow(pCsr, pRoot, &rc); - pCsr->isEof = pRoot->bEof; - pCsr->isRequireSeek = 1; - pCsr->isMatchinfoNeeded = 1; - pCsr->iPrevId = pRoot->iDocid; - }while( pCsr->isEof==0 - && pRoot->eType==FTSQUERY_NEAR - && fts3EvalTestDeferredAndNear(pCsr, &rc) - ); +exprtest_out: + if( pModule && pTokenizer ){ + rc = pModule->xDestroy(pTokenizer); + } + sqlite3_free(azCol); +} + +/* +** Register the query expression parser test function fts3_exprtest() +** with database connection db. +*/ +SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ + int rc = sqlite3_create_function( + db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 + ); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", + -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 + ); + } + return rc; +} + +#endif +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_expr.c *******************************************/ +/************** Begin file fts3_hash.c ***************************************/ +/* +** 2001 September 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This is the implementation of generic hash-tables used in SQLite. +** We've modified it slightly to serve as a standalone hash table +** implementation for the full-text indexing module. +*/ + +/* +** The code in this file is only compiled if: +** +** * The FTS3 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS3 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ +/* #include */ + +/* #include "fts3_hash.h" */ + +/* +** Malloc and Free functions +*/ +static void *fts3HashMalloc(int n){ + void *p = sqlite3_malloc(n); + if( p ){ + memset(p, 0, n); + } + return p; +} +static void fts3HashFree(void *p){ + sqlite3_free(p); +} + +/* Turn bulk memory into a hash table object by initializing the +** fields of the Hash structure. +** +** "pNew" is a pointer to the hash table that is to be initialized. +** keyClass is one of the constants +** FTS3_HASH_BINARY or FTS3_HASH_STRING. The value of keyClass +** determines what kind of key the hash table will use. "copyKey" is +** true if the hash table should make its own private copy of keys and +** false if it should just use the supplied pointer. +*/ +SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey){ + assert( pNew!=0 ); + assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY ); + pNew->keyClass = keyClass; + pNew->copyKey = copyKey; + pNew->first = 0; + pNew->count = 0; + pNew->htsize = 0; + pNew->ht = 0; +} - if( rc==SQLITE_OK && pCsr->isEof==0 ){ - fts3EvalUpdateCounts(pRoot); - } +/* Remove all entries from a hash table. Reclaim all memory. +** Call this routine to delete a hash table or to reset a hash table +** to the empty state. +*/ +SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash *pH){ + Fts3HashElem *elem; /* For looping over all elements of the table */ + + assert( pH!=0 ); + elem = pH->first; + pH->first = 0; + fts3HashFree(pH->ht); + pH->ht = 0; + pH->htsize = 0; + while( elem ){ + Fts3HashElem *next_elem = elem->next; + if( pH->copyKey && elem->pKey ){ + fts3HashFree(elem->pKey); } + fts3HashFree(elem); + elem = next_elem; + } + pH->count = 0; +} - pCsr->isEof = 0; - pCsr->iPrevId = iPrevId; +/* +** Hash and comparison functions when the mode is FTS3_HASH_STRING +*/ +static int fts3StrHash(const void *pKey, int nKey){ + const char *z = (const char *)pKey; + unsigned h = 0; + if( nKey<=0 ) nKey = (int) strlen(z); + while( nKey > 0 ){ + h = (h<<3) ^ h ^ *z++; + nKey--; + } + return (int)(h & 0x7fffffff); +} +static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){ + if( n1!=n2 ) return 1; + return strncmp((const char*)pKey1,(const char*)pKey2,n1); +} - if( bEof ){ - pRoot->bEof = bEof; - }else{ - /* Caution: pRoot may iterate through docids in ascending or descending - ** order. For this reason, even though it seems more defensive, the - ** do loop can not be written: - ** - ** do {...} while( pRoot->iDocidbEof==0 ); - }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK ); - fts3EvalTestDeferredAndNear(pCsr, &rc); - } +/* +** Hash and comparison functions when the mode is FTS3_HASH_BINARY +*/ +static int fts3BinHash(const void *pKey, int nKey){ + int h = 0; + const char *z = (const char *)pKey; + while( nKey-- > 0 ){ + h = (h<<3) ^ h ^ *(z++); } - return rc; + return h & 0x7fffffff; +} +static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){ + if( n1!=n2 ) return 1; + return memcmp(pKey1,pKey2,n1); } /* -** This function is used by the matchinfo() module to query a phrase -** expression node for the following information: -** -** 1. The total number of occurrences of the phrase in each column of -** the FTS table (considering all rows), and -** -** 2. For each column, the number of rows in the table for which the -** column contains at least one instance of the phrase. -** -** If no error occurs, SQLITE_OK is returned and the values for each column -** written into the array aiOut as follows: -** -** aiOut[iCol*3 + 1] = Number of occurrences -** aiOut[iCol*3 + 2] = Number of rows containing at least one instance -** -** Caveats: -** -** * If a phrase consists entirely of deferred tokens, then all output -** values are set to the number of documents in the table. In other -** words we assume that very common tokens occur exactly once in each -** column of each row of the table. +** Return a pointer to the appropriate hash function given the key class. ** -** * If a phrase contains some deferred tokens (and some non-deferred -** tokens), count the potential occurrence identified by considering -** the non-deferred tokens instead of actual phrase occurrences. +** The C syntax in this function definition may be unfamilar to some +** programmers, so we provide the following additional explanation: ** -** * If the phrase is part of a NEAR expression, then only phrase instances -** that meet the NEAR constraint are included in the counts. +** The name of the function is "ftsHashFunction". The function takes a +** single parameter "keyClass". The return value of ftsHashFunction() +** is a pointer to another function. Specifically, the return value +** of ftsHashFunction() is a pointer to a function that takes two parameters +** with types "const void*" and "int" and returns an "int". */ -SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats( - Fts3Cursor *pCsr, /* FTS cursor handle */ - Fts3Expr *pExpr, /* Phrase expression */ - u32 *aiOut /* Array to write results into (see above) */ -){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc = SQLITE_OK; - int iCol; - - if( pExpr->bDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){ - assert( pCsr->nDoc>0 ); - for(iCol=0; iColnColumn; iCol++){ - aiOut[iCol*3 + 1] = (u32)pCsr->nDoc; - aiOut[iCol*3 + 2] = (u32)pCsr->nDoc; - } +static int (*ftsHashFunction(int keyClass))(const void*,int){ + if( keyClass==FTS3_HASH_STRING ){ + return &fts3StrHash; }else{ - rc = fts3EvalGatherStats(pCsr, pExpr); - if( rc==SQLITE_OK ){ - assert( pExpr->aMI ); - for(iCol=0; iColnColumn; iCol++){ - aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1]; - aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2]; - } - } + assert( keyClass==FTS3_HASH_BINARY ); + return &fts3BinHash; } - - return rc; } /* -** The expression pExpr passed as the second argument to this function -** must be of type FTSQUERY_PHRASE. -** -** The returned value is either NULL or a pointer to a buffer containing -** a position-list indicating the occurrences of the phrase in column iCol -** of the current row. -** -** More specifically, the returned buffer contains 1 varint for each -** occurrence of the phrase in the column, stored using the normal (delta+2) -** compression and is terminated by either an 0x01 or 0x00 byte. For example, -** if the requested column contains "a b X c d X X" and the position-list -** for 'X' is requested, the buffer returned may contain: -** -** 0x04 0x05 0x03 0x01 or 0x04 0x05 0x03 0x00 +** Return a pointer to the appropriate hash function given the key class. ** -** This function works regardless of whether or not the phrase is deferred, -** incremental, or neither. +** For help in interpreted the obscure C code in the function definition, +** see the header comment on the previous function. */ -SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist( - Fts3Cursor *pCsr, /* FTS3 cursor object */ - Fts3Expr *pExpr, /* Phrase to return doclist for */ - int iCol, /* Column to return position list for */ - char **ppOut /* OUT: Pointer to position list */ -){ - Fts3Phrase *pPhrase = pExpr->pPhrase; - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - char *pIter; - int iThis; - sqlite3_int64 iDocid; +static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){ + if( keyClass==FTS3_HASH_STRING ){ + return &fts3StrCompare; + }else{ + assert( keyClass==FTS3_HASH_BINARY ); + return &fts3BinCompare; + } +} - /* If this phrase is applies specifically to some column other than - ** column iCol, return a NULL pointer. */ - *ppOut = 0; - assert( iCol>=0 && iColnColumn ); - if( (pPhrase->iColumnnColumn && pPhrase->iColumn!=iCol) ){ - return SQLITE_OK; +/* Link an element into the hash table +*/ +static void fts3HashInsertElement( + Fts3Hash *pH, /* The complete hash table */ + struct _fts3ht *pEntry, /* The entry into which pNew is inserted */ + Fts3HashElem *pNew /* The element to be inserted */ +){ + Fts3HashElem *pHead; /* First element already in pEntry */ + pHead = pEntry->chain; + if( pHead ){ + pNew->next = pHead; + pNew->prev = pHead->prev; + if( pHead->prev ){ pHead->prev->next = pNew; } + else { pH->first = pNew; } + pHead->prev = pNew; + }else{ + pNew->next = pH->first; + if( pH->first ){ pH->first->prev = pNew; } + pNew->prev = 0; + pH->first = pNew; } + pEntry->count++; + pEntry->chain = pNew; +} - iDocid = pExpr->iDocid; - pIter = pPhrase->doclist.pList; - if( iDocid!=pCsr->iPrevId || pExpr->bEof ){ - int rc = SQLITE_OK; - int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */ - int bOr = 0; - u8 bEof = 0; - u8 bTreeEof = 0; - Fts3Expr *p; /* Used to iterate from pExpr to root */ - Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */ - /* Check if this phrase descends from an OR expression node. If not, - ** return NULL. Otherwise, the entry that corresponds to docid - ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the - ** tree that the node is part of has been marked as EOF, but the node - ** itself is not EOF, then it may point to an earlier entry. */ - pNear = pExpr; - for(p=pExpr->pParent; p; p=p->pParent){ - if( p->eType==FTSQUERY_OR ) bOr = 1; - if( p->eType==FTSQUERY_NEAR ) pNear = p; - if( p->bEof ) bTreeEof = 1; - } - if( bOr==0 ) return SQLITE_OK; +/* Resize the hash table so that it cantains "new_size" buckets. +** "new_size" must be a power of 2. The hash table might fail +** to resize if sqliteMalloc() fails. +** +** Return non-zero if a memory allocation error occurs. +*/ +static int fts3Rehash(Fts3Hash *pH, int new_size){ + struct _fts3ht *new_ht; /* The new hash table */ + Fts3HashElem *elem, *next_elem; /* For looping over existing elements */ + int (*xHash)(const void*,int); /* The hash function */ - /* This is the descendent of an OR node. In this case we cannot use - ** an incremental phrase. Load the entire doclist for the phrase - ** into memory in this case. */ - if( pPhrase->bIncr ){ - int bEofSave = pNear->bEof; - fts3EvalRestart(pCsr, pNear, &rc); - while( rc==SQLITE_OK && !pNear->bEof ){ - fts3EvalNextRow(pCsr, pNear, &rc); - if( bEofSave==0 && pNear->iDocid==iDocid ) break; - } - assert( rc!=SQLITE_OK || pPhrase->bIncr==0 ); - } - if( bTreeEof ){ - while( rc==SQLITE_OK && !pNear->bEof ){ - fts3EvalNextRow(pCsr, pNear, &rc); - } - } - if( rc!=SQLITE_OK ) return rc; + assert( (new_size & (new_size-1))==0 ); + new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) ); + if( new_ht==0 ) return 1; + fts3HashFree(pH->ht); + pH->ht = new_ht; + pH->htsize = new_size; + xHash = ftsHashFunction(pH->keyClass); + for(elem=pH->first, pH->first=0; elem; elem = next_elem){ + int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); + next_elem = elem->next; + fts3HashInsertElement(pH, &new_ht[h], elem); + } + return 0; +} - pIter = pPhrase->pOrPoslist; - iDocid = pPhrase->iOrDocid; - if( pCsr->bDesc==bDescDoclist ){ - bEof = (pIter >= (pPhrase->doclist.aAll + pPhrase->doclist.nAll)); - while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ - sqlite3Fts3DoclistNext( - bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, - &pIter, &iDocid, &bEof - ); - } - }else{ - bEof = !pPhrase->doclist.nAll || (pIter && pIter<=pPhrase->doclist.aAll); - while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ - int dummy; - sqlite3Fts3DoclistPrev( - bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, - &pIter, &iDocid, &dummy, &bEof - ); +/* This function (for internal use only) locates an element in an +** hash table that matches the given key. The hash for this key has +** already been computed and is passed as the 4th parameter. +*/ +static Fts3HashElem *fts3FindElementByHash( + const Fts3Hash *pH, /* The pH to be searched */ + const void *pKey, /* The key we are searching for */ + int nKey, + int h /* The hash for this key. */ +){ + Fts3HashElem *elem; /* Used to loop thru the element list */ + int count; /* Number of elements left to test */ + int (*xCompare)(const void*,int,const void*,int); /* comparison function */ + + if( pH->ht ){ + struct _fts3ht *pEntry = &pH->ht[h]; + elem = pEntry->chain; + count = pEntry->count; + xCompare = ftsCompareFunction(pH->keyClass); + while( count-- && elem ){ + if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ + return elem; } + elem = elem->next; } - pPhrase->pOrPoslist = pIter; - pPhrase->iOrDocid = iDocid; - - if( bEof || iDocid!=pCsr->iPrevId ) pIter = 0; } - if( pIter==0 ) return SQLITE_OK; + return 0; +} - if( *pIter==0x01 ){ - pIter++; - pIter += fts3GetVarint32(pIter, &iThis); +/* Remove a single entry from the hash table given a pointer to that +** element and a hash on the element's key. +*/ +static void fts3RemoveElementByHash( + Fts3Hash *pH, /* The pH containing "elem" */ + Fts3HashElem* elem, /* The element to be removed from the pH */ + int h /* Hash value for the element */ +){ + struct _fts3ht *pEntry; + if( elem->prev ){ + elem->prev->next = elem->next; }else{ - iThis = 0; + pH->first = elem->next; } - while( iThisnext ){ + elem->next->prev = elem->prev; } - if( *pIter==0x00 ){ - pIter = 0; + pEntry = &pH->ht[h]; + if( pEntry->chain==elem ){ + pEntry->chain = elem->next; } - - *ppOut = ((iCol==iThis)?pIter:0); - return SQLITE_OK; -} - -/* -** Free all components of the Fts3Phrase structure that were allocated by -** the eval module. Specifically, this means to free: -** -** * the contents of pPhrase->doclist, and -** * any Fts3MultiSegReader objects held by phrase tokens. -*/ -SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ - if( pPhrase ){ - int i; - sqlite3_free(pPhrase->doclist.aAll); - fts3EvalInvalidatePoslist(pPhrase); - memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); - for(i=0; inToken; i++){ - fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr); - pPhrase->aToken[i].pSegcsr = 0; - } + pEntry->count--; + if( pEntry->count<=0 ){ + pEntry->chain = 0; + } + if( pH->copyKey && elem->pKey ){ + fts3HashFree(elem->pKey); + } + fts3HashFree( elem ); + pH->count--; + if( pH->count<=0 ){ + assert( pH->first==0 ); + assert( pH->count==0 ); + fts3HashClear(pH); } } +SQLITE_PRIVATE Fts3HashElem *sqlite3Fts3HashFindElem( + const Fts3Hash *pH, + const void *pKey, + int nKey +){ + int h; /* A hash on key */ + int (*xHash)(const void*,int); /* The hash function */ + + if( pH==0 || pH->ht==0 ) return 0; + xHash = ftsHashFunction(pH->keyClass); + assert( xHash!=0 ); + h = (*xHash)(pKey,nKey); + assert( (pH->htsize & (pH->htsize-1))==0 ); + return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1)); +} -/* -** Return SQLITE_CORRUPT_VTAB. +/* +** Attempt to locate an element of the hash table pH with a key +** that matches pKey,nKey. Return the data for this element if it is +** found, or NULL if there is no match. */ -#ifdef SQLITE_DEBUG -SQLITE_PRIVATE int sqlite3Fts3Corrupt(){ - return SQLITE_CORRUPT_VTAB; +SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){ + Fts3HashElem *pElem; /* The element that matches key (if any) */ + + pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey); + return pElem ? pElem->data : 0; } -#endif -#if !SQLITE_CORE -/* -** Initialize API pointer table, if required. +/* Insert an element into the hash table pH. The key is pKey,nKey +** and the data is "data". +** +** If no element exists with a matching key, then a new +** element is created. A copy of the key is made if the copyKey +** flag is set. NULL is returned. +** +** If another element already exists with the same key, then the +** new data replaces the old data and the old data is returned. +** The key is not copied in this instance. If a malloc fails, then +** the new data is returned and the hash table is unchanged. +** +** If the "data" parameter to this function is NULL, then the +** element corresponding to "key" is removed from the hash table. */ -#ifdef _WIN32 -__declspec(dllexport) -#endif -SQLITE_API int SQLITE_STDCALL sqlite3_fts3_init( - sqlite3 *db, - char **pzErrMsg, - const sqlite3_api_routines *pApi +SQLITE_PRIVATE void *sqlite3Fts3HashInsert( + Fts3Hash *pH, /* The hash table to insert into */ + const void *pKey, /* The key */ + int nKey, /* Number of bytes in the key */ + void *data /* The data */ ){ - SQLITE_EXTENSION_INIT2(pApi) - return sqlite3Fts3Init(db); + int hraw; /* Raw hash value of the key */ + int h; /* the hash of the key modulo hash table size */ + Fts3HashElem *elem; /* Used to loop thru the element list */ + Fts3HashElem *new_elem; /* New element added to the pH */ + int (*xHash)(const void*,int); /* The hash function */ + + assert( pH!=0 ); + xHash = ftsHashFunction(pH->keyClass); + assert( xHash!=0 ); + hraw = (*xHash)(pKey, nKey); + assert( (pH->htsize & (pH->htsize-1))==0 ); + h = hraw & (pH->htsize-1); + elem = fts3FindElementByHash(pH,pKey,nKey,h); + if( elem ){ + void *old_data = elem->data; + if( data==0 ){ + fts3RemoveElementByHash(pH,elem,h); + }else{ + elem->data = data; + } + return old_data; + } + if( data==0 ) return 0; + if( (pH->htsize==0 && fts3Rehash(pH,8)) + || (pH->count>=pH->htsize && fts3Rehash(pH, pH->htsize*2)) + ){ + pH->count = 0; + return data; + } + assert( pH->htsize>0 ); + new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) ); + if( new_elem==0 ) return data; + if( pH->copyKey && pKey!=0 ){ + new_elem->pKey = fts3HashMalloc( nKey ); + if( new_elem->pKey==0 ){ + fts3HashFree(new_elem); + return data; + } + memcpy((void*)new_elem->pKey, pKey, nKey); + }else{ + new_elem->pKey = (void*)pKey; + } + new_elem->nKey = nKey; + pH->count++; + assert( pH->htsize>0 ); + assert( (pH->htsize & (pH->htsize-1))==0 ); + h = hraw & (pH->htsize-1); + fts3HashInsertElement(pH, &pH->ht[h], new_elem); + new_elem->data = data; + return 0; } -#endif -#endif +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ -/************** End of fts3.c ************************************************/ -/************** Begin file fts3_aux.c ****************************************/ +/************** End of fts3_hash.c *******************************************/ +/************** Begin file fts3_porter.c *************************************/ /* -** 2011 Jan 27 +** 2006 September 30 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: @@ -138379,550 +142605,663 @@ SQLITE_API int SQLITE_STDCALL sqlite3_fts3_init( ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** -****************************************************************************** +************************************************************************* +** Implementation of the full-text-search tokenizer that implements +** a Porter stemmer. +*/ + +/* +** The code in this file is only compiled if: +** +** * The FTS3 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or ** +** * The FTS3 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). */ +/* #include "fts3Int.h" */ #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) -/* #include */ /* #include */ +/* #include */ +/* #include */ +/* #include */ -typedef struct Fts3auxTable Fts3auxTable; -typedef struct Fts3auxCursor Fts3auxCursor; - -struct Fts3auxTable { - sqlite3_vtab base; /* Base class used by SQLite core */ - Fts3Table *pFts3Tab; -}; - -struct Fts3auxCursor { - sqlite3_vtab_cursor base; /* Base class used by SQLite core */ - Fts3MultiSegReader csr; /* Must be right after "base" */ - Fts3SegFilter filter; - char *zStop; - int nStop; /* Byte-length of string zStop */ - int iLangid; /* Language id to query */ - int isEof; /* True if cursor is at EOF */ - sqlite3_int64 iRowid; /* Current rowid */ - - int iCol; /* Current value of 'col' column */ - int nStat; /* Size of aStat[] array */ - struct Fts3auxColstats { - sqlite3_int64 nDoc; /* 'documents' values for current csr row */ - sqlite3_int64 nOcc; /* 'occurrences' values for current csr row */ - } *aStat; -}; +/* #include "fts3_tokenizer.h" */ /* -** Schema of the terms table. +** Class derived from sqlite3_tokenizer */ -#define FTS3_AUX_SCHEMA \ - "CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)" +typedef struct porter_tokenizer { + sqlite3_tokenizer base; /* Base class */ +} porter_tokenizer; /* -** This function does all the work for both the xConnect and xCreate methods. -** These tables have no persistent representation of their own, so xConnect -** and xCreate are identical operations. +** Class derived from sqlite3_tokenizer_cursor */ -static int fts3auxConnectMethod( - sqlite3 *db, /* Database connection */ - void *pUnused, /* Unused */ - int argc, /* Number of elements in argv array */ - const char * const *argv, /* xCreate/xConnect argument array */ - sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ - char **pzErr /* OUT: sqlite3_malloc'd error message */ -){ - char const *zDb; /* Name of database (e.g. "main") */ - char const *zFts3; /* Name of fts3 table */ - int nDb; /* Result of strlen(zDb) */ - int nFts3; /* Result of strlen(zFts3) */ - int nByte; /* Bytes of space to allocate here */ - int rc; /* value returned by declare_vtab() */ - Fts3auxTable *p; /* Virtual table object to return */ - - UNUSED_PARAMETER(pUnused); - - /* The user should invoke this in one of two forms: - ** - ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table); - ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table); - */ - if( argc!=4 && argc!=5 ) goto bad_args; - - zDb = argv[1]; - nDb = (int)strlen(zDb); - if( argc==5 ){ - if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){ - zDb = argv[3]; - nDb = (int)strlen(zDb); - zFts3 = argv[4]; - }else{ - goto bad_args; - } - }else{ - zFts3 = argv[3]; - } - nFts3 = (int)strlen(zFts3); - - rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA); - if( rc!=SQLITE_OK ) return rc; +typedef struct porter_tokenizer_cursor { + sqlite3_tokenizer_cursor base; + const char *zInput; /* input we are tokenizing */ + int nInput; /* size of the input */ + int iOffset; /* current position in zInput */ + int iToken; /* index of next token to be returned */ + char *zToken; /* storage for current token */ + int nAllocated; /* space allocated to zToken buffer */ +} porter_tokenizer_cursor; - nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2; - p = (Fts3auxTable *)sqlite3_malloc(nByte); - if( !p ) return SQLITE_NOMEM; - memset(p, 0, nByte); - p->pFts3Tab = (Fts3Table *)&p[1]; - p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1]; - p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1]; - p->pFts3Tab->db = db; - p->pFts3Tab->nIndex = 1; +/* +** Create a new tokenizer instance. +*/ +static int porterCreate( + int argc, const char * const *argv, + sqlite3_tokenizer **ppTokenizer +){ + porter_tokenizer *t; - memcpy((char *)p->pFts3Tab->zDb, zDb, nDb); - memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3); - sqlite3Fts3Dequote((char *)p->pFts3Tab->zName); + UNUSED_PARAMETER(argc); + UNUSED_PARAMETER(argv); - *ppVtab = (sqlite3_vtab *)p; + t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t)); + if( t==NULL ) return SQLITE_NOMEM; + memset(t, 0, sizeof(*t)); + *ppTokenizer = &t->base; return SQLITE_OK; - - bad_args: - *pzErr = sqlite3_mprintf("invalid arguments to fts4aux constructor"); - return SQLITE_ERROR; } /* -** This function does the work for both the xDisconnect and xDestroy methods. -** These tables have no persistent representation of their own, so xDisconnect -** and xDestroy are identical operations. +** Destroy a tokenizer */ -static int fts3auxDisconnectMethod(sqlite3_vtab *pVtab){ - Fts3auxTable *p = (Fts3auxTable *)pVtab; - Fts3Table *pFts3 = p->pFts3Tab; - int i; - - /* Free any prepared statements held */ - for(i=0; iaStmt); i++){ - sqlite3_finalize(pFts3->aStmt[i]); - } - sqlite3_free(pFts3->zSegmentsTbl); - sqlite3_free(p); +static int porterDestroy(sqlite3_tokenizer *pTokenizer){ + sqlite3_free(pTokenizer); return SQLITE_OK; } -#define FTS4AUX_EQ_CONSTRAINT 1 -#define FTS4AUX_GE_CONSTRAINT 2 -#define FTS4AUX_LE_CONSTRAINT 4 - /* -** xBestIndex - Analyze a WHERE and ORDER BY clause. +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is zInput[0..nInput-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. */ -static int fts3auxBestIndexMethod( - sqlite3_vtab *pVTab, - sqlite3_index_info *pInfo +static int porterOpen( + sqlite3_tokenizer *pTokenizer, /* The tokenizer */ + const char *zInput, int nInput, /* String to be tokenized */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ ){ - int i; - int iEq = -1; - int iGe = -1; - int iLe = -1; - int iLangid = -1; - int iNext = 1; /* Next free argvIndex value */ - - UNUSED_PARAMETER(pVTab); - - /* This vtab delivers always results in "ORDER BY term ASC" order. */ - if( pInfo->nOrderBy==1 - && pInfo->aOrderBy[0].iColumn==0 - && pInfo->aOrderBy[0].desc==0 - ){ - pInfo->orderByConsumed = 1; - } + porter_tokenizer_cursor *c; - /* Search for equality and range constraints on the "term" column. - ** And equality constraints on the hidden "languageid" column. */ - for(i=0; inConstraint; i++){ - if( pInfo->aConstraint[i].usable ){ - int op = pInfo->aConstraint[i].op; - int iCol = pInfo->aConstraint[i].iColumn; + UNUSED_PARAMETER(pTokenizer); - if( iCol==0 ){ - if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i; - if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i; - if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i; - if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i; - if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i; - } - if( iCol==4 ){ - if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i; - } - } - } + c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); + if( c==NULL ) return SQLITE_NOMEM; - if( iEq>=0 ){ - pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT; - pInfo->aConstraintUsage[iEq].argvIndex = iNext++; - pInfo->estimatedCost = 5; + c->zInput = zInput; + if( zInput==0 ){ + c->nInput = 0; + }else if( nInput<0 ){ + c->nInput = (int)strlen(zInput); }else{ - pInfo->idxNum = 0; - pInfo->estimatedCost = 20000; - if( iGe>=0 ){ - pInfo->idxNum += FTS4AUX_GE_CONSTRAINT; - pInfo->aConstraintUsage[iGe].argvIndex = iNext++; - pInfo->estimatedCost /= 2; - } - if( iLe>=0 ){ - pInfo->idxNum += FTS4AUX_LE_CONSTRAINT; - pInfo->aConstraintUsage[iLe].argvIndex = iNext++; - pInfo->estimatedCost /= 2; - } - } - if( iLangid>=0 ){ - pInfo->aConstraintUsage[iLangid].argvIndex = iNext++; - pInfo->estimatedCost--; + c->nInput = nInput; } + c->iOffset = 0; /* start tokenizing at the beginning */ + c->iToken = 0; + c->zToken = NULL; /* no space allocated, yet. */ + c->nAllocated = 0; + *ppCursor = &c->base; return SQLITE_OK; } /* -** xOpen - Open a cursor. +** Close a tokenization cursor previously opened by a call to +** porterOpen() above. */ -static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ - Fts3auxCursor *pCsr; /* Pointer to cursor object to return */ - - UNUSED_PARAMETER(pVTab); - - pCsr = (Fts3auxCursor *)sqlite3_malloc(sizeof(Fts3auxCursor)); - if( !pCsr ) return SQLITE_NOMEM; - memset(pCsr, 0, sizeof(Fts3auxCursor)); - - *ppCsr = (sqlite3_vtab_cursor *)pCsr; +static int porterClose(sqlite3_tokenizer_cursor *pCursor){ + porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; + sqlite3_free(c->zToken); + sqlite3_free(c); return SQLITE_OK; } - /* -** xClose - Close a cursor. +** Vowel or consonant */ -static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){ - Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; - Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; +static const char cType[] = { + 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 2, 1 +}; - sqlite3Fts3SegmentsClose(pFts3); - sqlite3Fts3SegReaderFinish(&pCsr->csr); - sqlite3_free((void *)pCsr->filter.zTerm); - sqlite3_free(pCsr->zStop); - sqlite3_free(pCsr->aStat); - sqlite3_free(pCsr); - return SQLITE_OK; +/* +** isConsonant() and isVowel() determine if their first character in +** the string they point to is a consonant or a vowel, according +** to Porter ruls. +** +** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. +** 'Y' is a consonant unless it follows another consonant, +** in which case it is a vowel. +** +** In these routine, the letters are in reverse order. So the 'y' rule +** is that 'y' is a consonant unless it is followed by another +** consonent. +*/ +static int isVowel(const char*); +static int isConsonant(const char *z){ + int j; + char x = *z; + if( x==0 ) return 0; + assert( x>='a' && x<='z' ); + j = cType[x-'a']; + if( j<2 ) return j; + return z[1]==0 || isVowel(z + 1); } - -static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){ - if( nSize>pCsr->nStat ){ - struct Fts3auxColstats *aNew; - aNew = (struct Fts3auxColstats *)sqlite3_realloc(pCsr->aStat, - sizeof(struct Fts3auxColstats) * nSize - ); - if( aNew==0 ) return SQLITE_NOMEM; - memset(&aNew[pCsr->nStat], 0, - sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat) - ); - pCsr->aStat = aNew; - pCsr->nStat = nSize; - } - return SQLITE_OK; +static int isVowel(const char *z){ + int j; + char x = *z; + if( x==0 ) return 0; + assert( x>='a' && x<='z' ); + j = cType[x-'a']; + if( j<2 ) return 1-j; + return isConsonant(z + 1); } /* -** xNext - Advance the cursor to the next row, if any. +** Let any sequence of one or more vowels be represented by V and let +** C be sequence of one or more consonants. Then every word can be +** represented as: +** +** [C] (VC){m} [V] +** +** In prose: A word is an optional consonant followed by zero or +** vowel-consonant pairs followed by an optional vowel. "m" is the +** number of vowel consonant pairs. This routine computes the value +** of m for the first i bytes of a word. +** +** Return true if the m-value for z is 1 or more. In other words, +** return true if z contains at least one vowel that is followed +** by a consonant. +** +** In this routine z[] is in reverse order. So we are really looking +** for an instance of a consonant followed by a vowel. */ -static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){ - Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; - Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; - int rc; - - /* Increment our pretend rowid value. */ - pCsr->iRowid++; - - for(pCsr->iCol++; pCsr->iColnStat; pCsr->iCol++){ - if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK; - } - - rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr); - if( rc==SQLITE_ROW ){ - int i = 0; - int nDoclist = pCsr->csr.nDoclist; - char *aDoclist = pCsr->csr.aDoclist; - int iCol; - - int eState = 0; - - if( pCsr->zStop ){ - int n = (pCsr->nStopcsr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm; - int mc = memcmp(pCsr->zStop, pCsr->csr.zTerm, n); - if( mc<0 || (mc==0 && pCsr->csr.nTerm>pCsr->nStop) ){ - pCsr->isEof = 1; - return SQLITE_OK; - } - } - - if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM; - memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat); - iCol = 0; +static int m_gt_0(const char *z){ + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + return *z!=0; +} - while( iaStat[0].nDoc++; - eState = 1; - iCol = 0; - break; +/* Like mgt0 above except we are looking for a value of m>1 instead +** or m>0 +*/ +static int m_gt_1(const char *z){ + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + if( *z==0 ) return 0; + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + return *z!=0; +} - /* State 1. In this state we are expecting either a 1, indicating - ** that the following integer will be a column number, or the - ** start of a position list for column 0. - ** - ** The only difference between state 1 and state 2 is that if the - ** integer encountered in state 1 is not 0 or 1, then we need to - ** increment the column 0 "nDoc" count for this term. - */ - case 1: - assert( iCol==0 ); - if( v>1 ){ - pCsr->aStat[1].nDoc++; - } - eState = 2; - /* fall through */ +/* +** Return TRUE if there is a vowel anywhere within z[0..n-1] +*/ +static int hasVowel(const char *z){ + while( isConsonant(z) ){ z++; } + return *z!=0; +} - case 2: - if( v==0 ){ /* 0x00. Next integer will be a docid. */ - eState = 0; - }else if( v==1 ){ /* 0x01. Next integer will be a column number. */ - eState = 3; - }else{ /* 2 or greater. A position. */ - pCsr->aStat[iCol+1].nOcc++; - pCsr->aStat[0].nOcc++; - } - break; +/* +** Return TRUE if the word ends in a double consonant. +** +** The text is reversed here. So we are really looking at +** the first two characters of z[]. +*/ +static int doubleConsonant(const char *z){ + return isConsonant(z) && z[0]==z[1]; +} - /* State 3. The integer just read is a column number. */ - default: assert( eState==3 ); - iCol = (int)v; - if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM; - pCsr->aStat[iCol+1].nDoc++; - eState = 2; - break; - } - } +/* +** Return TRUE if the word ends with three letters which +** are consonant-vowel-consonent and where the final consonant +** is not 'w', 'x', or 'y'. +** +** The word is reversed here. So we are really checking the +** first three letters and the first one cannot be in [wxy]. +*/ +static int star_oh(const char *z){ + return + isConsonant(z) && + z[0]!='w' && z[0]!='x' && z[0]!='y' && + isVowel(z+1) && + isConsonant(z+2); +} - pCsr->iCol = 0; - rc = SQLITE_OK; - }else{ - pCsr->isEof = 1; +/* +** If the word ends with zFrom and xCond() is true for the stem +** of the word that preceeds the zFrom ending, then change the +** ending to zTo. +** +** The input word *pz and zFrom are both in reverse order. zTo +** is in normal order. +** +** Return TRUE if zFrom matches. Return FALSE if zFrom does not +** match. Not that TRUE is returned even if xCond() fails and +** no substitution occurs. +*/ +static int stem( + char **pz, /* The word being stemmed (Reversed) */ + const char *zFrom, /* If the ending matches this... (Reversed) */ + const char *zTo, /* ... change the ending to this (not reversed) */ + int (*xCond)(const char*) /* Condition that must be true */ +){ + char *z = *pz; + while( *zFrom && *zFrom==*z ){ z++; zFrom++; } + if( *zFrom!=0 ) return 0; + if( xCond && !xCond(z) ) return 1; + while( *zTo ){ + *(--z) = *(zTo++); } - return rc; + *pz = z; + return 1; } /* -** xFilter - Initialize a cursor to point at the start of its data. +** This is the fallback stemmer used when the porter stemmer is +** inappropriate. The input word is copied into the output with +** US-ASCII case folding. If the input word is too long (more +** than 20 bytes if it contains no digits or more than 6 bytes if +** it contains digits) then word is truncated to 20 or 6 bytes +** by taking 10 or 3 bytes from the beginning and end. */ -static int fts3auxFilterMethod( - sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ - int idxNum, /* Strategy index */ - const char *idxStr, /* Unused */ - int nVal, /* Number of elements in apVal */ - sqlite3_value **apVal /* Arguments for the indexing scheme */ -){ - Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; - Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; - int rc; - int isScan = 0; - int iLangVal = 0; /* Language id to query */ +static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ + int i, mx, j; + int hasDigit = 0; + for(i=0; i='A' && c<='Z' ){ + zOut[i] = c - 'A' + 'a'; + }else{ + if( c>='0' && c<='9' ) hasDigit = 1; + zOut[i] = c; + } + } + mx = hasDigit ? 3 : 10; + if( nIn>mx*2 ){ + for(j=mx, i=nIn-mx; i=? value in apVal */ - int iLe = -1; /* Index of term<=? value in apVal */ - int iLangid = -1; /* Index of languageid=? value in apVal */ - int iNext = 0; - UNUSED_PARAMETER(nVal); - UNUSED_PARAMETER(idxStr); +/* +** Stem the input word zIn[0..nIn-1]. Store the output in zOut. +** zOut is at least big enough to hold nIn bytes. Write the actual +** size of the output word (exclusive of the '\0' terminator) into *pnOut. +** +** Any upper-case characters in the US-ASCII character set ([A-Z]) +** are converted to lower case. Upper-case UTF characters are +** unchanged. +** +** Words that are longer than about 20 bytes are stemmed by retaining +** a few bytes from the beginning and the end of the word. If the +** word contains digits, 3 bytes are taken from the beginning and +** 3 bytes from the end. For long words without digits, 10 bytes +** are taken from each end. US-ASCII case folding still applies. +** +** If the input word contains not digits but does characters not +** in [a-zA-Z] then no stemming is attempted and this routine just +** copies the input into the input into the output with US-ASCII +** case folding. +** +** Stemming never increases the length of the word. So there is +** no chance of overflowing the zOut buffer. +*/ +static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ + int i, j; + char zReverse[28]; + char *z, *z2; + if( nIn<3 || nIn>=(int)sizeof(zReverse)-7 ){ + /* The word is too big or too small for the porter stemmer. + ** Fallback to the copy stemmer */ + copy_stemmer(zIn, nIn, zOut, pnOut); + return; + } + for(i=0, j=sizeof(zReverse)-6; i='A' && c<='Z' ){ + zReverse[j] = c + 'a' - 'A'; + }else if( c>='a' && c<='z' ){ + zReverse[j] = c; + }else{ + /* The use of a character not in [a-zA-Z] means that we fallback + ** to the copy stemmer */ + copy_stemmer(zIn, nIn, zOut, pnOut); + return; + } + } + memset(&zReverse[sizeof(zReverse)-5], 0, 5); + z = &zReverse[j+1]; - assert( idxStr==0 ); - assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0 - || idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT - || idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT) - ); - if( idxNum==FTS4AUX_EQ_CONSTRAINT ){ - iEq = iNext++; - }else{ - isScan = 1; - if( idxNum & FTS4AUX_GE_CONSTRAINT ){ - iGe = iNext++; - } - if( idxNum & FTS4AUX_LE_CONSTRAINT ){ - iLe = iNext++; + /* Step 1a */ + if( z[0]=='s' ){ + if( + !stem(&z, "sess", "ss", 0) && + !stem(&z, "sei", "i", 0) && + !stem(&z, "ss", "ss", 0) + ){ + z++; } } - if( iNextfilter.zTerm); - sqlite3Fts3SegReaderFinish(&pCsr->csr); - sqlite3_free((void *)pCsr->filter.zTerm); - sqlite3_free(pCsr->aStat); - memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr); + /* Step 1c */ + if( z[0]=='y' && hasVowel(z+1) ){ + z[0] = 'i'; + } - pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; - if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN; + /* Step 2 */ + switch( z[1] ){ + case 'a': + if( !stem(&z, "lanoita", "ate", m_gt_0) ){ + stem(&z, "lanoit", "tion", m_gt_0); + } + break; + case 'c': + if( !stem(&z, "icne", "ence", m_gt_0) ){ + stem(&z, "icna", "ance", m_gt_0); + } + break; + case 'e': + stem(&z, "rezi", "ize", m_gt_0); + break; + case 'g': + stem(&z, "igol", "log", m_gt_0); + break; + case 'l': + if( !stem(&z, "ilb", "ble", m_gt_0) + && !stem(&z, "illa", "al", m_gt_0) + && !stem(&z, "iltne", "ent", m_gt_0) + && !stem(&z, "ile", "e", m_gt_0) + ){ + stem(&z, "ilsuo", "ous", m_gt_0); + } + break; + case 'o': + if( !stem(&z, "noitazi", "ize", m_gt_0) + && !stem(&z, "noita", "ate", m_gt_0) + ){ + stem(&z, "rota", "ate", m_gt_0); + } + break; + case 's': + if( !stem(&z, "msila", "al", m_gt_0) + && !stem(&z, "ssenevi", "ive", m_gt_0) + && !stem(&z, "ssenluf", "ful", m_gt_0) + ){ + stem(&z, "ssensuo", "ous", m_gt_0); + } + break; + case 't': + if( !stem(&z, "itila", "al", m_gt_0) + && !stem(&z, "itivi", "ive", m_gt_0) + ){ + stem(&z, "itilib", "ble", m_gt_0); + } + break; + } - if( iEq>=0 || iGe>=0 ){ - const unsigned char *zStr = sqlite3_value_text(apVal[0]); - assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) ); - if( zStr ){ - pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr); - pCsr->filter.nTerm = sqlite3_value_bytes(apVal[0]); - if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM; - } + /* Step 3 */ + switch( z[0] ){ + case 'e': + if( !stem(&z, "etaci", "ic", m_gt_0) + && !stem(&z, "evita", "", m_gt_0) + ){ + stem(&z, "ezila", "al", m_gt_0); + } + break; + case 'i': + stem(&z, "itici", "ic", m_gt_0); + break; + case 'l': + if( !stem(&z, "laci", "ic", m_gt_0) ){ + stem(&z, "luf", "", m_gt_0); + } + break; + case 's': + stem(&z, "ssen", "", m_gt_0); + break; } - if( iLe>=0 ){ - pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe])); - pCsr->nStop = sqlite3_value_bytes(apVal[iLe]); - if( pCsr->zStop==0 ) return SQLITE_NOMEM; + /* Step 4 */ + switch( z[1] ){ + case 'a': + if( z[0]=='l' && m_gt_1(z+2) ){ + z += 2; + } + break; + case 'c': + if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){ + z += 4; + } + break; + case 'e': + if( z[0]=='r' && m_gt_1(z+2) ){ + z += 2; + } + break; + case 'i': + if( z[0]=='c' && m_gt_1(z+2) ){ + z += 2; + } + break; + case 'l': + if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){ + z += 4; + } + break; + case 'n': + if( z[0]=='t' ){ + if( z[2]=='a' ){ + if( m_gt_1(z+3) ){ + z += 3; + } + }else if( z[2]=='e' ){ + if( !stem(&z, "tneme", "", m_gt_1) + && !stem(&z, "tnem", "", m_gt_1) + ){ + stem(&z, "tne", "", m_gt_1); + } + } + } + break; + case 'o': + if( z[0]=='u' ){ + if( m_gt_1(z+2) ){ + z += 2; + } + }else if( z[3]=='s' || z[3]=='t' ){ + stem(&z, "noi", "", m_gt_1); + } + break; + case 's': + if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){ + z += 3; + } + break; + case 't': + if( !stem(&z, "eta", "", m_gt_1) ){ + stem(&z, "iti", "", m_gt_1); + } + break; + case 'u': + if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ + z += 3; + } + break; + case 'v': + case 'z': + if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){ + z += 3; + } + break; } - - if( iLangid>=0 ){ - iLangVal = sqlite3_value_int(apVal[iLangid]); - /* If the user specified a negative value for the languageid, use zero - ** instead. This works, as the "languageid=?" constraint will also - ** be tested by the VDBE layer. The test will always be false (since - ** this module will not return a row with a negative languageid), and - ** so the overall query will return zero rows. */ - if( iLangVal<0 ) iLangVal = 0; + /* Step 5a */ + if( z[0]=='e' ){ + if( m_gt_1(z+1) ){ + z++; + }else if( m_eq_1(z+1) && !star_oh(z+1) ){ + z++; + } } - pCsr->iLangid = iLangVal; - rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL, - pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr - ); - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter); + /* Step 5b */ + if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){ + z++; } - if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor); - return rc; + /* z[] is now the stemmed word in reverse order. Flip it back + ** around into forward order and return. + */ + *pnOut = i = (int)strlen(z); + zOut[i] = 0; + while( *z ){ + zOut[--i] = *(z++); + } } /* -** xEof - Return true if the cursor is at EOF, or false otherwise. +** Characters that can be part of a token. We assume any character +** whose value is greater than 0x80 (any UTF character) can be +** part of a token. In other words, delimiters all must have +** values of 0x7f or lower. */ -static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){ - Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; - return pCsr->isEof; -} +static const char porterIdChar[] = { +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ +}; +#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30])) /* -** xColumn - Return a column value. +** Extract the next token from a tokenization cursor. The cursor must +** have been opened by a prior call to porterOpen(). */ -static int fts3auxColumnMethod( - sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ - sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ - int iCol /* Index of column to read value from */ +static int porterNext( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */ + const char **pzToken, /* OUT: *pzToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ ){ - Fts3auxCursor *p = (Fts3auxCursor *)pCursor; - - assert( p->isEof==0 ); - switch( iCol ){ - case 0: /* term */ - sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT); - break; + porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; + const char *z = c->zInput; - case 1: /* col */ - if( p->iCol ){ - sqlite3_result_int(pCtx, p->iCol-1); - }else{ - sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC); - } - break; + while( c->iOffsetnInput ){ + int iStartOffset, ch; - case 2: /* documents */ - sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc); - break; + /* Scan past delimiter characters */ + while( c->iOffsetnInput && isDelim(z[c->iOffset]) ){ + c->iOffset++; + } - case 3: /* occurrences */ - sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc); - break; + /* Count non-delimiter characters. */ + iStartOffset = c->iOffset; + while( c->iOffsetnInput && !isDelim(z[c->iOffset]) ){ + c->iOffset++; + } - default: /* languageid */ - assert( iCol==4 ); - sqlite3_result_int(pCtx, p->iLangid); - break; + if( c->iOffset>iStartOffset ){ + int n = c->iOffset-iStartOffset; + if( n>c->nAllocated ){ + char *pNew; + c->nAllocated = n+20; + pNew = sqlite3_realloc(c->zToken, c->nAllocated); + if( !pNew ) return SQLITE_NOMEM; + c->zToken = pNew; + } + porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes); + *pzToken = c->zToken; + *piStartOffset = iStartOffset; + *piEndOffset = c->iOffset; + *piPosition = c->iToken++; + return SQLITE_OK; + } } - - return SQLITE_OK; + return SQLITE_DONE; } /* -** xRowid - Return the current rowid for the cursor. +** The set of routines that implement the porter-stemmer tokenizer */ -static int fts3auxRowidMethod( - sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ - sqlite_int64 *pRowid /* OUT: Rowid value */ -){ - Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; - *pRowid = pCsr->iRowid; - return SQLITE_OK; -} +static const sqlite3_tokenizer_module porterTokenizerModule = { + 0, + porterCreate, + porterDestroy, + porterOpen, + porterClose, + porterNext, + 0 +}; /* -** Register the fts3aux module with database connection db. Return SQLITE_OK -** if successful or an error code if sqlite3_create_module() fails. +** Allocate a new porter tokenizer. Return a pointer to the new +** tokenizer in *ppModule */ -SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db){ - static const sqlite3_module fts3aux_module = { - 0, /* iVersion */ - fts3auxConnectMethod, /* xCreate */ - fts3auxConnectMethod, /* xConnect */ - fts3auxBestIndexMethod, /* xBestIndex */ - fts3auxDisconnectMethod, /* xDisconnect */ - fts3auxDisconnectMethod, /* xDestroy */ - fts3auxOpenMethod, /* xOpen */ - fts3auxCloseMethod, /* xClose */ - fts3auxFilterMethod, /* xFilter */ - fts3auxNextMethod, /* xNext */ - fts3auxEofMethod, /* xEof */ - fts3auxColumnMethod, /* xColumn */ - fts3auxRowidMethod, /* xRowid */ - 0, /* xUpdate */ - 0, /* xBegin */ - 0, /* xSync */ - 0, /* xCommit */ - 0, /* xRollback */ - 0, /* xFindFunction */ - 0, /* xRename */ - 0, /* xSavepoint */ - 0, /* xRelease */ - 0 /* xRollbackTo */ - }; - int rc; /* Return code */ - - rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0); - return rc; +SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule( + sqlite3_tokenizer_module const**ppModule +){ + *ppModule = &porterTokenizerModule; } #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ -/************** End of fts3_aux.c ********************************************/ -/************** Begin file fts3_expr.c ***************************************/ +/************** End of fts3_porter.c *****************************************/ +/************** Begin file fts3_tokenizer.c **********************************/ /* -** 2008 Nov 28 +** 2007 June 22 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: @@ -138933,1281 +143272,1187 @@ SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db){ ** ****************************************************************************** ** -** This module contains code that implements a parser for fts3 query strings -** (the right-hand argument to the MATCH operator). Because the supported -** syntax is relatively simple, the whole tokenizer/parser system is -** hand-coded. +** This is part of an SQLite module implementing full-text search. +** This particular file implements the generic tokenizer interface. */ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) /* -** By default, this module parses the legacy syntax that has been -** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS -** is defined, then it uses the new syntax. The differences between -** the new and the old syntaxes are: -** -** a) The new syntax supports parenthesis. The old does not. -** -** b) The new syntax supports the AND and NOT operators. The old does not. -** -** c) The old syntax supports the "-" token qualifier. This is not -** supported by the new syntax (it is replaced by the NOT operator). -** -** d) When using the old syntax, the OR operator has a greater precedence -** than an implicit AND. When using the new, both implicity and explicit -** AND operators have a higher precedence than OR. -** -** If compiled with SQLITE_TEST defined, then this module exports the -** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable -** to zero causes the module to use the old syntax. If it is set to -** non-zero the new syntax is activated. This is so both syntaxes can -** be tested using a single build of testfixture. -** -** The following describes the syntax supported by the fts3 MATCH -** operator in a similar format to that used by the lemon parser -** generator. This module does not use actually lemon, it uses a -** custom parser. +** The code in this file is only compiled if: ** -** query ::= andexpr (OR andexpr)*. +** * The FTS3 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or ** -** andexpr ::= notexpr (AND? notexpr)*. +** * The FTS3 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ + +/* +** Implementation of the SQL scalar function for accessing the underlying +** hash table. This function may be called as follows: ** -** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*. -** notexpr ::= LP query RP. +** SELECT (); +** SELECT (, ); ** -** nearexpr ::= phrase (NEAR distance_opt nearexpr)*. +** where is the name passed as the second argument +** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer'). ** -** distance_opt ::= . -** distance_opt ::= / INTEGER. +** If the argument is specified, it must be a blob value +** containing a pointer to be stored as the hash data corresponding +** to the string . If is not specified, then +** the string must already exist in the has table. Otherwise, +** an error is returned. ** -** phrase ::= TOKEN. -** phrase ::= COLUMN:TOKEN. -** phrase ::= "TOKEN TOKEN TOKEN...". +** Whether or not the argument is specified, the value returned +** is a blob containing the pointer stored as the hash data corresponding +** to string (after the hash-table is updated, if applicable). */ +static void scalarFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + Fts3Hash *pHash; + void *pPtr = 0; + const unsigned char *zName; + int nName; -#ifdef SQLITE_TEST -SQLITE_API int sqlite3_fts3_enable_parentheses = 0; -#else -# ifdef SQLITE_ENABLE_FTS3_PARENTHESIS -# define sqlite3_fts3_enable_parentheses 1 -# else -# define sqlite3_fts3_enable_parentheses 0 -# endif -#endif + assert( argc==1 || argc==2 ); -/* -** Default span for NEAR operators. -*/ -#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 + pHash = (Fts3Hash *)sqlite3_user_data(context); -/* #include */ -/* #include */ + zName = sqlite3_value_text(argv[0]); + nName = sqlite3_value_bytes(argv[0])+1; -/* -** isNot: -** This variable is used by function getNextNode(). When getNextNode() is -** called, it sets ParseContext.isNot to true if the 'next node' is a -** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the -** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to -** zero. -*/ -typedef struct ParseContext ParseContext; -struct ParseContext { - sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ - int iLangid; /* Language id used with tokenizer */ - const char **azCol; /* Array of column names for fts3 table */ - int bFts4; /* True to allow FTS4-only syntax */ - int nCol; /* Number of entries in azCol[] */ - int iDefaultCol; /* Default column to query */ - int isNot; /* True if getNextNode() sees a unary - */ - sqlite3_context *pCtx; /* Write error message here */ - int nNest; /* Number of nested brackets */ -}; + if( argc==2 ){ + void *pOld; + int n = sqlite3_value_bytes(argv[1]); + if( zName==0 || n!=sizeof(pPtr) ){ + sqlite3_result_error(context, "argument type mismatch", -1); + return; + } + pPtr = *(void **)sqlite3_value_blob(argv[1]); + pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr); + if( pOld==pPtr ){ + sqlite3_result_error(context, "out of memory", -1); + return; + } + }else{ + if( zName ){ + pPtr = sqlite3Fts3HashFind(pHash, zName, nName); + } + if( !pPtr ){ + char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); + sqlite3_result_error(context, zErr, -1); + sqlite3_free(zErr); + return; + } + } -/* -** This function is equivalent to the standard isspace() function. -** -** The standard isspace() can be awkward to use safely, because although it -** is defined to accept an argument of type int, its behavior when passed -** an integer that falls outside of the range of the unsigned char type -** is undefined (and sometimes, "undefined" means segfault). This wrapper -** is defined to accept an argument of type char, and always returns 0 for -** any values that fall outside of the range of the unsigned char type (i.e. -** negative values). -*/ -static int fts3isspace(char c){ - return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; + sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); } -/* -** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, -** zero the memory before returning a pointer to it. If unsuccessful, -** return NULL. -*/ -static void *fts3MallocZero(int nByte){ - void *pRet = sqlite3_malloc(nByte); - if( pRet ) memset(pRet, 0, nByte); - return pRet; +SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char c){ + static const char isFtsIdChar[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ + }; + return (c&0x80 || isFtsIdChar[(int)(c)]); } -SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer( - sqlite3_tokenizer *pTokenizer, - int iLangid, - const char *z, - int n, - sqlite3_tokenizer_cursor **ppCsr -){ - sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; - sqlite3_tokenizer_cursor *pCsr = 0; - int rc; +SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *zStr, int *pn){ + const char *z1; + const char *z2 = 0; - rc = pModule->xOpen(pTokenizer, z, n, &pCsr); - assert( rc==SQLITE_OK || pCsr==0 ); - if( rc==SQLITE_OK ){ - pCsr->pTokenizer = pTokenizer; - if( pModule->iVersion>=1 ){ - rc = pModule->xLanguageid(pCsr, iLangid); - if( rc!=SQLITE_OK ){ - pModule->xClose(pCsr); - pCsr = 0; + /* Find the start of the next token. */ + z1 = zStr; + while( z2==0 ){ + char c = *z1; + switch( c ){ + case '\0': return 0; /* No more tokens here */ + case '\'': + case '"': + case '`': { + z2 = z1; + while( *++z2 && (*z2!=c || *++z2==c) ); + break; } + case '[': + z2 = &z1[1]; + while( *z2 && z2[0]!=']' ) z2++; + if( *z2 ) z2++; + break; + + default: + if( sqlite3Fts3IsIdChar(*z1) ){ + z2 = &z1[1]; + while( sqlite3Fts3IsIdChar(*z2) ) z2++; + }else{ + z1++; + } } } - *ppCsr = pCsr; - return rc; -} -/* -** Function getNextNode(), which is called by fts3ExprParse(), may itself -** call fts3ExprParse(). So this forward declaration is required. -*/ -static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); + *pn = (int)(z2-z1); + return z1; +} -/* -** Extract the next token from buffer z (length n) using the tokenizer -** and other information (column names etc.) in pParse. Create an Fts3Expr -** structure of type FTSQUERY_PHRASE containing a phrase consisting of this -** single token and set *ppExpr to point to it. If the end of the buffer is -** reached before a token is found, set *ppExpr to zero. It is the -** responsibility of the caller to eventually deallocate the allocated -** Fts3Expr structure (if any) by passing it to sqlite3_free(). -** -** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation -** fails. -*/ -static int getNextToken( - ParseContext *pParse, /* fts3 query parse context */ - int iCol, /* Value for Fts3Phrase.iColumn */ - const char *z, int n, /* Input string */ - Fts3Expr **ppExpr, /* OUT: expression */ - int *pnConsumed /* OUT: Number of bytes consumed */ +SQLITE_PRIVATE int sqlite3Fts3InitTokenizer( + Fts3Hash *pHash, /* Tokenizer hash table */ + const char *zArg, /* Tokenizer name */ + sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */ + char **pzErr /* OUT: Set to malloced error message */ ){ - sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; - sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; int rc; - sqlite3_tokenizer_cursor *pCursor; - Fts3Expr *pRet = 0; - int i = 0; - - /* Set variable i to the maximum number of bytes of input to tokenize. */ - for(i=0; iiLangid, z, i, &pCursor); - if( rc==SQLITE_OK ){ - const char *zToken; - int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; - int nByte; /* total space to allocate */ - - rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); - if( rc==SQLITE_OK ){ - nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; - pRet = (Fts3Expr *)fts3MallocZero(nByte); - if( !pRet ){ - rc = SQLITE_NOMEM; - }else{ - pRet->eType = FTSQUERY_PHRASE; - pRet->pPhrase = (Fts3Phrase *)&pRet[1]; - pRet->pPhrase->nToken = 1; - pRet->pPhrase->iColumn = iCol; - pRet->pPhrase->aToken[0].n = nToken; - pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; - memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); + char *z = (char *)zArg; + int n = 0; + char *zCopy; + char *zEnd; /* Pointer to nul-term of zCopy */ + sqlite3_tokenizer_module *m; - if( iEndpPhrase->aToken[0].isPrefix = 1; - iEnd++; - } + zCopy = sqlite3_mprintf("%s", zArg); + if( !zCopy ) return SQLITE_NOMEM; + zEnd = &zCopy[strlen(zCopy)]; - while( 1 ){ - if( !sqlite3_fts3_enable_parentheses - && iStart>0 && z[iStart-1]=='-' - ){ - pParse->isNot = 1; - iStart--; - }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){ - pRet->pPhrase->aToken[0].bFirst = 1; - iStart--; - }else{ - break; - } - } + z = (char *)sqlite3Fts3NextToken(zCopy, &n); + if( z==0 ){ + assert( n==0 ); + z = zCopy; + } + z[n] = '\0'; + sqlite3Fts3Dequote(z); + m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1); + if( !m ){ + sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", z); + rc = SQLITE_ERROR; + }else{ + char const **aArg = 0; + int iArg = 0; + z = &z[n+1]; + while( zxClose(pCursor); + rc = m->xCreate(iArg, aArg, ppTok); + assert( rc!=SQLITE_OK || *ppTok ); + if( rc!=SQLITE_OK ){ + sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer"); + }else{ + (*ppTok)->pModule = m; + } + sqlite3_free((void *)aArg); } - - *ppExpr = pRet; + + sqlite3_free(zCopy); return rc; } -/* -** Enlarge a memory allocation. If an out-of-memory allocation occurs, -** then free the old allocation. -*/ -static void *fts3ReallocOrFree(void *pOrig, int nNew){ - void *pRet = sqlite3_realloc(pOrig, nNew); - if( !pRet ){ - sqlite3_free(pOrig); - } - return pRet; -} +#ifdef SQLITE_TEST + +#include +/* #include */ /* -** Buffer zInput, length nInput, contains the contents of a quoted string -** that appeared as part of an fts3 query expression. Neither quote character -** is included in the buffer. This function attempts to tokenize the entire -** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE -** containing the results. +** Implementation of a special SQL scalar function for testing tokenizers +** designed to be used in concert with the Tcl testing framework. This +** function must be called with two or more arguments: ** -** If successful, SQLITE_OK is returned and *ppExpr set to point at the -** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory -** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set -** to 0. +** SELECT (, ..., ); +** +** where is the name passed as the second argument +** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer') +** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test'). +** +** The return value is a string that may be interpreted as a Tcl +** list. For each token in the , three elements are +** added to the returned list. The first is the token position, the +** second is the token text (folded, stemmed, etc.) and the third is the +** substring of associated with the token. For example, +** using the built-in "simple" tokenizer: +** +** SELECT fts_tokenizer_test('simple', 'I don't see how'); +** +** will return the string: +** +** "{0 i I 1 dont don't 2 see see 3 how how}" +** */ -static int getNextString( - ParseContext *pParse, /* fts3 query parse context */ - const char *zInput, int nInput, /* Input string */ - Fts3Expr **ppExpr /* OUT: expression */ +static void testFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv ){ - sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; - sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; - int rc; - Fts3Expr *p = 0; - sqlite3_tokenizer_cursor *pCursor = 0; - char *zTemp = 0; - int nTemp = 0; + Fts3Hash *pHash; + sqlite3_tokenizer_module *p; + sqlite3_tokenizer *pTokenizer = 0; + sqlite3_tokenizer_cursor *pCsr = 0; - const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase); + const char *zErr = 0; + + const char *zName; + int nName; + const char *zInput; + int nInput; + + const char *azArg[64]; + + const char *zToken; int nToken = 0; + int iStart = 0; + int iEnd = 0; + int iPos = 0; + int i; - /* The final Fts3Expr data structure, including the Fts3Phrase, - ** Fts3PhraseToken structures token buffers are all stored as a single - ** allocation so that the expression can be freed with a single call to - ** sqlite3_free(). Setting this up requires a two pass approach. - ** - ** The first pass, in the block below, uses a tokenizer cursor to iterate - ** through the tokens in the expression. This pass uses fts3ReallocOrFree() - ** to assemble data in two dynamic buffers: - ** - ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase - ** structure, followed by the array of Fts3PhraseToken - ** structures. This pass only populates the Fts3PhraseToken array. - ** - ** Buffer zTemp: Contains copies of all tokens. - ** - ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below, - ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase - ** structures. - */ - rc = sqlite3Fts3OpenTokenizer( - pTokenizer, pParse->iLangid, zInput, nInput, &pCursor); - if( rc==SQLITE_OK ){ - int ii; - for(ii=0; rc==SQLITE_OK; ii++){ - const char *zByte; - int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0; - rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); - if( rc==SQLITE_OK ){ - Fts3PhraseToken *pToken; + Tcl_Obj *pRet; - p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken)); - if( !p ) goto no_mem; + if( argc<2 ){ + sqlite3_result_error(context, "insufficient arguments", -1); + return; + } - zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte); - if( !zTemp ) goto no_mem; + nName = sqlite3_value_bytes(argv[0]); + zName = (const char *)sqlite3_value_text(argv[0]); + nInput = sqlite3_value_bytes(argv[argc-1]); + zInput = (const char *)sqlite3_value_text(argv[argc-1]); - assert( nToken==ii ); - pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii]; - memset(pToken, 0, sizeof(Fts3PhraseToken)); + pHash = (Fts3Hash *)sqlite3_user_data(context); + p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); - memcpy(&zTemp[nTemp], zByte, nByte); - nTemp += nByte; + if( !p ){ + char *zErr2 = sqlite3_mprintf("unknown tokenizer: %s", zName); + sqlite3_result_error(context, zErr2, -1); + sqlite3_free(zErr2); + return; + } - pToken->n = nByte; - pToken->isPrefix = (iEndbFirst = (iBegin>0 && zInput[iBegin-1]=='^'); - nToken = ii+1; - } - } + pRet = Tcl_NewObj(); + Tcl_IncrRefCount(pRet); - pModule->xClose(pCursor); - pCursor = 0; + for(i=1; ixCreate(argc-2, azArg, &pTokenizer) ){ + zErr = "error in xCreate()"; + goto finish; + } + pTokenizer->pModule = p; + if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){ + zErr = "error in xOpen()"; + goto finish; + } - p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp); - if( !p ) goto no_mem; - memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p); - p->eType = FTSQUERY_PHRASE; - p->pPhrase = (Fts3Phrase *)&p[1]; - p->pPhrase->iColumn = pParse->iDefaultCol; - p->pPhrase->nToken = nToken; + while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ + Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); + Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); + zToken = &zInput[iStart]; + nToken = iEnd-iStart; + Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); + } + + if( SQLITE_OK!=p->xClose(pCsr) ){ + zErr = "error in xClose()"; + goto finish; + } + if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ + zErr = "error in xDestroy()"; + goto finish; + } + +finish: + if( zErr ){ + sqlite3_result_error(context, zErr, -1); + }else{ + sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); + } + Tcl_DecrRefCount(pRet); +} + +static +int registerTokenizer( + sqlite3 *db, + char *zName, + const sqlite3_tokenizer_module *p +){ + int rc; + sqlite3_stmt *pStmt; + const char zSql[] = "SELECT fts3_tokenizer(?, ?)"; + + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); + sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); + sqlite3_step(pStmt); + + return sqlite3_finalize(pStmt); +} + +static +int queryTokenizer( + sqlite3 *db, + char *zName, + const sqlite3_tokenizer_module **pp +){ + int rc; + sqlite3_stmt *pStmt; + const char zSql[] = "SELECT fts3_tokenizer(?)"; - zBuf = (char *)&p->pPhrase->aToken[nToken]; - if( zTemp ){ - memcpy(zBuf, zTemp, nTemp); - sqlite3_free(zTemp); - }else{ - assert( nTemp==0 ); - } + *pp = 0; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + return rc; + } - for(jj=0; jjpPhrase->nToken; jj++){ - p->pPhrase->aToken[jj].z = zBuf; - zBuf += p->pPhrase->aToken[jj].n; + sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ + memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); } - rc = SQLITE_OK; } - *ppExpr = p; - return rc; -no_mem: - - if( pCursor ){ - pModule->xClose(pCursor); - } - sqlite3_free(zTemp); - sqlite3_free(p); - *ppExpr = 0; - return SQLITE_NOMEM; + return sqlite3_finalize(pStmt); } +SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); + /* -** The output variable *ppExpr is populated with an allocated Fts3Expr -** structure, or set to 0 if the end of the input buffer is reached. +** Implementation of the scalar function fts3_tokenizer_internal_test(). +** This function is used for testing only, it is not included in the +** build unless SQLITE_TEST is defined. +** +** The purpose of this is to test that the fts3_tokenizer() function +** can be used as designed by the C-code in the queryTokenizer and +** registerTokenizer() functions above. These two functions are repeated +** in the README.tokenizer file as an example, so it is important to +** test them. +** +** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar +** function with no arguments. An assert() will fail if a problem is +** detected. i.e.: +** +** SELECT fts3_tokenizer_internal_test(); ** -** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM -** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. -** If SQLITE_ERROR is returned, pContext is populated with an error message. */ -static int getNextNode( - ParseContext *pParse, /* fts3 query parse context */ - const char *z, int n, /* Input string */ - Fts3Expr **ppExpr, /* OUT: expression */ - int *pnConsumed /* OUT: Number of bytes consumed */ +static void intTestFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv ){ - static const struct Fts3Keyword { - char *z; /* Keyword text */ - unsigned char n; /* Length of the keyword */ - unsigned char parenOnly; /* Only valid in paren mode */ - unsigned char eType; /* Keyword code */ - } aKeyword[] = { - { "OR" , 2, 0, FTSQUERY_OR }, - { "AND", 3, 1, FTSQUERY_AND }, - { "NOT", 3, 1, FTSQUERY_NOT }, - { "NEAR", 4, 0, FTSQUERY_NEAR } - }; - int ii; - int iCol; - int iColLen; int rc; - Fts3Expr *pRet = 0; - - const char *zInput = z; - int nInput = n; - - pParse->isNot = 0; + const sqlite3_tokenizer_module *p1; + const sqlite3_tokenizer_module *p2; + sqlite3 *db = (sqlite3 *)sqlite3_user_data(context); - /* Skip over any whitespace before checking for a keyword, an open or - ** close bracket, or a quoted string. - */ - while( nInput>0 && fts3isspace(*zInput) ){ - nInput--; - zInput++; - } - if( nInput==0 ){ - return SQLITE_DONE; - } + UNUSED_PARAMETER(argc); + UNUSED_PARAMETER(argv); - /* See if we are dealing with a keyword. */ - for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){ - const struct Fts3Keyword *pKey = &aKeyword[ii]; + /* Test the query function */ + sqlite3Fts3SimpleTokenizerModule(&p1); + rc = queryTokenizer(db, "simple", &p2); + assert( rc==SQLITE_OK ); + assert( p1==p2 ); + rc = queryTokenizer(db, "nosuchtokenizer", &p2); + assert( rc==SQLITE_ERROR ); + assert( p2==0 ); + assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") ); - if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){ - continue; - } + /* Test the storage function */ + rc = registerTokenizer(db, "nosuchtokenizer", p1); + assert( rc==SQLITE_OK ); + rc = queryTokenizer(db, "nosuchtokenizer", &p2); + assert( rc==SQLITE_OK ); + assert( p2==p1 ); - if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){ - int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM; - int nKey = pKey->n; - char cNext; + sqlite3_result_text(context, "ok", -1, SQLITE_STATIC); +} - /* If this is a "NEAR" keyword, check for an explicit nearness. */ - if( pKey->eType==FTSQUERY_NEAR ){ - assert( nKey==4 ); - if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){ - nNear = 0; - for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){ - nNear = nNear * 10 + (zInput[nKey] - '0'); - } - } - } +#endif - /* At this point this is probably a keyword. But for that to be true, - ** the next byte must contain either whitespace, an open or close - ** parenthesis, a quote character, or EOF. - */ - cNext = zInput[nKey]; - if( fts3isspace(cNext) - || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 - ){ - pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr)); - if( !pRet ){ - return SQLITE_NOMEM; - } - pRet->eType = pKey->eType; - pRet->nNear = nNear; - *ppExpr = pRet; - *pnConsumed = (int)((zInput - z) + nKey); - return SQLITE_OK; - } +/* +** Set up SQL objects in database db used to access the contents of +** the hash table pointed to by argument pHash. The hash table must +** been initialized to use string keys, and to take a private copy +** of the key when a value is inserted. i.e. by a call similar to: +** +** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); +** +** This function adds a scalar function (see header comment above +** scalarFunc() in this file for details) and, if ENABLE_TABLE is +** defined at compilation time, a temporary virtual table (see header +** comment above struct HashTableVtab) to the database schema. Both +** provide read/write access to the contents of *pHash. +** +** The third argument to this function, zName, is used as the name +** of both the scalar and, if created, the virtual table. +*/ +SQLITE_PRIVATE int sqlite3Fts3InitHashTable( + sqlite3 *db, + Fts3Hash *pHash, + const char *zName +){ + int rc = SQLITE_OK; + void *p = (void *)pHash; + const int any = SQLITE_ANY; - /* Turns out that wasn't a keyword after all. This happens if the - ** user has supplied a token such as "ORacle". Continue. - */ - } +#ifdef SQLITE_TEST + char *zTest = 0; + char *zTest2 = 0; + void *pdb = (void *)db; + zTest = sqlite3_mprintf("%s_test", zName); + zTest2 = sqlite3_mprintf("%s_internal_test", zName); + if( !zTest || !zTest2 ){ + rc = SQLITE_NOMEM; } +#endif - /* See if we are dealing with a quoted phrase. If this is the case, then - ** search for the closing quote and pass the whole string to getNextString() - ** for processing. This is easy to do, as fts3 has no syntax for escaping - ** a quote character embedded in a string. - */ - if( *zInput=='"' ){ - for(ii=1; iinNest++; - rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); - if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } - *pnConsumed = (int)(zInput - z) + 1 + nConsumed; - return rc; - }else if( *zInput==')' ){ - pParse->nNest--; - *pnConsumed = (int)((zInput - z) + 1); - *ppExpr = 0; - return SQLITE_DONE; - } + if( SQLITE_OK==rc ){ + rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0); } - - /* If control flows to this point, this must be a regular token, or - ** the end of the input. Read a regular token using the sqlite3_tokenizer - ** interface. Before doing so, figure out if there is an explicit - ** column specifier for the token. - ** - ** TODO: Strangely, it is not possible to associate a column specifier - ** with a quoted phrase, only with a single token. Not sure if this was - ** an implementation artifact or an intentional decision when fts3 was - ** first implemented. Whichever it was, this module duplicates the - ** limitation. - */ - iCol = pParse->iDefaultCol; - iColLen = 0; - for(ii=0; iinCol; ii++){ - const char *zStr = pParse->azCol[ii]; - int nStr = (int)strlen(zStr); - if( nInput>nStr && zInput[nStr]==':' - && sqlite3_strnicmp(zStr, zInput, nStr)==0 - ){ - iCol = ii; - iColLen = (int)((zInput - z) + nStr + 1); - break; - } +#ifdef SQLITE_TEST + if( SQLITE_OK==rc ){ + rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0); } - rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); - *pnConsumed += iColLen; + if( SQLITE_OK==rc ){ + rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0); + } +#endif + +#ifdef SQLITE_TEST + sqlite3_free(zTest); + sqlite3_free(zTest2); +#endif + return rc; } +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_tokenizer.c **************************************/ +/************** Begin file fts3_tokenizer1.c *********************************/ /* -** The argument is an Fts3Expr structure for a binary operator (any type -** except an FTSQUERY_PHRASE). Return an integer value representing the -** precedence of the operator. Lower values have a higher precedence (i.e. -** group more tightly). For example, in the C language, the == operator -** groups more tightly than ||, and would therefore have a higher precedence. +** 2006 Oct 10 ** -** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS -** is defined), the order of the operators in precedence from highest to -** lowest is: +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: ** -** NEAR -** NOT -** AND (including implicit ANDs) -** OR +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. ** -** Note that when using the old query syntax, the OR operator has a higher -** precedence than the AND operator. +****************************************************************************** +** +** Implementation of the "simple" full-text-search tokenizer. */ -static int opPrecedence(Fts3Expr *p){ - assert( p->eType!=FTSQUERY_PHRASE ); - if( sqlite3_fts3_enable_parentheses ){ - return p->eType; - }else if( p->eType==FTSQUERY_NEAR ){ - return 1; - }else if( p->eType==FTSQUERY_OR ){ - return 2; - } - assert( p->eType==FTSQUERY_AND ); - return 3; -} /* -** Argument ppHead contains a pointer to the current head of a query -** expression tree being parsed. pPrev is the expression node most recently -** inserted into the tree. This function adds pNew, which is always a binary -** operator node, into the expression tree based on the relative precedence -** of pNew and the existing nodes of the tree. This may result in the head -** of the tree changing, in which case *ppHead is set to the new root node. +** The code in this file is only compiled if: +** +** * The FTS3 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS3 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). */ -static void insertBinaryOperator( - Fts3Expr **ppHead, /* Pointer to the root node of a tree */ - Fts3Expr *pPrev, /* Node most recently inserted into the tree */ - Fts3Expr *pNew /* New binary node to insert into expression tree */ -){ - Fts3Expr *pSplit = pPrev; - while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){ - pSplit = pSplit->pParent; - } +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) - if( pSplit->pParent ){ - assert( pSplit->pParent->pRight==pSplit ); - pSplit->pParent->pRight = pNew; - pNew->pParent = pSplit->pParent; - }else{ - *ppHead = pNew; - } - pNew->pLeft = pSplit; - pSplit->pParent = pNew; -} +/* #include */ +/* #include */ +/* #include */ +/* #include */ -/* -** Parse the fts3 query expression found in buffer z, length n. This function -** returns either when the end of the buffer is reached or an unmatched -** closing bracket - ')' - is encountered. -** -** If successful, SQLITE_OK is returned, *ppExpr is set to point to the -** parsed form of the expression and *pnConsumed is set to the number of -** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM -** (out of memory error) or SQLITE_ERROR (parse error) is returned. -*/ -static int fts3ExprParse( - ParseContext *pParse, /* fts3 query parse context */ - const char *z, int n, /* Text of MATCH query */ - Fts3Expr **ppExpr, /* OUT: Parsed query structure */ - int *pnConsumed /* OUT: Number of bytes consumed */ -){ - Fts3Expr *pRet = 0; - Fts3Expr *pPrev = 0; - Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ - int nIn = n; - const char *zIn = z; - int rc = SQLITE_OK; - int isRequirePhrase = 1; +/* #include "fts3_tokenizer.h" */ - while( rc==SQLITE_OK ){ - Fts3Expr *p = 0; - int nByte = 0; +typedef struct simple_tokenizer { + sqlite3_tokenizer base; + char delim[128]; /* flag ASCII delimiters */ +} simple_tokenizer; - rc = getNextNode(pParse, zIn, nIn, &p, &nByte); - assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); - if( rc==SQLITE_OK ){ - if( p ){ - int isPhrase; +typedef struct simple_tokenizer_cursor { + sqlite3_tokenizer_cursor base; + const char *pInput; /* input we are tokenizing */ + int nBytes; /* size of the input */ + int iOffset; /* current position in pInput */ + int iToken; /* index of next token to be returned */ + char *pToken; /* storage for current token */ + int nTokenAllocated; /* space allocated to zToken buffer */ +} simple_tokenizer_cursor; - if( !sqlite3_fts3_enable_parentheses - && p->eType==FTSQUERY_PHRASE && pParse->isNot - ){ - /* Create an implicit NOT operator. */ - Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); - if( !pNot ){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_NOMEM; - goto exprparse_out; - } - pNot->eType = FTSQUERY_NOT; - pNot->pRight = p; - p->pParent = pNot; - if( pNotBranch ){ - pNot->pLeft = pNotBranch; - pNotBranch->pParent = pNot; - } - pNotBranch = pNot; - p = pPrev; - }else{ - int eType = p->eType; - isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); - /* The isRequirePhrase variable is set to true if a phrase or - ** an expression contained in parenthesis is required. If a - ** binary operator (AND, OR, NOT or NEAR) is encounted when - ** isRequirePhrase is set, this is a syntax error. - */ - if( !isPhrase && isRequirePhrase ){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_ERROR; - goto exprparse_out; - } +static int simpleDelim(simple_tokenizer *t, unsigned char c){ + return c<0x80 && t->delim[c]; +} +static int fts3_isalnum(int x){ + return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z'); +} - if( isPhrase && !isRequirePhrase ){ - /* Insert an implicit AND operator. */ - Fts3Expr *pAnd; - assert( pRet && pPrev ); - pAnd = fts3MallocZero(sizeof(Fts3Expr)); - if( !pAnd ){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_NOMEM; - goto exprparse_out; - } - pAnd->eType = FTSQUERY_AND; - insertBinaryOperator(&pRet, pPrev, pAnd); - pPrev = pAnd; - } +/* +** Create a new tokenizer instance. +*/ +static int simpleCreate( + int argc, const char * const *argv, + sqlite3_tokenizer **ppTokenizer +){ + simple_tokenizer *t; - /* This test catches attempts to make either operand of a NEAR - ** operator something other than a phrase. For example, either of - ** the following: - ** - ** (bracketed expression) NEAR phrase - ** phrase NEAR (bracketed expression) - ** - ** Return an error in either case. - */ - if( pPrev && ( - (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) - || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) - )){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_ERROR; - goto exprparse_out; - } + t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t)); + if( t==NULL ) return SQLITE_NOMEM; + memset(t, 0, sizeof(*t)); - if( isPhrase ){ - if( pRet ){ - assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); - pPrev->pRight = p; - p->pParent = pPrev; - }else{ - pRet = p; - } - }else{ - insertBinaryOperator(&pRet, pPrev, p); - } - isRequirePhrase = !isPhrase; - } - pPrev = p; + /* TODO(shess) Delimiters need to remain the same from run to run, + ** else we need to reindex. One solution would be a meta-table to + ** track such information in the database, then we'd only want this + ** information on the initial create. + */ + if( argc>1 ){ + int i, n = (int)strlen(argv[1]); + for(i=0; i=0x80 ){ + sqlite3_free(t); + return SQLITE_ERROR; } - assert( nByte>0 ); + t->delim[ch] = 1; + } + } else { + /* Mark non-alphanumeric ASCII characters as delimiters */ + int i; + for(i=1; i<0x80; i++){ + t->delim[i] = !fts3_isalnum(i) ? -1 : 0; } - assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); - nIn -= nByte; - zIn += nByte; } - if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ - rc = SQLITE_ERROR; - } + *ppTokenizer = &t->base; + return SQLITE_OK; +} - if( rc==SQLITE_DONE ){ - rc = SQLITE_OK; - if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ - if( !pRet ){ - rc = SQLITE_ERROR; - }else{ - Fts3Expr *pIter = pNotBranch; - while( pIter->pLeft ){ - pIter = pIter->pLeft; - } - pIter->pLeft = pRet; - pRet->pParent = pIter; - pRet = pNotBranch; - } - } - } - *pnConsumed = n - nIn; +/* +** Destroy a tokenizer +*/ +static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ + sqlite3_free(pTokenizer); + return SQLITE_OK; +} -exprparse_out: - if( rc!=SQLITE_OK ){ - sqlite3Fts3ExprFree(pRet); - sqlite3Fts3ExprFree(pNotBranch); - pRet = 0; +/* +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is pInput[0..nBytes-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. +*/ +static int simpleOpen( + sqlite3_tokenizer *pTokenizer, /* The tokenizer */ + const char *pInput, int nBytes, /* String to be tokenized */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ +){ + simple_tokenizer_cursor *c; + + UNUSED_PARAMETER(pTokenizer); + + c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); + if( c==NULL ) return SQLITE_NOMEM; + + c->pInput = pInput; + if( pInput==0 ){ + c->nBytes = 0; + }else if( nBytes<0 ){ + c->nBytes = (int)strlen(pInput); + }else{ + c->nBytes = nBytes; } - *ppExpr = pRet; - return rc; + c->iOffset = 0; /* start tokenizing at the beginning */ + c->iToken = 0; + c->pToken = NULL; /* no space allocated, yet. */ + c->nTokenAllocated = 0; + + *ppCursor = &c->base; + return SQLITE_OK; } /* -** Return SQLITE_ERROR if the maximum depth of the expression tree passed -** as the only argument is more than nMaxDepth. +** Close a tokenization cursor previously opened by a call to +** simpleOpen() above. */ -static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ - int rc = SQLITE_OK; - if( p ){ - if( nMaxDepth<0 ){ - rc = SQLITE_TOOBIG; - }else{ - rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); - if( rc==SQLITE_OK ){ - rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); - } - } - } - return rc; +static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ + simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; + sqlite3_free(c->pToken); + sqlite3_free(c); + return SQLITE_OK; } /* -** This function attempts to transform the expression tree at (*pp) to -** an equivalent but more balanced form. The tree is modified in place. -** If successful, SQLITE_OK is returned and (*pp) set to point to the -** new root expression node. -** -** nMaxDepth is the maximum allowable depth of the balanced sub-tree. -** -** Otherwise, if an error occurs, an SQLite error code is returned and -** expression (*pp) freed. +** Extract the next token from a tokenization cursor. The cursor must +** have been opened by a prior call to simpleOpen(). */ -static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ - int rc = SQLITE_OK; /* Return code */ - Fts3Expr *pRoot = *pp; /* Initial root node */ - Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ - int eType = pRoot->eType; /* Type of node in this tree */ +static int simpleNext( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ + const char **ppToken, /* OUT: *ppToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ +){ + simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; + simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; + unsigned char *p = (unsigned char *)c->pInput; - if( nMaxDepth==0 ){ - rc = SQLITE_ERROR; - } + while( c->iOffsetnBytes ){ + int iStartOffset; - if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ - Fts3Expr **apLeaf; - apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); - if( 0==apLeaf ){ - rc = SQLITE_NOMEM; - }else{ - memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); + /* Scan past delimiter characters */ + while( c->iOffsetnBytes && simpleDelim(t, p[c->iOffset]) ){ + c->iOffset++; } - if( rc==SQLITE_OK ){ - int i; - Fts3Expr *p; + /* Count non-delimiter characters. */ + iStartOffset = c->iOffset; + while( c->iOffsetnBytes && !simpleDelim(t, p[c->iOffset]) ){ + c->iOffset++; + } - /* Set $p to point to the left-most leaf in the tree of eType nodes. */ - for(p=pRoot; p->eType==eType; p=p->pLeft){ - assert( p->pParent==0 || p->pParent->pLeft==p ); - assert( p->pLeft && p->pRight ); + if( c->iOffset>iStartOffset ){ + int i, n = c->iOffset-iStartOffset; + if( n>c->nTokenAllocated ){ + char *pNew; + c->nTokenAllocated = n+20; + pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated); + if( !pNew ) return SQLITE_NOMEM; + c->pToken = pNew; + } + for(i=0; ipToken[i] = (char)((ch>='A' && ch<='Z') ? ch-'A'+'a' : ch); } + *ppToken = c->pToken; + *pnBytes = n; + *piStartOffset = iStartOffset; + *piEndOffset = c->iOffset; + *piPosition = c->iToken++; - /* This loop runs once for each leaf in the tree of eType nodes. */ - while( 1 ){ - int iLvl; - Fts3Expr *pParent = p->pParent; /* Current parent of p */ + return SQLITE_OK; + } + } + return SQLITE_DONE; +} - assert( pParent==0 || pParent->pLeft==p ); - p->pParent = 0; - if( pParent ){ - pParent->pLeft = 0; - }else{ - pRoot = 0; - } - rc = fts3ExprBalance(&p, nMaxDepth-1); - if( rc!=SQLITE_OK ) break; +/* +** The set of routines that implement the simple tokenizer +*/ +static const sqlite3_tokenizer_module simpleTokenizerModule = { + 0, + simpleCreate, + simpleDestroy, + simpleOpen, + simpleClose, + simpleNext, + 0, +}; - for(iLvl=0; p && iLvlpLeft = apLeaf[iLvl]; - pFree->pRight = p; - pFree->pLeft->pParent = pFree; - pFree->pRight->pParent = pFree; +/* +** Allocate a new simple tokenizer. Return a pointer to the new +** tokenizer in *ppModule +*/ +SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule( + sqlite3_tokenizer_module const**ppModule +){ + *ppModule = &simpleTokenizerModule; +} - p = pFree; - pFree = pFree->pParent; - p->pParent = 0; - apLeaf[iLvl] = 0; - } - } - if( p ){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_TOOBIG; - break; - } +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ - /* If that was the last leaf node, break out of the loop */ - if( pParent==0 ) break; +/************** End of fts3_tokenizer1.c *************************************/ +/************** Begin file fts3_tokenize_vtab.c ******************************/ +/* +** 2013 Apr 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code for the "fts3tokenize" virtual table module. +** An fts3tokenize virtual table is created as follows: +** +** CREATE VIRTUAL TABLE USING fts3tokenize( +** , , ... +** ); +** +** The table created has the following schema: +** +** CREATE TABLE (input, token, start, end, position) +** +** When queried, the query must include a WHERE clause of type: +** +** input = +** +** The virtual table module tokenizes this , using the FTS3 +** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE +** statement and returns one row for each token in the result. With +** fields set as follows: +** +** input: Always set to a copy of +** token: A token from the input. +** start: Byte offset of the token within the input . +** end: Byte offset of the byte immediately following the end of the +** token within the input string. +** pos: Token offset of token within input. +** +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) - /* Set $p to point to the next leaf in the tree of eType nodes */ - for(p=pParent->pRight; p->eType==eType; p=p->pLeft); +/* #include */ +/* #include */ - /* Remove pParent from the original tree. */ - assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); - pParent->pRight->pParent = pParent->pParent; - if( pParent->pParent ){ - pParent->pParent->pLeft = pParent->pRight; - }else{ - assert( pParent==pRoot ); - pRoot = pParent->pRight; - } +typedef struct Fts3tokTable Fts3tokTable; +typedef struct Fts3tokCursor Fts3tokCursor; - /* Link pParent into the free node list. It will be used as an - ** internal node of the new tree. */ - pParent->pParent = pFree; - pFree = pParent; - } +/* +** Virtual table structure. +*/ +struct Fts3tokTable { + sqlite3_vtab base; /* Base class used by SQLite core */ + const sqlite3_tokenizer_module *pMod; + sqlite3_tokenizer *pTok; +}; - if( rc==SQLITE_OK ){ - p = 0; - for(i=0; ipParent = 0; - }else{ - assert( pFree!=0 ); - pFree->pRight = p; - pFree->pLeft = apLeaf[i]; - pFree->pLeft->pParent = pFree; - pFree->pRight->pParent = pFree; +/* +** Virtual table cursor structure. +*/ +struct Fts3tokCursor { + sqlite3_vtab_cursor base; /* Base class used by SQLite core */ + char *zInput; /* Input string */ + sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */ + int iRowid; /* Current 'rowid' value */ + const char *zToken; /* Current 'token' value */ + int nToken; /* Size of zToken in bytes */ + int iStart; /* Current 'start' value */ + int iEnd; /* Current 'end' value */ + int iPos; /* Current 'pos' value */ +}; - p = pFree; - pFree = pFree->pParent; - p->pParent = 0; - } - } - } - pRoot = p; - }else{ - /* An error occurred. Delete the contents of the apLeaf[] array - ** and pFree list. Everything else is cleaned up by the call to - ** sqlite3Fts3ExprFree(pRoot) below. */ - Fts3Expr *pDel; - for(i=0; ipParent; - sqlite3_free(pDel); - } - } +/* +** Query FTS for the tokenizer implementation named zName. +*/ +static int fts3tokQueryTokenizer( + Fts3Hash *pHash, + const char *zName, + const sqlite3_tokenizer_module **pp, + char **pzErr +){ + sqlite3_tokenizer_module *p; + int nName = (int)strlen(zName); - assert( pFree==0 ); - sqlite3_free( apLeaf ); - } + p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); + if( !p ){ + sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", zName); + return SQLITE_ERROR; } - if( rc!=SQLITE_OK ){ - sqlite3Fts3ExprFree(pRoot); - pRoot = 0; - } - *pp = pRoot; - return rc; + *pp = p; + return SQLITE_OK; } /* -** This function is similar to sqlite3Fts3ExprParse(), with the following -** differences: +** The second argument, argv[], is an array of pointers to nul-terminated +** strings. This function makes a copy of the array and strings into a +** single block of memory. It then dequotes any of the strings that appear +** to be quoted. ** -** 1. It does not do expression rebalancing. -** 2. It does not check that the expression does not exceed the -** maximum allowable depth. -** 3. Even if it fails, *ppExpr may still be set to point to an -** expression tree. It should be deleted using sqlite3Fts3ExprFree() -** in this case. +** If successful, output parameter *pazDequote is set to point at the +** array of dequoted strings and SQLITE_OK is returned. The caller is +** responsible for eventually calling sqlite3_free() to free the array +** in this case. Or, if an error occurs, an SQLite error code is returned. +** The final value of *pazDequote is undefined in this case. */ -static int fts3ExprParseUnbalanced( - sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ - int iLangid, /* Language id for tokenizer */ - char **azCol, /* Array of column names for fts3 table */ - int bFts4, /* True to allow FTS4-only syntax */ - int nCol, /* Number of entries in azCol[] */ - int iDefaultCol, /* Default column to query */ - const char *z, int n, /* Text of MATCH query */ - Fts3Expr **ppExpr /* OUT: Parsed query structure */ +static int fts3tokDequoteArray( + int argc, /* Number of elements in argv[] */ + const char * const *argv, /* Input array */ + char ***pazDequote /* Output array */ ){ - int nParsed; - int rc; - ParseContext sParse; + int rc = SQLITE_OK; /* Return code */ + if( argc==0 ){ + *pazDequote = 0; + }else{ + int i; + int nByte = 0; + char **azDequote; - memset(&sParse, 0, sizeof(ParseContext)); - sParse.pTokenizer = pTokenizer; - sParse.iLangid = iLangid; - sParse.azCol = (const char **)azCol; - sParse.nCol = nCol; - sParse.iDefaultCol = iDefaultCol; - sParse.bFts4 = bFts4; - if( z==0 ){ - *ppExpr = 0; - return SQLITE_OK; - } - if( n<0 ){ - n = (int)strlen(z); - } - rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); - assert( rc==SQLITE_OK || *ppExpr==0 ); + for(i=0; ixCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok); + } + + if( rc==SQLITE_OK ){ + pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable)); + if( pTab==0 ){ + rc = SQLITE_NOMEM; + } + } + + if( rc==SQLITE_OK ){ + memset(pTab, 0, sizeof(Fts3tokTable)); + pTab->pMod = pMod; + pTab->pTok = pTok; + *ppVtab = &pTab->base; + }else{ + if( pTok ){ + pMod->xDestroy(pTok); } } + sqlite3_free(azDequote); return rc; } /* -** Free a single node of an expression tree. +** This function does the work for both the xDisconnect and xDestroy methods. +** These tables have no persistent representation of their own, so xDisconnect +** and xDestroy are identical operations. */ -static void fts3FreeExprNode(Fts3Expr *p){ - assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); - sqlite3Fts3EvalPhraseCleanup(p->pPhrase); - sqlite3_free(p->aMI); - sqlite3_free(p); +static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){ + Fts3tokTable *pTab = (Fts3tokTable *)pVtab; + + pTab->pMod->xDestroy(pTab->pTok); + sqlite3_free(pTab); + return SQLITE_OK; } /* -** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). -** -** This function would be simpler if it recursively called itself. But -** that would mean passing a sufficiently large expression to ExprParse() -** could cause a stack overflow. +** xBestIndex - Analyze a WHERE and ORDER BY clause. */ -SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *pDel){ - Fts3Expr *p; - assert( pDel==0 || pDel->pParent==0 ); - for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ - assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); - } - while( p ){ - Fts3Expr *pParent = p->pParent; - fts3FreeExprNode(p); - if( pParent && p==pParent->pLeft && pParent->pRight ){ - p = pParent->pRight; - while( p && (p->pLeft || p->pRight) ){ - assert( p==p->pParent->pRight || p==p->pParent->pLeft ); - p = (p->pLeft ? p->pLeft : p->pRight); - } - }else{ - p = pParent; +static int fts3tokBestIndexMethod( + sqlite3_vtab *pVTab, + sqlite3_index_info *pInfo +){ + int i; + UNUSED_PARAMETER(pVTab); + + for(i=0; inConstraint; i++){ + if( pInfo->aConstraint[i].usable + && pInfo->aConstraint[i].iColumn==0 + && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + pInfo->idxNum = 1; + pInfo->aConstraintUsage[i].argvIndex = 1; + pInfo->aConstraintUsage[i].omit = 1; + pInfo->estimatedCost = 1; + return SQLITE_OK; } } + + pInfo->idxNum = 0; + assert( pInfo->estimatedCost>1000000.0 ); + + return SQLITE_OK; } -/**************************************************************************** -***************************************************************************** -** Everything after this point is just test code. +/* +** xOpen - Open a cursor. */ +static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ + Fts3tokCursor *pCsr; + UNUSED_PARAMETER(pVTab); -#ifdef SQLITE_TEST + pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor)); + if( pCsr==0 ){ + return SQLITE_NOMEM; + } + memset(pCsr, 0, sizeof(Fts3tokCursor)); -/* #include */ + *ppCsr = (sqlite3_vtab_cursor *)pCsr; + return SQLITE_OK; +} /* -** Function to query the hash-table of tokenizers (see README.tokenizers). +** Reset the tokenizer cursor passed as the only argument. As if it had +** just been returned by fts3tokOpenMethod(). */ -static int queryTestTokenizer( - sqlite3 *db, - const char *zName, - const sqlite3_tokenizer_module **pp -){ - int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts3_tokenizer(?)"; - - *pp = 0; - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; +static void fts3tokResetCursor(Fts3tokCursor *pCsr){ + if( pCsr->pCsr ){ + Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab); + pTab->pMod->xClose(pCsr->pCsr); + pCsr->pCsr = 0; } + sqlite3_free(pCsr->zInput); + pCsr->zInput = 0; + pCsr->zToken = 0; + pCsr->nToken = 0; + pCsr->iStart = 0; + pCsr->iEnd = 0; + pCsr->iPos = 0; + pCsr->iRowid = 0; +} - sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ - memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); - } - } +/* +** xClose - Close a cursor. +*/ +static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; - return sqlite3_finalize(pStmt); + fts3tokResetCursor(pCsr); + sqlite3_free(pCsr); + return SQLITE_OK; } /* -** Return a pointer to a buffer containing a text representation of the -** expression passed as the first argument. The buffer is obtained from -** sqlite3_malloc(). It is the responsibility of the caller to use -** sqlite3_free() to release the memory. If an OOM condition is encountered, -** NULL is returned. -** -** If the second argument is not NULL, then its contents are prepended to -** the returned expression text and then freed using sqlite3_free(). +** xNext - Advance the cursor to the next row, if any. */ -static char *exprToString(Fts3Expr *pExpr, char *zBuf){ - if( pExpr==0 ){ - return sqlite3_mprintf(""); - } - switch( pExpr->eType ){ - case FTSQUERY_PHRASE: { - Fts3Phrase *pPhrase = pExpr->pPhrase; - int i; - zBuf = sqlite3_mprintf( - "%zPHRASE %d 0", zBuf, pPhrase->iColumn); - for(i=0; zBuf && inToken; i++){ - zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, - pPhrase->aToken[i].n, pPhrase->aToken[i].z, - (pPhrase->aToken[i].isPrefix?"+":"") - ); - } - return zBuf; - } - - case FTSQUERY_NEAR: - zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear); - break; - case FTSQUERY_NOT: - zBuf = sqlite3_mprintf("%zNOT ", zBuf); - break; - case FTSQUERY_AND: - zBuf = sqlite3_mprintf("%zAND ", zBuf); - break; - case FTSQUERY_OR: - zBuf = sqlite3_mprintf("%zOR ", zBuf); - break; - } +static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); + int rc; /* Return code */ - if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf); - if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf); - if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf); + pCsr->iRowid++; + rc = pTab->pMod->xNext(pCsr->pCsr, + &pCsr->zToken, &pCsr->nToken, + &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos + ); - if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf); - if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf); + if( rc!=SQLITE_OK ){ + fts3tokResetCursor(pCsr); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } - return zBuf; + return rc; } /* -** This is the implementation of a scalar SQL function used to test the -** expression parser. It should be called as follows: -** -** fts3_exprtest(, , , ...); -** -** The first argument, , is the name of the fts3 tokenizer used -** to parse the query expression (see README.tokenizers). The second argument -** is the query expression to parse. Each subsequent argument is the name -** of a column of the fts3 table that the query expression may refer to. -** For example: -** -** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); +** xFilter - Initialize a cursor to point at the start of its data. */ -static void fts3ExprTest( - sqlite3_context *context, - int argc, - sqlite3_value **argv +static int fts3tokFilterMethod( + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ + int idxNum, /* Strategy index */ + const char *idxStr, /* Unused */ + int nVal, /* Number of elements in apVal */ + sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ - sqlite3_tokenizer_module const *pModule = 0; - sqlite3_tokenizer *pTokenizer = 0; - int rc; - char **azCol = 0; - const char *zExpr; - int nExpr; - int nCol; - int ii; - Fts3Expr *pExpr; - char *zBuf = 0; - sqlite3 *db = sqlite3_context_db_handle(context); - - if( argc<3 ){ - sqlite3_result_error(context, - "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 - ); - return; - } + int rc = SQLITE_ERROR; + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); + UNUSED_PARAMETER(idxStr); + UNUSED_PARAMETER(nVal); - rc = queryTestTokenizer(db, - (const char *)sqlite3_value_text(argv[0]), &pModule); - if( rc==SQLITE_NOMEM ){ - sqlite3_result_error_nomem(context); - goto exprtest_out; - }else if( !pModule ){ - sqlite3_result_error(context, "No such tokenizer module", -1); - goto exprtest_out; + fts3tokResetCursor(pCsr); + if( idxNum==1 ){ + const char *zByte = (const char *)sqlite3_value_text(apVal[0]); + int nByte = sqlite3_value_bytes(apVal[0]); + pCsr->zInput = sqlite3_malloc(nByte+1); + if( pCsr->zInput==0 ){ + rc = SQLITE_NOMEM; + }else{ + memcpy(pCsr->zInput, zByte, nByte); + pCsr->zInput[nByte] = 0; + rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr); + if( rc==SQLITE_OK ){ + pCsr->pCsr->pTokenizer = pTab->pTok; + } + } } - rc = pModule->xCreate(0, 0, &pTokenizer); - assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); - if( rc==SQLITE_NOMEM ){ - sqlite3_result_error_nomem(context); - goto exprtest_out; - } - pTokenizer->pModule = pModule; + if( rc!=SQLITE_OK ) return rc; + return fts3tokNextMethod(pCursor); +} - zExpr = (const char *)sqlite3_value_text(argv[1]); - nExpr = sqlite3_value_bytes(argv[1]); - nCol = argc-2; - azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); - if( !azCol ){ - sqlite3_result_error_nomem(context); - goto exprtest_out; - } - for(ii=0; iizToken==0); +} - if( sqlite3_user_data(context) ){ - char *zDummy = 0; - rc = sqlite3Fts3ExprParse( - pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy - ); - assert( rc==SQLITE_OK || pExpr==0 ); - sqlite3_free(zDummy); - }else{ - rc = fts3ExprParseUnbalanced( - pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr - ); - } +/* +** xColumn - Return a column value. +*/ +static int fts3tokColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ +){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; - if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ - sqlite3Fts3ExprFree(pExpr); - sqlite3_result_error(context, "Error parsing expression", -1); - }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ - sqlite3_result_error_nomem(context); - }else{ - sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); - sqlite3_free(zBuf); + /* CREATE TABLE x(input, token, start, end, position) */ + switch( iCol ){ + case 0: + sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT); + break; + case 1: + sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT); + break; + case 2: + sqlite3_result_int(pCtx, pCsr->iStart); + break; + case 3: + sqlite3_result_int(pCtx, pCsr->iEnd); + break; + default: + assert( iCol==4 ); + sqlite3_result_int(pCtx, pCsr->iPos); + break; } + return SQLITE_OK; +} - sqlite3Fts3ExprFree(pExpr); - -exprtest_out: - if( pModule && pTokenizer ){ - rc = pModule->xDestroy(pTokenizer); - } - sqlite3_free(azCol); +/* +** xRowid - Return the current rowid for the cursor. +*/ +static int fts3tokRowidMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite_int64 *pRowid /* OUT: Rowid value */ +){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + *pRowid = (sqlite3_int64)pCsr->iRowid; + return SQLITE_OK; } /* -** Register the query expression parser test function fts3_exprtest() -** with database connection db. +** Register the fts3tok module with database connection db. Return SQLITE_OK +** if successful or an error code if sqlite3_create_module() fails. */ -SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ - int rc = sqlite3_create_function( - db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 - ); - if( rc==SQLITE_OK ){ - rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", - -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 - ); - } +SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){ + static const sqlite3_module fts3tok_module = { + 0, /* iVersion */ + fts3tokConnectMethod, /* xCreate */ + fts3tokConnectMethod, /* xConnect */ + fts3tokBestIndexMethod, /* xBestIndex */ + fts3tokDisconnectMethod, /* xDisconnect */ + fts3tokDisconnectMethod, /* xDestroy */ + fts3tokOpenMethod, /* xOpen */ + fts3tokCloseMethod, /* xClose */ + fts3tokFilterMethod, /* xFilter */ + fts3tokNextMethod, /* xNext */ + fts3tokEofMethod, /* xEof */ + fts3tokColumnMethod, /* xColumn */ + fts3tokRowidMethod, /* xRowid */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindFunction */ + 0, /* xRename */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0 /* xRollbackTo */ + }; + int rc; /* Return code */ + + rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash); return rc; } -#endif #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ -/************** End of fts3_expr.c *******************************************/ -/************** Begin file fts3_hash.c ***************************************/ +/************** End of fts3_tokenize_vtab.c **********************************/ +/************** Begin file fts3_write.c **************************************/ /* -** 2001 September 22 +** 2009 Oct 23 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: @@ -140216,1909 +144461,2148 @@ SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** -************************************************************************* -** This is the implementation of generic hash-tables used in SQLite. -** We've modified it slightly to serve as a standalone hash table -** implementation for the full-text indexing module. -*/ - -/* -** The code in this file is only compiled if: -** -** * The FTS3 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or +****************************************************************************** ** -** * The FTS3 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +** This file is part of the SQLite FTS3 extension module. Specifically, +** this file contains code to insert, update and delete rows from FTS3 +** tables. It also contains code to merge FTS3 b-tree segments. Some +** of the sub-routines used to merge segments are also used by the query +** code in fts3.c. */ + +/* #include "fts3Int.h" */ #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) +/* #include */ /* #include */ /* #include */ -/* #include */ +#define FTS_MAX_APPENDABLE_HEIGHT 16 + /* -** Malloc and Free functions +** When full-text index nodes are loaded from disk, the buffer that they +** are loaded into has the following number of bytes of padding at the end +** of it. i.e. if a full-text index node is 900 bytes in size, then a buffer +** of 920 bytes is allocated for it. +** +** This means that if we have a pointer into a buffer containing node data, +** it is always safe to read up to two varints from it without risking an +** overread, even if the node data is corrupted. */ -static void *fts3HashMalloc(int n){ - void *p = sqlite3_malloc(n); - if( p ){ - memset(p, 0, n); - } - return p; -} -static void fts3HashFree(void *p){ - sqlite3_free(p); -} +#define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2) -/* Turn bulk memory into a hash table object by initializing the -** fields of the Hash structure. +/* +** Under certain circumstances, b-tree nodes (doclists) can be loaded into +** memory incrementally instead of all at once. This can be a big performance +** win (reduced IO and CPU) if SQLite stops calling the virtual table xNext() +** method before retrieving all query results (as may happen, for example, +** if a query has a LIMIT clause). ** -** "pNew" is a pointer to the hash table that is to be initialized. -** keyClass is one of the constants -** FTS3_HASH_BINARY or FTS3_HASH_STRING. The value of keyClass -** determines what kind of key the hash table will use. "copyKey" is -** true if the hash table should make its own private copy of keys and -** false if it should just use the supplied pointer. +** Incremental loading is used for b-tree nodes FTS3_NODE_CHUNK_THRESHOLD +** bytes and larger. Nodes are loaded in chunks of FTS3_NODE_CHUNKSIZE bytes. +** The code is written so that the hard lower-limit for each of these values +** is 1. Clearly such small values would be inefficient, but can be useful +** for testing purposes. +** +** If this module is built with SQLITE_TEST defined, these constants may +** be overridden at runtime for testing purposes. File fts3_test.c contains +** a Tcl interface to read and write the values. */ -SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey){ - assert( pNew!=0 ); - assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY ); - pNew->keyClass = keyClass; - pNew->copyKey = copyKey; - pNew->first = 0; - pNew->count = 0; - pNew->htsize = 0; - pNew->ht = 0; -} +#ifdef SQLITE_TEST +int test_fts3_node_chunksize = (4*1024); +int test_fts3_node_chunk_threshold = (4*1024)*4; +# define FTS3_NODE_CHUNKSIZE test_fts3_node_chunksize +# define FTS3_NODE_CHUNK_THRESHOLD test_fts3_node_chunk_threshold +#else +# define FTS3_NODE_CHUNKSIZE (4*1024) +# define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4) +#endif -/* Remove all entries from a hash table. Reclaim all memory. -** Call this routine to delete a hash table or to reset a hash table -** to the empty state. +/* +** The two values that may be meaningfully bound to the :1 parameter in +** statements SQL_REPLACE_STAT and SQL_SELECT_STAT. */ -SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash *pH){ - Fts3HashElem *elem; /* For looping over all elements of the table */ +#define FTS_STAT_DOCTOTAL 0 +#define FTS_STAT_INCRMERGEHINT 1 +#define FTS_STAT_AUTOINCRMERGE 2 - assert( pH!=0 ); - elem = pH->first; - pH->first = 0; - fts3HashFree(pH->ht); - pH->ht = 0; - pH->htsize = 0; - while( elem ){ - Fts3HashElem *next_elem = elem->next; - if( pH->copyKey && elem->pKey ){ - fts3HashFree(elem->pKey); - } - fts3HashFree(elem); - elem = next_elem; - } - pH->count = 0; +/* +** If FTS_LOG_MERGES is defined, call sqlite3_log() to report each automatic +** and incremental merge operation that takes place. This is used for +** debugging FTS only, it should not usually be turned on in production +** systems. +*/ +#ifdef FTS3_LOG_MERGES +static void fts3LogMerge(int nMerge, sqlite3_int64 iAbsLevel){ + sqlite3_log(SQLITE_OK, "%d-way merge from level %d", nMerge, (int)iAbsLevel); } +#else +#define fts3LogMerge(x, y) +#endif + + +typedef struct PendingList PendingList; +typedef struct SegmentNode SegmentNode; +typedef struct SegmentWriter SegmentWriter; /* -** Hash and comparison functions when the mode is FTS3_HASH_STRING +** An instance of the following data structure is used to build doclists +** incrementally. See function fts3PendingListAppend() for details. */ -static int fts3StrHash(const void *pKey, int nKey){ - const char *z = (const char *)pKey; - unsigned h = 0; - if( nKey<=0 ) nKey = (int) strlen(z); - while( nKey > 0 ){ - h = (h<<3) ^ h ^ *z++; - nKey--; - } - return (int)(h & 0x7fffffff); -} -static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return strncmp((const char*)pKey1,(const char*)pKey2,n1); -} +struct PendingList { + int nData; + char *aData; + int nSpace; + sqlite3_int64 iLastDocid; + sqlite3_int64 iLastCol; + sqlite3_int64 iLastPos; +}; + /* -** Hash and comparison functions when the mode is FTS3_HASH_BINARY +** Each cursor has a (possibly empty) linked list of the following objects. */ -static int fts3BinHash(const void *pKey, int nKey){ - int h = 0; - const char *z = (const char *)pKey; - while( nKey-- > 0 ){ - h = (h<<3) ^ h ^ *(z++); - } - return h & 0x7fffffff; -} -static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return memcmp(pKey1,pKey2,n1); -} +struct Fts3DeferredToken { + Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */ + int iCol; /* Column token must occur in */ + Fts3DeferredToken *pNext; /* Next in list of deferred tokens */ + PendingList *pList; /* Doclist is assembled here */ +}; /* -** Return a pointer to the appropriate hash function given the key class. +** An instance of this structure is used to iterate through the terms on +** a contiguous set of segment b-tree leaf nodes. Although the details of +** this structure are only manipulated by code in this file, opaque handles +** of type Fts3SegReader* are also used by code in fts3.c to iterate through +** terms when querying the full-text index. See functions: ** -** The C syntax in this function definition may be unfamilar to some -** programmers, so we provide the following additional explanation: +** sqlite3Fts3SegReaderNew() +** sqlite3Fts3SegReaderFree() +** sqlite3Fts3SegReaderIterate() ** -** The name of the function is "ftsHashFunction". The function takes a -** single parameter "keyClass". The return value of ftsHashFunction() -** is a pointer to another function. Specifically, the return value -** of ftsHashFunction() is a pointer to a function that takes two parameters -** with types "const void*" and "int" and returns an "int". +** Methods used to manipulate Fts3SegReader structures: +** +** fts3SegReaderNext() +** fts3SegReaderFirstDocid() +** fts3SegReaderNextDocid() */ -static int (*ftsHashFunction(int keyClass))(const void*,int){ - if( keyClass==FTS3_HASH_STRING ){ - return &fts3StrHash; - }else{ - assert( keyClass==FTS3_HASH_BINARY ); - return &fts3BinHash; - } -} +struct Fts3SegReader { + int iIdx; /* Index within level, or 0x7FFFFFFF for PT */ + u8 bLookup; /* True for a lookup only */ + u8 rootOnly; /* True for a root-only reader */ + + sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */ + sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */ + sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */ + sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */ + + char *aNode; /* Pointer to node data (or NULL) */ + int nNode; /* Size of buffer at aNode (or 0) */ + int nPopulate; /* If >0, bytes of buffer aNode[] loaded */ + sqlite3_blob *pBlob; /* If not NULL, blob handle to read node */ + + Fts3HashElem **ppNextElem; + + /* Variables set by fts3SegReaderNext(). These may be read directly + ** by the caller. They are valid from the time SegmentReaderNew() returns + ** until SegmentReaderNext() returns something other than SQLITE_OK + ** (i.e. SQLITE_DONE). + */ + int nTerm; /* Number of bytes in current term */ + char *zTerm; /* Pointer to current term */ + int nTermAlloc; /* Allocated size of zTerm buffer */ + char *aDoclist; /* Pointer to doclist of current entry */ + int nDoclist; /* Size of doclist in current entry */ + + /* The following variables are used by fts3SegReaderNextDocid() to iterate + ** through the current doclist (aDoclist/nDoclist). + */ + char *pOffsetList; + int nOffsetList; /* For descending pending seg-readers only */ + sqlite3_int64 iDocid; +}; + +#define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0) +#define fts3SegReaderIsRootOnly(p) ((p)->rootOnly!=0) /* -** Return a pointer to the appropriate hash function given the key class. +** An instance of this structure is used to create a segment b-tree in the +** database. The internal details of this type are only accessed by the +** following functions: ** -** For help in interpreted the obscure C code in the function definition, -** see the header comment on the previous function. +** fts3SegWriterAdd() +** fts3SegWriterFlush() +** fts3SegWriterFree() */ -static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){ - if( keyClass==FTS3_HASH_STRING ){ - return &fts3StrCompare; - }else{ - assert( keyClass==FTS3_HASH_BINARY ); - return &fts3BinCompare; - } -} +struct SegmentWriter { + SegmentNode *pTree; /* Pointer to interior tree structure */ + sqlite3_int64 iFirst; /* First slot in %_segments written */ + sqlite3_int64 iFree; /* Next free slot in %_segments */ + char *zTerm; /* Pointer to previous term buffer */ + int nTerm; /* Number of bytes in zTerm */ + int nMalloc; /* Size of malloc'd buffer at zMalloc */ + char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ + int nSize; /* Size of allocation at aData */ + int nData; /* Bytes of data in aData */ + char *aData; /* Pointer to block from malloc() */ + i64 nLeafData; /* Number of bytes of leaf data written */ +}; -/* Link an element into the hash table +/* +** Type SegmentNode is used by the following three functions to create +** the interior part of the segment b+-tree structures (everything except +** the leaf nodes). These functions and type are only ever used by code +** within the fts3SegWriterXXX() family of functions described above. +** +** fts3NodeAddTerm() +** fts3NodeWrite() +** fts3NodeFree() +** +** When a b+tree is written to the database (either as a result of a merge +** or the pending-terms table being flushed), leaves are written into the +** database file as soon as they are completely populated. The interior of +** the tree is assembled in memory and written out only once all leaves have +** been populated and stored. This is Ok, as the b+-tree fanout is usually +** very large, meaning that the interior of the tree consumes relatively +** little memory. */ -static void fts3HashInsertElement( - Fts3Hash *pH, /* The complete hash table */ - struct _fts3ht *pEntry, /* The entry into which pNew is inserted */ - Fts3HashElem *pNew /* The element to be inserted */ -){ - Fts3HashElem *pHead; /* First element already in pEntry */ - pHead = pEntry->chain; - if( pHead ){ - pNew->next = pHead; - pNew->prev = pHead->prev; - if( pHead->prev ){ pHead->prev->next = pNew; } - else { pH->first = pNew; } - pHead->prev = pNew; - }else{ - pNew->next = pH->first; - if( pH->first ){ pH->first->prev = pNew; } - pNew->prev = 0; - pH->first = pNew; - } - pEntry->count++; - pEntry->chain = pNew; -} - +struct SegmentNode { + SegmentNode *pParent; /* Parent node (or NULL for root node) */ + SegmentNode *pRight; /* Pointer to right-sibling */ + SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */ + int nEntry; /* Number of terms written to node so far */ + char *zTerm; /* Pointer to previous term buffer */ + int nTerm; /* Number of bytes in zTerm */ + int nMalloc; /* Size of malloc'd buffer at zMalloc */ + char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ + int nData; /* Bytes of valid data so far */ + char *aData; /* Node data */ +}; -/* Resize the hash table so that it cantains "new_size" buckets. -** "new_size" must be a power of 2. The hash table might fail -** to resize if sqliteMalloc() fails. -** -** Return non-zero if a memory allocation error occurs. +/* +** Valid values for the second argument to fts3SqlStmt(). */ -static int fts3Rehash(Fts3Hash *pH, int new_size){ - struct _fts3ht *new_ht; /* The new hash table */ - Fts3HashElem *elem, *next_elem; /* For looping over existing elements */ - int (*xHash)(const void*,int); /* The hash function */ +#define SQL_DELETE_CONTENT 0 +#define SQL_IS_EMPTY 1 +#define SQL_DELETE_ALL_CONTENT 2 +#define SQL_DELETE_ALL_SEGMENTS 3 +#define SQL_DELETE_ALL_SEGDIR 4 +#define SQL_DELETE_ALL_DOCSIZE 5 +#define SQL_DELETE_ALL_STAT 6 +#define SQL_SELECT_CONTENT_BY_ROWID 7 +#define SQL_NEXT_SEGMENT_INDEX 8 +#define SQL_INSERT_SEGMENTS 9 +#define SQL_NEXT_SEGMENTS_ID 10 +#define SQL_INSERT_SEGDIR 11 +#define SQL_SELECT_LEVEL 12 +#define SQL_SELECT_LEVEL_RANGE 13 +#define SQL_SELECT_LEVEL_COUNT 14 +#define SQL_SELECT_SEGDIR_MAX_LEVEL 15 +#define SQL_DELETE_SEGDIR_LEVEL 16 +#define SQL_DELETE_SEGMENTS_RANGE 17 +#define SQL_CONTENT_INSERT 18 +#define SQL_DELETE_DOCSIZE 19 +#define SQL_REPLACE_DOCSIZE 20 +#define SQL_SELECT_DOCSIZE 21 +#define SQL_SELECT_STAT 22 +#define SQL_REPLACE_STAT 23 - assert( (new_size & (new_size-1))==0 ); - new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) ); - if( new_ht==0 ) return 1; - fts3HashFree(pH->ht); - pH->ht = new_ht; - pH->htsize = new_size; - xHash = ftsHashFunction(pH->keyClass); - for(elem=pH->first, pH->first=0; elem; elem = next_elem){ - int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); - next_elem = elem->next; - fts3HashInsertElement(pH, &new_ht[h], elem); - } - return 0; -} +#define SQL_SELECT_ALL_PREFIX_LEVEL 24 +#define SQL_DELETE_ALL_TERMS_SEGDIR 25 +#define SQL_DELETE_SEGDIR_RANGE 26 +#define SQL_SELECT_ALL_LANGID 27 +#define SQL_FIND_MERGE_LEVEL 28 +#define SQL_MAX_LEAF_NODE_ESTIMATE 29 +#define SQL_DELETE_SEGDIR_ENTRY 30 +#define SQL_SHIFT_SEGDIR_ENTRY 31 +#define SQL_SELECT_SEGDIR 32 +#define SQL_CHOMP_SEGDIR 33 +#define SQL_SEGMENT_IS_APPENDABLE 34 +#define SQL_SELECT_INDEXES 35 +#define SQL_SELECT_MXLEVEL 36 -/* This function (for internal use only) locates an element in an -** hash table that matches the given key. The hash for this key has -** already been computed and is passed as the 4th parameter. +#define SQL_SELECT_LEVEL_RANGE2 37 +#define SQL_UPDATE_LEVEL_IDX 38 +#define SQL_UPDATE_LEVEL 39 + +/* +** This function is used to obtain an SQLite prepared statement handle +** for the statement identified by the second argument. If successful, +** *pp is set to the requested statement handle and SQLITE_OK returned. +** Otherwise, an SQLite error code is returned and *pp is set to 0. +** +** If argument apVal is not NULL, then it must point to an array with +** at least as many entries as the requested statement has bound +** parameters. The values are bound to the statements parameters before +** returning. */ -static Fts3HashElem *fts3FindElementByHash( - const Fts3Hash *pH, /* The pH to be searched */ - const void *pKey, /* The key we are searching for */ - int nKey, - int h /* The hash for this key. */ +static int fts3SqlStmt( + Fts3Table *p, /* Virtual table handle */ + int eStmt, /* One of the SQL_XXX constants above */ + sqlite3_stmt **pp, /* OUT: Statement handle */ + sqlite3_value **apVal /* Values to bind to statement */ ){ - Fts3HashElem *elem; /* Used to loop thru the element list */ - int count; /* Number of elements left to test */ - int (*xCompare)(const void*,int,const void*,int); /* comparison function */ + const char *azSql[] = { +/* 0 */ "DELETE FROM %Q.'%q_content' WHERE rowid = ?", +/* 1 */ "SELECT NOT EXISTS(SELECT docid FROM %Q.'%q_content' WHERE rowid!=?)", +/* 2 */ "DELETE FROM %Q.'%q_content'", +/* 3 */ "DELETE FROM %Q.'%q_segments'", +/* 4 */ "DELETE FROM %Q.'%q_segdir'", +/* 5 */ "DELETE FROM %Q.'%q_docsize'", +/* 6 */ "DELETE FROM %Q.'%q_stat'", +/* 7 */ "SELECT %s WHERE rowid=?", +/* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1", +/* 9 */ "REPLACE INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)", +/* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)", +/* 11 */ "REPLACE INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)", - if( pH->ht ){ - struct _fts3ht *pEntry = &pH->ht[h]; - elem = pEntry->chain; - count = pEntry->count; - xCompare = ftsCompareFunction(pH->keyClass); - while( count-- && elem ){ - if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ - return elem; - } - elem = elem->next; - } - } - return 0; -} + /* Return segments in order from oldest to newest.*/ +/* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root " + "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC", +/* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root " + "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?" + "ORDER BY level DESC, idx ASC", -/* Remove a single entry from the hash table given a pointer to that -** element and a hash on the element's key. -*/ -static void fts3RemoveElementByHash( - Fts3Hash *pH, /* The pH containing "elem" */ - Fts3HashElem* elem, /* The element to be removed from the pH */ - int h /* Hash value for the element */ -){ - struct _fts3ht *pEntry; - if( elem->prev ){ - elem->prev->next = elem->next; - }else{ - pH->first = elem->next; - } - if( elem->next ){ - elem->next->prev = elem->prev; - } - pEntry = &pH->ht[h]; - if( pEntry->chain==elem ){ - pEntry->chain = elem->next; - } - pEntry->count--; - if( pEntry->count<=0 ){ - pEntry->chain = 0; - } - if( pH->copyKey && elem->pKey ){ - fts3HashFree(elem->pKey); +/* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?", +/* 15 */ "SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", + +/* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?", +/* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?", +/* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)", +/* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?", +/* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", +/* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?", +/* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=?", +/* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(?,?)", +/* 24 */ "", +/* 25 */ "", + +/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", +/* 27 */ "SELECT ? UNION SELECT level / (1024 * ?) FROM %Q.'%q_segdir'", + +/* This statement is used to determine which level to read the input from +** when performing an incremental merge. It returns the absolute level number +** of the oldest level in the db that contains at least ? segments. Or, +** if no level in the FTS index contains more than ? segments, the statement +** returns zero rows. */ +/* 28 */ "SELECT level FROM %Q.'%q_segdir' GROUP BY level HAVING count(*)>=?" + " ORDER BY (level %% 1024) ASC LIMIT 1", + +/* Estimate the upper limit on the number of leaf nodes in a new segment +** created by merging the oldest :2 segments from absolute level :1. See +** function sqlite3Fts3Incrmerge() for details. */ +/* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) " + " FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?", + +/* SQL_DELETE_SEGDIR_ENTRY +** Delete the %_segdir entry on absolute level :1 with index :2. */ +/* 30 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", + +/* SQL_SHIFT_SEGDIR_ENTRY +** Modify the idx value for the segment with idx=:3 on absolute level :2 +** to :1. */ +/* 31 */ "UPDATE %Q.'%q_segdir' SET idx = ? WHERE level=? AND idx=?", + +/* SQL_SELECT_SEGDIR +** Read a single entry from the %_segdir table. The entry from absolute +** level :1 with index value :2. */ +/* 32 */ "SELECT idx, start_block, leaves_end_block, end_block, root " + "FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", + +/* SQL_CHOMP_SEGDIR +** Update the start_block (:1) and root (:2) fields of the %_segdir +** entry located on absolute level :3 with index :4. */ +/* 33 */ "UPDATE %Q.'%q_segdir' SET start_block = ?, root = ?" + "WHERE level = ? AND idx = ?", + +/* SQL_SEGMENT_IS_APPENDABLE +** Return a single row if the segment with end_block=? is appendable. Or +** no rows otherwise. */ +/* 34 */ "SELECT 1 FROM %Q.'%q_segments' WHERE blockid=? AND block IS NULL", + +/* SQL_SELECT_INDEXES +** Return the list of valid segment indexes for absolute level ? */ +/* 35 */ "SELECT idx FROM %Q.'%q_segdir' WHERE level=? ORDER BY 1 ASC", + +/* SQL_SELECT_MXLEVEL +** Return the largest relative level in the FTS index or indexes. */ +/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'", + + /* Return segments in order from oldest to newest.*/ +/* 37 */ "SELECT level, idx, end_block " + "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? " + "ORDER BY level DESC, idx ASC", + + /* Update statements used while promoting segments */ +/* 38 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? " + "WHERE level=? AND idx=?", +/* 39 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1" + + }; + int rc = SQLITE_OK; + sqlite3_stmt *pStmt; + + assert( SizeofArray(azSql)==SizeofArray(p->aStmt) ); + assert( eStmt=0 ); + + pStmt = p->aStmt[eStmt]; + if( !pStmt ){ + char *zSql; + if( eStmt==SQL_CONTENT_INSERT ){ + zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist); + }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){ + zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist); + }else{ + zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName); + } + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, NULL); + sqlite3_free(zSql); + assert( rc==SQLITE_OK || pStmt==0 ); + p->aStmt[eStmt] = pStmt; + } } - fts3HashFree( elem ); - pH->count--; - if( pH->count<=0 ){ - assert( pH->first==0 ); - assert( pH->count==0 ); - fts3HashClear(pH); + if( apVal ){ + int i; + int nParam = sqlite3_bind_parameter_count(pStmt); + for(i=0; rc==SQLITE_OK && iht==0 ) return 0; - xHash = ftsHashFunction(pH->keyClass); - assert( xHash!=0 ); - h = (*xHash)(pKey,nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1)); -} - -/* -** Attempt to locate an element of the hash table pH with a key -** that matches pKey,nKey. Return the data for this element if it is -** found, or NULL if there is no match. -*/ -SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){ - Fts3HashElem *pElem; /* The element that matches key (if any) */ - - pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey); - return pElem ? pElem->data : 0; -} -/* Insert an element into the hash table pH. The key is pKey,nKey -** and the data is "data". -** -** If no element exists with a matching key, then a new -** element is created. A copy of the key is made if the copyKey -** flag is set. NULL is returned. -** -** If another element already exists with the same key, then the -** new data replaces the old data and the old data is returned. -** The key is not copied in this instance. If a malloc fails, then -** the new data is returned and the hash table is unchanged. -** -** If the "data" parameter to this function is NULL, then the -** element corresponding to "key" is removed from the hash table. -*/ -SQLITE_PRIVATE void *sqlite3Fts3HashInsert( - Fts3Hash *pH, /* The hash table to insert into */ - const void *pKey, /* The key */ - int nKey, /* Number of bytes in the key */ - void *data /* The data */ +static int fts3SelectDocsize( + Fts3Table *pTab, /* FTS3 table handle */ + sqlite3_int64 iDocid, /* Docid to bind for SQL_SELECT_DOCSIZE */ + sqlite3_stmt **ppStmt /* OUT: Statement handle */ ){ - int hraw; /* Raw hash value of the key */ - int h; /* the hash of the key modulo hash table size */ - Fts3HashElem *elem; /* Used to loop thru the element list */ - Fts3HashElem *new_elem; /* New element added to the pH */ - int (*xHash)(const void*,int); /* The hash function */ + sqlite3_stmt *pStmt = 0; /* Statement requested from fts3SqlStmt() */ + int rc; /* Return code */ - assert( pH!=0 ); - xHash = ftsHashFunction(pH->keyClass); - assert( xHash!=0 ); - hraw = (*xHash)(pKey, nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - elem = fts3FindElementByHash(pH,pKey,nKey,h); - if( elem ){ - void *old_data = elem->data; - if( data==0 ){ - fts3RemoveElementByHash(pH,elem,h); + rc = fts3SqlStmt(pTab, SQL_SELECT_DOCSIZE, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, iDocid); + rc = sqlite3_step(pStmt); + if( rc!=SQLITE_ROW || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){ + rc = sqlite3_reset(pStmt); + if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB; + pStmt = 0; }else{ - elem->data = data; + rc = SQLITE_OK; } - return old_data; - } - if( data==0 ) return 0; - if( (pH->htsize==0 && fts3Rehash(pH,8)) - || (pH->count>=pH->htsize && fts3Rehash(pH, pH->htsize*2)) - ){ - pH->count = 0; - return data; } - assert( pH->htsize>0 ); - new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) ); - if( new_elem==0 ) return data; - if( pH->copyKey && pKey!=0 ){ - new_elem->pKey = fts3HashMalloc( nKey ); - if( new_elem->pKey==0 ){ - fts3HashFree(new_elem); - return data; + + *ppStmt = pStmt; + return rc; +} + +SQLITE_PRIVATE int sqlite3Fts3SelectDoctotal( + Fts3Table *pTab, /* Fts3 table handle */ + sqlite3_stmt **ppStmt /* OUT: Statement handle */ +){ + sqlite3_stmt *pStmt = 0; + int rc; + rc = fts3SqlStmt(pTab, SQL_SELECT_STAT, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); + if( sqlite3_step(pStmt)!=SQLITE_ROW + || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB + ){ + rc = sqlite3_reset(pStmt); + if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB; + pStmt = 0; } - memcpy((void*)new_elem->pKey, pKey, nKey); - }else{ - new_elem->pKey = (void*)pKey; } - new_elem->nKey = nKey; - pH->count++; - assert( pH->htsize>0 ); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - fts3HashInsertElement(pH, &pH->ht[h], new_elem); - new_elem->data = data; - return 0; + *ppStmt = pStmt; + return rc; } -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ - -/************** End of fts3_hash.c *******************************************/ -/************** Begin file fts3_porter.c *************************************/ -/* -** 2006 September 30 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** Implementation of the full-text-search tokenizer that implements -** a Porter stemmer. -*/ +SQLITE_PRIVATE int sqlite3Fts3SelectDocsize( + Fts3Table *pTab, /* Fts3 table handle */ + sqlite3_int64 iDocid, /* Docid to read size data for */ + sqlite3_stmt **ppStmt /* OUT: Statement handle */ +){ + return fts3SelectDocsize(pTab, iDocid, ppStmt); +} /* -** The code in this file is only compiled if: -** -** * The FTS3 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or +** Similar to fts3SqlStmt(). Except, after binding the parameters in +** array apVal[] to the SQL statement identified by eStmt, the statement +** is executed. ** -** * The FTS3 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +** Returns SQLITE_OK if the statement is successfully executed, or an +** SQLite error code otherwise. */ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) - -/* #include */ -/* #include */ -/* #include */ -/* #include */ - +static void fts3SqlExec( + int *pRC, /* Result code */ + Fts3Table *p, /* The FTS3 table */ + int eStmt, /* Index of statement to evaluate */ + sqlite3_value **apVal /* Parameters to bind */ +){ + sqlite3_stmt *pStmt; + int rc; + if( *pRC ) return; + rc = fts3SqlStmt(p, eStmt, &pStmt, apVal); + if( rc==SQLITE_OK ){ + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + } + *pRC = rc; +} -/* -** Class derived from sqlite3_tokenizer -*/ -typedef struct porter_tokenizer { - sqlite3_tokenizer base; /* Base class */ -} porter_tokenizer; /* -** Class derived from sqlite3_tokenizer_cursor +** This function ensures that the caller has obtained an exclusive +** shared-cache table-lock on the %_segdir table. This is required before +** writing data to the fts3 table. If this lock is not acquired first, then +** the caller may end up attempting to take this lock as part of committing +** a transaction, causing SQLite to return SQLITE_LOCKED or +** LOCKED_SHAREDCACHEto a COMMIT command. +** +** It is best to avoid this because if FTS3 returns any error when +** committing a transaction, the whole transaction will be rolled back. +** And this is not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. +** It can still happen if the user locks the underlying tables directly +** instead of accessing them via FTS. */ -typedef struct porter_tokenizer_cursor { - sqlite3_tokenizer_cursor base; - const char *zInput; /* input we are tokenizing */ - int nInput; /* size of the input */ - int iOffset; /* current position in zInput */ - int iToken; /* index of next token to be returned */ - char *zToken; /* storage for current token */ - int nAllocated; /* space allocated to zToken buffer */ -} porter_tokenizer_cursor; +static int fts3Writelock(Fts3Table *p){ + int rc = SQLITE_OK; + + if( p->nPendingData==0 ){ + sqlite3_stmt *pStmt; + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_null(pStmt, 1); + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + } + } + return rc; +} /* -** Create a new tokenizer instance. +** FTS maintains a separate indexes for each language-id (a 32-bit integer). +** Within each language id, a separate index is maintained to store the +** document terms, and each configured prefix size (configured the FTS +** "prefix=" option). And each index consists of multiple levels ("relative +** levels"). +** +** All three of these values (the language id, the specific index and the +** level within the index) are encoded in 64-bit integer values stored +** in the %_segdir table on disk. This function is used to convert three +** separate component values into the single 64-bit integer value that +** can be used to query the %_segdir table. +** +** Specifically, each language-id/index combination is allocated 1024 +** 64-bit integer level values ("absolute levels"). The main terms index +** for language-id 0 is allocate values 0-1023. The first prefix index +** (if any) for language-id 0 is allocated values 1024-2047. And so on. +** Language 1 indexes are allocated immediately following language 0. +** +** So, for a system with nPrefix prefix indexes configured, the block of +** absolute levels that corresponds to language-id iLangid and index +** iIndex starts at absolute level ((iLangid * (nPrefix+1) + iIndex) * 1024). */ -static int porterCreate( - int argc, const char * const *argv, - sqlite3_tokenizer **ppTokenizer +static sqlite3_int64 getAbsoluteLevel( + Fts3Table *p, /* FTS3 table handle */ + int iLangid, /* Language id */ + int iIndex, /* Index in p->aIndex[] */ + int iLevel /* Level of segments */ ){ - porter_tokenizer *t; - - UNUSED_PARAMETER(argc); - UNUSED_PARAMETER(argv); - - t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t)); - if( t==NULL ) return SQLITE_NOMEM; - memset(t, 0, sizeof(*t)); - *ppTokenizer = &t->base; - return SQLITE_OK; -} + sqlite3_int64 iBase; /* First absolute level for iLangid/iIndex */ + assert( iLangid>=0 ); + assert( p->nIndex>0 ); + assert( iIndex>=0 && iIndexnIndex ); -/* -** Destroy a tokenizer -*/ -static int porterDestroy(sqlite3_tokenizer *pTokenizer){ - sqlite3_free(pTokenizer); - return SQLITE_OK; + iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL; + return iBase + iLevel; } /* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is zInput[0..nInput-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. +** Set *ppStmt to a statement handle that may be used to iterate through +** all rows in the %_segdir table, from oldest to newest. If successful, +** return SQLITE_OK. If an error occurs while preparing the statement, +** return an SQLite error code. +** +** There is only ever one instance of this SQL statement compiled for +** each FTS3 table. +** +** The statement returns the following columns from the %_segdir table: +** +** 0: idx +** 1: start_block +** 2: leaves_end_block +** 3: end_block +** 4: root */ -static int porterOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *zInput, int nInput, /* String to be tokenized */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ +SQLITE_PRIVATE int sqlite3Fts3AllSegdirs( + Fts3Table *p, /* FTS3 table */ + int iLangid, /* Language being queried */ + int iIndex, /* Index for p->aIndex[] */ + int iLevel, /* Level to select (relative level) */ + sqlite3_stmt **ppStmt /* OUT: Compiled statement */ ){ - porter_tokenizer_cursor *c; - - UNUSED_PARAMETER(pTokenizer); + int rc; + sqlite3_stmt *pStmt = 0; - c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); - if( c==NULL ) return SQLITE_NOMEM; + assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 ); + assert( iLevel=0 && iIndexnIndex ); - c->zInput = zInput; - if( zInput==0 ){ - c->nInput = 0; - }else if( nInput<0 ){ - c->nInput = (int)strlen(zInput); + if( iLevel<0 ){ + /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */ + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); + sqlite3_bind_int64(pStmt, 2, + getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) + ); + } }else{ - c->nInput = nInput; + /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */ + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel)); + } } - c->iOffset = 0; /* start tokenizing at the beginning */ - c->iToken = 0; - c->zToken = NULL; /* no space allocated, yet. */ - c->nAllocated = 0; - - *ppCursor = &c->base; - return SQLITE_OK; + *ppStmt = pStmt; + return rc; } -/* -** Close a tokenization cursor previously opened by a call to -** porterOpen() above. -*/ -static int porterClose(sqlite3_tokenizer_cursor *pCursor){ - porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; - sqlite3_free(c->zToken); - sqlite3_free(c); - return SQLITE_OK; -} -/* -** Vowel or consonant -*/ -static const char cType[] = { - 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 2, 1 -}; /* -** isConsonant() and isVowel() determine if their first character in -** the string they point to is a consonant or a vowel, according -** to Porter ruls. +** Append a single varint to a PendingList buffer. SQLITE_OK is returned +** if successful, or an SQLite error code otherwise. ** -** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. -** 'Y' is a consonant unless it follows another consonant, -** in which case it is a vowel. +** This function also serves to allocate the PendingList structure itself. +** For example, to create a new PendingList structure containing two +** varints: ** -** In these routine, the letters are in reverse order. So the 'y' rule -** is that 'y' is a consonant unless it is followed by another -** consonent. +** PendingList *p = 0; +** fts3PendingListAppendVarint(&p, 1); +** fts3PendingListAppendVarint(&p, 2); */ -static int isVowel(const char*); -static int isConsonant(const char *z){ - int j; - char x = *z; - if( x==0 ) return 0; - assert( x>='a' && x<='z' ); - j = cType[x-'a']; - if( j<2 ) return j; - return z[1]==0 || isVowel(z + 1); -} -static int isVowel(const char *z){ - int j; - char x = *z; - if( x==0 ) return 0; - assert( x>='a' && x<='z' ); - j = cType[x-'a']; - if( j<2 ) return 1-j; - return isConsonant(z + 1); +static int fts3PendingListAppendVarint( + PendingList **pp, /* IN/OUT: Pointer to PendingList struct */ + sqlite3_int64 i /* Value to append to data */ +){ + PendingList *p = *pp; + + /* Allocate or grow the PendingList as required. */ + if( !p ){ + p = sqlite3_malloc(sizeof(*p) + 100); + if( !p ){ + return SQLITE_NOMEM; + } + p->nSpace = 100; + p->aData = (char *)&p[1]; + p->nData = 0; + } + else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){ + int nNew = p->nSpace * 2; + p = sqlite3_realloc(p, sizeof(*p) + nNew); + if( !p ){ + sqlite3_free(*pp); + *pp = 0; + return SQLITE_NOMEM; + } + p->nSpace = nNew; + p->aData = (char *)&p[1]; + } + + /* Append the new serialized varint to the end of the list. */ + p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i); + p->aData[p->nData] = '\0'; + *pp = p; + return SQLITE_OK; } /* -** Let any sequence of one or more vowels be represented by V and let -** C be sequence of one or more consonants. Then every word can be -** represented as: -** -** [C] (VC){m} [V] -** -** In prose: A word is an optional consonant followed by zero or -** vowel-consonant pairs followed by an optional vowel. "m" is the -** number of vowel consonant pairs. This routine computes the value -** of m for the first i bytes of a word. -** -** Return true if the m-value for z is 1 or more. In other words, -** return true if z contains at least one vowel that is followed -** by a consonant. +** Add a docid/column/position entry to a PendingList structure. Non-zero +** is returned if the structure is sqlite3_realloced as part of adding +** the entry. Otherwise, zero. ** -** In this routine z[] is in reverse order. So we are really looking -** for an instance of a consonant followed by a vowel. -*/ -static int m_gt_0(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - return *z!=0; -} - -/* Like mgt0 above except we are looking for a value of m which is -** exactly 1 +** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning. +** Zero is always returned in this case. Otherwise, if no OOM error occurs, +** it is set to SQLITE_OK. */ -static int m_eq_1(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - if( *z==0 ) return 0; - while( isVowel(z) ){ z++; } - if( *z==0 ) return 1; - while( isConsonant(z) ){ z++; } - return *z==0; -} +static int fts3PendingListAppend( + PendingList **pp, /* IN/OUT: PendingList structure */ + sqlite3_int64 iDocid, /* Docid for entry to add */ + sqlite3_int64 iCol, /* Column for entry to add */ + sqlite3_int64 iPos, /* Position of term for entry to add */ + int *pRc /* OUT: Return code */ +){ + PendingList *p = *pp; + int rc = SQLITE_OK; -/* Like mgt0 above except we are looking for a value of m>1 instead -** or m>0 -*/ -static int m_gt_1(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - if( *z==0 ) return 0; - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - return *z!=0; -} + assert( !p || p->iLastDocid<=iDocid ); -/* -** Return TRUE if there is a vowel anywhere within z[0..n-1] -*/ -static int hasVowel(const char *z){ - while( isConsonant(z) ){ z++; } - return *z!=0; -} + if( !p || p->iLastDocid!=iDocid ){ + sqlite3_int64 iDelta = iDocid - (p ? p->iLastDocid : 0); + if( p ){ + assert( p->nDatanSpace ); + assert( p->aData[p->nData]==0 ); + p->nData++; + } + if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iDelta)) ){ + goto pendinglistappend_out; + } + p->iLastCol = -1; + p->iLastPos = 0; + p->iLastDocid = iDocid; + } + if( iCol>0 && p->iLastCol!=iCol ){ + if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, 1)) + || SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iCol)) + ){ + goto pendinglistappend_out; + } + p->iLastCol = iCol; + p->iLastPos = 0; + } + if( iCol>=0 ){ + assert( iPos>p->iLastPos || (iPos==0 && p->iLastPos==0) ); + rc = fts3PendingListAppendVarint(&p, 2+iPos-p->iLastPos); + if( rc==SQLITE_OK ){ + p->iLastPos = iPos; + } + } -/* -** Return TRUE if the word ends in a double consonant. -** -** The text is reversed here. So we are really looking at -** the first two characters of z[]. -*/ -static int doubleConsonant(const char *z){ - return isConsonant(z) && z[0]==z[1]; + pendinglistappend_out: + *pRc = rc; + if( p!=*pp ){ + *pp = p; + return 1; + } + return 0; } /* -** Return TRUE if the word ends with three letters which -** are consonant-vowel-consonent and where the final consonant -** is not 'w', 'x', or 'y'. -** -** The word is reversed here. So we are really checking the -** first three letters and the first one cannot be in [wxy]. +** Free a PendingList object allocated by fts3PendingListAppend(). */ -static int star_oh(const char *z){ - return - isConsonant(z) && - z[0]!='w' && z[0]!='x' && z[0]!='y' && - isVowel(z+1) && - isConsonant(z+2); +static void fts3PendingListDelete(PendingList *pList){ + sqlite3_free(pList); } /* -** If the word ends with zFrom and xCond() is true for the stem -** of the word that preceeds the zFrom ending, then change the -** ending to zTo. -** -** The input word *pz and zFrom are both in reverse order. zTo -** is in normal order. -** -** Return TRUE if zFrom matches. Return FALSE if zFrom does not -** match. Not that TRUE is returned even if xCond() fails and -** no substitution occurs. +** Add an entry to one of the pending-terms hash tables. */ -static int stem( - char **pz, /* The word being stemmed (Reversed) */ - const char *zFrom, /* If the ending matches this... (Reversed) */ - const char *zTo, /* ... change the ending to this (not reversed) */ - int (*xCond)(const char*) /* Condition that must be true */ +static int fts3PendingTermsAddOne( + Fts3Table *p, + int iCol, + int iPos, + Fts3Hash *pHash, /* Pending terms hash table to add entry to */ + const char *zToken, + int nToken ){ - char *z = *pz; - while( *zFrom && *zFrom==*z ){ z++; zFrom++; } - if( *zFrom!=0 ) return 0; - if( xCond && !xCond(z) ) return 1; - while( *zTo ){ - *(--z) = *(zTo++); - } - *pz = z; - return 1; -} + PendingList *pList; + int rc = SQLITE_OK; -/* -** This is the fallback stemmer used when the porter stemmer is -** inappropriate. The input word is copied into the output with -** US-ASCII case folding. If the input word is too long (more -** than 20 bytes if it contains no digits or more than 6 bytes if -** it contains digits) then word is truncated to 20 or 6 bytes -** by taking 10 or 3 bytes from the beginning and end. -*/ -static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ - int i, mx, j; - int hasDigit = 0; - for(i=0; i='A' && c<='Z' ){ - zOut[i] = c - 'A' + 'a'; - }else{ - if( c>='0' && c<='9' ) hasDigit = 1; - zOut[i] = c; - } + pList = (PendingList *)fts3HashFind(pHash, zToken, nToken); + if( pList ){ + p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem)); } - mx = hasDigit ? 3 : 10; - if( nIn>mx*2 ){ - for(j=mx, i=nIn-mx; iiPrevDocid, iCol, iPos, &rc) ){ + if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){ + /* Malloc failed while inserting the new entry. This can only + ** happen if there was no previous entry for this token. + */ + assert( 0==fts3HashFind(pHash, zToken, nToken) ); + sqlite3_free(pList); + rc = SQLITE_NOMEM; } - i = j; } - zOut[i] = 0; - *pnOut = i; + if( rc==SQLITE_OK ){ + p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem)); + } + return rc; } - /* -** Stem the input word zIn[0..nIn-1]. Store the output in zOut. -** zOut is at least big enough to hold nIn bytes. Write the actual -** size of the output word (exclusive of the '\0' terminator) into *pnOut. -** -** Any upper-case characters in the US-ASCII character set ([A-Z]) -** are converted to lower case. Upper-case UTF characters are -** unchanged. -** -** Words that are longer than about 20 bytes are stemmed by retaining -** a few bytes from the beginning and the end of the word. If the -** word contains digits, 3 bytes are taken from the beginning and -** 3 bytes from the end. For long words without digits, 10 bytes -** are taken from each end. US-ASCII case folding still applies. -** -** If the input word contains not digits but does characters not -** in [a-zA-Z] then no stemming is attempted and this routine just -** copies the input into the input into the output with US-ASCII -** case folding. +** Tokenize the nul-terminated string zText and add all tokens to the +** pending-terms hash-table. The docid used is that currently stored in +** p->iPrevDocid, and the column is specified by argument iCol. ** -** Stemming never increases the length of the word. So there is -** no chance of overflowing the zOut buffer. +** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. */ -static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ - int i, j; - char zReverse[28]; - char *z, *z2; - if( nIn<3 || nIn>=(int)sizeof(zReverse)-7 ){ - /* The word is too big or too small for the porter stemmer. - ** Fallback to the copy stemmer */ - copy_stemmer(zIn, nIn, zOut, pnOut); - return; +static int fts3PendingTermsAdd( + Fts3Table *p, /* Table into which text will be inserted */ + int iLangid, /* Language id to use */ + const char *zText, /* Text of document to be inserted */ + int iCol, /* Column into which text is being inserted */ + u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */ +){ + int rc; + int iStart = 0; + int iEnd = 0; + int iPos = 0; + int nWord = 0; + + char const *zToken; + int nToken = 0; + + sqlite3_tokenizer *pTokenizer = p->pTokenizer; + sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; + sqlite3_tokenizer_cursor *pCsr; + int (*xNext)(sqlite3_tokenizer_cursor *pCursor, + const char**,int*,int*,int*,int*); + + assert( pTokenizer && pModule ); + + /* If the user has inserted a NULL value, this function may be called with + ** zText==0. In this case, add zero token entries to the hash table and + ** return early. */ + if( zText==0 ){ + *pnWord = 0; + return SQLITE_OK; } - for(i=0, j=sizeof(zReverse)-6; i='A' && c<='Z' ){ - zReverse[j] = c + 'a' - 'A'; - }else if( c>='a' && c<='z' ){ - zReverse[j] = c; - }else{ - /* The use of a character not in [a-zA-Z] means that we fallback - ** to the copy stemmer */ - copy_stemmer(zIn, nIn, zOut, pnOut); - return; - } + + rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr); + if( rc!=SQLITE_OK ){ + return rc; } - memset(&zReverse[sizeof(zReverse)-5], 0, 5); - z = &zReverse[j+1]; + xNext = pModule->xNext; + while( SQLITE_OK==rc + && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos)) + ){ + int i; + if( iPos>=nWord ) nWord = iPos+1; - /* Step 1a */ - if( z[0]=='s' ){ - if( - !stem(&z, "sess", "ss", 0) && - !stem(&z, "sei", "i", 0) && - !stem(&z, "ss", "ss", 0) - ){ - z++; + /* Positions cannot be negative; we use -1 as a terminator internally. + ** Tokens must have a non-zero length. + */ + if( iPos<0 || !zToken || nToken<=0 ){ + rc = SQLITE_ERROR; + break; } - } - /* Step 1b */ - z2 = z; - if( stem(&z, "dee", "ee", m_gt_0) ){ - /* Do nothing. The work was all in the test */ - }else if( - (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel)) - && z!=z2 - ){ - if( stem(&z, "ta", "ate", 0) || - stem(&z, "lb", "ble", 0) || - stem(&z, "zi", "ize", 0) ){ - /* Do nothing. The work was all in the test */ - }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){ - z++; - }else if( m_eq_1(z) && star_oh(z) ){ - *(--z) = 'e'; - } + /* Add the term to the terms index */ + rc = fts3PendingTermsAddOne( + p, iCol, iPos, &p->aIndex[0].hPending, zToken, nToken + ); + + /* Add the term to each of the prefix indexes that it is not too + ** short for. */ + for(i=1; rc==SQLITE_OK && inIndex; i++){ + struct Fts3Index *pIndex = &p->aIndex[i]; + if( nTokennPrefix ) continue; + rc = fts3PendingTermsAddOne( + p, iCol, iPos, &pIndex->hPending, zToken, pIndex->nPrefix + ); + } } - /* Step 1c */ - if( z[0]=='y' && hasVowel(z+1) ){ - z[0] = 'i'; - } + pModule->xClose(pCsr); + *pnWord += nWord; + return (rc==SQLITE_DONE ? SQLITE_OK : rc); +} - /* Step 2 */ - switch( z[1] ){ - case 'a': - if( !stem(&z, "lanoita", "ate", m_gt_0) ){ - stem(&z, "lanoit", "tion", m_gt_0); - } - break; - case 'c': - if( !stem(&z, "icne", "ence", m_gt_0) ){ - stem(&z, "icna", "ance", m_gt_0); - } - break; - case 'e': - stem(&z, "rezi", "ize", m_gt_0); - break; - case 'g': - stem(&z, "igol", "log", m_gt_0); - break; - case 'l': - if( !stem(&z, "ilb", "ble", m_gt_0) - && !stem(&z, "illa", "al", m_gt_0) - && !stem(&z, "iltne", "ent", m_gt_0) - && !stem(&z, "ile", "e", m_gt_0) - ){ - stem(&z, "ilsuo", "ous", m_gt_0); - } - break; - case 'o': - if( !stem(&z, "noitazi", "ize", m_gt_0) - && !stem(&z, "noita", "ate", m_gt_0) - ){ - stem(&z, "rota", "ate", m_gt_0); - } - break; - case 's': - if( !stem(&z, "msila", "al", m_gt_0) - && !stem(&z, "ssenevi", "ive", m_gt_0) - && !stem(&z, "ssenluf", "ful", m_gt_0) - ){ - stem(&z, "ssensuo", "ous", m_gt_0); - } - break; - case 't': - if( !stem(&z, "itila", "al", m_gt_0) - && !stem(&z, "itivi", "ive", m_gt_0) - ){ - stem(&z, "itilib", "ble", m_gt_0); - } - break; +/* +** Calling this function indicates that subsequent calls to +** fts3PendingTermsAdd() are to add term/position-list pairs for the +** contents of the document with docid iDocid. +*/ +static int fts3PendingTermsDocid( + Fts3Table *p, /* Full-text table handle */ + int iLangid, /* Language id of row being written */ + sqlite_int64 iDocid /* Docid of row being written */ +){ + assert( iLangid>=0 ); + + /* TODO(shess) Explore whether partially flushing the buffer on + ** forced-flush would provide better performance. I suspect that if + ** we ordered the doclists by size and flushed the largest until the + ** buffer was half empty, that would let the less frequent terms + ** generate longer doclists. + */ + if( iDocid<=p->iPrevDocid + || p->iPrevLangid!=iLangid + || p->nPendingData>p->nMaxPendingData + ){ + int rc = sqlite3Fts3PendingTermsFlush(p); + if( rc!=SQLITE_OK ) return rc; } + p->iPrevDocid = iDocid; + p->iPrevLangid = iLangid; + return SQLITE_OK; +} - /* Step 3 */ - switch( z[0] ){ - case 'e': - if( !stem(&z, "etaci", "ic", m_gt_0) - && !stem(&z, "evita", "", m_gt_0) - ){ - stem(&z, "ezila", "al", m_gt_0); - } - break; - case 'i': - stem(&z, "itici", "ic", m_gt_0); - break; - case 'l': - if( !stem(&z, "laci", "ic", m_gt_0) ){ - stem(&z, "luf", "", m_gt_0); - } - break; - case 's': - stem(&z, "ssen", "", m_gt_0); - break; +/* +** Discard the contents of the pending-terms hash tables. +*/ +SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *p){ + int i; + for(i=0; inIndex; i++){ + Fts3HashElem *pElem; + Fts3Hash *pHash = &p->aIndex[i].hPending; + for(pElem=fts3HashFirst(pHash); pElem; pElem=fts3HashNext(pElem)){ + PendingList *pList = (PendingList *)fts3HashData(pElem); + fts3PendingListDelete(pList); + } + fts3HashClear(pHash); } + p->nPendingData = 0; +} - /* Step 4 */ - switch( z[1] ){ - case 'a': - if( z[0]=='l' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'c': - if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){ - z += 4; - } - break; - case 'e': - if( z[0]=='r' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'i': - if( z[0]=='c' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'l': - if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){ - z += 4; - } - break; - case 'n': - if( z[0]=='t' ){ - if( z[2]=='a' ){ - if( m_gt_1(z+3) ){ - z += 3; - } - }else if( z[2]=='e' ){ - if( !stem(&z, "tneme", "", m_gt_1) - && !stem(&z, "tnem", "", m_gt_1) - ){ - stem(&z, "tne", "", m_gt_1); - } - } - } - break; - case 'o': - if( z[0]=='u' ){ - if( m_gt_1(z+2) ){ - z += 2; - } - }else if( z[3]=='s' || z[3]=='t' ){ - stem(&z, "noi", "", m_gt_1); - } - break; - case 's': - if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){ - z += 3; - } - break; - case 't': - if( !stem(&z, "eta", "", m_gt_1) ){ - stem(&z, "iti", "", m_gt_1); - } - break; - case 'u': - if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ - z += 3; - } - break; - case 'v': - case 'z': - if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){ - z += 3; - } - break; +/* +** This function is called by the xUpdate() method as part of an INSERT +** operation. It adds entries for each term in the new record to the +** pendingTerms hash table. +** +** Argument apVal is the same as the similarly named argument passed to +** fts3InsertData(). Parameter iDocid is the docid of the new row. +*/ +static int fts3InsertTerms( + Fts3Table *p, + int iLangid, + sqlite3_value **apVal, + u32 *aSz +){ + int i; /* Iterator variable */ + for(i=2; inColumn+2; i++){ + int iCol = i-2; + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_value_text(apVal[i]); + int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]); + if( rc!=SQLITE_OK ){ + return rc; + } + aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]); + } } + return SQLITE_OK; +} - /* Step 5a */ - if( z[0]=='e' ){ - if( m_gt_1(z+1) ){ - z++; - }else if( m_eq_1(z+1) && !star_oh(z+1) ){ - z++; +/* +** This function is called by the xUpdate() method for an INSERT operation. +** The apVal parameter is passed a copy of the apVal argument passed by +** SQLite to the xUpdate() method. i.e: +** +** apVal[0] Not used for INSERT. +** apVal[1] rowid +** apVal[2] Left-most user-defined column +** ... +** apVal[p->nColumn+1] Right-most user-defined column +** apVal[p->nColumn+2] Hidden column with same name as table +** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid) +** apVal[p->nColumn+4] Hidden languageid column +*/ +static int fts3InsertData( + Fts3Table *p, /* Full-text table */ + sqlite3_value **apVal, /* Array of values to insert */ + sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */ +){ + int rc; /* Return code */ + sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */ + + if( p->zContentTbl ){ + sqlite3_value *pRowid = apVal[p->nColumn+3]; + if( sqlite3_value_type(pRowid)==SQLITE_NULL ){ + pRowid = apVal[1]; + } + if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){ + return SQLITE_CONSTRAINT; } + *piDocid = sqlite3_value_int64(pRowid); + return SQLITE_OK; } - /* Step 5b */ - if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){ - z++; + /* Locate the statement handle used to insert data into the %_content + ** table. The SQL for this statement is: + ** + ** INSERT INTO %_content VALUES(?, ?, ?, ...) + ** + ** The statement features N '?' variables, where N is the number of user + ** defined columns in the FTS3 table, plus one for the docid field. + */ + rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]); + if( rc==SQLITE_OK && p->zLanguageid ){ + rc = sqlite3_bind_int( + pContentInsert, p->nColumn+2, + sqlite3_value_int(apVal[p->nColumn+4]) + ); } + if( rc!=SQLITE_OK ) return rc; - /* z[] is now the stemmed word in reverse order. Flip it back - ** around into forward order and return. + /* There is a quirk here. The users INSERT statement may have specified + ** a value for the "rowid" field, for the "docid" field, or for both. + ** Which is a problem, since "rowid" and "docid" are aliases for the + ** same value. For example: + ** + ** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2); + ** + ** In FTS3, this is an error. It is an error to specify non-NULL values + ** for both docid and some other rowid alias. */ - *pnOut = i = (int)strlen(z); - zOut[i] = 0; - while( *z ){ - zOut[--i] = *(z++); + if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){ + if( SQLITE_NULL==sqlite3_value_type(apVal[0]) + && SQLITE_NULL!=sqlite3_value_type(apVal[1]) + ){ + /* A rowid/docid conflict. */ + return SQLITE_ERROR; + } + rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]); + if( rc!=SQLITE_OK ) return rc; } + + /* Execute the statement to insert the record. Set *piDocid to the + ** new docid value. + */ + sqlite3_step(pContentInsert); + rc = sqlite3_reset(pContentInsert); + + *piDocid = sqlite3_last_insert_rowid(p->db); + return rc; } -/* -** Characters that can be part of a token. We assume any character -** whose value is greater than 0x80 (any UTF character) can be -** part of a token. In other words, delimiters all must have -** values of 0x7f or lower. -*/ -static const char porterIdChar[] = { -/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ -}; -#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30])) + /* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to porterOpen(). +** Remove all data from the FTS3 table. Clear the hash table containing +** pending terms. */ -static int porterNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */ - const char **pzToken, /* OUT: *pzToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ -){ - porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; - const char *z = c->zInput; - - while( c->iOffsetnInput ){ - int iStartOffset, ch; - - /* Scan past delimiter characters */ - while( c->iOffsetnInput && isDelim(z[c->iOffset]) ){ - c->iOffset++; - } +static int fts3DeleteAll(Fts3Table *p, int bContent){ + int rc = SQLITE_OK; /* Return code */ - /* Count non-delimiter characters. */ - iStartOffset = c->iOffset; - while( c->iOffsetnInput && !isDelim(z[c->iOffset]) ){ - c->iOffset++; - } + /* Discard the contents of the pending-terms hash table. */ + sqlite3Fts3PendingTermsClear(p); - if( c->iOffset>iStartOffset ){ - int n = c->iOffset-iStartOffset; - if( n>c->nAllocated ){ - char *pNew; - c->nAllocated = n+20; - pNew = sqlite3_realloc(c->zToken, c->nAllocated); - if( !pNew ) return SQLITE_NOMEM; - c->zToken = pNew; - } - porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes); - *pzToken = c->zToken; - *piStartOffset = iStartOffset; - *piEndOffset = c->iOffset; - *piPosition = c->iToken++; - return SQLITE_OK; - } + /* Delete everything from the shadow tables. Except, leave %_content as + ** is if bContent is false. */ + assert( p->zContentTbl==0 || bContent==0 ); + if( bContent ) fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0); + fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0); + fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); + if( p->bHasDocsize ){ + fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0); } - return SQLITE_DONE; + if( p->bHasStat ){ + fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0); + } + return rc; } /* -** The set of routines that implement the porter-stemmer tokenizer +** */ -static const sqlite3_tokenizer_module porterTokenizerModule = { - 0, - porterCreate, - porterDestroy, - porterOpen, - porterClose, - porterNext, - 0 -}; +static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){ + int iLangid = 0; + if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1); + return iLangid; +} /* -** Allocate a new porter tokenizer. Return a pointer to the new -** tokenizer in *ppModule +** The first element in the apVal[] array is assumed to contain the docid +** (an integer) of a row about to be deleted. Remove all terms from the +** full-text index. */ -SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule( - sqlite3_tokenizer_module const**ppModule +static void fts3DeleteTerms( + int *pRC, /* Result code */ + Fts3Table *p, /* The FTS table to delete from */ + sqlite3_value *pRowid, /* The docid to be deleted */ + u32 *aSz, /* Sizes of deleted document written here */ + int *pbFound /* OUT: Set to true if row really does exist */ ){ - *ppModule = &porterTokenizerModule; -} + int rc; + sqlite3_stmt *pSelect; -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + assert( *pbFound==0 ); + if( *pRC ) return; + rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pSelect) ){ + int i; + int iLangid = langidFromSelect(p, pSelect); + rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0)); + for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){ + int iCol = i-1; + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pSelect, i); + rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]); + aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); + } + } + if( rc!=SQLITE_OK ){ + sqlite3_reset(pSelect); + *pRC = rc; + return; + } + *pbFound = 1; + } + rc = sqlite3_reset(pSelect); + }else{ + sqlite3_reset(pSelect); + } + *pRC = rc; +} -/************** End of fts3_porter.c *****************************************/ -/************** Begin file fts3_tokenizer.c **********************************/ /* -** 2007 June 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This is part of an SQLite module implementing full-text search. -** This particular file implements the generic tokenizer interface. +** Forward declaration to account for the circular dependency between +** functions fts3SegmentMerge() and fts3AllocateSegdirIdx(). */ +static int fts3SegmentMerge(Fts3Table *, int, int, int); -/* -** The code in this file is only compiled if: +/* +** This function allocates a new level iLevel index in the segdir table. +** Usually, indexes are allocated within a level sequentially starting +** with 0, so the allocated index is one greater than the value returned +** by: ** -** * The FTS3 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or +** SELECT max(idx) FROM %_segdir WHERE level = :iLevel ** -** * The FTS3 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +** However, if there are already FTS3_MERGE_COUNT indexes at the requested +** level, they are merged into a single level (iLevel+1) segment and the +** allocated index is 0. +** +** If successful, *piIdx is set to the allocated index slot and SQLITE_OK +** returned. Otherwise, an SQLite error code is returned. */ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) +static int fts3AllocateSegdirIdx( + Fts3Table *p, + int iLangid, /* Language id */ + int iIndex, /* Index for p->aIndex */ + int iLevel, + int *piIdx +){ + int rc; /* Return Code */ + sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */ + int iNext = 0; /* Result of query pNextIdx */ -/* #include */ -/* #include */ + assert( iLangid>=0 ); + assert( p->nIndex>=1 ); + + /* Set variable iNext to the next available segdir index at level iLevel. */ + rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64( + pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) + ); + if( SQLITE_ROW==sqlite3_step(pNextIdx) ){ + iNext = sqlite3_column_int(pNextIdx, 0); + } + rc = sqlite3_reset(pNextIdx); + } + + if( rc==SQLITE_OK ){ + /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already + ** full, merge all segments in level iLevel into a single iLevel+1 + ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise, + ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext. + */ + if( iNext>=FTS3_MERGE_COUNT ){ + fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel)); + rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel); + *piIdx = 0; + }else{ + *piIdx = iNext; + } + } + + return rc; +} /* -** Implementation of the SQL scalar function for accessing the underlying -** hash table. This function may be called as follows: +** The %_segments table is declared as follows: ** -** SELECT (); -** SELECT (, ); +** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB) ** -** where is the name passed as the second argument -** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer'). +** This function reads data from a single row of the %_segments table. The +** specific row is identified by the iBlockid parameter. If paBlob is not +** NULL, then a buffer is allocated using sqlite3_malloc() and populated +** with the contents of the blob stored in the "block" column of the +** identified table row is. Whether or not paBlob is NULL, *pnBlob is set +** to the size of the blob in bytes before returning. ** -** If the argument is specified, it must be a blob value -** containing a pointer to be stored as the hash data corresponding -** to the string . If is not specified, then -** the string must already exist in the has table. Otherwise, -** an error is returned. +** If an error occurs, or the table does not contain the specified row, +** an SQLite error code is returned. Otherwise, SQLITE_OK is returned. If +** paBlob is non-NULL, then it is the responsibility of the caller to +** eventually free the returned buffer. ** -** Whether or not the argument is specified, the value returned -** is a blob containing the pointer stored as the hash data corresponding -** to string (after the hash-table is updated, if applicable). +** This function may leave an open sqlite3_blob* handle in the +** Fts3Table.pSegments variable. This handle is reused by subsequent calls +** to this function. The handle may be closed by calling the +** sqlite3Fts3SegmentsClose() function. Reusing a blob handle is a handy +** performance improvement, but the blob handle should always be closed +** before control is returned to the user (to prevent a lock being held +** on the database file for longer than necessary). Thus, any virtual table +** method (xFilter etc.) that may directly or indirectly call this function +** must call sqlite3Fts3SegmentsClose() before returning. */ -static void scalarFunc( - sqlite3_context *context, - int argc, - sqlite3_value **argv +SQLITE_PRIVATE int sqlite3Fts3ReadBlock( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iBlockid, /* Access the row with blockid=$iBlockid */ + char **paBlob, /* OUT: Blob data in malloc'd buffer */ + int *pnBlob, /* OUT: Size of blob data */ + int *pnLoad /* OUT: Bytes actually loaded */ ){ - Fts3Hash *pHash; - void *pPtr = 0; - const unsigned char *zName; - int nName; - - assert( argc==1 || argc==2 ); - - pHash = (Fts3Hash *)sqlite3_user_data(context); + int rc; /* Return code */ - zName = sqlite3_value_text(argv[0]); - nName = sqlite3_value_bytes(argv[0])+1; + /* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */ + assert( pnBlob ); - if( argc==2 ){ - void *pOld; - int n = sqlite3_value_bytes(argv[1]); - if( zName==0 || n!=sizeof(pPtr) ){ - sqlite3_result_error(context, "argument type mismatch", -1); - return; - } - pPtr = *(void **)sqlite3_value_blob(argv[1]); - pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr); - if( pOld==pPtr ){ - sqlite3_result_error(context, "out of memory", -1); - return; - } + if( p->pSegments ){ + rc = sqlite3_blob_reopen(p->pSegments, iBlockid); }else{ - if( zName ){ - pPtr = sqlite3Fts3HashFind(pHash, zName, nName); + if( 0==p->zSegmentsTbl ){ + p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName); + if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM; } - if( !pPtr ){ - char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); - sqlite3_result_error(context, zErr, -1); - sqlite3_free(zErr); - return; + rc = sqlite3_blob_open( + p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments + ); + } + + if( rc==SQLITE_OK ){ + int nByte = sqlite3_blob_bytes(p->pSegments); + *pnBlob = nByte; + if( paBlob ){ + char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING); + if( !aByte ){ + rc = SQLITE_NOMEM; + }else{ + if( pnLoad && nByte>(FTS3_NODE_CHUNK_THRESHOLD) ){ + nByte = FTS3_NODE_CHUNKSIZE; + *pnLoad = nByte; + } + rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0); + memset(&aByte[nByte], 0, FTS3_NODE_PADDING); + if( rc!=SQLITE_OK ){ + sqlite3_free(aByte); + aByte = 0; + } + } + *paBlob = aByte; } } - sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); + return rc; } -SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char c){ - static const char isFtsIdChar[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ - }; - return (c&0x80 || isFtsIdChar[(int)(c)]); +/* +** Close the blob handle at p->pSegments, if it is open. See comments above +** the sqlite3Fts3ReadBlock() function for details. +*/ +SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *p){ + sqlite3_blob_close(p->pSegments); + p->pSegments = 0; } + +static int fts3SegReaderIncrRead(Fts3SegReader *pReader){ + int nRead; /* Number of bytes to read */ + int rc; /* Return code */ -SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *zStr, int *pn){ - const char *z1; - const char *z2 = 0; - - /* Find the start of the next token. */ - z1 = zStr; - while( z2==0 ){ - char c = *z1; - switch( c ){ - case '\0': return 0; /* No more tokens here */ - case '\'': - case '"': - case '`': { - z2 = z1; - while( *++z2 && (*z2!=c || *++z2==c) ); - break; - } - case '[': - z2 = &z1[1]; - while( *z2 && z2[0]!=']' ) z2++; - if( *z2 ) z2++; - break; + nRead = MIN(pReader->nNode - pReader->nPopulate, FTS3_NODE_CHUNKSIZE); + rc = sqlite3_blob_read( + pReader->pBlob, + &pReader->aNode[pReader->nPopulate], + nRead, + pReader->nPopulate + ); - default: - if( sqlite3Fts3IsIdChar(*z1) ){ - z2 = &z1[1]; - while( sqlite3Fts3IsIdChar(*z2) ) z2++; - }else{ - z1++; - } + if( rc==SQLITE_OK ){ + pReader->nPopulate += nRead; + memset(&pReader->aNode[pReader->nPopulate], 0, FTS3_NODE_PADDING); + if( pReader->nPopulate==pReader->nNode ){ + sqlite3_blob_close(pReader->pBlob); + pReader->pBlob = 0; + pReader->nPopulate = 0; } } + return rc; +} - *pn = (int)(z2-z1); - return z1; +static int fts3SegReaderRequire(Fts3SegReader *pReader, char *pFrom, int nByte){ + int rc = SQLITE_OK; + assert( !pReader->pBlob + || (pFrom>=pReader->aNode && pFrom<&pReader->aNode[pReader->nNode]) + ); + while( pReader->pBlob && rc==SQLITE_OK + && (pFrom - pReader->aNode + nByte)>pReader->nPopulate + ){ + rc = fts3SegReaderIncrRead(pReader); + } + return rc; } -SQLITE_PRIVATE int sqlite3Fts3InitTokenizer( - Fts3Hash *pHash, /* Tokenizer hash table */ - const char *zArg, /* Tokenizer name */ - sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */ - char **pzErr /* OUT: Set to malloced error message */ -){ - int rc; - char *z = (char *)zArg; - int n = 0; - char *zCopy; - char *zEnd; /* Pointer to nul-term of zCopy */ - sqlite3_tokenizer_module *m; +/* +** Set an Fts3SegReader cursor to point at EOF. +*/ +static void fts3SegReaderSetEof(Fts3SegReader *pSeg){ + if( !fts3SegReaderIsRootOnly(pSeg) ){ + sqlite3_free(pSeg->aNode); + sqlite3_blob_close(pSeg->pBlob); + pSeg->pBlob = 0; + } + pSeg->aNode = 0; +} - zCopy = sqlite3_mprintf("%s", zArg); - if( !zCopy ) return SQLITE_NOMEM; - zEnd = &zCopy[strlen(zCopy)]; +/* +** Move the iterator passed as the first argument to the next term in the +** segment. If successful, SQLITE_OK is returned. If there is no next term, +** SQLITE_DONE. Otherwise, an SQLite error code. +*/ +static int fts3SegReaderNext( + Fts3Table *p, + Fts3SegReader *pReader, + int bIncr +){ + int rc; /* Return code of various sub-routines */ + char *pNext; /* Cursor variable */ + int nPrefix; /* Number of bytes in term prefix */ + int nSuffix; /* Number of bytes in term suffix */ - z = (char *)sqlite3Fts3NextToken(zCopy, &n); - if( z==0 ){ - assert( n==0 ); - z = zCopy; + if( !pReader->aDoclist ){ + pNext = pReader->aNode; + }else{ + pNext = &pReader->aDoclist[pReader->nDoclist]; } - z[n] = '\0'; - sqlite3Fts3Dequote(z); - m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1); - if( !m ){ - *pzErr = sqlite3_mprintf("unknown tokenizer: %s", z); - rc = SQLITE_ERROR; - }else{ - char const **aArg = 0; - int iArg = 0; - z = &z[n+1]; - while( z=&pReader->aNode[pReader->nNode] ){ + + if( fts3SegReaderIsPending(pReader) ){ + Fts3HashElem *pElem = *(pReader->ppNextElem); + if( pElem==0 ){ + pReader->aNode = 0; + }else{ + PendingList *pList = (PendingList *)fts3HashData(pElem); + pReader->zTerm = (char *)fts3HashKey(pElem); + pReader->nTerm = fts3HashKeysize(pElem); + pReader->nNode = pReader->nDoclist = pList->nData + 1; + pReader->aNode = pReader->aDoclist = pList->aData; + pReader->ppNextElem++; + assert( pReader->aNode ); } - aArg = aNew; - aArg[iArg++] = z; - z[n] = '\0'; - sqlite3Fts3Dequote(z); - z = &z[n+1]; + return SQLITE_OK; } - rc = m->xCreate(iArg, aArg, ppTok); - assert( rc!=SQLITE_OK || *ppTok ); - if( rc!=SQLITE_OK ){ - *pzErr = sqlite3_mprintf("unknown tokenizer"); - }else{ - (*ppTok)->pModule = m; + + fts3SegReaderSetEof(pReader); + + /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf + ** blocks have already been traversed. */ + assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock ); + if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){ + return SQLITE_OK; } - sqlite3_free((void *)aArg); + + rc = sqlite3Fts3ReadBlock( + p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode, + (bIncr ? &pReader->nPopulate : 0) + ); + if( rc!=SQLITE_OK ) return rc; + assert( pReader->pBlob==0 ); + if( bIncr && pReader->nPopulatenNode ){ + pReader->pBlob = p->pSegments; + p->pSegments = 0; + } + pNext = pReader->aNode; } - sqlite3_free(zCopy); - return rc; -} + assert( !fts3SegReaderIsPending(pReader) ); + rc = fts3SegReaderRequire(pReader, pNext, FTS3_VARINT_MAX*2); + if( rc!=SQLITE_OK ) return rc; + + /* Because of the FTS3_NODE_PADDING bytes of padding, the following is + ** safe (no risk of overread) even if the node data is corrupted. */ + pNext += fts3GetVarint32(pNext, &nPrefix); + pNext += fts3GetVarint32(pNext, &nSuffix); + if( nPrefix<0 || nSuffix<=0 + || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] + ){ + return FTS_CORRUPT_VTAB; + } -#ifdef SQLITE_TEST + if( nPrefix+nSuffix>pReader->nTermAlloc ){ + int nNew = (nPrefix+nSuffix)*2; + char *zNew = sqlite3_realloc(pReader->zTerm, nNew); + if( !zNew ){ + return SQLITE_NOMEM; + } + pReader->zTerm = zNew; + pReader->nTermAlloc = nNew; + } -#include -/* #include */ + rc = fts3SegReaderRequire(pReader, pNext, nSuffix+FTS3_VARINT_MAX); + if( rc!=SQLITE_OK ) return rc; + + memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix); + pReader->nTerm = nPrefix+nSuffix; + pNext += nSuffix; + pNext += fts3GetVarint32(pNext, &pReader->nDoclist); + pReader->aDoclist = pNext; + pReader->pOffsetList = 0; + + /* Check that the doclist does not appear to extend past the end of the + ** b-tree node. And that the final byte of the doclist is 0x00. If either + ** of these statements is untrue, then the data structure is corrupt. + */ + if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] + || (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1]) + ){ + return FTS_CORRUPT_VTAB; + } + return SQLITE_OK; +} /* -** Implementation of a special SQL scalar function for testing tokenizers -** designed to be used in concert with the Tcl testing framework. This -** function must be called with two or more arguments: -** -** SELECT (, ..., ); -** -** where is the name passed as the second argument -** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer') -** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test'). -** -** The return value is a string that may be interpreted as a Tcl -** list. For each token in the , three elements are -** added to the returned list. The first is the token position, the -** second is the token text (folded, stemmed, etc.) and the third is the -** substring of associated with the token. For example, -** using the built-in "simple" tokenizer: -** -** SELECT fts_tokenizer_test('simple', 'I don't see how'); -** -** will return the string: -** -** "{0 i I 1 dont don't 2 see see 3 how how}" -** +** Set the SegReader to point to the first docid in the doclist associated +** with the current term. */ -static void testFunc( - sqlite3_context *context, - int argc, - sqlite3_value **argv -){ - Fts3Hash *pHash; - sqlite3_tokenizer_module *p; - sqlite3_tokenizer *pTokenizer = 0; - sqlite3_tokenizer_cursor *pCsr = 0; - - const char *zErr = 0; +static int fts3SegReaderFirstDocid(Fts3Table *pTab, Fts3SegReader *pReader){ + int rc = SQLITE_OK; + assert( pReader->aDoclist ); + assert( !pReader->pOffsetList ); + if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ + u8 bEof = 0; + pReader->iDocid = 0; + pReader->nOffsetList = 0; + sqlite3Fts3DoclistPrev(0, + pReader->aDoclist, pReader->nDoclist, &pReader->pOffsetList, + &pReader->iDocid, &pReader->nOffsetList, &bEof + ); + }else{ + rc = fts3SegReaderRequire(pReader, pReader->aDoclist, FTS3_VARINT_MAX); + if( rc==SQLITE_OK ){ + int n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid); + pReader->pOffsetList = &pReader->aDoclist[n]; + } + } + return rc; +} - const char *zName; - int nName; - const char *zInput; - int nInput; +/* +** Advance the SegReader to point to the next docid in the doclist +** associated with the current term. +** +** If arguments ppOffsetList and pnOffsetList are not NULL, then +** *ppOffsetList is set to point to the first column-offset list +** in the doclist entry (i.e. immediately past the docid varint). +** *pnOffsetList is set to the length of the set of column-offset +** lists, not including the nul-terminator byte. For example: +*/ +static int fts3SegReaderNextDocid( + Fts3Table *pTab, + Fts3SegReader *pReader, /* Reader to advance to next docid */ + char **ppOffsetList, /* OUT: Pointer to current position-list */ + int *pnOffsetList /* OUT: Length of *ppOffsetList in bytes */ +){ + int rc = SQLITE_OK; + char *p = pReader->pOffsetList; + char c = 0; - const char *azArg[64]; + assert( p ); - const char *zToken; - int nToken = 0; - int iStart = 0; - int iEnd = 0; - int iPos = 0; - int i; + if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ + /* A pending-terms seg-reader for an FTS4 table that uses order=desc. + ** Pending-terms doclists are always built up in ascending order, so + ** we have to iterate through them backwards here. */ + u8 bEof = 0; + if( ppOffsetList ){ + *ppOffsetList = pReader->pOffsetList; + *pnOffsetList = pReader->nOffsetList - 1; + } + sqlite3Fts3DoclistPrev(0, + pReader->aDoclist, pReader->nDoclist, &p, &pReader->iDocid, + &pReader->nOffsetList, &bEof + ); + if( bEof ){ + pReader->pOffsetList = 0; + }else{ + pReader->pOffsetList = p; + } + }else{ + char *pEnd = &pReader->aDoclist[pReader->nDoclist]; - Tcl_Obj *pRet; + /* Pointer p currently points at the first byte of an offset list. The + ** following block advances it to point one byte past the end of + ** the same offset list. */ + while( 1 ){ + + /* The following line of code (and the "p++" below the while() loop) is + ** normally all that is required to move pointer p to the desired + ** position. The exception is if this node is being loaded from disk + ** incrementally and pointer "p" now points to the first byte past + ** the populated part of pReader->aNode[]. + */ + while( *p | c ) c = *p++ & 0x80; + assert( *p==0 ); + + if( pReader->pBlob==0 || p<&pReader->aNode[pReader->nPopulate] ) break; + rc = fts3SegReaderIncrRead(pReader); + if( rc!=SQLITE_OK ) return rc; + } + p++; + + /* If required, populate the output variables with a pointer to and the + ** size of the previous offset-list. + */ + if( ppOffsetList ){ + *ppOffsetList = pReader->pOffsetList; + *pnOffsetList = (int)(p - pReader->pOffsetList - 1); + } - if( argc<2 ){ - sqlite3_result_error(context, "insufficient arguments", -1); - return; + /* List may have been edited in place by fts3EvalNearTrim() */ + while( p=pEnd ){ + pReader->pOffsetList = 0; + }else{ + rc = fts3SegReaderRequire(pReader, p, FTS3_VARINT_MAX); + if( rc==SQLITE_OK ){ + sqlite3_int64 iDelta; + pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta); + if( pTab->bDescIdx ){ + pReader->iDocid -= iDelta; + }else{ + pReader->iDocid += iDelta; + } + } + } } - nName = sqlite3_value_bytes(argv[0]); - zName = (const char *)sqlite3_value_text(argv[0]); - nInput = sqlite3_value_bytes(argv[argc-1]); - zInput = (const char *)sqlite3_value_text(argv[argc-1]); + return SQLITE_OK; +} - pHash = (Fts3Hash *)sqlite3_user_data(context); - p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); - if( !p ){ - char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); - sqlite3_result_error(context, zErr, -1); - sqlite3_free(zErr); - return; - } +SQLITE_PRIVATE int sqlite3Fts3MsrOvfl( + Fts3Cursor *pCsr, + Fts3MultiSegReader *pMsr, + int *pnOvfl +){ + Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; + int nOvfl = 0; + int ii; + int rc = SQLITE_OK; + int pgsz = p->nPgsz; - pRet = Tcl_NewObj(); - Tcl_IncrRefCount(pRet); + assert( p->bFts4 ); + assert( pgsz>0 ); - for(i=1; inSegment; ii++){ + Fts3SegReader *pReader = pMsr->apSegment[ii]; + if( !fts3SegReaderIsPending(pReader) + && !fts3SegReaderIsRootOnly(pReader) + ){ + sqlite3_int64 jj; + for(jj=pReader->iStartBlock; jj<=pReader->iLeafEndBlock; jj++){ + int nBlob; + rc = sqlite3Fts3ReadBlock(p, jj, 0, &nBlob, 0); + if( rc!=SQLITE_OK ) break; + if( (nBlob+35)>pgsz ){ + nOvfl += (nBlob + 34)/pgsz; + } + } + } } + *pnOvfl = nOvfl; + return rc; +} - if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){ - zErr = "error in xCreate()"; - goto finish; - } - pTokenizer->pModule = p; - if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){ - zErr = "error in xOpen()"; - goto finish; +/* +** Free all allocations associated with the iterator passed as the +** second argument. +*/ +SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){ + if( pReader && !fts3SegReaderIsPending(pReader) ){ + sqlite3_free(pReader->zTerm); + if( !fts3SegReaderIsRootOnly(pReader) ){ + sqlite3_free(pReader->aNode); + sqlite3_blob_close(pReader->pBlob); + } } + sqlite3_free(pReader); +} - while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ - Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); - Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); - zToken = &zInput[iStart]; - nToken = iEnd-iStart; - Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); - } +/* +** Allocate a new SegReader object. +*/ +SQLITE_PRIVATE int sqlite3Fts3SegReaderNew( + int iAge, /* Segment "age". */ + int bLookup, /* True for a lookup only */ + sqlite3_int64 iStartLeaf, /* First leaf to traverse */ + sqlite3_int64 iEndLeaf, /* Final leaf to traverse */ + sqlite3_int64 iEndBlock, /* Final block of segment */ + const char *zRoot, /* Buffer containing root node */ + int nRoot, /* Size of buffer containing root node */ + Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */ +){ + Fts3SegReader *pReader; /* Newly allocated SegReader object */ + int nExtra = 0; /* Bytes to allocate segment root node */ - if( SQLITE_OK!=p->xClose(pCsr) ){ - zErr = "error in xClose()"; - goto finish; + assert( iStartLeaf<=iEndLeaf ); + if( iStartLeaf==0 ){ + nExtra = nRoot + FTS3_NODE_PADDING; } - if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ - zErr = "error in xDestroy()"; - goto finish; + + pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra); + if( !pReader ){ + return SQLITE_NOMEM; } + memset(pReader, 0, sizeof(Fts3SegReader)); + pReader->iIdx = iAge; + pReader->bLookup = bLookup!=0; + pReader->iStartBlock = iStartLeaf; + pReader->iLeafEndBlock = iEndLeaf; + pReader->iEndBlock = iEndBlock; -finish: - if( zErr ){ - sqlite3_result_error(context, zErr, -1); + if( nExtra ){ + /* The entire segment is stored in the root node. */ + pReader->aNode = (char *)&pReader[1]; + pReader->rootOnly = 1; + pReader->nNode = nRoot; + memcpy(pReader->aNode, zRoot, nRoot); + memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING); }else{ - sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); + pReader->iCurrentBlock = iStartLeaf-1; } - Tcl_DecrRefCount(pRet); + *ppReader = pReader; + return SQLITE_OK; } -static -int registerTokenizer( - sqlite3 *db, - char *zName, - const sqlite3_tokenizer_module *p +/* +** This is a comparison function used as a qsort() callback when sorting +** an array of pending terms by term. This occurs as part of flushing +** the contents of the pending-terms hash table to the database. +*/ +static int SQLITE_CDECL fts3CompareElemByTerm( + const void *lhs, + const void *rhs ){ - int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts3_tokenizer(?, ?)"; + char *z1 = fts3HashKey(*(Fts3HashElem **)lhs); + char *z2 = fts3HashKey(*(Fts3HashElem **)rhs); + int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs); + int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs); - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; + int n = (n1aIndex */ + const char *zTerm, /* Term to search for */ + int nTerm, /* Size of buffer zTerm */ + int bPrefix, /* True for a prefix iterator */ + Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */ ){ - int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts3_tokenizer(?)"; + Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */ + Fts3HashElem *pE; /* Iterator variable */ + Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */ + int nElem = 0; /* Size of array at aElem */ + int rc = SQLITE_OK; /* Return Code */ + Fts3Hash *pHash; + + pHash = &p->aIndex[iIndex].hPending; + if( bPrefix ){ + int nAlloc = 0; /* Size of allocated array at aElem */ + + for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ + char *zKey = (char *)fts3HashKey(pE); + int nKey = fts3HashKeysize(pE); + if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){ + if( nElem==nAlloc ){ + Fts3HashElem **aElem2; + nAlloc += 16; + aElem2 = (Fts3HashElem **)sqlite3_realloc( + aElem, nAlloc*sizeof(Fts3HashElem *) + ); + if( !aElem2 ){ + rc = SQLITE_NOMEM; + nElem = 0; + break; + } + aElem = aElem2; + } + + aElem[nElem++] = pE; + } + } + + /* If more than one term matches the prefix, sort the Fts3HashElem + ** objects in term order using qsort(). This uses the same comparison + ** callback as is used when flushing terms to disk. + */ + if( nElem>1 ){ + qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm); + } - *pp = 0; - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; + }else{ + /* The query is a simple term lookup that matches at most one term in + ** the index. All that is required is a straight hash-lookup. + ** + ** Because the stack address of pE may be accessed via the aElem pointer + ** below, the "Fts3HashElem *pE" must be declared so that it is valid + ** within this entire function, not just this "else{...}" block. + */ + pE = fts3HashFindElem(pHash, zTerm, nTerm); + if( pE ){ + aElem = &pE; + nElem = 1; + } } - sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ - memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); + if( nElem>0 ){ + int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *); + pReader = (Fts3SegReader *)sqlite3_malloc(nByte); + if( !pReader ){ + rc = SQLITE_NOMEM; + }else{ + memset(pReader, 0, nByte); + pReader->iIdx = 0x7FFFFFFF; + pReader->ppNextElem = (Fts3HashElem **)&pReader[1]; + memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *)); } } - return sqlite3_finalize(pStmt); + if( bPrefix ){ + sqlite3_free(aElem); + } + *ppReader = pReader; + return rc; } -SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); - /* -** Implementation of the scalar function fts3_tokenizer_internal_test(). -** This function is used for testing only, it is not included in the -** build unless SQLITE_TEST is defined. -** -** The purpose of this is to test that the fts3_tokenizer() function -** can be used as designed by the C-code in the queryTokenizer and -** registerTokenizer() functions above. These two functions are repeated -** in the README.tokenizer file as an example, so it is important to -** test them. +** Compare the entries pointed to by two Fts3SegReader structures. +** Comparison is as follows: ** -** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar -** function with no arguments. An assert() will fail if a problem is -** detected. i.e.: +** 1) EOF is greater than not EOF. ** -** SELECT fts3_tokenizer_internal_test(); +** 2) The current terms (if any) are compared using memcmp(). If one +** term is a prefix of another, the longer term is considered the +** larger. ** +** 3) By segment age. An older segment is considered larger. */ -static void intTestFunc( - sqlite3_context *context, - int argc, - sqlite3_value **argv -){ +static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ int rc; - const sqlite3_tokenizer_module *p1; - const sqlite3_tokenizer_module *p2; - sqlite3 *db = (sqlite3 *)sqlite3_user_data(context); - - UNUSED_PARAMETER(argc); - UNUSED_PARAMETER(argv); - - /* Test the query function */ - sqlite3Fts3SimpleTokenizerModule(&p1); - rc = queryTokenizer(db, "simple", &p2); - assert( rc==SQLITE_OK ); - assert( p1==p2 ); - rc = queryTokenizer(db, "nosuchtokenizer", &p2); - assert( rc==SQLITE_ERROR ); - assert( p2==0 ); - assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") ); - - /* Test the storage function */ - rc = registerTokenizer(db, "nosuchtokenizer", p1); - assert( rc==SQLITE_OK ); - rc = queryTokenizer(db, "nosuchtokenizer", &p2); - assert( rc==SQLITE_OK ); - assert( p2==p1 ); - - sqlite3_result_text(context, "ok", -1, SQLITE_STATIC); + if( pLhs->aNode && pRhs->aNode ){ + int rc2 = pLhs->nTerm - pRhs->nTerm; + if( rc2<0 ){ + rc = memcmp(pLhs->zTerm, pRhs->zTerm, pLhs->nTerm); + }else{ + rc = memcmp(pLhs->zTerm, pRhs->zTerm, pRhs->nTerm); + } + if( rc==0 ){ + rc = rc2; + } + }else{ + rc = (pLhs->aNode==0) - (pRhs->aNode==0); + } + if( rc==0 ){ + rc = pRhs->iIdx - pLhs->iIdx; + } + assert( rc!=0 ); + return rc; } -#endif - /* -** Set up SQL objects in database db used to access the contents of -** the hash table pointed to by argument pHash. The hash table must -** been initialized to use string keys, and to take a private copy -** of the key when a value is inserted. i.e. by a call similar to: +** A different comparison function for SegReader structures. In this +** version, it is assumed that each SegReader points to an entry in +** a doclist for identical terms. Comparison is made as follows: ** -** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); +** 1) EOF (end of doclist in this case) is greater than not EOF. ** -** This function adds a scalar function (see header comment above -** scalarFunc() in this file for details) and, if ENABLE_TABLE is -** defined at compilation time, a temporary virtual table (see header -** comment above struct HashTableVtab) to the database schema. Both -** provide read/write access to the contents of *pHash. +** 2) By current docid. ** -** The third argument to this function, zName, is used as the name -** of both the scalar and, if created, the virtual table. +** 3) By segment age. An older segment is considered larger. */ -SQLITE_PRIVATE int sqlite3Fts3InitHashTable( - sqlite3 *db, - Fts3Hash *pHash, - const char *zName -){ - int rc = SQLITE_OK; - void *p = (void *)pHash; - const int any = SQLITE_ANY; - -#ifdef SQLITE_TEST - char *zTest = 0; - char *zTest2 = 0; - void *pdb = (void *)db; - zTest = sqlite3_mprintf("%s_test", zName); - zTest2 = sqlite3_mprintf("%s_internal_test", zName); - if( !zTest || !zTest2 ){ - rc = SQLITE_NOMEM; - } -#endif - - if( SQLITE_OK==rc ){ - rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0); - } - if( SQLITE_OK==rc ){ - rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0); - } -#ifdef SQLITE_TEST - if( SQLITE_OK==rc ){ - rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0); +static int fts3SegReaderDoclistCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ + int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); + if( rc==0 ){ + if( pLhs->iDocid==pRhs->iDocid ){ + rc = pRhs->iIdx - pLhs->iIdx; + }else{ + rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1; + } } - if( SQLITE_OK==rc ){ - rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0); + assert( pLhs->aNode && pRhs->aNode ); + return rc; +} +static int fts3SegReaderDoclistCmpRev(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ + int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); + if( rc==0 ){ + if( pLhs->iDocid==pRhs->iDocid ){ + rc = pRhs->iIdx - pLhs->iIdx; + }else{ + rc = (pLhs->iDocid < pRhs->iDocid) ? 1 : -1; + } } -#endif - -#ifdef SQLITE_TEST - sqlite3_free(zTest); - sqlite3_free(zTest2); -#endif - + assert( pLhs->aNode && pRhs->aNode ); return rc; } -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ - -/************** End of fts3_tokenizer.c **************************************/ -/************** Begin file fts3_tokenizer1.c *********************************/ -/* -** 2006 Oct 10 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** Implementation of the "simple" full-text-search tokenizer. -*/ - /* -** The code in this file is only compiled if: -** -** * The FTS3 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or +** Compare the term that the Fts3SegReader object passed as the first argument +** points to with the term specified by arguments zTerm and nTerm. ** -** * The FTS3 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +** If the pSeg iterator is already at EOF, return 0. Otherwise, return +** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are +** equal, or +ve if the pSeg term is greater than zTerm/nTerm. */ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) - -/* #include */ -/* #include */ -/* #include */ -/* #include */ - - -typedef struct simple_tokenizer { - sqlite3_tokenizer base; - char delim[128]; /* flag ASCII delimiters */ -} simple_tokenizer; - -typedef struct simple_tokenizer_cursor { - sqlite3_tokenizer_cursor base; - const char *pInput; /* input we are tokenizing */ - int nBytes; /* size of the input */ - int iOffset; /* current position in pInput */ - int iToken; /* index of next token to be returned */ - char *pToken; /* storage for current token */ - int nTokenAllocated; /* space allocated to zToken buffer */ -} simple_tokenizer_cursor; - - -static int simpleDelim(simple_tokenizer *t, unsigned char c){ - return c<0x80 && t->delim[c]; -} -static int fts3_isalnum(int x){ - return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z'); +static int fts3SegReaderTermCmp( + Fts3SegReader *pSeg, /* Segment reader object */ + const char *zTerm, /* Term to compare to */ + int nTerm /* Size of term zTerm in bytes */ +){ + int res = 0; + if( pSeg->aNode ){ + if( pSeg->nTerm>nTerm ){ + res = memcmp(pSeg->zTerm, zTerm, nTerm); + }else{ + res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm); + } + if( res==0 ){ + res = pSeg->nTerm-nTerm; + } + } + return res; } /* -** Create a new tokenizer instance. +** Argument apSegment is an array of nSegment elements. It is known that +** the final (nSegment-nSuspect) members are already in sorted order +** (according to the comparison function provided). This function shuffles +** the array around until all entries are in sorted order. */ -static int simpleCreate( - int argc, const char * const *argv, - sqlite3_tokenizer **ppTokenizer +static void fts3SegReaderSort( + Fts3SegReader **apSegment, /* Array to sort entries of */ + int nSegment, /* Size of apSegment array */ + int nSuspect, /* Unsorted entry count */ + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) /* Comparison function */ ){ - simple_tokenizer *t; + int i; /* Iterator variable */ - t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t)); - if( t==NULL ) return SQLITE_NOMEM; - memset(t, 0, sizeof(*t)); + assert( nSuspect<=nSegment ); - /* TODO(shess) Delimiters need to remain the same from run to run, - ** else we need to reindex. One solution would be a meta-table to - ** track such information in the database, then we'd only want this - ** information on the initial create. - */ - if( argc>1 ){ - int i, n = (int)strlen(argv[1]); - for(i=0; i=0x80 ){ - sqlite3_free(t); - return SQLITE_ERROR; - } - t->delim[ch] = 1; - } - } else { - /* Mark non-alphanumeric ASCII characters as delimiters */ - int i; - for(i=1; i<0x80; i++){ - t->delim[i] = !fts3_isalnum(i) ? -1 : 0; + if( nSuspect==nSegment ) nSuspect--; + for(i=nSuspect-1; i>=0; i--){ + int j; + for(j=i; j<(nSegment-1); j++){ + Fts3SegReader *pTmp; + if( xCmp(apSegment[j], apSegment[j+1])<0 ) break; + pTmp = apSegment[j+1]; + apSegment[j+1] = apSegment[j]; + apSegment[j] = pTmp; } } - *ppTokenizer = &t->base; - return SQLITE_OK; +#ifndef NDEBUG + /* Check that the list really is sorted now. */ + for(i=0; i<(nSuspect-1); i++){ + assert( xCmp(apSegment[i], apSegment[i+1])<0 ); + } +#endif } -/* -** Destroy a tokenizer +/* +** Insert a record into the %_segments table. */ -static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ - sqlite3_free(pTokenizer); - return SQLITE_OK; +static int fts3WriteSegment( + Fts3Table *p, /* Virtual table handle */ + sqlite3_int64 iBlock, /* Block id for new block */ + char *z, /* Pointer to buffer containing block data */ + int n /* Size of buffer z in bytes */ +){ + sqlite3_stmt *pStmt; + int rc = fts3SqlStmt(p, SQL_INSERT_SEGMENTS, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, iBlock); + sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC); + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + } + return rc; } /* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is pInput[0..nBytes-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. +** Find the largest relative level number in the table. If successful, set +** *pnMax to this value and return SQLITE_OK. Otherwise, if an error occurs, +** set *pnMax to zero and return an SQLite error code. */ -static int simpleOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *pInput, int nBytes, /* String to be tokenized */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ -){ - simple_tokenizer_cursor *c; - - UNUSED_PARAMETER(pTokenizer); - - c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); - if( c==NULL ) return SQLITE_NOMEM; +SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *p, int *pnMax){ + int rc; + int mxLevel = 0; + sqlite3_stmt *pStmt = 0; - c->pInput = pInput; - if( pInput==0 ){ - c->nBytes = 0; - }else if( nBytes<0 ){ - c->nBytes = (int)strlen(pInput); - }else{ - c->nBytes = nBytes; + rc = fts3SqlStmt(p, SQL_SELECT_MXLEVEL, &pStmt, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + mxLevel = sqlite3_column_int(pStmt, 0); + } + rc = sqlite3_reset(pStmt); } - c->iOffset = 0; /* start tokenizing at the beginning */ - c->iToken = 0; - c->pToken = NULL; /* no space allocated, yet. */ - c->nTokenAllocated = 0; + *pnMax = mxLevel; + return rc; +} - *ppCursor = &c->base; - return SQLITE_OK; +/* +** Insert a record into the %_segdir table. +*/ +static int fts3WriteSegdir( + Fts3Table *p, /* Virtual table handle */ + sqlite3_int64 iLevel, /* Value for "level" field (absolute level) */ + int iIdx, /* Value for "idx" field */ + sqlite3_int64 iStartBlock, /* Value for "start_block" field */ + sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */ + sqlite3_int64 iEndBlock, /* Value for "end_block" field */ + sqlite3_int64 nLeafData, /* Bytes of leaf data in segment */ + char *zRoot, /* Blob value for "root" field */ + int nRoot /* Number of bytes in buffer zRoot */ +){ + sqlite3_stmt *pStmt; + int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, iLevel); + sqlite3_bind_int(pStmt, 2, iIdx); + sqlite3_bind_int64(pStmt, 3, iStartBlock); + sqlite3_bind_int64(pStmt, 4, iLeafEndBlock); + if( nLeafData==0 ){ + sqlite3_bind_int64(pStmt, 5, iEndBlock); + }else{ + char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData); + if( !zEnd ) return SQLITE_NOMEM; + sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free); + } + sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC); + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + } + return rc; } /* -** Close a tokenization cursor previously opened by a call to -** simpleOpen() above. +** Return the size of the common prefix (if any) shared by zPrev and +** zNext, in bytes. For example, +** +** fts3PrefixCompress("abc", 3, "abcdef", 6) // returns 3 +** fts3PrefixCompress("abX", 3, "abcdef", 6) // returns 2 +** fts3PrefixCompress("abX", 3, "Xbcdef", 6) // returns 0 */ -static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ - simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; - sqlite3_free(c->pToken); - sqlite3_free(c); - return SQLITE_OK; +static int fts3PrefixCompress( + const char *zPrev, /* Buffer containing previous term */ + int nPrev, /* Size of buffer zPrev in bytes */ + const char *zNext, /* Buffer containing next term */ + int nNext /* Size of buffer zNext in bytes */ +){ + int n; + UNUSED_PARAMETER(nNext); + for(n=0; npTokenizer; - unsigned char *p = (unsigned char *)c->pInput; + SegmentNode *pTree = *ppTree; + int rc; + SegmentNode *pNew; - while( c->iOffsetnBytes ){ - int iStartOffset; + /* First try to append the term to the current node. Return early if + ** this is possible. + */ + if( pTree ){ + int nData = pTree->nData; /* Current size of node in bytes */ + int nReq = nData; /* Required space after adding zTerm */ + int nPrefix; /* Number of bytes of prefix compression */ + int nSuffix; /* Suffix length */ - /* Scan past delimiter characters */ - while( c->iOffsetnBytes && simpleDelim(t, p[c->iOffset]) ){ - c->iOffset++; - } + nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm); + nSuffix = nTerm-nPrefix; - /* Count non-delimiter characters. */ - iStartOffset = c->iOffset; - while( c->iOffsetnBytes && !simpleDelim(t, p[c->iOffset]) ){ - c->iOffset++; - } + nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix; + if( nReq<=p->nNodeSize || !pTree->zTerm ){ - if( c->iOffset>iStartOffset ){ - int i, n = c->iOffset-iStartOffset; - if( n>c->nTokenAllocated ){ - char *pNew; - c->nTokenAllocated = n+20; - pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated); - if( !pNew ) return SQLITE_NOMEM; - c->pToken = pNew; - } - for(i=0; ip->nNodeSize ){ + /* An unusual case: this is the first term to be added to the node + ** and the static node buffer (p->nNodeSize bytes) is not large + ** enough. Use a separately malloced buffer instead This wastes + ** p->nNodeSize bytes, but since this scenario only comes about when + ** the database contain two terms that share a prefix of almost 2KB, + ** this is not expected to be a serious problem. */ - unsigned char ch = p[iStartOffset+i]; - c->pToken[i] = (char)((ch>='A' && ch<='Z') ? ch-'A'+'a' : ch); + assert( pTree->aData==(char *)&pTree[1] ); + pTree->aData = (char *)sqlite3_malloc(nReq); + if( !pTree->aData ){ + return SQLITE_NOMEM; + } } - *ppToken = c->pToken; - *pnBytes = n; - *piStartOffset = iStartOffset; - *piEndOffset = c->iOffset; - *piPosition = c->iToken++; + if( pTree->zTerm ){ + /* There is no prefix-length field for first term in a node */ + nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nPrefix); + } + + nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nSuffix); + memcpy(&pTree->aData[nData], &zTerm[nPrefix], nSuffix); + pTree->nData = nData + nSuffix; + pTree->nEntry++; + + if( isCopyTerm ){ + if( pTree->nMalloczMalloc, nTerm*2); + if( !zNew ){ + return SQLITE_NOMEM; + } + pTree->nMalloc = nTerm*2; + pTree->zMalloc = zNew; + } + pTree->zTerm = pTree->zMalloc; + memcpy(pTree->zTerm, zTerm, nTerm); + pTree->nTerm = nTerm; + }else{ + pTree->zTerm = (char *)zTerm; + pTree->nTerm = nTerm; + } return SQLITE_OK; } } - return SQLITE_DONE; -} - -/* -** The set of routines that implement the simple tokenizer -*/ -static const sqlite3_tokenizer_module simpleTokenizerModule = { - 0, - simpleCreate, - simpleDestroy, - simpleOpen, - simpleClose, - simpleNext, - 0, -}; - -/* -** Allocate a new simple tokenizer. Return a pointer to the new -** tokenizer in *ppModule -*/ -SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule( - sqlite3_tokenizer_module const**ppModule -){ - *ppModule = &simpleTokenizerModule; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ - -/************** End of fts3_tokenizer1.c *************************************/ -/************** Begin file fts3_tokenize_vtab.c ******************************/ -/* -** 2013 Apr 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This file contains code for the "fts3tokenize" virtual table module. -** An fts3tokenize virtual table is created as follows: -** -** CREATE VIRTUAL TABLE USING fts3tokenize( -** , , ... -** ); -** -** The table created has the following schema: -** -** CREATE TABLE (input, token, start, end, position) -** -** When queried, the query must include a WHERE clause of type: -** -** input = -** -** The virtual table module tokenizes this , using the FTS3 -** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE -** statement and returns one row for each token in the result. With -** fields set as follows: -** -** input: Always set to a copy of -** token: A token from the input. -** start: Byte offset of the token within the input . -** end: Byte offset of the byte immediately following the end of the -** token within the input string. -** pos: Token offset of token within input. -** -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) - -/* #include */ -/* #include */ -typedef struct Fts3tokTable Fts3tokTable; -typedef struct Fts3tokCursor Fts3tokCursor; + /* If control flows to here, it was not possible to append zTerm to the + ** current node. Create a new node (a right-sibling of the current node). + ** If this is the first node in the tree, the term is added to it. + ** + ** Otherwise, the term is not added to the new node, it is left empty for + ** now. Instead, the term is inserted into the parent of pTree. If pTree + ** has no parent, one is created here. + */ + pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize); + if( !pNew ){ + return SQLITE_NOMEM; + } + memset(pNew, 0, sizeof(SegmentNode)); + pNew->nData = 1 + FTS3_VARINT_MAX; + pNew->aData = (char *)&pNew[1]; -/* -** Virtual table structure. -*/ -struct Fts3tokTable { - sqlite3_vtab base; /* Base class used by SQLite core */ - const sqlite3_tokenizer_module *pMod; - sqlite3_tokenizer *pTok; -}; + if( pTree ){ + SegmentNode *pParent = pTree->pParent; + rc = fts3NodeAddTerm(p, &pParent, isCopyTerm, zTerm, nTerm); + if( pTree->pParent==0 ){ + pTree->pParent = pParent; + } + pTree->pRight = pNew; + pNew->pLeftmost = pTree->pLeftmost; + pNew->pParent = pParent; + pNew->zMalloc = pTree->zMalloc; + pNew->nMalloc = pTree->nMalloc; + pTree->zMalloc = 0; + }else{ + pNew->pLeftmost = pNew; + rc = fts3NodeAddTerm(p, &pNew, isCopyTerm, zTerm, nTerm); + } -/* -** Virtual table cursor structure. -*/ -struct Fts3tokCursor { - sqlite3_vtab_cursor base; /* Base class used by SQLite core */ - char *zInput; /* Input string */ - sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */ - int iRowid; /* Current 'rowid' value */ - const char *zToken; /* Current 'token' value */ - int nToken; /* Size of zToken in bytes */ - int iStart; /* Current 'start' value */ - int iEnd; /* Current 'end' value */ - int iPos; /* Current 'pos' value */ -}; + *ppTree = pNew; + return rc; +} /* -** Query FTS for the tokenizer implementation named zName. +** Helper function for fts3NodeWrite(). */ -static int fts3tokQueryTokenizer( - Fts3Hash *pHash, - const char *zName, - const sqlite3_tokenizer_module **pp, - char **pzErr +static int fts3TreeFinishNode( + SegmentNode *pTree, + int iHeight, + sqlite3_int64 iLeftChild ){ - sqlite3_tokenizer_module *p; - int nName = (int)strlen(zName); - - p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); - if( !p ){ - *pzErr = sqlite3_mprintf("unknown tokenizer: %s", zName); - return SQLITE_ERROR; - } - - *pp = p; - return SQLITE_OK; + int nStart; + assert( iHeight>=1 && iHeight<128 ); + nStart = FTS3_VARINT_MAX - sqlite3Fts3VarintLen(iLeftChild); + pTree->aData[nStart] = (char)iHeight; + sqlite3Fts3PutVarint(&pTree->aData[nStart+1], iLeftChild); + return nStart; } /* -** The second argument, argv[], is an array of pointers to nul-terminated -** strings. This function makes a copy of the array and strings into a -** single block of memory. It then dequotes any of the strings that appear -** to be quoted. +** Write the buffer for the segment node pTree and all of its peers to the +** database. Then call this function recursively to write the parent of +** pTree and its peers to the database. ** -** If successful, output parameter *pazDequote is set to point at the -** array of dequoted strings and SQLITE_OK is returned. The caller is -** responsible for eventually calling sqlite3_free() to free the array -** in this case. Or, if an error occurs, an SQLite error code is returned. -** The final value of *pazDequote is undefined in this case. +** Except, if pTree is a root node, do not write it to the database. Instead, +** set output variables *paRoot and *pnRoot to contain the root node. +** +** If successful, SQLITE_OK is returned and output variable *piLast is +** set to the largest blockid written to the database (or zero if no +** blocks were written to the db). Otherwise, an SQLite error code is +** returned. */ -static int fts3tokDequoteArray( - int argc, /* Number of elements in argv[] */ - const char * const *argv, /* Input array */ - char ***pazDequote /* Output array */ +static int fts3NodeWrite( + Fts3Table *p, /* Virtual table handle */ + SegmentNode *pTree, /* SegmentNode handle */ + int iHeight, /* Height of this node in tree */ + sqlite3_int64 iLeaf, /* Block id of first leaf node */ + sqlite3_int64 iFree, /* Block id of next free slot in %_segments */ + sqlite3_int64 *piLast, /* OUT: Block id of last entry written */ + char **paRoot, /* OUT: Data for root node */ + int *pnRoot /* OUT: Size of root node in bytes */ ){ - int rc = SQLITE_OK; /* Return code */ - if( argc==0 ){ - *pazDequote = 0; - }else{ - int i; - int nByte = 0; - char **azDequote; + int rc = SQLITE_OK; - for(i=0; ipParent ){ + /* Root node of the tree. */ + int nStart = fts3TreeFinishNode(pTree, iHeight, iLeaf); + *piLast = iFree-1; + *pnRoot = pTree->nData - nStart; + *paRoot = &pTree->aData[nStart]; + }else{ + SegmentNode *pIter; + sqlite3_int64 iNextFree = iFree; + sqlite3_int64 iNextLeaf = iLeaf; + for(pIter=pTree->pLeftmost; pIter && rc==SQLITE_OK; pIter=pIter->pRight){ + int nStart = fts3TreeFinishNode(pIter, iHeight, iNextLeaf); + int nWrite = pIter->nData - nStart; + + rc = fts3WriteSegment(p, iNextFree, &pIter->aData[nStart], nWrite); + iNextFree++; + iNextLeaf += (pIter->nEntry+1); } - - *pazDequote = azDequote = sqlite3_malloc(sizeof(char *)*argc + nByte); - if( azDequote==0 ){ - rc = SQLITE_NOMEM; - }else{ - char *pSpace = (char *)&azDequote[argc]; - for(i=0; ipParent, iHeight+1, iFree, iNextFree, piLast, paRoot, pnRoot + ); } } @@ -142126,5987 +146610,5989 @@ static int fts3tokDequoteArray( } /* -** Schema of the tokenizer table. +** Free all memory allocations associated with the tree pTree. */ -#define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)" +static void fts3NodeFree(SegmentNode *pTree){ + if( pTree ){ + SegmentNode *p = pTree->pLeftmost; + fts3NodeFree(p->pParent); + while( p ){ + SegmentNode *pRight = p->pRight; + if( p->aData!=(char *)&p[1] ){ + sqlite3_free(p->aData); + } + assert( pRight==0 || p->zMalloc==0 ); + sqlite3_free(p->zMalloc); + sqlite3_free(p); + p = pRight; + } + } +} /* -** This function does all the work for both the xConnect and xCreate methods. -** These tables have no persistent representation of their own, so xConnect -** and xCreate are identical operations. +** Add a term to the segment being constructed by the SegmentWriter object +** *ppWriter. When adding the first term to a segment, *ppWriter should +** be passed NULL. This function will allocate a new SegmentWriter object +** and return it via the input/output variable *ppWriter in this case. ** -** argv[0]: module name -** argv[1]: database name -** argv[2]: table name -** argv[3]: first argument (tokenizer name) +** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. */ -static int fts3tokConnectMethod( - sqlite3 *db, /* Database connection */ - void *pHash, /* Hash table of tokenizers */ - int argc, /* Number of elements in argv array */ - const char * const *argv, /* xCreate/xConnect argument array */ - sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ - char **pzErr /* OUT: sqlite3_malloc'd error message */ +static int fts3SegWriterAdd( + Fts3Table *p, /* Virtual table handle */ + SegmentWriter **ppWriter, /* IN/OUT: SegmentWriter handle */ + int isCopyTerm, /* True if buffer zTerm must be copied */ + const char *zTerm, /* Pointer to buffer containing term */ + int nTerm, /* Size of term in bytes */ + const char *aDoclist, /* Pointer to buffer containing doclist */ + int nDoclist /* Size of doclist in bytes */ ){ - Fts3tokTable *pTab = 0; - const sqlite3_tokenizer_module *pMod = 0; - sqlite3_tokenizer *pTok = 0; - int rc; - char **azDequote = 0; - int nDequote; + int nPrefix; /* Size of term prefix in bytes */ + int nSuffix; /* Size of term suffix in bytes */ + int nReq; /* Number of bytes required on leaf page */ + int nData; + SegmentWriter *pWriter = *ppWriter; - rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA); - if( rc!=SQLITE_OK ) return rc; + if( !pWriter ){ + int rc; + sqlite3_stmt *pStmt; - nDequote = argc-3; - rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote); + /* Allocate the SegmentWriter structure */ + pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter)); + if( !pWriter ) return SQLITE_NOMEM; + memset(pWriter, 0, sizeof(SegmentWriter)); + *ppWriter = pWriter; - if( rc==SQLITE_OK ){ - const char *zModule; - if( nDequote<1 ){ - zModule = "simple"; - }else{ - zModule = azDequote[0]; + /* Allocate a buffer in which to accumulate data */ + pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize); + if( !pWriter->aData ) return SQLITE_NOMEM; + pWriter->nSize = p->nNodeSize; + + /* Find the next free blockid in the %_segments table */ + rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pStmt, 0); + if( rc!=SQLITE_OK ) return rc; + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + pWriter->iFree = sqlite3_column_int64(pStmt, 0); + pWriter->iFirst = pWriter->iFree; } - rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr); + rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK ) return rc; } + nData = pWriter->nData; - assert( (rc==SQLITE_OK)==(pMod!=0) ); - if( rc==SQLITE_OK ){ - const char * const *azArg = (const char * const *)&azDequote[1]; - rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok); - } + nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm); + nSuffix = nTerm-nPrefix; - if( rc==SQLITE_OK ){ - pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable)); - if( pTab==0 ){ - rc = SQLITE_NOMEM; - } + /* Figure out how many bytes are required by this new entry */ + nReq = sqlite3Fts3VarintLen(nPrefix) + /* varint containing prefix size */ + sqlite3Fts3VarintLen(nSuffix) + /* varint containing suffix size */ + nSuffix + /* Term suffix */ + sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ + nDoclist; /* Doclist data */ + + if( nData>0 && nData+nReq>p->nNodeSize ){ + int rc; + + /* The current leaf node is full. Write it out to the database. */ + rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData); + if( rc!=SQLITE_OK ) return rc; + p->nLeafAdd++; + + /* Add the current term to the interior node tree. The term added to + ** the interior tree must: + ** + ** a) be greater than the largest term on the leaf node just written + ** to the database (still available in pWriter->zTerm), and + ** + ** b) be less than or equal to the term about to be added to the new + ** leaf node (zTerm/nTerm). + ** + ** In other words, it must be the prefix of zTerm 1 byte longer than + ** the common prefix (if any) of zTerm and pWriter->zTerm. + */ + assert( nPrefixpTree, isCopyTerm, zTerm, nPrefix+1); + if( rc!=SQLITE_OK ) return rc; + + nData = 0; + pWriter->nTerm = 0; + + nPrefix = 0; + nSuffix = nTerm; + nReq = 1 + /* varint containing prefix size */ + sqlite3Fts3VarintLen(nTerm) + /* varint containing suffix size */ + nTerm + /* Term suffix */ + sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ + nDoclist; /* Doclist data */ } - if( rc==SQLITE_OK ){ - memset(pTab, 0, sizeof(Fts3tokTable)); - pTab->pMod = pMod; - pTab->pTok = pTok; - *ppVtab = &pTab->base; - }else{ - if( pTok ){ - pMod->xDestroy(pTok); - } + /* Increase the total number of bytes written to account for the new entry. */ + pWriter->nLeafData += nReq; + + /* If the buffer currently allocated is too small for this entry, realloc + ** the buffer to make it large enough. + */ + if( nReq>pWriter->nSize ){ + char *aNew = sqlite3_realloc(pWriter->aData, nReq); + if( !aNew ) return SQLITE_NOMEM; + pWriter->aData = aNew; + pWriter->nSize = nReq; } + assert( nData+nReq<=pWriter->nSize ); - sqlite3_free(azDequote); - return rc; -} + /* Append the prefix-compressed term and doclist to the buffer. */ + nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix); + nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix); + memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix); + nData += nSuffix; + nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist); + memcpy(&pWriter->aData[nData], aDoclist, nDoclist); + pWriter->nData = nData + nDoclist; -/* -** This function does the work for both the xDisconnect and xDestroy methods. -** These tables have no persistent representation of their own, so xDisconnect -** and xDestroy are identical operations. -*/ -static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){ - Fts3tokTable *pTab = (Fts3tokTable *)pVtab; + /* Save the current term so that it can be used to prefix-compress the next. + ** If the isCopyTerm parameter is true, then the buffer pointed to by + ** zTerm is transient, so take a copy of the term data. Otherwise, just + ** store a copy of the pointer. + */ + if( isCopyTerm ){ + if( nTerm>pWriter->nMalloc ){ + char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2); + if( !zNew ){ + return SQLITE_NOMEM; + } + pWriter->nMalloc = nTerm*2; + pWriter->zMalloc = zNew; + pWriter->zTerm = zNew; + } + assert( pWriter->zTerm==pWriter->zMalloc ); + memcpy(pWriter->zTerm, zTerm, nTerm); + }else{ + pWriter->zTerm = (char *)zTerm; + } + pWriter->nTerm = nTerm; - pTab->pMod->xDestroy(pTab->pTok); - sqlite3_free(pTab); return SQLITE_OK; } /* -** xBestIndex - Analyze a WHERE and ORDER BY clause. +** Flush all data associated with the SegmentWriter object pWriter to the +** database. This function must be called after all terms have been added +** to the segment using fts3SegWriterAdd(). If successful, SQLITE_OK is +** returned. Otherwise, an SQLite error code. */ -static int fts3tokBestIndexMethod( - sqlite3_vtab *pVTab, - sqlite3_index_info *pInfo +static int fts3SegWriterFlush( + Fts3Table *p, /* Virtual table handle */ + SegmentWriter *pWriter, /* SegmentWriter to flush to the db */ + sqlite3_int64 iLevel, /* Value for 'level' column of %_segdir */ + int iIdx /* Value for 'idx' column of %_segdir */ ){ - int i; - UNUSED_PARAMETER(pVTab); + int rc; /* Return code */ + if( pWriter->pTree ){ + sqlite3_int64 iLast = 0; /* Largest block id written to database */ + sqlite3_int64 iLastLeaf; /* Largest leaf block id written to db */ + char *zRoot = NULL; /* Pointer to buffer containing root node */ + int nRoot = 0; /* Size of buffer zRoot */ - for(i=0; inConstraint; i++){ - if( pInfo->aConstraint[i].usable - && pInfo->aConstraint[i].iColumn==0 - && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ - ){ - pInfo->idxNum = 1; - pInfo->aConstraintUsage[i].argvIndex = 1; - pInfo->aConstraintUsage[i].omit = 1; - pInfo->estimatedCost = 1; - return SQLITE_OK; + iLastLeaf = pWriter->iFree; + rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, pWriter->nData); + if( rc==SQLITE_OK ){ + rc = fts3NodeWrite(p, pWriter->pTree, 1, + pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot); + } + if( rc==SQLITE_OK ){ + rc = fts3WriteSegdir(p, iLevel, iIdx, + pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot); } + }else{ + /* The entire tree fits on the root node. Write it to the segdir table. */ + rc = fts3WriteSegdir(p, iLevel, iIdx, + 0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData); } - - pInfo->idxNum = 0; - assert( pInfo->estimatedCost>1000000.0 ); - - return SQLITE_OK; + p->nLeafAdd++; + return rc; } /* -** xOpen - Open a cursor. +** Release all memory held by the SegmentWriter object passed as the +** first argument. */ -static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ - Fts3tokCursor *pCsr; - UNUSED_PARAMETER(pVTab); - - pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor)); - if( pCsr==0 ){ - return SQLITE_NOMEM; +static void fts3SegWriterFree(SegmentWriter *pWriter){ + if( pWriter ){ + sqlite3_free(pWriter->aData); + sqlite3_free(pWriter->zMalloc); + fts3NodeFree(pWriter->pTree); + sqlite3_free(pWriter); } - memset(pCsr, 0, sizeof(Fts3tokCursor)); - - *ppCsr = (sqlite3_vtab_cursor *)pCsr; - return SQLITE_OK; } /* -** Reset the tokenizer cursor passed as the only argument. As if it had -** just been returned by fts3tokOpenMethod(). +** The first value in the apVal[] array is assumed to contain an integer. +** This function tests if there exist any documents with docid values that +** are different from that integer. i.e. if deleting the document with docid +** pRowid would mean the FTS3 table were empty. +** +** If successful, *pisEmpty is set to true if the table is empty except for +** document pRowid, or false otherwise, and SQLITE_OK is returned. If an +** error occurs, an SQLite error code is returned. */ -static void fts3tokResetCursor(Fts3tokCursor *pCsr){ - if( pCsr->pCsr ){ - Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab); - pTab->pMod->xClose(pCsr->pCsr); - pCsr->pCsr = 0; +static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){ + sqlite3_stmt *pStmt; + int rc; + if( p->zContentTbl ){ + /* If using the content=xxx option, assume the table is never empty */ + *pisEmpty = 0; + rc = SQLITE_OK; + }else{ + rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + *pisEmpty = sqlite3_column_int(pStmt, 0); + } + rc = sqlite3_reset(pStmt); + } } - sqlite3_free(pCsr->zInput); - pCsr->zInput = 0; - pCsr->zToken = 0; - pCsr->nToken = 0; - pCsr->iStart = 0; - pCsr->iEnd = 0; - pCsr->iPos = 0; - pCsr->iRowid = 0; + return rc; } /* -** xClose - Close a cursor. +** Set *pnMax to the largest segment level in the database for the index +** iIndex. +** +** Segment levels are stored in the 'level' column of the %_segdir table. +** +** Return SQLITE_OK if successful, or an SQLite error code if not. */ -static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){ - Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; +static int fts3SegmentMaxLevel( + Fts3Table *p, + int iLangid, + int iIndex, + sqlite3_int64 *pnMax +){ + sqlite3_stmt *pStmt; + int rc; + assert( iIndex>=0 && iIndexnIndex ); - fts3tokResetCursor(pCsr); - sqlite3_free(pCsr); - return SQLITE_OK; + /* Set pStmt to the compiled version of: + ** + ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? + ** + ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). + */ + rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); + if( rc!=SQLITE_OK ) return rc; + sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); + sqlite3_bind_int64(pStmt, 2, + getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) + ); + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + *pnMax = sqlite3_column_int64(pStmt, 0); + } + return sqlite3_reset(pStmt); } /* -** xNext - Advance the cursor to the next row, if any. +** iAbsLevel is an absolute level that may be assumed to exist within +** the database. This function checks if it is the largest level number +** within its index. Assuming no error occurs, *pbMax is set to 1 if +** iAbsLevel is indeed the largest level, or 0 otherwise, and SQLITE_OK +** is returned. If an error occurs, an error code is returned and the +** final value of *pbMax is undefined. */ -static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){ - Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; - Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); - int rc; /* Return code */ +static int fts3SegmentIsMaxLevel(Fts3Table *p, i64 iAbsLevel, int *pbMax){ - pCsr->iRowid++; - rc = pTab->pMod->xNext(pCsr->pCsr, - &pCsr->zToken, &pCsr->nToken, - &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos + /* Set pStmt to the compiled version of: + ** + ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? + ** + ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). + */ + sqlite3_stmt *pStmt; + int rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); + if( rc!=SQLITE_OK ) return rc; + sqlite3_bind_int64(pStmt, 1, iAbsLevel+1); + sqlite3_bind_int64(pStmt, 2, + ((iAbsLevel/FTS3_SEGDIR_MAXLEVEL)+1) * FTS3_SEGDIR_MAXLEVEL ); - if( rc!=SQLITE_OK ){ - fts3tokResetCursor(pCsr); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; + *pbMax = 0; + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + *pbMax = sqlite3_column_type(pStmt, 0)==SQLITE_NULL; } - - return rc; + return sqlite3_reset(pStmt); } /* -** xFilter - Initialize a cursor to point at the start of its data. +** Delete all entries in the %_segments table associated with the segment +** opened with seg-reader pSeg. This function does not affect the contents +** of the %_segdir table. */ -static int fts3tokFilterMethod( - sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ - int idxNum, /* Strategy index */ - const char *idxStr, /* Unused */ - int nVal, /* Number of elements in apVal */ - sqlite3_value **apVal /* Arguments for the indexing scheme */ +static int fts3DeleteSegment( + Fts3Table *p, /* FTS table handle */ + Fts3SegReader *pSeg /* Segment to delete */ ){ - int rc = SQLITE_ERROR; - Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; - Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); - UNUSED_PARAMETER(idxStr); - UNUSED_PARAMETER(nVal); - - fts3tokResetCursor(pCsr); - if( idxNum==1 ){ - const char *zByte = (const char *)sqlite3_value_text(apVal[0]); - int nByte = sqlite3_value_bytes(apVal[0]); - pCsr->zInput = sqlite3_malloc(nByte+1); - if( pCsr->zInput==0 ){ - rc = SQLITE_NOMEM; - }else{ - memcpy(pCsr->zInput, zByte, nByte); - pCsr->zInput[nByte] = 0; - rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr); - if( rc==SQLITE_OK ){ - pCsr->pCsr->pTokenizer = pTab->pTok; - } + int rc = SQLITE_OK; /* Return code */ + if( pSeg->iStartBlock ){ + sqlite3_stmt *pDelete; /* SQL statement to delete rows */ + rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDelete, 1, pSeg->iStartBlock); + sqlite3_bind_int64(pDelete, 2, pSeg->iEndBlock); + sqlite3_step(pDelete); + rc = sqlite3_reset(pDelete); } } - - if( rc!=SQLITE_OK ) return rc; - return fts3tokNextMethod(pCursor); -} - -/* -** xEof - Return true if the cursor is at EOF, or false otherwise. -*/ -static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){ - Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; - return (pCsr->zToken==0); + return rc; } /* -** xColumn - Return a column value. +** This function is used after merging multiple segments into a single large +** segment to delete the old, now redundant, segment b-trees. Specifically, +** it: +** +** 1) Deletes all %_segments entries for the segments associated with +** each of the SegReader objects in the array passed as the third +** argument, and +** +** 2) deletes all %_segdir entries with level iLevel, or all %_segdir +** entries regardless of level if (iLevel<0). +** +** SQLITE_OK is returned if successful, otherwise an SQLite error code. */ -static int fts3tokColumnMethod( - sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ - sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ - int iCol /* Index of column to read value from */ +static int fts3DeleteSegdir( + Fts3Table *p, /* Virtual table handle */ + int iLangid, /* Language id */ + int iIndex, /* Index for p->aIndex */ + int iLevel, /* Level of %_segdir entries to delete */ + Fts3SegReader **apSegment, /* Array of SegReader objects */ + int nReader /* Size of array apSegment */ ){ - Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + int rc = SQLITE_OK; /* Return Code */ + int i; /* Iterator variable */ + sqlite3_stmt *pDelete = 0; /* SQL statement to delete rows */ - /* CREATE TABLE x(input, token, start, end, position) */ - switch( iCol ){ - case 0: - sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT); - break; - case 1: - sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT); - break; - case 2: - sqlite3_result_int(pCtx, pCsr->iStart); - break; - case 3: - sqlite3_result_int(pCtx, pCsr->iEnd); - break; - default: - assert( iCol==4 ); - sqlite3_result_int(pCtx, pCsr->iPos); - break; + for(i=0; rc==SQLITE_OK && iiRowid; - return SQLITE_OK; -} + assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL ); + if( iLevel==FTS3_SEGCURSOR_ALL ){ + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); + sqlite3_bind_int64(pDelete, 2, + getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) + ); + } + }else{ + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64( + pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) + ); + } + } -/* -** Register the fts3tok module with database connection db. Return SQLITE_OK -** if successful or an error code if sqlite3_create_module() fails. -*/ -SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){ - static const sqlite3_module fts3tok_module = { - 0, /* iVersion */ - fts3tokConnectMethod, /* xCreate */ - fts3tokConnectMethod, /* xConnect */ - fts3tokBestIndexMethod, /* xBestIndex */ - fts3tokDisconnectMethod, /* xDisconnect */ - fts3tokDisconnectMethod, /* xDestroy */ - fts3tokOpenMethod, /* xOpen */ - fts3tokCloseMethod, /* xClose */ - fts3tokFilterMethod, /* xFilter */ - fts3tokNextMethod, /* xNext */ - fts3tokEofMethod, /* xEof */ - fts3tokColumnMethod, /* xColumn */ - fts3tokRowidMethod, /* xRowid */ - 0, /* xUpdate */ - 0, /* xBegin */ - 0, /* xSync */ - 0, /* xCommit */ - 0, /* xRollback */ - 0, /* xFindFunction */ - 0, /* xRename */ - 0, /* xSavepoint */ - 0, /* xRelease */ - 0 /* xRollbackTo */ - }; - int rc; /* Return code */ + if( rc==SQLITE_OK ){ + sqlite3_step(pDelete); + rc = sqlite3_reset(pDelete); + } - rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash); return rc; } -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ - -/************** End of fts3_tokenize_vtab.c **********************************/ -/************** Begin file fts3_write.c **************************************/ /* -** 2009 Oct 23 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. +** When this function is called, buffer *ppList (size *pnList bytes) contains +** a position list that may (or may not) feature multiple columns. This +** function adjusts the pointer *ppList and the length *pnList so that they +** identify the subset of the position list that corresponds to column iCol. ** -****************************************************************************** +** If there are no entries in the input position list for column iCol, then +** *pnList is set to zero before returning. ** -** This file is part of the SQLite FTS3 extension module. Specifically, -** this file contains code to insert, update and delete rows from FTS3 -** tables. It also contains code to merge FTS3 b-tree segments. Some -** of the sub-routines used to merge segments are also used by the query -** code in fts3.c. +** If parameter bZero is non-zero, then any part of the input list following +** the end of the output list is zeroed before returning. */ +static void fts3ColumnFilter( + int iCol, /* Column to filter on */ + int bZero, /* Zero out anything following *ppList */ + char **ppList, /* IN/OUT: Pointer to position list */ + int *pnList /* IN/OUT: Size of buffer *ppList in bytes */ +){ + char *pList = *ppList; + int nList = *pnList; + char *pEnd = &pList[nList]; + int iCurrent = 0; + char *p = pList; -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) - -/* #include */ -/* #include */ -/* #include */ - + assert( iCol>=0 ); + while( 1 ){ + char c = 0; + while( ppMsr->nBuffer ){ + char *pNew; + pMsr->nBuffer = nList*2; + pNew = (char *)sqlite3_realloc(pMsr->aBuffer, pMsr->nBuffer); + if( !pNew ) return SQLITE_NOMEM; + pMsr->aBuffer = pNew; + } -/* -** If FTS_LOG_MERGES is defined, call sqlite3_log() to report each automatic -** and incremental merge operation that takes place. This is used for -** debugging FTS only, it should not usually be turned on in production -** systems. -*/ -#ifdef FTS3_LOG_MERGES -static void fts3LogMerge(int nMerge, sqlite3_int64 iAbsLevel){ - sqlite3_log(SQLITE_OK, "%d-way merge from level %d", nMerge, (int)iAbsLevel); + memcpy(pMsr->aBuffer, pList, nList); + return SQLITE_OK; } -#else -#define fts3LogMerge(x, y) -#endif +SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ + sqlite3_int64 *piDocid, /* OUT: Docid value */ + char **paPoslist, /* OUT: Pointer to position list */ + int *pnPoslist /* OUT: Size of position list in bytes */ +){ + int nMerge = pMsr->nAdvance; + Fts3SegReader **apSegment = pMsr->apSegment; + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( + p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp + ); -typedef struct PendingList PendingList; -typedef struct SegmentNode SegmentNode; -typedef struct SegmentWriter SegmentWriter; + if( nMerge==0 ){ + *paPoslist = 0; + return SQLITE_OK; + } -/* -** An instance of the following data structure is used to build doclists -** incrementally. See function fts3PendingListAppend() for details. -*/ -struct PendingList { - int nData; - char *aData; - int nSpace; - sqlite3_int64 iLastDocid; - sqlite3_int64 iLastCol; - sqlite3_int64 iLastPos; -}; + while( 1 ){ + Fts3SegReader *pSeg; + pSeg = pMsr->apSegment[0]; + if( pSeg->pOffsetList==0 ){ + *paPoslist = 0; + break; + }else{ + int rc; + char *pList; + int nList; + int j; + sqlite3_int64 iDocid = apSegment[0]->iDocid; -/* -** Each cursor has a (possibly empty) linked list of the following objects. -*/ -struct Fts3DeferredToken { - Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */ - int iCol; /* Column token must occur in */ - Fts3DeferredToken *pNext; /* Next in list of deferred tokens */ - PendingList *pList; /* Doclist is assembled here */ -}; + rc = fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); + j = 1; + while( rc==SQLITE_OK + && jpOffsetList + && apSegment[j]->iDocid==iDocid + ){ + rc = fts3SegReaderNextDocid(p, apSegment[j], 0, 0); + j++; + } + if( rc!=SQLITE_OK ) return rc; + fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp); -/* -** An instance of this structure is used to iterate through the terms on -** a contiguous set of segment b-tree leaf nodes. Although the details of -** this structure are only manipulated by code in this file, opaque handles -** of type Fts3SegReader* are also used by code in fts3.c to iterate through -** terms when querying the full-text index. See functions: -** -** sqlite3Fts3SegReaderNew() -** sqlite3Fts3SegReaderFree() -** sqlite3Fts3SegReaderIterate() -** -** Methods used to manipulate Fts3SegReader structures: -** -** fts3SegReaderNext() -** fts3SegReaderFirstDocid() -** fts3SegReaderNextDocid() -*/ -struct Fts3SegReader { - int iIdx; /* Index within level, or 0x7FFFFFFF for PT */ - u8 bLookup; /* True for a lookup only */ - u8 rootOnly; /* True for a root-only reader */ + if( nList>0 && fts3SegReaderIsPending(apSegment[0]) ){ + rc = fts3MsrBufferData(pMsr, pList, nList+1); + if( rc!=SQLITE_OK ) return rc; + assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 ); + pList = pMsr->aBuffer; + } - sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */ - sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */ - sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */ - sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */ + if( pMsr->iColFilter>=0 ){ + fts3ColumnFilter(pMsr->iColFilter, 1, &pList, &nList); + } - char *aNode; /* Pointer to node data (or NULL) */ - int nNode; /* Size of buffer at aNode (or 0) */ - int nPopulate; /* If >0, bytes of buffer aNode[] loaded */ - sqlite3_blob *pBlob; /* If not NULL, blob handle to read node */ + if( nList>0 ){ + *paPoslist = pList; + *piDocid = iDocid; + *pnPoslist = nList; + break; + } + } + } - Fts3HashElem **ppNextElem; + return SQLITE_OK; +} - /* Variables set by fts3SegReaderNext(). These may be read directly - ** by the caller. They are valid from the time SegmentReaderNew() returns - ** until SegmentReaderNext() returns something other than SQLITE_OK - ** (i.e. SQLITE_DONE). - */ - int nTerm; /* Number of bytes in current term */ - char *zTerm; /* Pointer to current term */ - int nTermAlloc; /* Allocated size of zTerm buffer */ - char *aDoclist; /* Pointer to doclist of current entry */ - int nDoclist; /* Size of doclist in current entry */ +static int fts3SegReaderStart( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pCsr, /* Cursor object */ + const char *zTerm, /* Term searched for (or NULL) */ + int nTerm /* Length of zTerm in bytes */ +){ + int i; + int nSeg = pCsr->nSegment; - /* The following variables are used by fts3SegReaderNextDocid() to iterate - ** through the current doclist (aDoclist/nDoclist). + /* If the Fts3SegFilter defines a specific term (or term prefix) to search + ** for, then advance each segment iterator until it points to a term of + ** equal or greater value than the specified term. This prevents many + ** unnecessary merge/sort operations for the case where single segment + ** b-tree leaf nodes contain more than one term. */ - char *pOffsetList; - int nOffsetList; /* For descending pending seg-readers only */ - sqlite3_int64 iDocid; -}; - -#define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0) -#define fts3SegReaderIsRootOnly(p) ((p)->rootOnly!=0) - -/* -** An instance of this structure is used to create a segment b-tree in the -** database. The internal details of this type are only accessed by the -** following functions: -** -** fts3SegWriterAdd() -** fts3SegWriterFlush() -** fts3SegWriterFree() -*/ -struct SegmentWriter { - SegmentNode *pTree; /* Pointer to interior tree structure */ - sqlite3_int64 iFirst; /* First slot in %_segments written */ - sqlite3_int64 iFree; /* Next free slot in %_segments */ - char *zTerm; /* Pointer to previous term buffer */ - int nTerm; /* Number of bytes in zTerm */ - int nMalloc; /* Size of malloc'd buffer at zMalloc */ - char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ - int nSize; /* Size of allocation at aData */ - int nData; /* Bytes of data in aData */ - char *aData; /* Pointer to block from malloc() */ - i64 nLeafData; /* Number of bytes of leaf data written */ -}; - -/* -** Type SegmentNode is used by the following three functions to create -** the interior part of the segment b+-tree structures (everything except -** the leaf nodes). These functions and type are only ever used by code -** within the fts3SegWriterXXX() family of functions described above. -** -** fts3NodeAddTerm() -** fts3NodeWrite() -** fts3NodeFree() -** -** When a b+tree is written to the database (either as a result of a merge -** or the pending-terms table being flushed), leaves are written into the -** database file as soon as they are completely populated. The interior of -** the tree is assembled in memory and written out only once all leaves have -** been populated and stored. This is Ok, as the b+-tree fanout is usually -** very large, meaning that the interior of the tree consumes relatively -** little memory. -*/ -struct SegmentNode { - SegmentNode *pParent; /* Parent node (or NULL for root node) */ - SegmentNode *pRight; /* Pointer to right-sibling */ - SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */ - int nEntry; /* Number of terms written to node so far */ - char *zTerm; /* Pointer to previous term buffer */ - int nTerm; /* Number of bytes in zTerm */ - int nMalloc; /* Size of malloc'd buffer at zMalloc */ - char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ - int nData; /* Bytes of valid data so far */ - char *aData; /* Node data */ -}; - -/* -** Valid values for the second argument to fts3SqlStmt(). -*/ -#define SQL_DELETE_CONTENT 0 -#define SQL_IS_EMPTY 1 -#define SQL_DELETE_ALL_CONTENT 2 -#define SQL_DELETE_ALL_SEGMENTS 3 -#define SQL_DELETE_ALL_SEGDIR 4 -#define SQL_DELETE_ALL_DOCSIZE 5 -#define SQL_DELETE_ALL_STAT 6 -#define SQL_SELECT_CONTENT_BY_ROWID 7 -#define SQL_NEXT_SEGMENT_INDEX 8 -#define SQL_INSERT_SEGMENTS 9 -#define SQL_NEXT_SEGMENTS_ID 10 -#define SQL_INSERT_SEGDIR 11 -#define SQL_SELECT_LEVEL 12 -#define SQL_SELECT_LEVEL_RANGE 13 -#define SQL_SELECT_LEVEL_COUNT 14 -#define SQL_SELECT_SEGDIR_MAX_LEVEL 15 -#define SQL_DELETE_SEGDIR_LEVEL 16 -#define SQL_DELETE_SEGMENTS_RANGE 17 -#define SQL_CONTENT_INSERT 18 -#define SQL_DELETE_DOCSIZE 19 -#define SQL_REPLACE_DOCSIZE 20 -#define SQL_SELECT_DOCSIZE 21 -#define SQL_SELECT_STAT 22 -#define SQL_REPLACE_STAT 23 + for(i=0; pCsr->bRestart==0 && inSegment; i++){ + int res = 0; + Fts3SegReader *pSeg = pCsr->apSegment[i]; + do { + int rc = fts3SegReaderNext(p, pSeg, 0); + if( rc!=SQLITE_OK ) return rc; + }while( zTerm && (res = fts3SegReaderTermCmp(pSeg, zTerm, nTerm))<0 ); -#define SQL_SELECT_ALL_PREFIX_LEVEL 24 -#define SQL_DELETE_ALL_TERMS_SEGDIR 25 -#define SQL_DELETE_SEGDIR_RANGE 26 -#define SQL_SELECT_ALL_LANGID 27 -#define SQL_FIND_MERGE_LEVEL 28 -#define SQL_MAX_LEAF_NODE_ESTIMATE 29 -#define SQL_DELETE_SEGDIR_ENTRY 30 -#define SQL_SHIFT_SEGDIR_ENTRY 31 -#define SQL_SELECT_SEGDIR 32 -#define SQL_CHOMP_SEGDIR 33 -#define SQL_SEGMENT_IS_APPENDABLE 34 -#define SQL_SELECT_INDEXES 35 -#define SQL_SELECT_MXLEVEL 36 + if( pSeg->bLookup && res!=0 ){ + fts3SegReaderSetEof(pSeg); + } + } + fts3SegReaderSort(pCsr->apSegment, nSeg, nSeg, fts3SegReaderCmp); -#define SQL_SELECT_LEVEL_RANGE2 37 -#define SQL_UPDATE_LEVEL_IDX 38 -#define SQL_UPDATE_LEVEL 39 + return SQLITE_OK; +} -/* -** This function is used to obtain an SQLite prepared statement handle -** for the statement identified by the second argument. If successful, -** *pp is set to the requested statement handle and SQLITE_OK returned. -** Otherwise, an SQLite error code is returned and *pp is set to 0. -** -** If argument apVal is not NULL, then it must point to an array with -** at least as many entries as the requested statement has bound -** parameters. The values are bound to the statements parameters before -** returning. -*/ -static int fts3SqlStmt( +SQLITE_PRIVATE int sqlite3Fts3SegReaderStart( Fts3Table *p, /* Virtual table handle */ - int eStmt, /* One of the SQL_XXX constants above */ - sqlite3_stmt **pp, /* OUT: Statement handle */ - sqlite3_value **apVal /* Values to bind to statement */ + Fts3MultiSegReader *pCsr, /* Cursor object */ + Fts3SegFilter *pFilter /* Restrictions on range of iteration */ ){ - const char *azSql[] = { -/* 0 */ "DELETE FROM %Q.'%q_content' WHERE rowid = ?", -/* 1 */ "SELECT NOT EXISTS(SELECT docid FROM %Q.'%q_content' WHERE rowid!=?)", -/* 2 */ "DELETE FROM %Q.'%q_content'", -/* 3 */ "DELETE FROM %Q.'%q_segments'", -/* 4 */ "DELETE FROM %Q.'%q_segdir'", -/* 5 */ "DELETE FROM %Q.'%q_docsize'", -/* 6 */ "DELETE FROM %Q.'%q_stat'", -/* 7 */ "SELECT %s WHERE rowid=?", -/* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1", -/* 9 */ "REPLACE INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)", -/* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)", -/* 11 */ "REPLACE INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)", + pCsr->pFilter = pFilter; + return fts3SegReaderStart(p, pCsr, pFilter->zTerm, pFilter->nTerm); +} - /* Return segments in order from oldest to newest.*/ -/* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root " - "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC", -/* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root " - "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?" - "ORDER BY level DESC, idx ASC", +SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pCsr, /* Cursor object */ + int iCol, /* Column to match on. */ + const char *zTerm, /* Term to iterate through a doclist for */ + int nTerm /* Number of bytes in zTerm */ +){ + int i; + int rc; + int nSegment = pCsr->nSegment; + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( + p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp + ); -/* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?", -/* 15 */ "SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", + assert( pCsr->pFilter==0 ); + assert( zTerm && nTerm>0 ); -/* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?", -/* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?", -/* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)", -/* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?", -/* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", -/* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?", -/* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=?", -/* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(?,?)", -/* 24 */ "", -/* 25 */ "", + /* Advance each segment iterator until it points to the term zTerm/nTerm. */ + rc = fts3SegReaderStart(p, pCsr, zTerm, nTerm); + if( rc!=SQLITE_OK ) return rc; -/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", -/* 27 */ "SELECT DISTINCT level / (1024 * ?) FROM %Q.'%q_segdir'", + /* Determine how many of the segments actually point to zTerm/nTerm. */ + for(i=0; iapSegment[i]; + if( !pSeg->aNode || fts3SegReaderTermCmp(pSeg, zTerm, nTerm) ){ + break; + } + } + pCsr->nAdvance = i; -/* This statement is used to determine which level to read the input from -** when performing an incremental merge. It returns the absolute level number -** of the oldest level in the db that contains at least ? segments. Or, -** if no level in the FTS index contains more than ? segments, the statement -** returns zero rows. */ -/* 28 */ "SELECT level FROM %Q.'%q_segdir' GROUP BY level HAVING count(*)>=?" - " ORDER BY (level %% 1024) ASC LIMIT 1", + /* Advance each of the segments to point to the first docid. */ + for(i=0; inAdvance; i++){ + rc = fts3SegReaderFirstDocid(p, pCsr->apSegment[i]); + if( rc!=SQLITE_OK ) return rc; + } + fts3SegReaderSort(pCsr->apSegment, i, i, xCmp); -/* Estimate the upper limit on the number of leaf nodes in a new segment -** created by merging the oldest :2 segments from absolute level :1. See -** function sqlite3Fts3Incrmerge() for details. */ -/* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) " - " FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?", + assert( iCol<0 || iColnColumn ); + pCsr->iColFilter = iCol; -/* SQL_DELETE_SEGDIR_ENTRY -** Delete the %_segdir entry on absolute level :1 with index :2. */ -/* 30 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", + return SQLITE_OK; +} -/* SQL_SHIFT_SEGDIR_ENTRY -** Modify the idx value for the segment with idx=:3 on absolute level :2 -** to :1. */ -/* 31 */ "UPDATE %Q.'%q_segdir' SET idx = ? WHERE level=? AND idx=?", +/* +** This function is called on a MultiSegReader that has been started using +** sqlite3Fts3MsrIncrStart(). One or more calls to MsrIncrNext() may also +** have been made. Calling this function puts the MultiSegReader in such +** a state that if the next two calls are: +** +** sqlite3Fts3SegReaderStart() +** sqlite3Fts3SegReaderStep() +** +** then the entire doclist for the term is available in +** MultiSegReader.aDoclist/nDoclist. +*/ +SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr){ + int i; /* Used to iterate through segment-readers */ -/* SQL_SELECT_SEGDIR -** Read a single entry from the %_segdir table. The entry from absolute -** level :1 with index value :2. */ -/* 32 */ "SELECT idx, start_block, leaves_end_block, end_block, root " - "FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", + assert( pCsr->zTerm==0 ); + assert( pCsr->nTerm==0 ); + assert( pCsr->aDoclist==0 ); + assert( pCsr->nDoclist==0 ); -/* SQL_CHOMP_SEGDIR -** Update the start_block (:1) and root (:2) fields of the %_segdir -** entry located on absolute level :3 with index :4. */ -/* 33 */ "UPDATE %Q.'%q_segdir' SET start_block = ?, root = ?" - "WHERE level = ? AND idx = ?", + pCsr->nAdvance = 0; + pCsr->bRestart = 1; + for(i=0; inSegment; i++){ + pCsr->apSegment[i]->pOffsetList = 0; + pCsr->apSegment[i]->nOffsetList = 0; + pCsr->apSegment[i]->iDocid = 0; + } -/* SQL_SEGMENT_IS_APPENDABLE -** Return a single row if the segment with end_block=? is appendable. Or -** no rows otherwise. */ -/* 34 */ "SELECT 1 FROM %Q.'%q_segments' WHERE blockid=? AND block IS NULL", + return SQLITE_OK; +} -/* SQL_SELECT_INDEXES -** Return the list of valid segment indexes for absolute level ? */ -/* 35 */ "SELECT idx FROM %Q.'%q_segdir' WHERE level=? ORDER BY 1 ASC", -/* SQL_SELECT_MXLEVEL -** Return the largest relative level in the FTS index or indexes. */ -/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'", +SQLITE_PRIVATE int sqlite3Fts3SegReaderStep( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pCsr /* Cursor object */ +){ + int rc = SQLITE_OK; - /* Return segments in order from oldest to newest.*/ -/* 37 */ "SELECT level, idx, end_block " - "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? " - "ORDER BY level DESC, idx ASC", + int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); + int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); + int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); + int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); + int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); + int isFirst = (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST); - /* Update statements used while promoting segments */ -/* 38 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? " - "WHERE level=? AND idx=?", -/* 39 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1" + Fts3SegReader **apSegment = pCsr->apSegment; + int nSegment = pCsr->nSegment; + Fts3SegFilter *pFilter = pCsr->pFilter; + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( + p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp + ); - }; - int rc = SQLITE_OK; - sqlite3_stmt *pStmt; + if( pCsr->nSegment==0 ) return SQLITE_OK; - assert( SizeofArray(azSql)==SizeofArray(p->aStmt) ); - assert( eStmt=0 ); - - pStmt = p->aStmt[eStmt]; - if( !pStmt ){ - char *zSql; - if( eStmt==SQL_CONTENT_INSERT ){ - zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist); - }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){ - zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist); - }else{ - zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName); - } - if( !zSql ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, NULL); - sqlite3_free(zSql); - assert( rc==SQLITE_OK || pStmt==0 ); - p->aStmt[eStmt] = pStmt; - } - } - if( apVal ){ + do { + int nMerge; int i; - int nParam = sqlite3_bind_parameter_count(pStmt); - for(i=0; rc==SQLITE_OK && inAdvance entries in the apSegment[] array + ** forward. Then sort the list in order of current term again. + */ + for(i=0; inAdvance; i++){ + Fts3SegReader *pSeg = apSegment[i]; + if( pSeg->bLookup ){ + fts3SegReaderSetEof(pSeg); + }else{ + rc = fts3SegReaderNext(p, pSeg, 0); + } + if( rc!=SQLITE_OK ) return rc; } - } - *pp = pStmt; - return rc; -} + fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp); + pCsr->nAdvance = 0; + /* If all the seg-readers are at EOF, we're finished. return SQLITE_OK. */ + assert( rc==SQLITE_OK ); + if( apSegment[0]->aNode==0 ) break; -static int fts3SelectDocsize( - Fts3Table *pTab, /* FTS3 table handle */ - sqlite3_int64 iDocid, /* Docid to bind for SQL_SELECT_DOCSIZE */ - sqlite3_stmt **ppStmt /* OUT: Statement handle */ -){ - sqlite3_stmt *pStmt = 0; /* Statement requested from fts3SqlStmt() */ - int rc; /* Return code */ + pCsr->nTerm = apSegment[0]->nTerm; + pCsr->zTerm = apSegment[0]->zTerm; - rc = fts3SqlStmt(pTab, SQL_SELECT_DOCSIZE, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pStmt, 1, iDocid); - rc = sqlite3_step(pStmt); - if( rc!=SQLITE_ROW || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){ - rc = sqlite3_reset(pStmt); - if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB; - pStmt = 0; - }else{ - rc = SQLITE_OK; + /* If this is a prefix-search, and if the term that apSegment[0] points + ** to does not share a suffix with pFilter->zTerm/nTerm, then all + ** required callbacks have been made. In this case exit early. + ** + ** Similarly, if this is a search for an exact match, and the first term + ** of segment apSegment[0] is not a match, exit early. + */ + if( pFilter->zTerm && !isScan ){ + if( pCsr->nTermnTerm + || (!isPrefix && pCsr->nTerm>pFilter->nTerm) + || memcmp(pCsr->zTerm, pFilter->zTerm, pFilter->nTerm) + ){ + break; + } } - } - *ppStmt = pStmt; - return rc; -} + nMerge = 1; + while( nMergeaNode + && apSegment[nMerge]->nTerm==pCsr->nTerm + && 0==memcmp(pCsr->zTerm, apSegment[nMerge]->zTerm, pCsr->nTerm) + ){ + nMerge++; + } + + assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); + if( nMerge==1 + && !isIgnoreEmpty + && !isFirst + && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0) + ){ + pCsr->nDoclist = apSegment[0]->nDoclist; + if( fts3SegReaderIsPending(apSegment[0]) ){ + rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist); + pCsr->aDoclist = pCsr->aBuffer; + }else{ + pCsr->aDoclist = apSegment[0]->aDoclist; + } + if( rc==SQLITE_OK ) rc = SQLITE_ROW; + }else{ + int nDoclist = 0; /* Size of doclist */ + sqlite3_int64 iPrev = 0; /* Previous docid stored in doclist */ + + /* The current term of the first nMerge entries in the array + ** of Fts3SegReader objects is the same. The doclists must be merged + ** and a single term returned with the merged doclist. + */ + for(i=0; ipOffsetList ){ + int j; /* Number of segments that share a docid */ + char *pList = 0; + int nList = 0; + int nByte; + sqlite3_int64 iDocid = apSegment[0]->iDocid; + fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); + j = 1; + while( jpOffsetList + && apSegment[j]->iDocid==iDocid + ){ + fts3SegReaderNextDocid(p, apSegment[j], 0, 0); + j++; + } + + if( isColFilter ){ + fts3ColumnFilter(pFilter->iCol, 0, &pList, &nList); + } -SQLITE_PRIVATE int sqlite3Fts3SelectDoctotal( - Fts3Table *pTab, /* Fts3 table handle */ - sqlite3_stmt **ppStmt /* OUT: Statement handle */ -){ - sqlite3_stmt *pStmt = 0; - int rc; - rc = fts3SqlStmt(pTab, SQL_SELECT_STAT, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); - if( sqlite3_step(pStmt)!=SQLITE_ROW - || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB - ){ - rc = sqlite3_reset(pStmt); - if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB; - pStmt = 0; - } - } - *ppStmt = pStmt; - return rc; -} + if( !isIgnoreEmpty || nList>0 ){ -SQLITE_PRIVATE int sqlite3Fts3SelectDocsize( - Fts3Table *pTab, /* Fts3 table handle */ - sqlite3_int64 iDocid, /* Docid to read size data for */ - sqlite3_stmt **ppStmt /* OUT: Statement handle */ -){ - return fts3SelectDocsize(pTab, iDocid, ppStmt); -} + /* Calculate the 'docid' delta value to write into the merged + ** doclist. */ + sqlite3_int64 iDelta; + if( p->bDescIdx && nDoclist>0 ){ + iDelta = iPrev - iDocid; + }else{ + iDelta = iDocid - iPrev; + } + assert( iDelta>0 || (nDoclist==0 && iDelta==iDocid) ); + assert( nDoclist>0 || iDelta==iDocid ); -/* -** Similar to fts3SqlStmt(). Except, after binding the parameters in -** array apVal[] to the SQL statement identified by eStmt, the statement -** is executed. -** -** Returns SQLITE_OK if the statement is successfully executed, or an -** SQLite error code otherwise. -*/ -static void fts3SqlExec( - int *pRC, /* Result code */ - Fts3Table *p, /* The FTS3 table */ - int eStmt, /* Index of statement to evaluate */ - sqlite3_value **apVal /* Parameters to bind */ -){ - sqlite3_stmt *pStmt; - int rc; - if( *pRC ) return; - rc = fts3SqlStmt(p, eStmt, &pStmt, apVal); - if( rc==SQLITE_OK ){ - sqlite3_step(pStmt); - rc = sqlite3_reset(pStmt); - } - *pRC = rc; -} + nByte = sqlite3Fts3VarintLen(iDelta) + (isRequirePos?nList+1:0); + if( nDoclist+nByte>pCsr->nBuffer ){ + char *aNew; + pCsr->nBuffer = (nDoclist+nByte)*2; + aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); + if( !aNew ){ + return SQLITE_NOMEM; + } + pCsr->aBuffer = aNew; + } + if( isFirst ){ + char *a = &pCsr->aBuffer[nDoclist]; + int nWrite; + + nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a); + if( nWrite ){ + iPrev = iDocid; + nDoclist += nWrite; + } + }else{ + nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta); + iPrev = iDocid; + if( isRequirePos ){ + memcpy(&pCsr->aBuffer[nDoclist], pList, nList); + nDoclist += nList; + pCsr->aBuffer[nDoclist++] = '\0'; + } + } + } -/* -** This function ensures that the caller has obtained an exclusive -** shared-cache table-lock on the %_segdir table. This is required before -** writing data to the fts3 table. If this lock is not acquired first, then -** the caller may end up attempting to take this lock as part of committing -** a transaction, causing SQLite to return SQLITE_LOCKED or -** LOCKED_SHAREDCACHEto a COMMIT command. -** -** It is best to avoid this because if FTS3 returns any error when -** committing a transaction, the whole transaction will be rolled back. -** And this is not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. -** It can still happen if the user locks the underlying tables directly -** instead of accessing them via FTS. -*/ -static int fts3Writelock(Fts3Table *p){ - int rc = SQLITE_OK; - - if( p->nPendingData==0 ){ - sqlite3_stmt *pStmt; - rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_null(pStmt, 1); - sqlite3_step(pStmt); - rc = sqlite3_reset(pStmt); + fts3SegReaderSort(apSegment, nMerge, j, xCmp); + } + if( nDoclist>0 ){ + pCsr->aDoclist = pCsr->aBuffer; + pCsr->nDoclist = nDoclist; + rc = SQLITE_ROW; + } } - } + pCsr->nAdvance = nMerge; + }while( rc==SQLITE_OK ); return rc; } -/* -** FTS maintains a separate indexes for each language-id (a 32-bit integer). -** Within each language id, a separate index is maintained to store the -** document terms, and each configured prefix size (configured the FTS -** "prefix=" option). And each index consists of multiple levels ("relative -** levels"). -** -** All three of these values (the language id, the specific index and the -** level within the index) are encoded in 64-bit integer values stored -** in the %_segdir table on disk. This function is used to convert three -** separate component values into the single 64-bit integer value that -** can be used to query the %_segdir table. -** -** Specifically, each language-id/index combination is allocated 1024 -** 64-bit integer level values ("absolute levels"). The main terms index -** for language-id 0 is allocate values 0-1023. The first prefix index -** (if any) for language-id 0 is allocated values 1024-2047. And so on. -** Language 1 indexes are allocated immediately following language 0. -** -** So, for a system with nPrefix prefix indexes configured, the block of -** absolute levels that corresponds to language-id iLangid and index -** iIndex starts at absolute level ((iLangid * (nPrefix+1) + iIndex) * 1024). -*/ -static sqlite3_int64 getAbsoluteLevel( - Fts3Table *p, /* FTS3 table handle */ - int iLangid, /* Language id */ - int iIndex, /* Index in p->aIndex[] */ - int iLevel /* Level of segments */ -){ - sqlite3_int64 iBase; /* First absolute level for iLangid/iIndex */ - assert( iLangid>=0 ); - assert( p->nIndex>0 ); - assert( iIndex>=0 && iIndexnIndex ); - - iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL; - return iBase + iLevel; -} -/* -** Set *ppStmt to a statement handle that may be used to iterate through -** all rows in the %_segdir table, from oldest to newest. If successful, -** return SQLITE_OK. If an error occurs while preparing the statement, -** return an SQLite error code. -** -** There is only ever one instance of this SQL statement compiled for -** each FTS3 table. -** -** The statement returns the following columns from the %_segdir table: -** -** 0: idx -** 1: start_block -** 2: leaves_end_block -** 3: end_block -** 4: root -*/ -SQLITE_PRIVATE int sqlite3Fts3AllSegdirs( - Fts3Table *p, /* FTS3 table */ - int iLangid, /* Language being queried */ - int iIndex, /* Index for p->aIndex[] */ - int iLevel, /* Level to select (relative level) */ - sqlite3_stmt **ppStmt /* OUT: Compiled statement */ +SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish( + Fts3MultiSegReader *pCsr /* Cursor object */ ){ - int rc; - sqlite3_stmt *pStmt = 0; - - assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 ); - assert( iLevel=0 && iIndexnIndex ); - - if( iLevel<0 ){ - /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */ - rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); - sqlite3_bind_int64(pStmt, 2, - getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) - ); - } - }else{ - /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */ - rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel)); + if( pCsr ){ + int i; + for(i=0; inSegment; i++){ + sqlite3Fts3SegReaderFree(pCsr->apSegment[i]); } + sqlite3_free(pCsr->apSegment); + sqlite3_free(pCsr->aBuffer); + + pCsr->nSegment = 0; + pCsr->apSegment = 0; + pCsr->aBuffer = 0; } - *ppStmt = pStmt; - return rc; } - /* -** Append a single varint to a PendingList buffer. SQLITE_OK is returned -** if successful, or an SQLite error code otherwise. -** -** This function also serves to allocate the PendingList structure itself. -** For example, to create a new PendingList structure containing two -** varints: +** Decode the "end_block" field, selected by column iCol of the SELECT +** statement passed as the first argument. ** -** PendingList *p = 0; -** fts3PendingListAppendVarint(&p, 1); -** fts3PendingListAppendVarint(&p, 2); +** The "end_block" field may contain either an integer, or a text field +** containing the text representation of two non-negative integers separated +** by one or more space (0x20) characters. In the first case, set *piEndBlock +** to the integer value and *pnByte to zero before returning. In the second, +** set *piEndBlock to the first value and *pnByte to the second. */ -static int fts3PendingListAppendVarint( - PendingList **pp, /* IN/OUT: Pointer to PendingList struct */ - sqlite3_int64 i /* Value to append to data */ +static void fts3ReadEndBlockField( + sqlite3_stmt *pStmt, + int iCol, + i64 *piEndBlock, + i64 *pnByte ){ - PendingList *p = *pp; - - /* Allocate or grow the PendingList as required. */ - if( !p ){ - p = sqlite3_malloc(sizeof(*p) + 100); - if( !p ){ - return SQLITE_NOMEM; + const unsigned char *zText = sqlite3_column_text(pStmt, iCol); + if( zText ){ + int i; + int iMul = 1; + i64 iVal = 0; + for(i=0; zText[i]>='0' && zText[i]<='9'; i++){ + iVal = iVal*10 + (zText[i] - '0'); } - p->nSpace = 100; - p->aData = (char *)&p[1]; - p->nData = 0; - } - else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){ - int nNew = p->nSpace * 2; - p = sqlite3_realloc(p, sizeof(*p) + nNew); - if( !p ){ - sqlite3_free(*pp); - *pp = 0; - return SQLITE_NOMEM; + *piEndBlock = iVal; + while( zText[i]==' ' ) i++; + iVal = 0; + if( zText[i]=='-' ){ + i++; + iMul = -1; } - p->nSpace = nNew; - p->aData = (char *)&p[1]; + for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){ + iVal = iVal*10 + (zText[i] - '0'); + } + *pnByte = (iVal * (i64)iMul); } - - /* Append the new serialized varint to the end of the list. */ - p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i); - p->aData[p->nData] = '\0'; - *pp = p; - return SQLITE_OK; } + /* -** Add a docid/column/position entry to a PendingList structure. Non-zero -** is returned if the structure is sqlite3_realloced as part of adding -** the entry. Otherwise, zero. -** -** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning. -** Zero is always returned in this case. Otherwise, if no OOM error occurs, -** it is set to SQLITE_OK. +** A segment of size nByte bytes has just been written to absolute level +** iAbsLevel. Promote any segments that should be promoted as a result. */ -static int fts3PendingListAppend( - PendingList **pp, /* IN/OUT: PendingList structure */ - sqlite3_int64 iDocid, /* Docid for entry to add */ - sqlite3_int64 iCol, /* Column for entry to add */ - sqlite3_int64 iPos, /* Position of term for entry to add */ - int *pRc /* OUT: Return code */ +static int fts3PromoteSegments( + Fts3Table *p, /* FTS table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level just updated */ + sqlite3_int64 nByte /* Size of new segment at iAbsLevel */ ){ - PendingList *p = *pp; int rc = SQLITE_OK; + sqlite3_stmt *pRange; - assert( !p || p->iLastDocid<=iDocid ); + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0); - if( !p || p->iLastDocid!=iDocid ){ - sqlite3_int64 iDelta = iDocid - (p ? p->iLastDocid : 0); - if( p ){ - assert( p->nDatanSpace ); - assert( p->aData[p->nData]==0 ); - p->nData++; - } - if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iDelta)) ){ - goto pendinglistappend_out; - } - p->iLastCol = -1; - p->iLastPos = 0; - p->iLastDocid = iDocid; - } - if( iCol>0 && p->iLastCol!=iCol ){ - if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, 1)) - || SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iCol)) - ){ - goto pendinglistappend_out; - } - p->iLastCol = iCol; - p->iLastPos = 0; - } - if( iCol>=0 ){ - assert( iPos>p->iLastPos || (iPos==0 && p->iLastPos==0) ); - rc = fts3PendingListAppendVarint(&p, 2+iPos-p->iLastPos); - if( rc==SQLITE_OK ){ - p->iLastPos = iPos; + if( rc==SQLITE_OK ){ + int bOk = 0; + i64 iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1; + i64 nLimit = (nByte*3)/2; + + /* Loop through all entries in the %_segdir table corresponding to + ** segments in this index on levels greater than iAbsLevel. If there is + ** at least one such segment, and it is possible to determine that all + ** such segments are smaller than nLimit bytes in size, they will be + ** promoted to level iAbsLevel. */ + sqlite3_bind_int64(pRange, 1, iAbsLevel+1); + sqlite3_bind_int64(pRange, 2, iLast); + while( SQLITE_ROW==sqlite3_step(pRange) ){ + i64 nSize = 0, dummy; + fts3ReadEndBlockField(pRange, 2, &dummy, &nSize); + if( nSize<=0 || nSize>nLimit ){ + /* If nSize==0, then the %_segdir.end_block field does not not + ** contain a size value. This happens if it was written by an + ** old version of FTS. In this case it is not possible to determine + ** the size of the segment, and so segment promotion does not + ** take place. */ + bOk = 0; + break; + } + bOk = 1; } - } + rc = sqlite3_reset(pRange); - pendinglistappend_out: - *pRc = rc; - if( p!=*pp ){ - *pp = p; - return 1; - } - return 0; -} + if( bOk ){ + int iIdx = 0; + sqlite3_stmt *pUpdate1 = 0; + sqlite3_stmt *pUpdate2 = 0; -/* -** Free a PendingList object allocated by fts3PendingListAppend(). -*/ -static void fts3PendingListDelete(PendingList *pList){ - sqlite3_free(pList); -} + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0); + } + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0); + } -/* -** Add an entry to one of the pending-terms hash tables. -*/ -static int fts3PendingTermsAddOne( - Fts3Table *p, - int iCol, - int iPos, - Fts3Hash *pHash, /* Pending terms hash table to add entry to */ - const char *zToken, - int nToken -){ - PendingList *pList; - int rc = SQLITE_OK; + if( rc==SQLITE_OK ){ - pList = (PendingList *)fts3HashFind(pHash, zToken, nToken); - if( pList ){ - p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem)); - } - if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){ - if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){ - /* Malloc failed while inserting the new entry. This can only - ** happen if there was no previous entry for this token. - */ - assert( 0==fts3HashFind(pHash, zToken, nToken) ); - sqlite3_free(pList); - rc = SQLITE_NOMEM; + /* Loop through all %_segdir entries for segments in this index with + ** levels equal to or greater than iAbsLevel. As each entry is visited, + ** updated it to set (level = -1) and (idx = N), where N is 0 for the + ** oldest segment in the range, 1 for the next oldest, and so on. + ** + ** In other words, move all segments being promoted to level -1, + ** setting the "idx" fields as appropriate to keep them in the same + ** order. The contents of level -1 (which is never used, except + ** transiently here), will be moved back to level iAbsLevel below. */ + sqlite3_bind_int64(pRange, 1, iAbsLevel); + while( SQLITE_ROW==sqlite3_step(pRange) ){ + sqlite3_bind_int(pUpdate1, 1, iIdx++); + sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0)); + sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1)); + sqlite3_step(pUpdate1); + rc = sqlite3_reset(pUpdate1); + if( rc!=SQLITE_OK ){ + sqlite3_reset(pRange); + break; + } + } + } + if( rc==SQLITE_OK ){ + rc = sqlite3_reset(pRange); + } + + /* Move level -1 to level iAbsLevel */ + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pUpdate2, 1, iAbsLevel); + sqlite3_step(pUpdate2); + rc = sqlite3_reset(pUpdate2); + } } } - if( rc==SQLITE_OK ){ - p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem)); - } + + return rc; } /* -** Tokenize the nul-terminated string zText and add all tokens to the -** pending-terms hash-table. The docid used is that currently stored in -** p->iPrevDocid, and the column is specified by argument iCol. +** Merge all level iLevel segments in the database into a single +** iLevel+1 segment. Or, if iLevel<0, merge all segments into a +** single segment with a level equal to the numerically largest level +** currently present in the database. ** -** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. +** If this function is called with iLevel<0, but there is only one +** segment in the database, SQLITE_DONE is returned immediately. +** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, +** an SQLite error code is returned. */ -static int fts3PendingTermsAdd( - Fts3Table *p, /* Table into which text will be inserted */ - int iLangid, /* Language id to use */ - const char *zText, /* Text of document to be inserted */ - int iCol, /* Column into which text is being inserted */ - u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */ +static int fts3SegmentMerge( + Fts3Table *p, + int iLangid, /* Language id to merge */ + int iIndex, /* Index in p->aIndex[] to merge */ + int iLevel /* Level to merge */ ){ - int rc; - int iStart = 0; - int iEnd = 0; - int iPos = 0; - int nWord = 0; - - char const *zToken; - int nToken = 0; + int rc; /* Return code */ + int iIdx = 0; /* Index of new segment */ + sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */ + SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ + Fts3SegFilter filter; /* Segment term filter condition */ + Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ + int bIgnoreEmpty = 0; /* True to ignore empty segments */ + i64 iMaxLevel = 0; /* Max level number for this index/langid */ - sqlite3_tokenizer *pTokenizer = p->pTokenizer; - sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; - sqlite3_tokenizer_cursor *pCsr; - int (*xNext)(sqlite3_tokenizer_cursor *pCursor, - const char**,int*,int*,int*,int*); + assert( iLevel==FTS3_SEGCURSOR_ALL + || iLevel==FTS3_SEGCURSOR_PENDING + || iLevel>=0 + ); + assert( iLevel=0 && iIndexnIndex ); - assert( pTokenizer && pModule ); + rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr); + if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; - /* If the user has inserted a NULL value, this function may be called with - ** zText==0. In this case, add zero token entries to the hash table and - ** return early. */ - if( zText==0 ){ - *pnWord = 0; - return SQLITE_OK; + if( iLevel!=FTS3_SEGCURSOR_PENDING ){ + rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel); + if( rc!=SQLITE_OK ) goto finished; } - rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr); - if( rc!=SQLITE_OK ){ - return rc; + if( iLevel==FTS3_SEGCURSOR_ALL ){ + /* This call is to merge all segments in the database to a single + ** segment. The level of the new segment is equal to the numerically + ** greatest segment level currently present in the database for this + ** index. The idx of the new segment is always 0. */ + if( csr.nSegment==1 ){ + rc = SQLITE_DONE; + goto finished; + } + iNewLevel = iMaxLevel; + bIgnoreEmpty = 1; + + }else{ + /* This call is to merge all segments at level iLevel. find the next + ** available segment index at level iLevel+1. The call to + ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to + ** a single iLevel+2 segment if necessary. */ + assert( FTS3_SEGCURSOR_PENDING==-1 ); + iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1); + rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); + bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel); } + if( rc!=SQLITE_OK ) goto finished; - xNext = pModule->xNext; - while( SQLITE_OK==rc - && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos)) - ){ - int i; - if( iPos>=nWord ) nWord = iPos+1; + assert( csr.nSegment>0 ); + assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) ); + assert( iNewLevelaIndex[0].hPending, zToken, nToken + rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); + while( SQLITE_OK==rc ){ + rc = sqlite3Fts3SegReaderStep(p, &csr); + if( rc!=SQLITE_ROW ) break; + rc = fts3SegWriterAdd(p, &pWriter, 1, + csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist); + } + if( rc!=SQLITE_OK ) goto finished; + assert( pWriter || bIgnoreEmpty ); + + if( iLevel!=FTS3_SEGCURSOR_PENDING ){ + rc = fts3DeleteSegdir( + p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment ); - - /* Add the term to each of the prefix indexes that it is not too - ** short for. */ - for(i=1; rc==SQLITE_OK && inIndex; i++){ - struct Fts3Index *pIndex = &p->aIndex[i]; - if( nTokennPrefix ) continue; - rc = fts3PendingTermsAddOne( - p, iCol, iPos, &pIndex->hPending, zToken, pIndex->nPrefix - ); + if( rc!=SQLITE_OK ) goto finished; + } + if( pWriter ){ + rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); + if( rc==SQLITE_OK ){ + if( iLevel==FTS3_SEGCURSOR_PENDING || iNewLevelnLeafData); + } } } - pModule->xClose(pCsr); - *pnWord += nWord; - return (rc==SQLITE_DONE ? SQLITE_OK : rc); + finished: + fts3SegWriterFree(pWriter); + sqlite3Fts3SegReaderFinish(&csr); + return rc; } + /* -** Calling this function indicates that subsequent calls to -** fts3PendingTermsAdd() are to add term/position-list pairs for the -** contents of the document with docid iDocid. +** Flush the contents of pendingTerms to level 0 segments. */ -static int fts3PendingTermsDocid( - Fts3Table *p, /* Full-text table handle */ - int iLangid, /* Language id of row being written */ - sqlite_int64 iDocid /* Docid of row being written */ -){ - assert( iLangid>=0 ); +SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ + int rc = SQLITE_OK; + int i; + + for(i=0; rc==SQLITE_OK && inIndex; i++){ + rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + sqlite3Fts3PendingTermsClear(p); - /* TODO(shess) Explore whether partially flushing the buffer on - ** forced-flush would provide better performance. I suspect that if - ** we ordered the doclists by size and flushed the largest until the - ** buffer was half empty, that would let the less frequent terms - ** generate longer doclists. + /* Determine the auto-incr-merge setting if unknown. If enabled, + ** estimate the number of leaf blocks of content to be written */ - if( iDocid<=p->iPrevDocid - || p->iPrevLangid!=iLangid - || p->nPendingData>p->nMaxPendingData + if( rc==SQLITE_OK && p->bHasStat + && p->nAutoincrmerge==0xff && p->nLeafAdd>0 ){ - int rc = sqlite3Fts3PendingTermsFlush(p); - if( rc!=SQLITE_OK ) return rc; + sqlite3_stmt *pStmt = 0; + rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); + rc = sqlite3_step(pStmt); + if( rc==SQLITE_ROW ){ + p->nAutoincrmerge = sqlite3_column_int(pStmt, 0); + if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8; + }else if( rc==SQLITE_DONE ){ + p->nAutoincrmerge = 0; + } + rc = sqlite3_reset(pStmt); + } } - p->iPrevDocid = iDocid; - p->iPrevLangid = iLangid; - return SQLITE_OK; + return rc; } /* -** Discard the contents of the pending-terms hash tables. +** Encode N integers as varints into a blob. */ -SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *p){ - int i; - for(i=0; inIndex; i++){ - Fts3HashElem *pElem; - Fts3Hash *pHash = &p->aIndex[i].hPending; - for(pElem=fts3HashFirst(pHash); pElem; pElem=fts3HashNext(pElem)){ - PendingList *pList = (PendingList *)fts3HashData(pElem); - fts3PendingListDelete(pList); - } - fts3HashClear(pHash); +static void fts3EncodeIntArray( + int N, /* The number of integers to encode */ + u32 *a, /* The integer values */ + char *zBuf, /* Write the BLOB here */ + int *pNBuf /* Write number of bytes if zBuf[] used here */ +){ + int i, j; + for(i=j=0; inPendingData = 0; + *pNBuf = j; } /* -** This function is called by the xUpdate() method as part of an INSERT -** operation. It adds entries for each term in the new record to the -** pendingTerms hash table. -** -** Argument apVal is the same as the similarly named argument passed to -** fts3InsertData(). Parameter iDocid is the docid of the new row. +** Decode a blob of varints into N integers */ -static int fts3InsertTerms( - Fts3Table *p, - int iLangid, - sqlite3_value **apVal, - u32 *aSz +static void fts3DecodeIntArray( + int N, /* The number of integers to decode */ + u32 *a, /* Write the integer values */ + const char *zBuf, /* The BLOB containing the varints */ + int nBuf /* size of the BLOB */ ){ - int i; /* Iterator variable */ - for(i=2; inColumn+2; i++){ - int iCol = i-2; - if( p->abNotindexed[iCol]==0 ){ - const char *zText = (const char *)sqlite3_value_text(apVal[i]); - int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]); - if( rc!=SQLITE_OK ){ - return rc; - } - aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]); - } + int i, j; + UNUSED_PARAMETER(nBuf); + for(i=j=0; inColumn+1] Right-most user-defined column -** apVal[p->nColumn+2] Hidden column with same name as table -** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid) -** apVal[p->nColumn+4] Hidden languageid column +** Insert the sizes (in tokens) for each column of the document +** with docid equal to p->iPrevDocid. The sizes are encoded as +** a blob of varints. */ -static int fts3InsertData( - Fts3Table *p, /* Full-text table */ - sqlite3_value **apVal, /* Array of values to insert */ - sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */ +static void fts3InsertDocsize( + int *pRC, /* Result code */ + Fts3Table *p, /* Table into which to insert */ + u32 *aSz /* Sizes of each column, in tokens */ ){ - int rc; /* Return code */ - sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */ - - if( p->zContentTbl ){ - sqlite3_value *pRowid = apVal[p->nColumn+3]; - if( sqlite3_value_type(pRowid)==SQLITE_NULL ){ - pRowid = apVal[1]; - } - if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){ - return SQLITE_CONSTRAINT; - } - *piDocid = sqlite3_value_int64(pRowid); - return SQLITE_OK; - } + char *pBlob; /* The BLOB encoding of the document size */ + int nBlob; /* Number of bytes in the BLOB */ + sqlite3_stmt *pStmt; /* Statement used to insert the encoding */ + int rc; /* Result code from subfunctions */ - /* Locate the statement handle used to insert data into the %_content - ** table. The SQL for this statement is: - ** - ** INSERT INTO %_content VALUES(?, ?, ?, ...) - ** - ** The statement features N '?' variables, where N is the number of user - ** defined columns in the FTS3 table, plus one for the docid field. - */ - rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]); - if( rc==SQLITE_OK && p->zLanguageid ){ - rc = sqlite3_bind_int( - pContentInsert, p->nColumn+2, - sqlite3_value_int(apVal[p->nColumn+4]) - ); + if( *pRC ) return; + pBlob = sqlite3_malloc( 10*p->nColumn ); + if( pBlob==0 ){ + *pRC = SQLITE_NOMEM; + return; } - if( rc!=SQLITE_OK ) return rc; - - /* There is a quirk here. The users INSERT statement may have specified - ** a value for the "rowid" field, for the "docid" field, or for both. - ** Which is a problem, since "rowid" and "docid" are aliases for the - ** same value. For example: - ** - ** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2); - ** - ** In FTS3, this is an error. It is an error to specify non-NULL values - ** for both docid and some other rowid alias. - */ - if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){ - if( SQLITE_NULL==sqlite3_value_type(apVal[0]) - && SQLITE_NULL!=sqlite3_value_type(apVal[1]) - ){ - /* A rowid/docid conflict. */ - return SQLITE_ERROR; - } - rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]); - if( rc!=SQLITE_OK ) return rc; + fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob); + rc = fts3SqlStmt(p, SQL_REPLACE_DOCSIZE, &pStmt, 0); + if( rc ){ + sqlite3_free(pBlob); + *pRC = rc; + return; } - - /* Execute the statement to insert the record. Set *piDocid to the - ** new docid value. - */ - sqlite3_step(pContentInsert); - rc = sqlite3_reset(pContentInsert); - - *piDocid = sqlite3_last_insert_rowid(p->db); - return rc; + sqlite3_bind_int64(pStmt, 1, p->iPrevDocid); + sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free); + sqlite3_step(pStmt); + *pRC = sqlite3_reset(pStmt); } - - /* -** Remove all data from the FTS3 table. Clear the hash table containing -** pending terms. +** Record 0 of the %_stat table contains a blob consisting of N varints, +** where N is the number of user defined columns in the fts3 table plus +** two. If nCol is the number of user defined columns, then values of the +** varints are set as follows: +** +** Varint 0: Total number of rows in the table. +** +** Varint 1..nCol: For each column, the total number of tokens stored in +** the column for all rows of the table. +** +** Varint 1+nCol: The total size, in bytes, of all text values in all +** columns of all rows of the table. +** */ -static int fts3DeleteAll(Fts3Table *p, int bContent){ - int rc = SQLITE_OK; /* Return code */ +static void fts3UpdateDocTotals( + int *pRC, /* The result code */ + Fts3Table *p, /* Table being updated */ + u32 *aSzIns, /* Size increases */ + u32 *aSzDel, /* Size decreases */ + int nChng /* Change in the number of documents */ +){ + char *pBlob; /* Storage for BLOB written into %_stat */ + int nBlob; /* Size of BLOB written into %_stat */ + u32 *a; /* Array of integers that becomes the BLOB */ + sqlite3_stmt *pStmt; /* Statement for reading and writing */ + int i; /* Loop counter */ + int rc; /* Result code from subfunctions */ - /* Discard the contents of the pending-terms hash table. */ - sqlite3Fts3PendingTermsClear(p); + const int nStat = p->nColumn+2; - /* Delete everything from the shadow tables. Except, leave %_content as - ** is if bContent is false. */ - assert( p->zContentTbl==0 || bContent==0 ); - if( bContent ) fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0); - fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0); - fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); - if( p->bHasDocsize ){ - fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0); + if( *pRC ) return; + a = sqlite3_malloc( (sizeof(u32)+10)*nStat ); + if( a==0 ){ + *pRC = SQLITE_NOMEM; + return; } - if( p->bHasStat ){ - fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0); + pBlob = (char*)&a[nStat]; + rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); + if( rc ){ + sqlite3_free(a); + *pRC = rc; + return; } - return rc; -} - -/* -** -*/ -static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){ - int iLangid = 0; - if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1); - return iLangid; + sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); + if( sqlite3_step(pStmt)==SQLITE_ROW ){ + fts3DecodeIntArray(nStat, a, + sqlite3_column_blob(pStmt, 0), + sqlite3_column_bytes(pStmt, 0)); + }else{ + memset(a, 0, sizeof(u32)*(nStat) ); + } + rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK ){ + sqlite3_free(a); + *pRC = rc; + return; + } + if( nChng<0 && a[0]<(u32)(-nChng) ){ + a[0] = 0; + }else{ + a[0] += nChng; + } + for(i=0; inColumn+1; i++){ + u32 x = a[i+1]; + if( x+aSzIns[i] < aSzDel[i] ){ + x = 0; + }else{ + x = x + aSzIns[i] - aSzDel[i]; + } + a[i+1] = x; + } + fts3EncodeIntArray(nStat, a, pBlob, &nBlob); + rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); + if( rc ){ + sqlite3_free(a); + *pRC = rc; + return; + } + sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); + sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC); + sqlite3_step(pStmt); + *pRC = sqlite3_reset(pStmt); + sqlite3_free(a); } /* -** The first element in the apVal[] array is assumed to contain the docid -** (an integer) of a row about to be deleted. Remove all terms from the -** full-text index. +** Merge the entire database so that there is one segment for each +** iIndex/iLangid combination. */ -static void fts3DeleteTerms( - int *pRC, /* Result code */ - Fts3Table *p, /* The FTS table to delete from */ - sqlite3_value *pRowid, /* The docid to be deleted */ - u32 *aSz, /* Sizes of deleted document written here */ - int *pbFound /* OUT: Set to true if row really does exist */ -){ +static int fts3DoOptimize(Fts3Table *p, int bReturnDone){ + int bSeenDone = 0; int rc; - sqlite3_stmt *pSelect; + sqlite3_stmt *pAllLangid = 0; - assert( *pbFound==0 ); - if( *pRC ) return; - rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid); + rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); if( rc==SQLITE_OK ){ - if( SQLITE_ROW==sqlite3_step(pSelect) ){ + int rc2; + sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid); + sqlite3_bind_int(pAllLangid, 2, p->nIndex); + while( sqlite3_step(pAllLangid)==SQLITE_ROW ){ int i; - int iLangid = langidFromSelect(p, pSelect); - rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0)); - for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){ - int iCol = i-1; - if( p->abNotindexed[iCol]==0 ){ - const char *zText = (const char *)sqlite3_column_text(pSelect, i); - rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]); - aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); + int iLangid = sqlite3_column_int(pAllLangid, 0); + for(i=0; rc==SQLITE_OK && inIndex; i++){ + rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL); + if( rc==SQLITE_DONE ){ + bSeenDone = 1; + rc = SQLITE_OK; } } - if( rc!=SQLITE_OK ){ - sqlite3_reset(pSelect); - *pRC = rc; - return; - } - *pbFound = 1; - } - rc = sqlite3_reset(pSelect); - }else{ - sqlite3_reset(pSelect); - } - *pRC = rc; -} - -/* -** Forward declaration to account for the circular dependency between -** functions fts3SegmentMerge() and fts3AllocateSegdirIdx(). -*/ -static int fts3SegmentMerge(Fts3Table *, int, int, int); - -/* -** This function allocates a new level iLevel index in the segdir table. -** Usually, indexes are allocated within a level sequentially starting -** with 0, so the allocated index is one greater than the value returned -** by: -** -** SELECT max(idx) FROM %_segdir WHERE level = :iLevel -** -** However, if there are already FTS3_MERGE_COUNT indexes at the requested -** level, they are merged into a single level (iLevel+1) segment and the -** allocated index is 0. -** -** If successful, *piIdx is set to the allocated index slot and SQLITE_OK -** returned. Otherwise, an SQLite error code is returned. -*/ -static int fts3AllocateSegdirIdx( - Fts3Table *p, - int iLangid, /* Language id */ - int iIndex, /* Index for p->aIndex */ - int iLevel, - int *piIdx -){ - int rc; /* Return Code */ - sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */ - int iNext = 0; /* Result of query pNextIdx */ - - assert( iLangid>=0 ); - assert( p->nIndex>=1 ); - - /* Set variable iNext to the next available segdir index at level iLevel. */ - rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64( - pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) - ); - if( SQLITE_ROW==sqlite3_step(pNextIdx) ){ - iNext = sqlite3_column_int(pNextIdx, 0); } - rc = sqlite3_reset(pNextIdx); + rc2 = sqlite3_reset(pAllLangid); + if( rc==SQLITE_OK ) rc = rc2; } - if( rc==SQLITE_OK ){ - /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already - ** full, merge all segments in level iLevel into a single iLevel+1 - ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise, - ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext. - */ - if( iNext>=FTS3_MERGE_COUNT ){ - fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel)); - rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel); - *piIdx = 0; - }else{ - *piIdx = iNext; - } - } + sqlite3Fts3SegmentsClose(p); + sqlite3Fts3PendingTermsClear(p); - return rc; + return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc; } /* -** The %_segments table is declared as follows: -** -** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB) -** -** This function reads data from a single row of the %_segments table. The -** specific row is identified by the iBlockid parameter. If paBlob is not -** NULL, then a buffer is allocated using sqlite3_malloc() and populated -** with the contents of the blob stored in the "block" column of the -** identified table row is. Whether or not paBlob is NULL, *pnBlob is set -** to the size of the blob in bytes before returning. +** This function is called when the user executes the following statement: ** -** If an error occurs, or the table does not contain the specified row, -** an SQLite error code is returned. Otherwise, SQLITE_OK is returned. If -** paBlob is non-NULL, then it is the responsibility of the caller to -** eventually free the returned buffer. +** INSERT INTO () VALUES('rebuild'); ** -** This function may leave an open sqlite3_blob* handle in the -** Fts3Table.pSegments variable. This handle is reused by subsequent calls -** to this function. The handle may be closed by calling the -** sqlite3Fts3SegmentsClose() function. Reusing a blob handle is a handy -** performance improvement, but the blob handle should always be closed -** before control is returned to the user (to prevent a lock being held -** on the database file for longer than necessary). Thus, any virtual table -** method (xFilter etc.) that may directly or indirectly call this function -** must call sqlite3Fts3SegmentsClose() before returning. +** The entire FTS index is discarded and rebuilt. If the table is one +** created using the content=xxx option, then the new index is based on +** the current contents of the xxx table. Otherwise, it is rebuilt based +** on the contents of the %_content table. */ -SQLITE_PRIVATE int sqlite3Fts3ReadBlock( - Fts3Table *p, /* FTS3 table handle */ - sqlite3_int64 iBlockid, /* Access the row with blockid=$iBlockid */ - char **paBlob, /* OUT: Blob data in malloc'd buffer */ - int *pnBlob, /* OUT: Size of blob data */ - int *pnLoad /* OUT: Bytes actually loaded */ -){ - int rc; /* Return code */ +static int fts3DoRebuild(Fts3Table *p){ + int rc; /* Return Code */ - /* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */ - assert( pnBlob ); + rc = fts3DeleteAll(p, 0); + if( rc==SQLITE_OK ){ + u32 *aSz = 0; + u32 *aSzIns = 0; + u32 *aSzDel = 0; + sqlite3_stmt *pStmt = 0; + int nEntry = 0; - if( p->pSegments ){ - rc = sqlite3_blob_reopen(p->pSegments, iBlockid); - }else{ - if( 0==p->zSegmentsTbl ){ - p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName); - if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM; + /* Compose and prepare an SQL statement to loop through the content table */ + char *zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); + sqlite3_free(zSql); } - rc = sqlite3_blob_open( - p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments - ); - } - if( rc==SQLITE_OK ){ - int nByte = sqlite3_blob_bytes(p->pSegments); - *pnBlob = nByte; - if( paBlob ){ - char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING); - if( !aByte ){ + if( rc==SQLITE_OK ){ + int nByte = sizeof(u32) * (p->nColumn+1)*3; + aSz = (u32 *)sqlite3_malloc(nByte); + if( aSz==0 ){ rc = SQLITE_NOMEM; }else{ - if( pnLoad && nByte>(FTS3_NODE_CHUNK_THRESHOLD) ){ - nByte = FTS3_NODE_CHUNKSIZE; - *pnLoad = nByte; + memset(aSz, 0, nByte); + aSzIns = &aSz[p->nColumn+1]; + aSzDel = &aSzIns[p->nColumn+1]; + } + } + + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + int iCol; + int iLangid = langidFromSelect(p, pStmt); + rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0)); + memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1)); + for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ + if( p->abNotindexed[iCol]==0 ){ + const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); + rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); + aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); } - rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0); - memset(&aByte[nByte], 0, FTS3_NODE_PADDING); - if( rc!=SQLITE_OK ){ - sqlite3_free(aByte); - aByte = 0; + } + if( p->bHasDocsize ){ + fts3InsertDocsize(&rc, p, aSz); + } + if( rc!=SQLITE_OK ){ + sqlite3_finalize(pStmt); + pStmt = 0; + }else{ + nEntry++; + for(iCol=0; iCol<=p->nColumn; iCol++){ + aSzIns[iCol] += aSz[iCol]; } } - *paBlob = aByte; + } + if( p->bFts4 ){ + fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nEntry); + } + sqlite3_free(aSz); + + if( pStmt ){ + int rc2 = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ){ + rc = rc2; + } } } return rc; } + /* -** Close the blob handle at p->pSegments, if it is open. See comments above -** the sqlite3Fts3ReadBlock() function for details. +** This function opens a cursor used to read the input data for an +** incremental merge operation. Specifically, it opens a cursor to scan +** the oldest nSeg segments (idx=0 through idx=(nSeg-1)) in absolute +** level iAbsLevel. */ -SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *p){ - sqlite3_blob_close(p->pSegments); - p->pSegments = 0; -} - -static int fts3SegReaderIncrRead(Fts3SegReader *pReader){ - int nRead; /* Number of bytes to read */ - int rc; /* Return code */ +static int fts3IncrmergeCsr( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level to open */ + int nSeg, /* Number of segments to merge */ + Fts3MultiSegReader *pCsr /* Cursor object to populate */ +){ + int rc; /* Return Code */ + sqlite3_stmt *pStmt = 0; /* Statement used to read %_segdir entry */ + int nByte; /* Bytes allocated at pCsr->apSegment[] */ - nRead = MIN(pReader->nNode - pReader->nPopulate, FTS3_NODE_CHUNKSIZE); - rc = sqlite3_blob_read( - pReader->pBlob, - &pReader->aNode[pReader->nPopulate], - nRead, - pReader->nPopulate - ); + /* Allocate space for the Fts3MultiSegReader.aCsr[] array */ + memset(pCsr, 0, sizeof(*pCsr)); + nByte = sizeof(Fts3SegReader *) * nSeg; + pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte); + if( pCsr->apSegment==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pCsr->apSegment, 0, nByte); + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); + } if( rc==SQLITE_OK ){ - pReader->nPopulate += nRead; - memset(&pReader->aNode[pReader->nPopulate], 0, FTS3_NODE_PADDING); - if( pReader->nPopulate==pReader->nNode ){ - sqlite3_blob_close(pReader->pBlob); - pReader->pBlob = 0; - pReader->nPopulate = 0; + int i; + int rc2; + sqlite3_bind_int64(pStmt, 1, iAbsLevel); + assert( pCsr->nSegment==0 ); + for(i=0; rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW && iapSegment[i] + ); + pCsr->nSegment++; } + rc2 = sqlite3_reset(pStmt); + if( rc==SQLITE_OK ) rc = rc2; } - return rc; -} -static int fts3SegReaderRequire(Fts3SegReader *pReader, char *pFrom, int nByte){ - int rc = SQLITE_OK; - assert( !pReader->pBlob - || (pFrom>=pReader->aNode && pFrom<&pReader->aNode[pReader->nNode]) - ); - while( pReader->pBlob && rc==SQLITE_OK - && (pFrom - pReader->aNode + nByte)>pReader->nPopulate - ){ - rc = fts3SegReaderIncrRead(pReader); - } return rc; } +typedef struct IncrmergeWriter IncrmergeWriter; +typedef struct NodeWriter NodeWriter; +typedef struct Blob Blob; +typedef struct NodeReader NodeReader; + /* -** Set an Fts3SegReader cursor to point at EOF. +** An instance of the following structure is used as a dynamic buffer +** to build up nodes or other blobs of data in. +** +** The function blobGrowBuffer() is used to extend the allocation. */ -static void fts3SegReaderSetEof(Fts3SegReader *pSeg){ - if( !fts3SegReaderIsRootOnly(pSeg) ){ - sqlite3_free(pSeg->aNode); - sqlite3_blob_close(pSeg->pBlob); - pSeg->pBlob = 0; - } - pSeg->aNode = 0; -} +struct Blob { + char *a; /* Pointer to allocation */ + int n; /* Number of valid bytes of data in a[] */ + int nAlloc; /* Allocated size of a[] (nAlloc>=n) */ +}; /* -** Move the iterator passed as the first argument to the next term in the -** segment. If successful, SQLITE_OK is returned. If there is no next term, -** SQLITE_DONE. Otherwise, an SQLite error code. +** This structure is used to build up buffers containing segment b-tree +** nodes (blocks). */ -static int fts3SegReaderNext( - Fts3Table *p, - Fts3SegReader *pReader, - int bIncr -){ - int rc; /* Return code of various sub-routines */ - char *pNext; /* Cursor variable */ - int nPrefix; /* Number of bytes in term prefix */ - int nSuffix; /* Number of bytes in term suffix */ - - if( !pReader->aDoclist ){ - pNext = pReader->aNode; - }else{ - pNext = &pReader->aDoclist[pReader->nDoclist]; - } - - if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){ +struct NodeWriter { + sqlite3_int64 iBlock; /* Current block id */ + Blob key; /* Last key written to the current block */ + Blob block; /* Current block image */ +}; - if( fts3SegReaderIsPending(pReader) ){ - Fts3HashElem *pElem = *(pReader->ppNextElem); - if( pElem==0 ){ - pReader->aNode = 0; - }else{ - PendingList *pList = (PendingList *)fts3HashData(pElem); - pReader->zTerm = (char *)fts3HashKey(pElem); - pReader->nTerm = fts3HashKeysize(pElem); - pReader->nNode = pReader->nDoclist = pList->nData + 1; - pReader->aNode = pReader->aDoclist = pList->aData; - pReader->ppNextElem++; - assert( pReader->aNode ); - } - return SQLITE_OK; - } +/* +** An object of this type contains the state required to create or append +** to an appendable b-tree segment. +*/ +struct IncrmergeWriter { + int nLeafEst; /* Space allocated for leaf blocks */ + int nWork; /* Number of leaf pages flushed */ + sqlite3_int64 iAbsLevel; /* Absolute level of input segments */ + int iIdx; /* Index of *output* segment in iAbsLevel+1 */ + sqlite3_int64 iStart; /* Block number of first allocated block */ + sqlite3_int64 iEnd; /* Block number of last allocated block */ + sqlite3_int64 nLeafData; /* Bytes of leaf page data so far */ + u8 bNoLeafData; /* If true, store 0 for segment size */ + NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT]; +}; - fts3SegReaderSetEof(pReader); +/* +** An object of the following type is used to read data from a single +** FTS segment node. See the following functions: +** +** nodeReaderInit() +** nodeReaderNext() +** nodeReaderRelease() +*/ +struct NodeReader { + const char *aNode; + int nNode; + int iOff; /* Current offset within aNode[] */ - /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf - ** blocks have already been traversed. */ - assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock ); - if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){ - return SQLITE_OK; - } + /* Output variables. Containing the current node entry. */ + sqlite3_int64 iChild; /* Pointer to child node */ + Blob term; /* Current term */ + const char *aDoclist; /* Pointer to doclist */ + int nDoclist; /* Size of doclist in bytes */ +}; - rc = sqlite3Fts3ReadBlock( - p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode, - (bIncr ? &pReader->nPopulate : 0) - ); - if( rc!=SQLITE_OK ) return rc; - assert( pReader->pBlob==0 ); - if( bIncr && pReader->nPopulatenNode ){ - pReader->pBlob = p->pSegments; - p->pSegments = 0; +/* +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, if the allocation at pBlob->a is not already at least nMin +** bytes in size, extend (realloc) it to be so. +** +** If an OOM error occurs, set *pRc to SQLITE_NOMEM and leave pBlob->a +** unmodified. Otherwise, if the allocation succeeds, update pBlob->nAlloc +** to reflect the new size of the pBlob->a[] buffer. +*/ +static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){ + if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){ + int nAlloc = nMin; + char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc); + if( a ){ + pBlob->nAlloc = nAlloc; + pBlob->a = a; + }else{ + *pRc = SQLITE_NOMEM; } - pNext = pReader->aNode; } +} - assert( !fts3SegReaderIsPending(pReader) ); +/* +** Attempt to advance the node-reader object passed as the first argument to +** the next entry on the node. +** +** Return an error code if an error occurs (SQLITE_NOMEM is possible). +** Otherwise return SQLITE_OK. If there is no next entry on the node +** (e.g. because the current entry is the last) set NodeReader->aNode to +** NULL to indicate EOF. Otherwise, populate the NodeReader structure output +** variables for the new entry. +*/ +static int nodeReaderNext(NodeReader *p){ + int bFirst = (p->term.n==0); /* True for first term on the node */ + int nPrefix = 0; /* Bytes to copy from previous term */ + int nSuffix = 0; /* Bytes to append to the prefix */ + int rc = SQLITE_OK; /* Return code */ - rc = fts3SegReaderRequire(pReader, pNext, FTS3_VARINT_MAX*2); - if( rc!=SQLITE_OK ) return rc; - - /* Because of the FTS3_NODE_PADDING bytes of padding, the following is - ** safe (no risk of overread) even if the node data is corrupted. */ - pNext += fts3GetVarint32(pNext, &nPrefix); - pNext += fts3GetVarint32(pNext, &nSuffix); - if( nPrefix<0 || nSuffix<=0 - || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] - ){ - return FTS_CORRUPT_VTAB; - } + assert( p->aNode ); + if( p->iChild && bFirst==0 ) p->iChild++; + if( p->iOff>=p->nNode ){ + /* EOF */ + p->aNode = 0; + }else{ + if( bFirst==0 ){ + p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nPrefix); + } + p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nSuffix); - if( nPrefix+nSuffix>pReader->nTermAlloc ){ - int nNew = (nPrefix+nSuffix)*2; - char *zNew = sqlite3_realloc(pReader->zTerm, nNew); - if( !zNew ){ - return SQLITE_NOMEM; + blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc); + if( rc==SQLITE_OK ){ + memcpy(&p->term.a[nPrefix], &p->aNode[p->iOff], nSuffix); + p->term.n = nPrefix+nSuffix; + p->iOff += nSuffix; + if( p->iChild==0 ){ + p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist); + p->aDoclist = &p->aNode[p->iOff]; + p->iOff += p->nDoclist; + } } - pReader->zTerm = zNew; - pReader->nTermAlloc = nNew; } - rc = fts3SegReaderRequire(pReader, pNext, nSuffix+FTS3_VARINT_MAX); - if( rc!=SQLITE_OK ) return rc; + assert( p->iOff<=p->nNode ); - memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix); - pReader->nTerm = nPrefix+nSuffix; - pNext += nSuffix; - pNext += fts3GetVarint32(pNext, &pReader->nDoclist); - pReader->aDoclist = pNext; - pReader->pOffsetList = 0; + return rc; +} - /* Check that the doclist does not appear to extend past the end of the - ** b-tree node. And that the final byte of the doclist is 0x00. If either - ** of these statements is untrue, then the data structure is corrupt. - */ - if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] - || (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1]) - ){ - return FTS_CORRUPT_VTAB; - } - return SQLITE_OK; +/* +** Release all dynamic resources held by node-reader object *p. +*/ +static void nodeReaderRelease(NodeReader *p){ + sqlite3_free(p->term.a); } /* -** Set the SegReader to point to the first docid in the doclist associated -** with the current term. +** Initialize a node-reader object to read the node in buffer aNode/nNode. +** +** If successful, SQLITE_OK is returned and the NodeReader object set to +** point to the first entry on the node (if any). Otherwise, an SQLite +** error code is returned. */ -static int fts3SegReaderFirstDocid(Fts3Table *pTab, Fts3SegReader *pReader){ - int rc = SQLITE_OK; - assert( pReader->aDoclist ); - assert( !pReader->pOffsetList ); - if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ - u8 bEof = 0; - pReader->iDocid = 0; - pReader->nOffsetList = 0; - sqlite3Fts3DoclistPrev(0, - pReader->aDoclist, pReader->nDoclist, &pReader->pOffsetList, - &pReader->iDocid, &pReader->nOffsetList, &bEof - ); +static int nodeReaderInit(NodeReader *p, const char *aNode, int nNode){ + memset(p, 0, sizeof(NodeReader)); + p->aNode = aNode; + p->nNode = nNode; + + /* Figure out if this is a leaf or an internal node. */ + if( p->aNode[0] ){ + /* An internal node. */ + p->iOff = 1 + sqlite3Fts3GetVarint(&p->aNode[1], &p->iChild); }else{ - rc = fts3SegReaderRequire(pReader, pReader->aDoclist, FTS3_VARINT_MAX); - if( rc==SQLITE_OK ){ - int n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid); - pReader->pOffsetList = &pReader->aDoclist[n]; - } + p->iOff = 1; } - return rc; + + return nodeReaderNext(p); } /* -** Advance the SegReader to point to the next docid in the doclist -** associated with the current term. -** -** If arguments ppOffsetList and pnOffsetList are not NULL, then -** *ppOffsetList is set to point to the first column-offset list -** in the doclist entry (i.e. immediately past the docid varint). -** *pnOffsetList is set to the length of the set of column-offset -** lists, not including the nul-terminator byte. For example: +** This function is called while writing an FTS segment each time a leaf o +** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed +** to be greater than the largest key on the node just written, but smaller +** than or equal to the first key that will be written to the next leaf +** node. +** +** The block id of the leaf node just written to disk may be found in +** (pWriter->aNodeWriter[0].iBlock) when this function is called. */ -static int fts3SegReaderNextDocid( - Fts3Table *pTab, - Fts3SegReader *pReader, /* Reader to advance to next docid */ - char **ppOffsetList, /* OUT: Pointer to current position-list */ - int *pnOffsetList /* OUT: Length of *ppOffsetList in bytes */ +static int fts3IncrmergePush( + Fts3Table *p, /* Fts3 table handle */ + IncrmergeWriter *pWriter, /* Writer object */ + const char *zTerm, /* Term to write to internal node */ + int nTerm /* Bytes at zTerm */ ){ - int rc = SQLITE_OK; - char *p = pReader->pOffsetList; - char c = 0; + sqlite3_int64 iPtr = pWriter->aNodeWriter[0].iBlock; + int iLayer; - assert( p ); + assert( nTerm>0 ); + for(iLayer=1; ALWAYS(iLayeraNodeWriter[iLayer]; + int rc = SQLITE_OK; + int nPrefix; + int nSuffix; + int nSpace; - if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ - /* A pending-terms seg-reader for an FTS4 table that uses order=desc. - ** Pending-terms doclists are always built up in ascending order, so - ** we have to iterate through them backwards here. */ - u8 bEof = 0; - if( ppOffsetList ){ - *ppOffsetList = pReader->pOffsetList; - *pnOffsetList = pReader->nOffsetList - 1; - } - sqlite3Fts3DoclistPrev(0, - pReader->aDoclist, pReader->nDoclist, &p, &pReader->iDocid, - &pReader->nOffsetList, &bEof - ); - if( bEof ){ - pReader->pOffsetList = 0; - }else{ - pReader->pOffsetList = p; - } - }else{ - char *pEnd = &pReader->aDoclist[pReader->nDoclist]; + /* Figure out how much space the key will consume if it is written to + ** the current node of layer iLayer. Due to the prefix compression, + ** the space required changes depending on which node the key is to + ** be added to. */ + nPrefix = fts3PrefixCompress(pNode->key.a, pNode->key.n, zTerm, nTerm); + nSuffix = nTerm - nPrefix; + nSpace = sqlite3Fts3VarintLen(nPrefix); + nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; - /* Pointer p currently points at the first byte of an offset list. The - ** following block advances it to point one byte past the end of - ** the same offset list. */ - while( 1 ){ - - /* The following line of code (and the "p++" below the while() loop) is - ** normally all that is required to move pointer p to the desired - ** position. The exception is if this node is being loaded from disk - ** incrementally and pointer "p" now points to the first byte past - ** the populated part of pReader->aNode[]. - */ - while( *p | c ) c = *p++ & 0x80; - assert( *p==0 ); - - if( pReader->pBlob==0 || p<&pReader->aNode[pReader->nPopulate] ) break; - rc = fts3SegReaderIncrRead(pReader); - if( rc!=SQLITE_OK ) return rc; - } - p++; - - /* If required, populate the output variables with a pointer to and the - ** size of the previous offset-list. - */ - if( ppOffsetList ){ - *ppOffsetList = pReader->pOffsetList; - *pnOffsetList = (int)(p - pReader->pOffsetList - 1); - } + if( pNode->key.n==0 || (pNode->block.n + nSpace)<=p->nNodeSize ){ + /* If the current node of layer iLayer contains zero keys, or if adding + ** the key to it will not cause it to grow to larger than nNodeSize + ** bytes in size, write the key here. */ - /* List may have been edited in place by fts3EvalNearTrim() */ - while( p=pEnd ){ - pReader->pOffsetList = 0; - }else{ - rc = fts3SegReaderRequire(pReader, p, FTS3_VARINT_MAX); - if( rc==SQLITE_OK ){ - sqlite3_int64 iDelta; - pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta); - if( pTab->bDescIdx ){ - pReader->iDocid -= iDelta; - }else{ - pReader->iDocid += iDelta; + Blob *pBlk = &pNode->block; + if( pBlk->n==0 ){ + blobGrowBuffer(pBlk, p->nNodeSize, &rc); + if( rc==SQLITE_OK ){ + pBlk->a[0] = (char)iLayer; + pBlk->n = 1 + sqlite3Fts3PutVarint(&pBlk->a[1], iPtr); } } - } - } - - return SQLITE_OK; -} + blobGrowBuffer(pBlk, pBlk->n + nSpace, &rc); + blobGrowBuffer(&pNode->key, nTerm, &rc); + if( rc==SQLITE_OK ){ + if( pNode->key.n ){ + pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nPrefix); + } + pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nSuffix); + memcpy(&pBlk->a[pBlk->n], &zTerm[nPrefix], nSuffix); + pBlk->n += nSuffix; -SQLITE_PRIVATE int sqlite3Fts3MsrOvfl( - Fts3Cursor *pCsr, - Fts3MultiSegReader *pMsr, - int *pnOvfl -){ - Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; - int nOvfl = 0; - int ii; - int rc = SQLITE_OK; - int pgsz = p->nPgsz; + memcpy(pNode->key.a, zTerm, nTerm); + pNode->key.n = nTerm; + } + }else{ + /* Otherwise, flush the current node of layer iLayer to disk. + ** Then allocate a new, empty sibling node. The key will be written + ** into the parent of this node. */ + rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n); - assert( p->bFts4 ); - assert( pgsz>0 ); + assert( pNode->block.nAlloc>=p->nNodeSize ); + pNode->block.a[0] = (char)iLayer; + pNode->block.n = 1 + sqlite3Fts3PutVarint(&pNode->block.a[1], iPtr+1); - for(ii=0; rc==SQLITE_OK && iinSegment; ii++){ - Fts3SegReader *pReader = pMsr->apSegment[ii]; - if( !fts3SegReaderIsPending(pReader) - && !fts3SegReaderIsRootOnly(pReader) - ){ - sqlite3_int64 jj; - for(jj=pReader->iStartBlock; jj<=pReader->iLeafEndBlock; jj++){ - int nBlob; - rc = sqlite3Fts3ReadBlock(p, jj, 0, &nBlob, 0); - if( rc!=SQLITE_OK ) break; - if( (nBlob+35)>pgsz ){ - nOvfl += (nBlob + 34)/pgsz; - } - } + iNextPtr = pNode->iBlock; + pNode->iBlock++; + pNode->key.n = 0; } - } - *pnOvfl = nOvfl; - return rc; -} -/* -** Free all allocations associated with the iterator passed as the -** second argument. -*/ -SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){ - if( pReader && !fts3SegReaderIsPending(pReader) ){ - sqlite3_free(pReader->zTerm); - if( !fts3SegReaderIsRootOnly(pReader) ){ - sqlite3_free(pReader->aNode); - sqlite3_blob_close(pReader->pBlob); - } + if( rc!=SQLITE_OK || iNextPtr==0 ) return rc; + iPtr = iNextPtr; } - sqlite3_free(pReader); + + assert( 0 ); + return 0; } /* -** Allocate a new SegReader object. +** Append a term and (optionally) doclist to the FTS segment node currently +** stored in blob *pNode. The node need not contain any terms, but the +** header must be written before this function is called. +** +** A node header is a single 0x00 byte for a leaf node, or a height varint +** followed by the left-hand-child varint for an internal node. +** +** The term to be appended is passed via arguments zTerm/nTerm. For a +** leaf node, the doclist is passed as aDoclist/nDoclist. For an internal +** node, both aDoclist and nDoclist must be passed 0. +** +** If the size of the value in blob pPrev is zero, then this is the first +** term written to the node. Otherwise, pPrev contains a copy of the +** previous term. Before this function returns, it is updated to contain a +** copy of zTerm/nTerm. +** +** It is assumed that the buffer associated with pNode is already large +** enough to accommodate the new entry. The buffer associated with pPrev +** is extended by this function if requrired. +** +** If an error (i.e. OOM condition) occurs, an SQLite error code is +** returned. Otherwise, SQLITE_OK. */ -SQLITE_PRIVATE int sqlite3Fts3SegReaderNew( - int iAge, /* Segment "age". */ - int bLookup, /* True for a lookup only */ - sqlite3_int64 iStartLeaf, /* First leaf to traverse */ - sqlite3_int64 iEndLeaf, /* Final leaf to traverse */ - sqlite3_int64 iEndBlock, /* Final block of segment */ - const char *zRoot, /* Buffer containing root node */ - int nRoot, /* Size of buffer containing root node */ - Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */ +static int fts3AppendToNode( + Blob *pNode, /* Current node image to append to */ + Blob *pPrev, /* Buffer containing previous term written */ + const char *zTerm, /* New term to write */ + int nTerm, /* Size of zTerm in bytes */ + const char *aDoclist, /* Doclist (or NULL) to write */ + int nDoclist /* Size of aDoclist in bytes */ ){ - Fts3SegReader *pReader; /* Newly allocated SegReader object */ - int nExtra = 0; /* Bytes to allocate segment root node */ + int rc = SQLITE_OK; /* Return code */ + int bFirst = (pPrev->n==0); /* True if this is the first term written */ + int nPrefix; /* Size of term prefix in bytes */ + int nSuffix; /* Size of term suffix in bytes */ - assert( iStartLeaf<=iEndLeaf ); - if( iStartLeaf==0 ){ - nExtra = nRoot + FTS3_NODE_PADDING; - } + /* Node must have already been started. There must be a doclist for a + ** leaf node, and there must not be a doclist for an internal node. */ + assert( pNode->n>0 ); + assert( (pNode->a[0]=='\0')==(aDoclist!=0) ); - pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra); - if( !pReader ){ - return SQLITE_NOMEM; + blobGrowBuffer(pPrev, nTerm, &rc); + if( rc!=SQLITE_OK ) return rc; + + nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm); + nSuffix = nTerm - nPrefix; + memcpy(pPrev->a, zTerm, nTerm); + pPrev->n = nTerm; + + if( bFirst==0 ){ + pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix); } - memset(pReader, 0, sizeof(Fts3SegReader)); - pReader->iIdx = iAge; - pReader->bLookup = bLookup!=0; - pReader->iStartBlock = iStartLeaf; - pReader->iLeafEndBlock = iEndLeaf; - pReader->iEndBlock = iEndBlock; + pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nSuffix); + memcpy(&pNode->a[pNode->n], &zTerm[nPrefix], nSuffix); + pNode->n += nSuffix; - if( nExtra ){ - /* The entire segment is stored in the root node. */ - pReader->aNode = (char *)&pReader[1]; - pReader->rootOnly = 1; - pReader->nNode = nRoot; - memcpy(pReader->aNode, zRoot, nRoot); - memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING); - }else{ - pReader->iCurrentBlock = iStartLeaf-1; + if( aDoclist ){ + pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nDoclist); + memcpy(&pNode->a[pNode->n], aDoclist, nDoclist); + pNode->n += nDoclist; } - *ppReader = pReader; - return SQLITE_OK; -} -/* -** This is a comparison function used as a qsort() callback when sorting -** an array of pending terms by term. This occurs as part of flushing -** the contents of the pending-terms hash table to the database. -*/ -static int SQLITE_CDECL fts3CompareElemByTerm( - const void *lhs, - const void *rhs -){ - char *z1 = fts3HashKey(*(Fts3HashElem **)lhs); - char *z2 = fts3HashKey(*(Fts3HashElem **)rhs); - int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs); - int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs); + assert( pNode->n<=pNode->nAlloc ); - int n = (n1aIndex */ - const char *zTerm, /* Term to search for */ - int nTerm, /* Size of buffer zTerm */ - int bPrefix, /* True for a prefix iterator */ - Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */ +static int fts3IncrmergeAppend( + Fts3Table *p, /* Fts3 table handle */ + IncrmergeWriter *pWriter, /* Writer object */ + Fts3MultiSegReader *pCsr /* Cursor containing term and doclist */ ){ - Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */ - Fts3HashElem *pE; /* Iterator variable */ - Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */ - int nElem = 0; /* Size of array at aElem */ - int rc = SQLITE_OK; /* Return Code */ - Fts3Hash *pHash; - - pHash = &p->aIndex[iIndex].hPending; - if( bPrefix ){ - int nAlloc = 0; /* Size of allocated array at aElem */ + const char *zTerm = pCsr->zTerm; + int nTerm = pCsr->nTerm; + const char *aDoclist = pCsr->aDoclist; + int nDoclist = pCsr->nDoclist; + int rc = SQLITE_OK; /* Return code */ + int nSpace; /* Total space in bytes required on leaf */ + int nPrefix; /* Size of prefix shared with previous term */ + int nSuffix; /* Size of suffix (nTerm - nPrefix) */ + NodeWriter *pLeaf; /* Object used to write leaf nodes */ - for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ - char *zKey = (char *)fts3HashKey(pE); - int nKey = fts3HashKeysize(pE); - if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){ - if( nElem==nAlloc ){ - Fts3HashElem **aElem2; - nAlloc += 16; - aElem2 = (Fts3HashElem **)sqlite3_realloc( - aElem, nAlloc*sizeof(Fts3HashElem *) - ); - if( !aElem2 ){ - rc = SQLITE_NOMEM; - nElem = 0; - break; - } - aElem = aElem2; - } + pLeaf = &pWriter->aNodeWriter[0]; + nPrefix = fts3PrefixCompress(pLeaf->key.a, pLeaf->key.n, zTerm, nTerm); + nSuffix = nTerm - nPrefix; - aElem[nElem++] = pE; - } - } + nSpace = sqlite3Fts3VarintLen(nPrefix); + nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; + nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; - /* If more than one term matches the prefix, sort the Fts3HashElem - ** objects in term order using qsort(). This uses the same comparison - ** callback as is used when flushing terms to disk. - */ - if( nElem>1 ){ - qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm); - } + /* If the current block is not empty, and if adding this term/doclist + ** to the current block would make it larger than Fts3Table.nNodeSize + ** bytes, write this block out to the database. */ + if( pLeaf->block.n>0 && (pLeaf->block.n + nSpace)>p->nNodeSize ){ + rc = fts3WriteSegment(p, pLeaf->iBlock, pLeaf->block.a, pLeaf->block.n); + pWriter->nWork++; - }else{ - /* The query is a simple term lookup that matches at most one term in - ** the index. All that is required is a straight hash-lookup. + /* Add the current term to the parent node. The term added to the + ** parent must: ** - ** Because the stack address of pE may be accessed via the aElem pointer - ** below, the "Fts3HashElem *pE" must be declared so that it is valid - ** within this entire function, not just this "else{...}" block. + ** a) be greater than the largest term on the leaf node just written + ** to the database (still available in pLeaf->key), and + ** + ** b) be less than or equal to the term about to be added to the new + ** leaf node (zTerm/nTerm). + ** + ** In other words, it must be the prefix of zTerm 1 byte longer than + ** the common prefix (if any) of zTerm and pWriter->zTerm. */ - pE = fts3HashFindElem(pHash, zTerm, nTerm); - if( pE ){ - aElem = &pE; - nElem = 1; + if( rc==SQLITE_OK ){ + rc = fts3IncrmergePush(p, pWriter, zTerm, nPrefix+1); } + + /* Advance to the next output block */ + pLeaf->iBlock++; + pLeaf->key.n = 0; + pLeaf->block.n = 0; + + nSuffix = nTerm; + nSpace = 1; + nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; + nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; } - if( nElem>0 ){ - int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *); - pReader = (Fts3SegReader *)sqlite3_malloc(nByte); - if( !pReader ){ - rc = SQLITE_NOMEM; - }else{ - memset(pReader, 0, nByte); - pReader->iIdx = 0x7FFFFFFF; - pReader->ppNextElem = (Fts3HashElem **)&pReader[1]; - memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *)); + pWriter->nLeafData += nSpace; + blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc); + if( rc==SQLITE_OK ){ + if( pLeaf->block.n==0 ){ + pLeaf->block.n = 1; + pLeaf->block.a[0] = '\0'; } + rc = fts3AppendToNode( + &pLeaf->block, &pLeaf->key, zTerm, nTerm, aDoclist, nDoclist + ); } - if( bPrefix ){ - sqlite3_free(aElem); - } - *ppReader = pReader; return rc; } /* -** Compare the entries pointed to by two Fts3SegReader structures. -** Comparison is as follows: -** -** 1) EOF is greater than not EOF. +** This function is called to release all dynamic resources held by the +** merge-writer object pWriter, and if no error has occurred, to flush +** all outstanding node buffers held by pWriter to disk. ** -** 2) The current terms (if any) are compared using memcmp(). If one -** term is a prefix of another, the longer term is considered the -** larger. +** If *pRc is not SQLITE_OK when this function is called, then no attempt +** is made to write any data to disk. Instead, this function serves only +** to release outstanding resources. ** -** 3) By segment age. An older segment is considered larger. +** Otherwise, if *pRc is initially SQLITE_OK and an error occurs while +** flushing buffers to disk, *pRc is set to an SQLite error code before +** returning. */ -static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ - int rc; - if( pLhs->aNode && pRhs->aNode ){ - int rc2 = pLhs->nTerm - pRhs->nTerm; - if( rc2<0 ){ - rc = memcmp(pLhs->zTerm, pRhs->zTerm, pLhs->nTerm); - }else{ - rc = memcmp(pLhs->zTerm, pRhs->zTerm, pRhs->nTerm); - } - if( rc==0 ){ - rc = rc2; - } - }else{ - rc = (pLhs->aNode==0) - (pRhs->aNode==0); - } - if( rc==0 ){ - rc = pRhs->iIdx - pLhs->iIdx; +static void fts3IncrmergeRelease( + Fts3Table *p, /* FTS3 table handle */ + IncrmergeWriter *pWriter, /* Merge-writer object */ + int *pRc /* IN/OUT: Error code */ +){ + int i; /* Used to iterate through non-root layers */ + int iRoot; /* Index of root in pWriter->aNodeWriter */ + NodeWriter *pRoot; /* NodeWriter for root node */ + int rc = *pRc; /* Error code */ + + /* Set iRoot to the index in pWriter->aNodeWriter[] of the output segment + ** root node. If the segment fits entirely on a single leaf node, iRoot + ** will be set to 0. If the root node is the parent of the leaves, iRoot + ** will be 1. And so on. */ + for(iRoot=FTS_MAX_APPENDABLE_HEIGHT-1; iRoot>=0; iRoot--){ + NodeWriter *pNode = &pWriter->aNodeWriter[iRoot]; + if( pNode->block.n>0 ) break; + assert( *pRc || pNode->block.nAlloc==0 ); + assert( *pRc || pNode->key.nAlloc==0 ); + sqlite3_free(pNode->block.a); + sqlite3_free(pNode->key.a); } - assert( rc!=0 ); - return rc; -} -/* -** A different comparison function for SegReader structures. In this -** version, it is assumed that each SegReader points to an entry in -** a doclist for identical terms. Comparison is made as follows: -** -** 1) EOF (end of doclist in this case) is greater than not EOF. -** -** 2) By current docid. -** -** 3) By segment age. An older segment is considered larger. -*/ -static int fts3SegReaderDoclistCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ - int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); - if( rc==0 ){ - if( pLhs->iDocid==pRhs->iDocid ){ - rc = pRhs->iIdx - pLhs->iIdx; - }else{ - rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1; + /* Empty output segment. This is a no-op. */ + if( iRoot<0 ) return; + + /* The entire output segment fits on a single node. Normally, this means + ** the node would be stored as a blob in the "root" column of the %_segdir + ** table. However, this is not permitted in this case. The problem is that + ** space has already been reserved in the %_segments table, and so the + ** start_block and end_block fields of the %_segdir table must be populated. + ** And, by design or by accident, released versions of FTS cannot handle + ** segments that fit entirely on the root node with start_block!=0. + ** + ** Instead, create a synthetic root node that contains nothing but a + ** pointer to the single content node. So that the segment consists of a + ** single leaf and a single interior (root) node. + ** + ** Todo: Better might be to defer allocating space in the %_segments + ** table until we are sure it is needed. + */ + if( iRoot==0 ){ + Blob *pBlock = &pWriter->aNodeWriter[1].block; + blobGrowBuffer(pBlock, 1 + FTS3_VARINT_MAX, &rc); + if( rc==SQLITE_OK ){ + pBlock->a[0] = 0x01; + pBlock->n = 1 + sqlite3Fts3PutVarint( + &pBlock->a[1], pWriter->aNodeWriter[0].iBlock + ); } + iRoot = 1; } - assert( pLhs->aNode && pRhs->aNode ); - return rc; -} -static int fts3SegReaderDoclistCmpRev(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ - int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); - if( rc==0 ){ - if( pLhs->iDocid==pRhs->iDocid ){ - rc = pRhs->iIdx - pLhs->iIdx; - }else{ - rc = (pLhs->iDocid < pRhs->iDocid) ? 1 : -1; + pRoot = &pWriter->aNodeWriter[iRoot]; + + /* Flush all currently outstanding nodes to disk. */ + for(i=0; iaNodeWriter[i]; + if( pNode->block.n>0 && rc==SQLITE_OK ){ + rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n); } + sqlite3_free(pNode->block.a); + sqlite3_free(pNode->key.a); } - assert( pLhs->aNode && pRhs->aNode ); - return rc; -} -/* -** Compare the term that the Fts3SegReader object passed as the first argument -** points to with the term specified by arguments zTerm and nTerm. -** -** If the pSeg iterator is already at EOF, return 0. Otherwise, return -** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are -** equal, or +ve if the pSeg term is greater than zTerm/nTerm. -*/ -static int fts3SegReaderTermCmp( - Fts3SegReader *pSeg, /* Segment reader object */ - const char *zTerm, /* Term to compare to */ - int nTerm /* Size of term zTerm in bytes */ -){ - int res = 0; - if( pSeg->aNode ){ - if( pSeg->nTerm>nTerm ){ - res = memcmp(pSeg->zTerm, zTerm, nTerm); - }else{ - res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm); - } - if( res==0 ){ - res = pSeg->nTerm-nTerm; - } + /* Write the %_segdir record. */ + if( rc==SQLITE_OK ){ + rc = fts3WriteSegdir(p, + pWriter->iAbsLevel+1, /* level */ + pWriter->iIdx, /* idx */ + pWriter->iStart, /* start_block */ + pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */ + pWriter->iEnd, /* end_block */ + (pWriter->bNoLeafData==0 ? pWriter->nLeafData : 0), /* end_block */ + pRoot->block.a, pRoot->block.n /* root */ + ); } - return res; + sqlite3_free(pRoot->block.a); + sqlite3_free(pRoot->key.a); + + *pRc = rc; } /* -** Argument apSegment is an array of nSegment elements. It is known that -** the final (nSegment-nSuspect) members are already in sorted order -** (according to the comparison function provided). This function shuffles -** the array around until all entries are in sorted order. +** Compare the term in buffer zLhs (size in bytes nLhs) with that in +** zRhs (size in bytes nRhs) using memcmp. If one term is a prefix of +** the other, it is considered to be smaller than the other. +** +** Return -ve if zLhs is smaller than zRhs, 0 if it is equal, or +ve +** if it is greater. */ -static void fts3SegReaderSort( - Fts3SegReader **apSegment, /* Array to sort entries of */ - int nSegment, /* Size of apSegment array */ - int nSuspect, /* Unsorted entry count */ - int (*xCmp)(Fts3SegReader *, Fts3SegReader *) /* Comparison function */ +static int fts3TermCmp( + const char *zLhs, int nLhs, /* LHS of comparison */ + const char *zRhs, int nRhs /* RHS of comparison */ ){ - int i; /* Iterator variable */ - - assert( nSuspect<=nSegment ); + int nCmp = MIN(nLhs, nRhs); + int res; - if( nSuspect==nSegment ) nSuspect--; - for(i=nSuspect-1; i>=0; i--){ - int j; - for(j=i; j<(nSegment-1); j++){ - Fts3SegReader *pTmp; - if( xCmp(apSegment[j], apSegment[j+1])<0 ) break; - pTmp = apSegment[j+1]; - apSegment[j+1] = apSegment[j]; - apSegment[j] = pTmp; - } - } + res = memcmp(zLhs, zRhs, nCmp); + if( res==0 ) res = nLhs - nRhs; -#ifndef NDEBUG - /* Check that the list really is sorted now. */ - for(i=0; i<(nSuspect-1); i++){ - assert( xCmp(apSegment[i], apSegment[i+1])<0 ); - } -#endif + return res; } -/* -** Insert a record into the %_segments table. -*/ -static int fts3WriteSegment( - Fts3Table *p, /* Virtual table handle */ - sqlite3_int64 iBlock, /* Block id for new block */ - char *z, /* Pointer to buffer containing block data */ - int n /* Size of buffer z in bytes */ -){ - sqlite3_stmt *pStmt; - int rc = fts3SqlStmt(p, SQL_INSERT_SEGMENTS, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pStmt, 1, iBlock); - sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC); - sqlite3_step(pStmt); - rc = sqlite3_reset(pStmt); - } - return rc; -} /* -** Find the largest relative level number in the table. If successful, set -** *pnMax to this value and return SQLITE_OK. Otherwise, if an error occurs, -** set *pnMax to zero and return an SQLite error code. +** Query to see if the entry in the %_segments table with blockid iEnd is +** NULL. If no error occurs and the entry is NULL, set *pbRes 1 before +** returning. Otherwise, set *pbRes to 0. +** +** Or, if an error occurs while querying the database, return an SQLite +** error code. The final value of *pbRes is undefined in this case. +** +** This is used to test if a segment is an "appendable" segment. If it +** is, then a NULL entry has been inserted into the %_segments table +** with blockid %_segdir.end_block. */ -SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *p, int *pnMax){ - int rc; - int mxLevel = 0; - sqlite3_stmt *pStmt = 0; +static int fts3IsAppendable(Fts3Table *p, sqlite3_int64 iEnd, int *pbRes){ + int bRes = 0; /* Result to set *pbRes to */ + sqlite3_stmt *pCheck = 0; /* Statement to query database with */ + int rc; /* Return code */ - rc = fts3SqlStmt(p, SQL_SELECT_MXLEVEL, &pStmt, 0); + rc = fts3SqlStmt(p, SQL_SEGMENT_IS_APPENDABLE, &pCheck, 0); if( rc==SQLITE_OK ){ - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - mxLevel = sqlite3_column_int(pStmt, 0); - } - rc = sqlite3_reset(pStmt); + sqlite3_bind_int64(pCheck, 1, iEnd); + if( SQLITE_ROW==sqlite3_step(pCheck) ) bRes = 1; + rc = sqlite3_reset(pCheck); } - *pnMax = mxLevel; + + *pbRes = bRes; return rc; } -/* -** Insert a record into the %_segdir table. +/* +** This function is called when initializing an incremental-merge operation. +** It checks if the existing segment with index value iIdx at absolute level +** (iAbsLevel+1) can be appended to by the incremental merge. If it can, the +** merge-writer object *pWriter is initialized to write to it. +** +** An existing segment can be appended to by an incremental merge if: +** +** * It was initially created as an appendable segment (with all required +** space pre-allocated), and +** +** * The first key read from the input (arguments zKey and nKey) is +** greater than the largest key currently stored in the potential +** output segment. */ -static int fts3WriteSegdir( - Fts3Table *p, /* Virtual table handle */ - sqlite3_int64 iLevel, /* Value for "level" field (absolute level) */ - int iIdx, /* Value for "idx" field */ - sqlite3_int64 iStartBlock, /* Value for "start_block" field */ - sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */ - sqlite3_int64 iEndBlock, /* Value for "end_block" field */ - sqlite3_int64 nLeafData, /* Bytes of leaf data in segment */ - char *zRoot, /* Blob value for "root" field */ - int nRoot /* Number of bytes in buffer zRoot */ +static int fts3IncrmergeLoad( + Fts3Table *p, /* Fts3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ + int iIdx, /* Index of candidate output segment */ + const char *zKey, /* First key to write */ + int nKey, /* Number of bytes in nKey */ + IncrmergeWriter *pWriter /* Populate this object */ ){ - sqlite3_stmt *pStmt; - int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0); + int rc; /* Return code */ + sqlite3_stmt *pSelect = 0; /* SELECT to read %_segdir entry */ + + rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pSelect, 0); if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pStmt, 1, iLevel); - sqlite3_bind_int(pStmt, 2, iIdx); - sqlite3_bind_int64(pStmt, 3, iStartBlock); - sqlite3_bind_int64(pStmt, 4, iLeafEndBlock); - if( nLeafData==0 ){ - sqlite3_bind_int64(pStmt, 5, iEndBlock); + sqlite3_int64 iStart = 0; /* Value of %_segdir.start_block */ + sqlite3_int64 iLeafEnd = 0; /* Value of %_segdir.leaves_end_block */ + sqlite3_int64 iEnd = 0; /* Value of %_segdir.end_block */ + const char *aRoot = 0; /* Pointer to %_segdir.root buffer */ + int nRoot = 0; /* Size of aRoot[] in bytes */ + int rc2; /* Return code from sqlite3_reset() */ + int bAppendable = 0; /* Set to true if segment is appendable */ + + /* Read the %_segdir entry for index iIdx absolute level (iAbsLevel+1) */ + sqlite3_bind_int64(pSelect, 1, iAbsLevel+1); + sqlite3_bind_int(pSelect, 2, iIdx); + if( sqlite3_step(pSelect)==SQLITE_ROW ){ + iStart = sqlite3_column_int64(pSelect, 1); + iLeafEnd = sqlite3_column_int64(pSelect, 2); + fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData); + if( pWriter->nLeafData<0 ){ + pWriter->nLeafData = pWriter->nLeafData * -1; + } + pWriter->bNoLeafData = (pWriter->nLeafData==0); + nRoot = sqlite3_column_bytes(pSelect, 4); + aRoot = sqlite3_column_blob(pSelect, 4); }else{ - char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData); - if( !zEnd ) return SQLITE_NOMEM; - sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free); + return sqlite3_reset(pSelect); } - sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC); - sqlite3_step(pStmt); - rc = sqlite3_reset(pStmt); - } - return rc; -} -/* -** Return the size of the common prefix (if any) shared by zPrev and -** zNext, in bytes. For example, -** -** fts3PrefixCompress("abc", 3, "abcdef", 6) // returns 3 -** fts3PrefixCompress("abX", 3, "abcdef", 6) // returns 2 -** fts3PrefixCompress("abX", 3, "Xbcdef", 6) // returns 0 -*/ -static int fts3PrefixCompress( - const char *zPrev, /* Buffer containing previous term */ - int nPrev, /* Size of buffer zPrev in bytes */ - const char *zNext, /* Buffer containing next term */ - int nNext /* Size of buffer zNext in bytes */ -){ - int n; - UNUSED_PARAMETER(nNext); - for(n=0; nnData; /* Current size of node in bytes */ - int nReq = nData; /* Required space after adding zTerm */ - int nPrefix; /* Number of bytes of prefix compression */ - int nSuffix; /* Suffix length */ + rc = sqlite3Fts3ReadBlock(p, iLeafEnd, &aLeaf, &nLeaf, 0); + if( rc==SQLITE_OK ){ + NodeReader reader; + for(rc = nodeReaderInit(&reader, aLeaf, nLeaf); + rc==SQLITE_OK && reader.aNode; + rc = nodeReaderNext(&reader) + ){ + assert( reader.aNode ); + } + if( fts3TermCmp(zKey, nKey, reader.term.a, reader.term.n)<=0 ){ + bAppendable = 0; + } + nodeReaderRelease(&reader); + } + sqlite3_free(aLeaf); + } - nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm); - nSuffix = nTerm-nPrefix; + if( rc==SQLITE_OK && bAppendable ){ + /* It is possible to append to this segment. Set up the IncrmergeWriter + ** object to do so. */ + int i; + int nHeight = (int)aRoot[0]; + NodeWriter *pNode; - nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix; - if( nReq<=p->nNodeSize || !pTree->zTerm ){ + pWriter->nLeafEst = (int)((iEnd - iStart) + 1)/FTS_MAX_APPENDABLE_HEIGHT; + pWriter->iStart = iStart; + pWriter->iEnd = iEnd; + pWriter->iAbsLevel = iAbsLevel; + pWriter->iIdx = iIdx; - if( nReq>p->nNodeSize ){ - /* An unusual case: this is the first term to be added to the node - ** and the static node buffer (p->nNodeSize bytes) is not large - ** enough. Use a separately malloced buffer instead This wastes - ** p->nNodeSize bytes, but since this scenario only comes about when - ** the database contain two terms that share a prefix of almost 2KB, - ** this is not expected to be a serious problem. - */ - assert( pTree->aData==(char *)&pTree[1] ); - pTree->aData = (char *)sqlite3_malloc(nReq); - if( !pTree->aData ){ - return SQLITE_NOMEM; - } + for(i=nHeight+1; iaNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; } - if( pTree->zTerm ){ - /* There is no prefix-length field for first term in a node */ - nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nPrefix); + pNode = &pWriter->aNodeWriter[nHeight]; + pNode->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight; + blobGrowBuffer(&pNode->block, MAX(nRoot, p->nNodeSize), &rc); + if( rc==SQLITE_OK ){ + memcpy(pNode->block.a, aRoot, nRoot); + pNode->block.n = nRoot; } - nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nSuffix); - memcpy(&pTree->aData[nData], &zTerm[nPrefix], nSuffix); - pTree->nData = nData + nSuffix; - pTree->nEntry++; + for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){ + NodeReader reader; + pNode = &pWriter->aNodeWriter[i]; - if( isCopyTerm ){ - if( pTree->nMalloczMalloc, nTerm*2); - if( !zNew ){ - return SQLITE_NOMEM; + rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n); + while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader); + blobGrowBuffer(&pNode->key, reader.term.n, &rc); + if( rc==SQLITE_OK ){ + memcpy(pNode->key.a, reader.term.a, reader.term.n); + pNode->key.n = reader.term.n; + if( i>0 ){ + char *aBlock = 0; + int nBlock = 0; + pNode = &pWriter->aNodeWriter[i-1]; + pNode->iBlock = reader.iChild; + rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0); + blobGrowBuffer(&pNode->block, MAX(nBlock, p->nNodeSize), &rc); + if( rc==SQLITE_OK ){ + memcpy(pNode->block.a, aBlock, nBlock); + pNode->block.n = nBlock; + } + sqlite3_free(aBlock); } - pTree->nMalloc = nTerm*2; - pTree->zMalloc = zNew; } - pTree->zTerm = pTree->zMalloc; - memcpy(pTree->zTerm, zTerm, nTerm); - pTree->nTerm = nTerm; - }else{ - pTree->zTerm = (char *)zTerm; - pTree->nTerm = nTerm; + nodeReaderRelease(&reader); } - return SQLITE_OK; } - } - - /* If control flows to here, it was not possible to append zTerm to the - ** current node. Create a new node (a right-sibling of the current node). - ** If this is the first node in the tree, the term is added to it. - ** - ** Otherwise, the term is not added to the new node, it is left empty for - ** now. Instead, the term is inserted into the parent of pTree. If pTree - ** has no parent, one is created here. - */ - pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize); - if( !pNew ){ - return SQLITE_NOMEM; - } - memset(pNew, 0, sizeof(SegmentNode)); - pNew->nData = 1 + FTS3_VARINT_MAX; - pNew->aData = (char *)&pNew[1]; - if( pTree ){ - SegmentNode *pParent = pTree->pParent; - rc = fts3NodeAddTerm(p, &pParent, isCopyTerm, zTerm, nTerm); - if( pTree->pParent==0 ){ - pTree->pParent = pParent; - } - pTree->pRight = pNew; - pNew->pLeftmost = pTree->pLeftmost; - pNew->pParent = pParent; - pNew->zMalloc = pTree->zMalloc; - pNew->nMalloc = pTree->nMalloc; - pTree->zMalloc = 0; - }else{ - pNew->pLeftmost = pNew; - rc = fts3NodeAddTerm(p, &pNew, isCopyTerm, zTerm, nTerm); + rc2 = sqlite3_reset(pSelect); + if( rc==SQLITE_OK ) rc = rc2; } - *ppTree = pNew; return rc; } /* -** Helper function for fts3NodeWrite(). -*/ -static int fts3TreeFinishNode( - SegmentNode *pTree, - int iHeight, - sqlite3_int64 iLeftChild -){ - int nStart; - assert( iHeight>=1 && iHeight<128 ); - nStart = FTS3_VARINT_MAX - sqlite3Fts3VarintLen(iLeftChild); - pTree->aData[nStart] = (char)iHeight; - sqlite3Fts3PutVarint(&pTree->aData[nStart+1], iLeftChild); - return nStart; -} - -/* -** Write the buffer for the segment node pTree and all of its peers to the -** database. Then call this function recursively to write the parent of -** pTree and its peers to the database. -** -** Except, if pTree is a root node, do not write it to the database. Instead, -** set output variables *paRoot and *pnRoot to contain the root node. +** Determine the largest segment index value that exists within absolute +** level iAbsLevel+1. If no error occurs, set *piIdx to this value plus +** one before returning SQLITE_OK. Or, if there are no segments at all +** within level iAbsLevel, set *piIdx to zero. ** -** If successful, SQLITE_OK is returned and output variable *piLast is -** set to the largest blockid written to the database (or zero if no -** blocks were written to the db). Otherwise, an SQLite error code is -** returned. +** If an error occurs, return an SQLite error code. The final value of +** *piIdx is undefined in this case. */ -static int fts3NodeWrite( - Fts3Table *p, /* Virtual table handle */ - SegmentNode *pTree, /* SegmentNode handle */ - int iHeight, /* Height of this node in tree */ - sqlite3_int64 iLeaf, /* Block id of first leaf node */ - sqlite3_int64 iFree, /* Block id of next free slot in %_segments */ - sqlite3_int64 *piLast, /* OUT: Block id of last entry written */ - char **paRoot, /* OUT: Data for root node */ - int *pnRoot /* OUT: Size of root node in bytes */ +static int fts3IncrmergeOutputIdx( + Fts3Table *p, /* FTS Table handle */ + sqlite3_int64 iAbsLevel, /* Absolute index of input segments */ + int *piIdx /* OUT: Next free index at iAbsLevel+1 */ ){ - int rc = SQLITE_OK; + int rc; + sqlite3_stmt *pOutputIdx = 0; /* SQL used to find output index */ - if( !pTree->pParent ){ - /* Root node of the tree. */ - int nStart = fts3TreeFinishNode(pTree, iHeight, iLeaf); - *piLast = iFree-1; - *pnRoot = pTree->nData - nStart; - *paRoot = &pTree->aData[nStart]; - }else{ - SegmentNode *pIter; - sqlite3_int64 iNextFree = iFree; - sqlite3_int64 iNextLeaf = iLeaf; - for(pIter=pTree->pLeftmost; pIter && rc==SQLITE_OK; pIter=pIter->pRight){ - int nStart = fts3TreeFinishNode(pIter, iHeight, iNextLeaf); - int nWrite = pIter->nData - nStart; - - rc = fts3WriteSegment(p, iNextFree, &pIter->aData[nStart], nWrite); - iNextFree++; - iNextLeaf += (pIter->nEntry+1); - } - if( rc==SQLITE_OK ){ - assert( iNextLeaf==iFree ); - rc = fts3NodeWrite( - p, pTree->pParent, iHeight+1, iFree, iNextFree, piLast, paRoot, pnRoot - ); - } + rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pOutputIdx, 1, iAbsLevel+1); + sqlite3_step(pOutputIdx); + *piIdx = sqlite3_column_int(pOutputIdx, 0); + rc = sqlite3_reset(pOutputIdx); } return rc; } -/* -** Free all memory allocations associated with the tree pTree. -*/ -static void fts3NodeFree(SegmentNode *pTree){ - if( pTree ){ - SegmentNode *p = pTree->pLeftmost; - fts3NodeFree(p->pParent); - while( p ){ - SegmentNode *pRight = p->pRight; - if( p->aData!=(char *)&p[1] ){ - sqlite3_free(p->aData); - } - assert( pRight==0 || p->zMalloc==0 ); - sqlite3_free(p->zMalloc); - sqlite3_free(p); - p = pRight; - } - } -} - -/* -** Add a term to the segment being constructed by the SegmentWriter object -** *ppWriter. When adding the first term to a segment, *ppWriter should -** be passed NULL. This function will allocate a new SegmentWriter object -** and return it via the input/output variable *ppWriter in this case. +/* +** Allocate an appendable output segment on absolute level iAbsLevel+1 +** with idx value iIdx. ** -** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. +** In the %_segdir table, a segment is defined by the values in three +** columns: +** +** start_block +** leaves_end_block +** end_block +** +** When an appendable segment is allocated, it is estimated that the +** maximum number of leaf blocks that may be required is the sum of the +** number of leaf blocks consumed by the input segments, plus the number +** of input segments, multiplied by two. This value is stored in stack +** variable nLeafEst. +** +** A total of 16*nLeafEst blocks are allocated when an appendable segment +** is created ((1 + end_block - start_block)==16*nLeafEst). The contiguous +** array of leaf nodes starts at the first block allocated. The array +** of interior nodes that are parents of the leaf nodes start at block +** (start_block + (1 + end_block - start_block) / 16). And so on. +** +** In the actual code below, the value "16" is replaced with the +** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT. */ -static int fts3SegWriterAdd( - Fts3Table *p, /* Virtual table handle */ - SegmentWriter **ppWriter, /* IN/OUT: SegmentWriter handle */ - int isCopyTerm, /* True if buffer zTerm must be copied */ - const char *zTerm, /* Pointer to buffer containing term */ - int nTerm, /* Size of term in bytes */ - const char *aDoclist, /* Pointer to buffer containing doclist */ - int nDoclist /* Size of doclist in bytes */ +static int fts3IncrmergeWriter( + Fts3Table *p, /* Fts3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ + int iIdx, /* Index of new output segment */ + Fts3MultiSegReader *pCsr, /* Cursor that data will be read from */ + IncrmergeWriter *pWriter /* Populate this object */ ){ - int nPrefix; /* Size of term prefix in bytes */ - int nSuffix; /* Size of term suffix in bytes */ - int nReq; /* Number of bytes required on leaf page */ - int nData; - SegmentWriter *pWriter = *ppWriter; - - if( !pWriter ){ - int rc; - sqlite3_stmt *pStmt; - - /* Allocate the SegmentWriter structure */ - pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter)); - if( !pWriter ) return SQLITE_NOMEM; - memset(pWriter, 0, sizeof(SegmentWriter)); - *ppWriter = pWriter; - - /* Allocate a buffer in which to accumulate data */ - pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize); - if( !pWriter->aData ) return SQLITE_NOMEM; - pWriter->nSize = p->nNodeSize; + int rc; /* Return Code */ + int i; /* Iterator variable */ + int nLeafEst = 0; /* Blocks allocated for leaf nodes */ + sqlite3_stmt *pLeafEst = 0; /* SQL used to determine nLeafEst */ + sqlite3_stmt *pFirstBlock = 0; /* SQL used to determine first block */ - /* Find the next free blockid in the %_segments table */ - rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pStmt, 0); - if( rc!=SQLITE_OK ) return rc; - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - pWriter->iFree = sqlite3_column_int64(pStmt, 0); - pWriter->iFirst = pWriter->iFree; + /* Calculate nLeafEst. */ + rc = fts3SqlStmt(p, SQL_MAX_LEAF_NODE_ESTIMATE, &pLeafEst, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pLeafEst, 1, iAbsLevel); + sqlite3_bind_int64(pLeafEst, 2, pCsr->nSegment); + if( SQLITE_ROW==sqlite3_step(pLeafEst) ){ + nLeafEst = sqlite3_column_int(pLeafEst, 0); } - rc = sqlite3_reset(pStmt); - if( rc!=SQLITE_OK ) return rc; + rc = sqlite3_reset(pLeafEst); } - nData = pWriter->nData; - - nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm); - nSuffix = nTerm-nPrefix; - - /* Figure out how many bytes are required by this new entry */ - nReq = sqlite3Fts3VarintLen(nPrefix) + /* varint containing prefix size */ - sqlite3Fts3VarintLen(nSuffix) + /* varint containing suffix size */ - nSuffix + /* Term suffix */ - sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ - nDoclist; /* Doclist data */ - - if( nData>0 && nData+nReq>p->nNodeSize ){ - int rc; - - /* The current leaf node is full. Write it out to the database. */ - rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData); - if( rc!=SQLITE_OK ) return rc; - p->nLeafAdd++; - - /* Add the current term to the interior node tree. The term added to - ** the interior tree must: - ** - ** a) be greater than the largest term on the leaf node just written - ** to the database (still available in pWriter->zTerm), and - ** - ** b) be less than or equal to the term about to be added to the new - ** leaf node (zTerm/nTerm). - ** - ** In other words, it must be the prefix of zTerm 1 byte longer than - ** the common prefix (if any) of zTerm and pWriter->zTerm. - */ - assert( nPrefixpTree, isCopyTerm, zTerm, nPrefix+1); - if( rc!=SQLITE_OK ) return rc; - - nData = 0; - pWriter->nTerm = 0; + if( rc!=SQLITE_OK ) return rc; - nPrefix = 0; - nSuffix = nTerm; - nReq = 1 + /* varint containing prefix size */ - sqlite3Fts3VarintLen(nTerm) + /* varint containing suffix size */ - nTerm + /* Term suffix */ - sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ - nDoclist; /* Doclist data */ + /* Calculate the first block to use in the output segment */ + rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pFirstBlock, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pFirstBlock) ){ + pWriter->iStart = sqlite3_column_int64(pFirstBlock, 0); + pWriter->iEnd = pWriter->iStart - 1; + pWriter->iEnd += nLeafEst * FTS_MAX_APPENDABLE_HEIGHT; + } + rc = sqlite3_reset(pFirstBlock); } + if( rc!=SQLITE_OK ) return rc; - /* Increase the total number of bytes written to account for the new entry. */ - pWriter->nLeafData += nReq; + /* Insert the marker in the %_segments table to make sure nobody tries + ** to steal the space just allocated. This is also used to identify + ** appendable segments. */ + rc = fts3WriteSegment(p, pWriter->iEnd, 0, 0); + if( rc!=SQLITE_OK ) return rc; - /* If the buffer currently allocated is too small for this entry, realloc - ** the buffer to make it large enough. - */ - if( nReq>pWriter->nSize ){ - char *aNew = sqlite3_realloc(pWriter->aData, nReq); - if( !aNew ) return SQLITE_NOMEM; - pWriter->aData = aNew; - pWriter->nSize = nReq; + pWriter->iAbsLevel = iAbsLevel; + pWriter->nLeafEst = nLeafEst; + pWriter->iIdx = iIdx; + + /* Set up the array of NodeWriter objects */ + for(i=0; iaNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; } - assert( nData+nReq<=pWriter->nSize ); + return SQLITE_OK; +} - /* Append the prefix-compressed term and doclist to the buffer. */ - nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix); - nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix); - memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix); - nData += nSuffix; - nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist); - memcpy(&pWriter->aData[nData], aDoclist, nDoclist); - pWriter->nData = nData + nDoclist; +/* +** Remove an entry from the %_segdir table. This involves running the +** following two statements: +** +** DELETE FROM %_segdir WHERE level = :iAbsLevel AND idx = :iIdx +** UPDATE %_segdir SET idx = idx - 1 WHERE level = :iAbsLevel AND idx > :iIdx +** +** The DELETE statement removes the specific %_segdir level. The UPDATE +** statement ensures that the remaining segments have contiguously allocated +** idx values. +*/ +static int fts3RemoveSegdirEntry( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level to delete from */ + int iIdx /* Index of %_segdir entry to delete */ +){ + int rc; /* Return code */ + sqlite3_stmt *pDelete = 0; /* DELETE statement */ - /* Save the current term so that it can be used to prefix-compress the next. - ** If the isCopyTerm parameter is true, then the buffer pointed to by - ** zTerm is transient, so take a copy of the term data. Otherwise, just - ** store a copy of the pointer. - */ - if( isCopyTerm ){ - if( nTerm>pWriter->nMalloc ){ - char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2); - if( !zNew ){ - return SQLITE_NOMEM; - } - pWriter->nMalloc = nTerm*2; - pWriter->zMalloc = zNew; - pWriter->zTerm = zNew; - } - assert( pWriter->zTerm==pWriter->zMalloc ); - memcpy(pWriter->zTerm, zTerm, nTerm); - }else{ - pWriter->zTerm = (char *)zTerm; + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_ENTRY, &pDelete, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDelete, 1, iAbsLevel); + sqlite3_bind_int(pDelete, 2, iIdx); + sqlite3_step(pDelete); + rc = sqlite3_reset(pDelete); } - pWriter->nTerm = nTerm; - return SQLITE_OK; + return rc; } /* -** Flush all data associated with the SegmentWriter object pWriter to the -** database. This function must be called after all terms have been added -** to the segment using fts3SegWriterAdd(). If successful, SQLITE_OK is -** returned. Otherwise, an SQLite error code. +** One or more segments have just been removed from absolute level iAbsLevel. +** Update the 'idx' values of the remaining segments in the level so that +** the idx values are a contiguous sequence starting from 0. */ -static int fts3SegWriterFlush( - Fts3Table *p, /* Virtual table handle */ - SegmentWriter *pWriter, /* SegmentWriter to flush to the db */ - sqlite3_int64 iLevel, /* Value for 'level' column of %_segdir */ - int iIdx /* Value for 'idx' column of %_segdir */ +static int fts3RepackSegdirLevel( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iAbsLevel /* Absolute level to repack */ ){ int rc; /* Return code */ - if( pWriter->pTree ){ - sqlite3_int64 iLast = 0; /* Largest block id written to database */ - sqlite3_int64 iLastLeaf; /* Largest leaf block id written to db */ - char *zRoot = NULL; /* Pointer to buffer containing root node */ - int nRoot = 0; /* Size of buffer zRoot */ + int *aIdx = 0; /* Array of remaining idx values */ + int nIdx = 0; /* Valid entries in aIdx[] */ + int nAlloc = 0; /* Allocated size of aIdx[] */ + int i; /* Iterator variable */ + sqlite3_stmt *pSelect = 0; /* Select statement to read idx values */ + sqlite3_stmt *pUpdate = 0; /* Update statement to modify idx values */ - iLastLeaf = pWriter->iFree; - rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, pWriter->nData); - if( rc==SQLITE_OK ){ - rc = fts3NodeWrite(p, pWriter->pTree, 1, - pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot); + rc = fts3SqlStmt(p, SQL_SELECT_INDEXES, &pSelect, 0); + if( rc==SQLITE_OK ){ + int rc2; + sqlite3_bind_int64(pSelect, 1, iAbsLevel); + while( SQLITE_ROW==sqlite3_step(pSelect) ){ + if( nIdx>=nAlloc ){ + int *aNew; + nAlloc += 16; + aNew = sqlite3_realloc(aIdx, nAlloc*sizeof(int)); + if( !aNew ){ + rc = SQLITE_NOMEM; + break; + } + aIdx = aNew; + } + aIdx[nIdx++] = sqlite3_column_int(pSelect, 0); } - if( rc==SQLITE_OK ){ - rc = fts3WriteSegdir(p, iLevel, iIdx, - pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot); + rc2 = sqlite3_reset(pSelect); + if( rc==SQLITE_OK ) rc = rc2; + } + + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_SHIFT_SEGDIR_ENTRY, &pUpdate, 0); + } + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pUpdate, 2, iAbsLevel); + } + + assert( p->bIgnoreSavepoint==0 ); + p->bIgnoreSavepoint = 1; + for(i=0; rc==SQLITE_OK && inLeafData, pWriter->aData, pWriter->nData); } - p->nLeafAdd++; + p->bIgnoreSavepoint = 0; + + sqlite3_free(aIdx); return rc; } -/* -** Release all memory held by the SegmentWriter object passed as the -** first argument. -*/ -static void fts3SegWriterFree(SegmentWriter *pWriter){ - if( pWriter ){ - sqlite3_free(pWriter->aData); - sqlite3_free(pWriter->zMalloc); - fts3NodeFree(pWriter->pTree); - sqlite3_free(pWriter); +static void fts3StartNode(Blob *pNode, int iHeight, sqlite3_int64 iChild){ + pNode->a[0] = (char)iHeight; + if( iChild ){ + assert( pNode->nAlloc>=1+sqlite3Fts3VarintLen(iChild) ); + pNode->n = 1 + sqlite3Fts3PutVarint(&pNode->a[1], iChild); + }else{ + assert( pNode->nAlloc>=1 ); + pNode->n = 1; } } /* -** The first value in the apVal[] array is assumed to contain an integer. -** This function tests if there exist any documents with docid values that -** are different from that integer. i.e. if deleting the document with docid -** pRowid would mean the FTS3 table were empty. +** The first two arguments are a pointer to and the size of a segment b-tree +** node. The node may be a leaf or an internal node. ** -** If successful, *pisEmpty is set to true if the table is empty except for -** document pRowid, or false otherwise, and SQLITE_OK is returned. If an -** error occurs, an SQLite error code is returned. +** This function creates a new node image in blob object *pNew by copying +** all terms that are greater than or equal to zTerm/nTerm (for leaf nodes) +** or greater than zTerm/nTerm (for internal nodes) from aNode/nNode. */ -static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){ - sqlite3_stmt *pStmt; - int rc; - if( p->zContentTbl ){ - /* If using the content=xxx option, assume the table is never empty */ - *pisEmpty = 0; - rc = SQLITE_OK; - }else{ - rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid); - if( rc==SQLITE_OK ){ - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - *pisEmpty = sqlite3_column_int(pStmt, 0); - } - rc = sqlite3_reset(pStmt); +static int fts3TruncateNode( + const char *aNode, /* Current node image */ + int nNode, /* Size of aNode in bytes */ + Blob *pNew, /* OUT: Write new node image here */ + const char *zTerm, /* Omit all terms smaller than this */ + int nTerm, /* Size of zTerm in bytes */ + sqlite3_int64 *piBlock /* OUT: Block number in next layer down */ +){ + NodeReader reader; /* Reader object */ + Blob prev = {0, 0, 0}; /* Previous term written to new node */ + int rc = SQLITE_OK; /* Return code */ + int bLeaf = aNode[0]=='\0'; /* True for a leaf node */ + + /* Allocate required output space */ + blobGrowBuffer(pNew, nNode, &rc); + if( rc!=SQLITE_OK ) return rc; + pNew->n = 0; + + /* Populate new node buffer */ + for(rc = nodeReaderInit(&reader, aNode, nNode); + rc==SQLITE_OK && reader.aNode; + rc = nodeReaderNext(&reader) + ){ + if( pNew->n==0 ){ + int res = fts3TermCmp(reader.term.a, reader.term.n, zTerm, nTerm); + if( res<0 || (bLeaf==0 && res==0) ) continue; + fts3StartNode(pNew, (int)aNode[0], reader.iChild); + *piBlock = reader.iChild; } + rc = fts3AppendToNode( + pNew, &prev, reader.term.a, reader.term.n, + reader.aDoclist, reader.nDoclist + ); + if( rc!=SQLITE_OK ) break; + } + if( pNew->n==0 ){ + fts3StartNode(pNew, (int)aNode[0], reader.iChild); + *piBlock = reader.iChild; } + assert( pNew->n<=pNew->nAlloc ); + + nodeReaderRelease(&reader); + sqlite3_free(prev.a); return rc; } /* -** Set *pnMax to the largest segment level in the database for the index -** iIndex. -** -** Segment levels are stored in the 'level' column of the %_segdir table. +** Remove all terms smaller than zTerm/nTerm from segment iIdx in absolute +** level iAbsLevel. This may involve deleting entries from the %_segments +** table, and modifying existing entries in both the %_segments and %_segdir +** tables. ** -** Return SQLITE_OK if successful, or an SQLite error code if not. +** SQLITE_OK is returned if the segment is updated successfully. Or an +** SQLite error code otherwise. */ -static int fts3SegmentMaxLevel( - Fts3Table *p, - int iLangid, - int iIndex, - sqlite3_int64 *pnMax +static int fts3TruncateSegment( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level of segment to modify */ + int iIdx, /* Index within level of segment to modify */ + const char *zTerm, /* Remove terms smaller than this */ + int nTerm /* Number of bytes in buffer zTerm */ ){ - sqlite3_stmt *pStmt; - int rc; - assert( iIndex>=0 && iIndexnIndex ); + int rc = SQLITE_OK; /* Return code */ + Blob root = {0,0,0}; /* New root page image */ + Blob block = {0,0,0}; /* Buffer used for any other block */ + sqlite3_int64 iBlock = 0; /* Block id */ + sqlite3_int64 iNewStart = 0; /* New value for iStartBlock */ + sqlite3_int64 iOldStart = 0; /* Old value for iStartBlock */ + sqlite3_stmt *pFetch = 0; /* Statement used to fetch segdir */ - /* Set pStmt to the compiled version of: - ** - ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? - ** - ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). - */ - rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); - if( rc!=SQLITE_OK ) return rc; - sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); - sqlite3_bind_int64(pStmt, 2, - getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) - ); - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - *pnMax = sqlite3_column_int64(pStmt, 0); + rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pFetch, 0); + if( rc==SQLITE_OK ){ + int rc2; /* sqlite3_reset() return code */ + sqlite3_bind_int64(pFetch, 1, iAbsLevel); + sqlite3_bind_int(pFetch, 2, iIdx); + if( SQLITE_ROW==sqlite3_step(pFetch) ){ + const char *aRoot = sqlite3_column_blob(pFetch, 4); + int nRoot = sqlite3_column_bytes(pFetch, 4); + iOldStart = sqlite3_column_int64(pFetch, 1); + rc = fts3TruncateNode(aRoot, nRoot, &root, zTerm, nTerm, &iBlock); + } + rc2 = sqlite3_reset(pFetch); + if( rc==SQLITE_OK ) rc = rc2; } - return sqlite3_reset(pStmt); -} - -/* -** iAbsLevel is an absolute level that may be assumed to exist within -** the database. This function checks if it is the largest level number -** within its index. Assuming no error occurs, *pbMax is set to 1 if -** iAbsLevel is indeed the largest level, or 0 otherwise, and SQLITE_OK -** is returned. If an error occurs, an error code is returned and the -** final value of *pbMax is undefined. -*/ -static int fts3SegmentIsMaxLevel(Fts3Table *p, i64 iAbsLevel, int *pbMax){ - /* Set pStmt to the compiled version of: - ** - ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? - ** - ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). - */ - sqlite3_stmt *pStmt; - int rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); - if( rc!=SQLITE_OK ) return rc; - sqlite3_bind_int64(pStmt, 1, iAbsLevel+1); - sqlite3_bind_int64(pStmt, 2, - ((iAbsLevel/FTS3_SEGDIR_MAXLEVEL)+1) * FTS3_SEGDIR_MAXLEVEL - ); + while( rc==SQLITE_OK && iBlock ){ + char *aBlock = 0; + int nBlock = 0; + iNewStart = iBlock; - *pbMax = 0; - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - *pbMax = sqlite3_column_type(pStmt, 0)==SQLITE_NULL; + rc = sqlite3Fts3ReadBlock(p, iBlock, &aBlock, &nBlock, 0); + if( rc==SQLITE_OK ){ + rc = fts3TruncateNode(aBlock, nBlock, &block, zTerm, nTerm, &iBlock); + } + if( rc==SQLITE_OK ){ + rc = fts3WriteSegment(p, iNewStart, block.a, block.n); + } + sqlite3_free(aBlock); } - return sqlite3_reset(pStmt); -} -/* -** Delete all entries in the %_segments table associated with the segment -** opened with seg-reader pSeg. This function does not affect the contents -** of the %_segdir table. -*/ -static int fts3DeleteSegment( - Fts3Table *p, /* FTS table handle */ - Fts3SegReader *pSeg /* Segment to delete */ -){ - int rc = SQLITE_OK; /* Return code */ - if( pSeg->iStartBlock ){ - sqlite3_stmt *pDelete; /* SQL statement to delete rows */ - rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0); + /* Variable iNewStart now contains the first valid leaf node. */ + if( rc==SQLITE_OK && iNewStart ){ + sqlite3_stmt *pDel = 0; + rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDel, 0); if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pDelete, 1, pSeg->iStartBlock); - sqlite3_bind_int64(pDelete, 2, pSeg->iEndBlock); - sqlite3_step(pDelete); - rc = sqlite3_reset(pDelete); + sqlite3_bind_int64(pDel, 1, iOldStart); + sqlite3_bind_int64(pDel, 2, iNewStart-1); + sqlite3_step(pDel); + rc = sqlite3_reset(pDel); + } + } + + if( rc==SQLITE_OK ){ + sqlite3_stmt *pChomp = 0; + rc = fts3SqlStmt(p, SQL_CHOMP_SEGDIR, &pChomp, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pChomp, 1, iNewStart); + sqlite3_bind_blob(pChomp, 2, root.a, root.n, SQLITE_STATIC); + sqlite3_bind_int64(pChomp, 3, iAbsLevel); + sqlite3_bind_int(pChomp, 4, iIdx); + sqlite3_step(pChomp); + rc = sqlite3_reset(pChomp); } } + + sqlite3_free(root.a); + sqlite3_free(block.a); return rc; } /* -** This function is used after merging multiple segments into a single large -** segment to delete the old, now redundant, segment b-trees. Specifically, -** it: -** -** 1) Deletes all %_segments entries for the segments associated with -** each of the SegReader objects in the array passed as the third -** argument, and -** -** 2) deletes all %_segdir entries with level iLevel, or all %_segdir -** entries regardless of level if (iLevel<0). +** This function is called after an incrmental-merge operation has run to +** merge (or partially merge) two or more segments from absolute level +** iAbsLevel. ** -** SQLITE_OK is returned if successful, otherwise an SQLite error code. +** Each input segment is either removed from the db completely (if all of +** its data was copied to the output segment by the incrmerge operation) +** or modified in place so that it no longer contains those entries that +** have been duplicated in the output segment. */ -static int fts3DeleteSegdir( - Fts3Table *p, /* Virtual table handle */ - int iLangid, /* Language id */ - int iIndex, /* Index for p->aIndex */ - int iLevel, /* Level of %_segdir entries to delete */ - Fts3SegReader **apSegment, /* Array of SegReader objects */ - int nReader /* Size of array apSegment */ +static int fts3IncrmergeChomp( + Fts3Table *p, /* FTS table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level containing segments */ + Fts3MultiSegReader *pCsr, /* Chomp all segments opened by this cursor */ + int *pnRem /* Number of segments not deleted */ ){ - int rc = SQLITE_OK; /* Return Code */ - int i; /* Iterator variable */ - sqlite3_stmt *pDelete = 0; /* SQL statement to delete rows */ + int i; + int nRem = 0; + int rc = SQLITE_OK; - for(i=0; rc==SQLITE_OK && inSegment-1; i>=0 && rc==SQLITE_OK; i--){ + Fts3SegReader *pSeg = 0; + int j; - assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL ); - if( iLevel==FTS3_SEGCURSOR_ALL ){ - rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); - sqlite3_bind_int64(pDelete, 2, - getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) - ); + /* Find the Fts3SegReader object with Fts3SegReader.iIdx==i. It is hiding + ** somewhere in the pCsr->apSegment[] array. */ + for(j=0; ALWAYS(jnSegment); j++){ + pSeg = pCsr->apSegment[j]; + if( pSeg->iIdx==i ) break; } - }else{ - rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64( - pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) - ); + assert( jnSegment && pSeg->iIdx==i ); + + if( pSeg->aNode==0 ){ + /* Seg-reader is at EOF. Remove the entire input segment. */ + rc = fts3DeleteSegment(p, pSeg); + if( rc==SQLITE_OK ){ + rc = fts3RemoveSegdirEntry(p, iAbsLevel, pSeg->iIdx); + } + *pnRem = 0; + }else{ + /* The incremental merge did not copy all the data from this + ** segment to the upper level. The segment is modified in place + ** so that it contains no keys smaller than zTerm/nTerm. */ + const char *zTerm = pSeg->zTerm; + int nTerm = pSeg->nTerm; + rc = fts3TruncateSegment(p, iAbsLevel, pSeg->iIdx, zTerm, nTerm); + nRem++; } } + if( rc==SQLITE_OK && nRem!=pCsr->nSegment ){ + rc = fts3RepackSegdirLevel(p, iAbsLevel); + } + + *pnRem = nRem; + return rc; +} + +/* +** Store an incr-merge hint in the database. +*/ +static int fts3IncrmergeHintStore(Fts3Table *p, Blob *pHint){ + sqlite3_stmt *pReplace = 0; + int rc; /* Return code */ + + rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pReplace, 0); if( rc==SQLITE_OK ){ - sqlite3_step(pDelete); - rc = sqlite3_reset(pDelete); + sqlite3_bind_int(pReplace, 1, FTS_STAT_INCRMERGEHINT); + sqlite3_bind_blob(pReplace, 2, pHint->a, pHint->n, SQLITE_STATIC); + sqlite3_step(pReplace); + rc = sqlite3_reset(pReplace); } return rc; } /* -** When this function is called, buffer *ppList (size *pnList bytes) contains -** a position list that may (or may not) feature multiple columns. This -** function adjusts the pointer *ppList and the length *pnList so that they -** identify the subset of the position list that corresponds to column iCol. -** -** If there are no entries in the input position list for column iCol, then -** *pnList is set to zero before returning. +** Load an incr-merge hint from the database. The incr-merge hint, if one +** exists, is stored in the rowid==1 row of the %_stat table. ** -** If parameter bZero is non-zero, then any part of the input list following -** the end of the output list is zeroed before returning. +** If successful, populate blob *pHint with the value read from the %_stat +** table and return SQLITE_OK. Otherwise, if an error occurs, return an +** SQLite error code. */ -static void fts3ColumnFilter( - int iCol, /* Column to filter on */ - int bZero, /* Zero out anything following *ppList */ - char **ppList, /* IN/OUT: Pointer to position list */ - int *pnList /* IN/OUT: Size of buffer *ppList in bytes */ -){ - char *pList = *ppList; - int nList = *pnList; - char *pEnd = &pList[nList]; - int iCurrent = 0; - char *p = pList; - - assert( iCol>=0 ); - while( 1 ){ - char c = 0; - while( pn = 0; + rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pSelect, 0); + if( rc==SQLITE_OK ){ + int rc2; + sqlite3_bind_int(pSelect, 1, FTS_STAT_INCRMERGEHINT); + if( SQLITE_ROW==sqlite3_step(pSelect) ){ + const char *aHint = sqlite3_column_blob(pSelect, 0); + int nHint = sqlite3_column_bytes(pSelect, 0); + if( aHint ){ + blobGrowBuffer(pHint, nHint, &rc); + if( rc==SQLITE_OK ){ + memcpy(pHint->a, aHint, nHint); + pHint->n = nHint; + } + } } - p = &pList[1]; - p += fts3GetVarint32(p, &iCurrent); + rc2 = sqlite3_reset(pSelect); + if( rc==SQLITE_OK ) rc = rc2; } - if( bZero && &pList[nList]!=pEnd ){ - memset(&pList[nList], 0, pEnd - &pList[nList]); - } - *ppList = pList; - *pnList = nList; + return rc; } /* -** Cache data in the Fts3MultiSegReader.aBuffer[] buffer (overwriting any -** existing data). Grow the buffer if required. +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, append an entry to the hint stored in blob *pHint. Each entry +** consists of two varints, the absolute level number of the input segments +** and the number of input segments. ** -** If successful, return SQLITE_OK. Otherwise, if an OOM error is encountered -** trying to resize the buffer, return SQLITE_NOMEM. +** If successful, leave *pRc set to SQLITE_OK and return. If an error occurs, +** set *pRc to an SQLite error code before returning. */ -static int fts3MsrBufferData( - Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ - char *pList, - int nList +static void fts3IncrmergeHintPush( + Blob *pHint, /* Hint blob to append to */ + i64 iAbsLevel, /* First varint to store in hint */ + int nInput, /* Second varint to store in hint */ + int *pRc /* IN/OUT: Error code */ ){ - if( nList>pMsr->nBuffer ){ - char *pNew; - pMsr->nBuffer = nList*2; - pNew = (char *)sqlite3_realloc(pMsr->aBuffer, pMsr->nBuffer); - if( !pNew ) return SQLITE_NOMEM; - pMsr->aBuffer = pNew; + blobGrowBuffer(pHint, pHint->n + 2*FTS3_VARINT_MAX, pRc); + if( *pRc==SQLITE_OK ){ + pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], iAbsLevel); + pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], (i64)nInput); } +} + +/* +** Read the last entry (most recently pushed) from the hint blob *pHint +** and then remove the entry. Write the two values read to *piAbsLevel and +** *pnInput before returning. +** +** If no error occurs, return SQLITE_OK. If the hint blob in *pHint does +** not contain at least two valid varints, return SQLITE_CORRUPT_VTAB. +*/ +static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){ + const int nHint = pHint->n; + int i; + + i = pHint->n-2; + while( i>0 && (pHint->a[i-1] & 0x80) ) i--; + while( i>0 && (pHint->a[i-1] & 0x80) ) i--; + + pHint->n = i; + i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel); + i += fts3GetVarint32(&pHint->a[i], pnInput); + if( i!=nHint ) return FTS_CORRUPT_VTAB; - memcpy(pMsr->aBuffer, pList, nList); return SQLITE_OK; } -SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext( - Fts3Table *p, /* Virtual table handle */ - Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ - sqlite3_int64 *piDocid, /* OUT: Docid value */ - char **paPoslist, /* OUT: Pointer to position list */ - int *pnPoslist /* OUT: Size of position list in bytes */ -){ - int nMerge = pMsr->nAdvance; - Fts3SegReader **apSegment = pMsr->apSegment; - int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( - p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp - ); - if( nMerge==0 ){ - *paPoslist = 0; - return SQLITE_OK; - } +/* +** Attempt an incremental merge that writes nMerge leaf blocks. +** +** Incremental merges happen nMin segments at a time. The segments +** to be merged are the nMin oldest segments (the ones with the smallest +** values for the _segdir.idx field) in the highest level that contains +** at least nMin segments. Multiple merges might occur in an attempt to +** write the quota of nMerge leaf blocks. +*/ +SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ + int rc; /* Return code */ + int nRem = nMerge; /* Number of leaf pages yet to be written */ + Fts3MultiSegReader *pCsr; /* Cursor used to read input data */ + Fts3SegFilter *pFilter; /* Filter used with cursor pCsr */ + IncrmergeWriter *pWriter; /* Writer object */ + int nSeg = 0; /* Number of input segments */ + sqlite3_int64 iAbsLevel = 0; /* Absolute level number to work on */ + Blob hint = {0, 0, 0}; /* Hint read from %_stat table */ + int bDirtyHint = 0; /* True if blob 'hint' has been modified */ - while( 1 ){ - Fts3SegReader *pSeg; - pSeg = pMsr->apSegment[0]; + /* Allocate space for the cursor, filter and writer objects */ + const int nAlloc = sizeof(*pCsr) + sizeof(*pFilter) + sizeof(*pWriter); + pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc); + if( !pWriter ) return SQLITE_NOMEM; + pFilter = (Fts3SegFilter *)&pWriter[1]; + pCsr = (Fts3MultiSegReader *)&pFilter[1]; - if( pSeg->pOffsetList==0 ){ - *paPoslist = 0; - break; + rc = fts3IncrmergeHintLoad(p, &hint); + while( rc==SQLITE_OK && nRem>0 ){ + const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex; + sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */ + int bUseHint = 0; /* True if attempting to append */ + int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */ + + /* Search the %_segdir table for the absolute level with the smallest + ** relative level number that contains at least nMin segments, if any. + ** If one is found, set iAbsLevel to the absolute level number and + ** nSeg to nMin. If no level with at least nMin segments can be found, + ** set nSeg to -1. + */ + rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0); + sqlite3_bind_int(pFindLevel, 1, nMin); + if( sqlite3_step(pFindLevel)==SQLITE_ROW ){ + iAbsLevel = sqlite3_column_int64(pFindLevel, 0); + nSeg = nMin; }else{ - int rc; - char *pList; - int nList; - int j; - sqlite3_int64 iDocid = apSegment[0]->iDocid; + nSeg = -1; + } + rc = sqlite3_reset(pFindLevel); - rc = fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); - j = 1; - while( rc==SQLITE_OK - && jpOffsetList - && apSegment[j]->iDocid==iDocid - ){ - rc = fts3SegReaderNextDocid(p, apSegment[j], 0, 0); - j++; - } - if( rc!=SQLITE_OK ) return rc; - fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp); + /* If the hint read from the %_stat table is not empty, check if the + ** last entry in it specifies a relative level smaller than or equal + ** to the level identified by the block above (if any). If so, this + ** iteration of the loop will work on merging at the hinted level. + */ + if( rc==SQLITE_OK && hint.n ){ + int nHint = hint.n; + sqlite3_int64 iHintAbsLevel = 0; /* Hint level */ + int nHintSeg = 0; /* Hint number of segments */ - if( nList>0 && fts3SegReaderIsPending(apSegment[0]) ){ - rc = fts3MsrBufferData(pMsr, pList, nList+1); - if( rc!=SQLITE_OK ) return rc; - assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 ); - pList = pMsr->aBuffer; + rc = fts3IncrmergeHintPop(&hint, &iHintAbsLevel, &nHintSeg); + if( nSeg<0 || (iAbsLevel % nMod) >= (iHintAbsLevel % nMod) ){ + iAbsLevel = iHintAbsLevel; + nSeg = nHintSeg; + bUseHint = 1; + bDirtyHint = 1; + }else{ + /* This undoes the effect of the HintPop() above - so that no entry + ** is removed from the hint blob. */ + hint.n = nHint; } + } - if( pMsr->iColFilter>=0 ){ - fts3ColumnFilter(pMsr->iColFilter, 1, &pList, &nList); - } + /* If nSeg is less that zero, then there is no level with at least + ** nMin segments and no hint in the %_stat table. No work to do. + ** Exit early in this case. */ + if( nSeg<0 ) break; - if( nList>0 ){ - *paPoslist = pList; - *piDocid = iDocid; - *pnPoslist = nList; - break; + /* Open a cursor to iterate through the contents of the oldest nSeg + ** indexes of absolute level iAbsLevel. If this cursor is opened using + ** the 'hint' parameters, it is possible that there are less than nSeg + ** segments available in level iAbsLevel. In this case, no work is + ** done on iAbsLevel - fall through to the next iteration of the loop + ** to start work on some other level. */ + memset(pWriter, 0, nAlloc); + pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; + + if( rc==SQLITE_OK ){ + rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); + assert( bUseHint==1 || bUseHint==0 ); + if( iIdx==0 || (bUseHint && iIdx==1) ){ + int bIgnore = 0; + rc = fts3SegmentIsMaxLevel(p, iAbsLevel+1, &bIgnore); + if( bIgnore ){ + pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY; + } } } - } - return SQLITE_OK; -} + if( rc==SQLITE_OK ){ + rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr); + } + if( SQLITE_OK==rc && pCsr->nSegment==nSeg + && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter)) + && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) + ){ + if( bUseHint && iIdx>0 ){ + const char *zKey = pCsr->zTerm; + int nKey = pCsr->nTerm; + rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); + }else{ + rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); + } -static int fts3SegReaderStart( - Fts3Table *p, /* Virtual table handle */ - Fts3MultiSegReader *pCsr, /* Cursor object */ - const char *zTerm, /* Term searched for (or NULL) */ - int nTerm /* Length of zTerm in bytes */ -){ - int i; - int nSeg = pCsr->nSegment; + if( rc==SQLITE_OK && pWriter->nLeafEst ){ + fts3LogMerge(nSeg, iAbsLevel); + do { + rc = fts3IncrmergeAppend(p, pWriter, pCsr); + if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr); + if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK; + }while( rc==SQLITE_ROW ); - /* If the Fts3SegFilter defines a specific term (or term prefix) to search - ** for, then advance each segment iterator until it points to a term of - ** equal or greater value than the specified term. This prevents many - ** unnecessary merge/sort operations for the case where single segment - ** b-tree leaf nodes contain more than one term. - */ - for(i=0; pCsr->bRestart==0 && inSegment; i++){ - int res = 0; - Fts3SegReader *pSeg = pCsr->apSegment[i]; - do { - int rc = fts3SegReaderNext(p, pSeg, 0); - if( rc!=SQLITE_OK ) return rc; - }while( zTerm && (res = fts3SegReaderTermCmp(pSeg, zTerm, nTerm))<0 ); + /* Update or delete the input segments */ + if( rc==SQLITE_OK ){ + nRem -= (1 + pWriter->nWork); + rc = fts3IncrmergeChomp(p, iAbsLevel, pCsr, &nSeg); + if( nSeg!=0 ){ + bDirtyHint = 1; + fts3IncrmergeHintPush(&hint, iAbsLevel, nSeg, &rc); + } + } + } - if( pSeg->bLookup && res!=0 ){ - fts3SegReaderSetEof(pSeg); + if( nSeg!=0 ){ + pWriter->nLeafData = pWriter->nLeafData * -1; + } + fts3IncrmergeRelease(p, pWriter, &rc); + if( nSeg==0 && pWriter->bNoLeafData==0 ){ + fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData); + } } + + sqlite3Fts3SegReaderFinish(pCsr); } - fts3SegReaderSort(pCsr->apSegment, nSeg, nSeg, fts3SegReaderCmp); - return SQLITE_OK; + /* Write the hint values into the %_stat table for the next incr-merger */ + if( bDirtyHint && rc==SQLITE_OK ){ + rc = fts3IncrmergeHintStore(p, &hint); + } + + sqlite3_free(pWriter); + sqlite3_free(hint.a); + return rc; } -SQLITE_PRIVATE int sqlite3Fts3SegReaderStart( - Fts3Table *p, /* Virtual table handle */ - Fts3MultiSegReader *pCsr, /* Cursor object */ - Fts3SegFilter *pFilter /* Restrictions on range of iteration */ -){ - pCsr->pFilter = pFilter; - return fts3SegReaderStart(p, pCsr, pFilter->zTerm, pFilter->nTerm); +/* +** Convert the text beginning at *pz into an integer and return +** its value. Advance *pz to point to the first character past +** the integer. +*/ +static int fts3Getint(const char **pz){ + const char *z = *pz; + int i = 0; + while( (*z)>='0' && (*z)<='9' ) i = 10*i + *(z++) - '0'; + *pz = z; + return i; } -SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart( - Fts3Table *p, /* Virtual table handle */ - Fts3MultiSegReader *pCsr, /* Cursor object */ - int iCol, /* Column to match on. */ - const char *zTerm, /* Term to iterate through a doclist for */ - int nTerm /* Number of bytes in zTerm */ +/* +** Process statements of the form: +** +** INSERT INTO table(table) VALUES('merge=A,B'); +** +** A and B are integers that decode to be the number of leaf pages +** written for the merge, and the minimum number of segments on a level +** before it will be selected for a merge, respectively. +*/ +static int fts3DoIncrmerge( + Fts3Table *p, /* FTS3 table handle */ + const char *zParam /* Nul-terminated string containing "A,B" */ ){ - int i; int rc; - int nSegment = pCsr->nSegment; - int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( - p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp - ); - - assert( pCsr->pFilter==0 ); - assert( zTerm && nTerm>0 ); + int nMin = (FTS3_MERGE_COUNT / 2); + int nMerge = 0; + const char *z = zParam; - /* Advance each segment iterator until it points to the term zTerm/nTerm. */ - rc = fts3SegReaderStart(p, pCsr, zTerm, nTerm); - if( rc!=SQLITE_OK ) return rc; + /* Read the first integer value */ + nMerge = fts3Getint(&z); - /* Determine how many of the segments actually point to zTerm/nTerm. */ - for(i=0; iapSegment[i]; - if( !pSeg->aNode || fts3SegReaderTermCmp(pSeg, zTerm, nTerm) ){ - break; - } + /* If the first integer value is followed by a ',', read the second + ** integer value. */ + if( z[0]==',' && z[1]!='\0' ){ + z++; + nMin = fts3Getint(&z); } - pCsr->nAdvance = i; - /* Advance each of the segments to point to the first docid. */ - for(i=0; inAdvance; i++){ - rc = fts3SegReaderFirstDocid(p, pCsr->apSegment[i]); - if( rc!=SQLITE_OK ) return rc; + if( z[0]!='\0' || nMin<2 ){ + rc = SQLITE_ERROR; + }else{ + rc = SQLITE_OK; + if( !p->bHasStat ){ + assert( p->bFts4==0 ); + sqlite3Fts3CreateStatTable(&rc, p); + } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3Incrmerge(p, nMerge, nMin); + } + sqlite3Fts3SegmentsClose(p); } - fts3SegReaderSort(pCsr->apSegment, i, i, xCmp); - - assert( iCol<0 || iColnColumn ); - pCsr->iColFilter = iCol; - - return SQLITE_OK; + return rc; } /* -** This function is called on a MultiSegReader that has been started using -** sqlite3Fts3MsrIncrStart(). One or more calls to MsrIncrNext() may also -** have been made. Calling this function puts the MultiSegReader in such -** a state that if the next two calls are: +** Process statements of the form: ** -** sqlite3Fts3SegReaderStart() -** sqlite3Fts3SegReaderStep() +** INSERT INTO table(table) VALUES('automerge=X'); ** -** then the entire doclist for the term is available in -** MultiSegReader.aDoclist/nDoclist. +** where X is an integer. X==0 means to turn automerge off. X!=0 means +** turn it on. The setting is persistent. */ -SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr){ - int i; /* Used to iterate through segment-readers */ +static int fts3DoAutoincrmerge( + Fts3Table *p, /* FTS3 table handle */ + const char *zParam /* Nul-terminated string containing boolean */ +){ + int rc = SQLITE_OK; + sqlite3_stmt *pStmt = 0; + p->nAutoincrmerge = fts3Getint(&zParam); + if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){ + p->nAutoincrmerge = 8; + } + if( !p->bHasStat ){ + assert( p->bFts4==0 ); + sqlite3Fts3CreateStatTable(&rc, p); + if( rc ) return rc; + } + rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); + if( rc ) return rc; + sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); + sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge); + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + return rc; +} - assert( pCsr->zTerm==0 ); - assert( pCsr->nTerm==0 ); - assert( pCsr->aDoclist==0 ); - assert( pCsr->nDoclist==0 ); +/* +** Return a 64-bit checksum for the FTS index entry specified by the +** arguments to this function. +*/ +static u64 fts3ChecksumEntry( + const char *zTerm, /* Pointer to buffer containing term */ + int nTerm, /* Size of zTerm in bytes */ + int iLangid, /* Language id for current row */ + int iIndex, /* Index (0..Fts3Table.nIndex-1) */ + i64 iDocid, /* Docid for current row. */ + int iCol, /* Column number */ + int iPos /* Position */ +){ + int i; + u64 ret = (u64)iDocid; - pCsr->nAdvance = 0; - pCsr->bRestart = 1; - for(i=0; inSegment; i++){ - pCsr->apSegment[i]->pOffsetList = 0; - pCsr->apSegment[i]->nOffsetList = 0; - pCsr->apSegment[i]->iDocid = 0; - } + ret += (ret<<3) + iLangid; + ret += (ret<<3) + iIndex; + ret += (ret<<3) + iCol; + ret += (ret<<3) + iPos; + for(i=0; inIndex-1) */ + int *pRc /* OUT: Return code */ ){ - int rc = SQLITE_OK; + Fts3SegFilter filter; + Fts3MultiSegReader csr; + int rc; + u64 cksum = 0; - int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); - int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); - int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); - int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); - int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); - int isFirst = (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST); + assert( *pRc==SQLITE_OK ); - Fts3SegReader **apSegment = pCsr->apSegment; - int nSegment = pCsr->nSegment; - Fts3SegFilter *pFilter = pCsr->pFilter; - int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( - p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp + memset(&filter, 0, sizeof(filter)); + memset(&csr, 0, sizeof(csr)); + filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; + filter.flags |= FTS3_SEGMENT_SCAN; + + rc = sqlite3Fts3SegReaderCursor( + p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr ); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); + } - if( pCsr->nSegment==0 ) return SQLITE_OK; + if( rc==SQLITE_OK ){ + while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){ + char *pCsr = csr.aDoclist; + char *pEnd = &pCsr[csr.nDoclist]; - do { - int nMerge; - int i; - - /* Advance the first pCsr->nAdvance entries in the apSegment[] array - ** forward. Then sort the list in order of current term again. - */ - for(i=0; inAdvance; i++){ - Fts3SegReader *pSeg = apSegment[i]; - if( pSeg->bLookup ){ - fts3SegReaderSetEof(pSeg); - }else{ - rc = fts3SegReaderNext(p, pSeg, 0); + i64 iDocid = 0; + i64 iCol = 0; + i64 iPos = 0; + + pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid); + while( pCsrnAdvance, fts3SegReaderCmp); - pCsr->nAdvance = 0; + } + sqlite3Fts3SegReaderFinish(&csr); - /* If all the seg-readers are at EOF, we're finished. return SQLITE_OK. */ - assert( rc==SQLITE_OK ); - if( apSegment[0]->aNode==0 ) break; + *pRc = rc; + return cksum; +} - pCsr->nTerm = apSegment[0]->nTerm; - pCsr->zTerm = apSegment[0]->zTerm; +/* +** Check if the contents of the FTS index match the current contents of the +** content table. If no error occurs and the contents do match, set *pbOk +** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk +** to false before returning. +** +** If an error occurs (e.g. an OOM or IO error), return an SQLite error +** code. The final value of *pbOk is undefined in this case. +*/ +static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){ + int rc = SQLITE_OK; /* Return code */ + u64 cksum1 = 0; /* Checksum based on FTS index contents */ + u64 cksum2 = 0; /* Checksum based on %_content contents */ + sqlite3_stmt *pAllLangid = 0; /* Statement to return all language-ids */ - /* If this is a prefix-search, and if the term that apSegment[0] points - ** to does not share a suffix with pFilter->zTerm/nTerm, then all - ** required callbacks have been made. In this case exit early. - ** - ** Similarly, if this is a search for an exact match, and the first term - ** of segment apSegment[0] is not a match, exit early. - */ - if( pFilter->zTerm && !isScan ){ - if( pCsr->nTermnTerm - || (!isPrefix && pCsr->nTerm>pFilter->nTerm) - || memcmp(pCsr->zTerm, pFilter->zTerm, pFilter->nTerm) - ){ - break; + /* This block calculates the checksum according to the FTS index. */ + rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); + if( rc==SQLITE_OK ){ + int rc2; + sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid); + sqlite3_bind_int(pAllLangid, 2, p->nIndex); + while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){ + int iLangid = sqlite3_column_int(pAllLangid, 0); + int i; + for(i=0; inIndex; i++){ + cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc); } } + rc2 = sqlite3_reset(pAllLangid); + if( rc==SQLITE_OK ) rc = rc2; + } - nMerge = 1; - while( nMergeaNode - && apSegment[nMerge]->nTerm==pCsr->nTerm - && 0==memcmp(pCsr->zTerm, apSegment[nMerge]->zTerm, pCsr->nTerm) - ){ - nMerge++; + /* This block calculates the checksum according to the %_content table */ + if( rc==SQLITE_OK ){ + sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule; + sqlite3_stmt *pStmt = 0; + char *zSql; + + zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); + sqlite3_free(zSql); } - assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); - if( nMerge==1 - && !isIgnoreEmpty - && !isFirst - && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0) - ){ - pCsr->nDoclist = apSegment[0]->nDoclist; - if( fts3SegReaderIsPending(apSegment[0]) ){ - rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist); - pCsr->aDoclist = pCsr->aBuffer; - }else{ - pCsr->aDoclist = apSegment[0]->aDoclist; - } - if( rc==SQLITE_OK ) rc = SQLITE_ROW; - }else{ - int nDoclist = 0; /* Size of doclist */ - sqlite3_int64 iPrev = 0; /* Previous docid stored in doclist */ + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + i64 iDocid = sqlite3_column_int64(pStmt, 0); + int iLang = langidFromSelect(p, pStmt); + int iCol; - /* The current term of the first nMerge entries in the array - ** of Fts3SegReader objects is the same. The doclists must be merged - ** and a single term returned with the merged doclist. - */ - for(i=0; ipOffsetList ){ - int j; /* Number of segments that share a docid */ - char *pList = 0; - int nList = 0; - int nByte; - sqlite3_int64 iDocid = apSegment[0]->iDocid; - fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); - j = 1; - while( jpOffsetList - && apSegment[j]->iDocid==iDocid - ){ - fts3SegReaderNextDocid(p, apSegment[j], 0, 0); - j++; - } + for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1); + int nText = sqlite3_column_bytes(pStmt, iCol+1); + sqlite3_tokenizer_cursor *pT = 0; - if( isColFilter ){ - fts3ColumnFilter(pFilter->iCol, 0, &pList, &nList); + rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText,&pT); + while( rc==SQLITE_OK ){ + char const *zToken; /* Buffer containing token */ + int nToken = 0; /* Number of bytes in token */ + int iDum1 = 0, iDum2 = 0; /* Dummy variables */ + int iPos = 0; /* Position of token in zText */ + + rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos); + if( rc==SQLITE_OK ){ + int i; + cksum2 = cksum2 ^ fts3ChecksumEntry( + zToken, nToken, iLang, 0, iDocid, iCol, iPos + ); + for(i=1; inIndex; i++){ + if( p->aIndex[i].nPrefix<=nToken ){ + cksum2 = cksum2 ^ fts3ChecksumEntry( + zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos + ); + } + } + } + } + if( pT ) pModule->xClose(pT); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; } + } + } - if( !isIgnoreEmpty || nList>0 ){ + sqlite3_finalize(pStmt); + } - /* Calculate the 'docid' delta value to write into the merged - ** doclist. */ - sqlite3_int64 iDelta; - if( p->bDescIdx && nDoclist>0 ){ - iDelta = iPrev - iDocid; - }else{ - iDelta = iDocid - iPrev; - } - assert( iDelta>0 || (nDoclist==0 && iDelta==iDocid) ); - assert( nDoclist>0 || iDelta==iDocid ); + *pbOk = (cksum1==cksum2); + return rc; +} - nByte = sqlite3Fts3VarintLen(iDelta) + (isRequirePos?nList+1:0); - if( nDoclist+nByte>pCsr->nBuffer ){ - char *aNew; - pCsr->nBuffer = (nDoclist+nByte)*2; - aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); - if( !aNew ){ - return SQLITE_NOMEM; - } - pCsr->aBuffer = aNew; - } +/* +** Run the integrity-check. If no error occurs and the current contents of +** the FTS index are correct, return SQLITE_OK. Or, if the contents of the +** FTS index are incorrect, return SQLITE_CORRUPT_VTAB. +** +** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite +** error code. +** +** The integrity-check works as follows. For each token and indexed token +** prefix in the document set, a 64-bit checksum is calculated (by code +** in fts3ChecksumEntry()) based on the following: +** +** + The index number (0 for the main index, 1 for the first prefix +** index etc.), +** + The token (or token prefix) text itself, +** + The language-id of the row it appears in, +** + The docid of the row it appears in, +** + The column it appears in, and +** + The tokens position within that column. +** +** The checksums for all entries in the index are XORed together to create +** a single checksum for the entire index. +** +** The integrity-check code calculates the same checksum in two ways: +** +** 1. By scanning the contents of the FTS index, and +** 2. By scanning and tokenizing the content table. +** +** If the two checksums are identical, the integrity-check is deemed to have +** passed. +*/ +static int fts3DoIntegrityCheck( + Fts3Table *p /* FTS3 table handle */ +){ + int rc; + int bOk = 0; + rc = fts3IntegrityCheck(p, &bOk); + if( rc==SQLITE_OK && bOk==0 ) rc = FTS_CORRUPT_VTAB; + return rc; +} - if( isFirst ){ - char *a = &pCsr->aBuffer[nDoclist]; - int nWrite; - - nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a); - if( nWrite ){ - iPrev = iDocid; - nDoclist += nWrite; - } - }else{ - nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta); - iPrev = iDocid; - if( isRequirePos ){ - memcpy(&pCsr->aBuffer[nDoclist], pList, nList); - nDoclist += nList; - pCsr->aBuffer[nDoclist++] = '\0'; +/* +** Handle a 'special' INSERT of the form: +** +** "INSERT INTO tbl(tbl) VALUES()" +** +** Argument pVal contains the result of . Currently the only +** meaningful value to insert is the text 'optimize'. +*/ +static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){ + int rc; /* Return Code */ + const char *zVal = (const char *)sqlite3_value_text(pVal); + int nVal = sqlite3_value_bytes(pVal); + + if( !zVal ){ + return SQLITE_NOMEM; + }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){ + rc = fts3DoOptimize(p, 0); + }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){ + rc = fts3DoRebuild(p); + }else if( nVal==15 && 0==sqlite3_strnicmp(zVal, "integrity-check", 15) ){ + rc = fts3DoIntegrityCheck(p); + }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){ + rc = fts3DoIncrmerge(p, &zVal[6]); + }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){ + rc = fts3DoAutoincrmerge(p, &zVal[10]); +#ifdef SQLITE_TEST + }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){ + p->nNodeSize = atoi(&zVal[9]); + rc = SQLITE_OK; + }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ + p->nMaxPendingData = atoi(&zVal[11]); + rc = SQLITE_OK; + }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){ + p->bNoIncrDoclist = atoi(&zVal[21]); + rc = SQLITE_OK; +#endif + }else{ + rc = SQLITE_ERROR; + } + + return rc; +} + +#ifndef SQLITE_DISABLE_FTS4_DEFERRED +/* +** Delete all cached deferred doclists. Deferred doclists are cached +** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function. +*/ +SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){ + Fts3DeferredToken *pDef; + for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){ + fts3PendingListDelete(pDef->pList); + pDef->pList = 0; + } +} + +/* +** Free all entries in the pCsr->pDeffered list. Entries are added to +** this list using sqlite3Fts3DeferToken(). +*/ +SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){ + Fts3DeferredToken *pDef; + Fts3DeferredToken *pNext; + for(pDef=pCsr->pDeferred; pDef; pDef=pNext){ + pNext = pDef->pNext; + fts3PendingListDelete(pDef->pList); + sqlite3_free(pDef); + } + pCsr->pDeferred = 0; +} + +/* +** Generate deferred-doclists for all tokens in the pCsr->pDeferred list +** based on the row that pCsr currently points to. +** +** A deferred-doclist is like any other doclist with position information +** included, except that it only contains entries for a single row of the +** table, not for all rows. +*/ +SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ + int rc = SQLITE_OK; /* Return code */ + if( pCsr->pDeferred ){ + int i; /* Used to iterate through table columns */ + sqlite3_int64 iDocid; /* Docid of the row pCsr points to */ + Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */ + + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + sqlite3_tokenizer *pT = p->pTokenizer; + sqlite3_tokenizer_module const *pModule = pT->pModule; + + assert( pCsr->isRequireSeek==0 ); + iDocid = sqlite3_column_int64(pCsr->pStmt, 0); + + for(i=0; inColumn && rc==SQLITE_OK; i++){ + if( p->abNotindexed[i]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); + sqlite3_tokenizer_cursor *pTC = 0; + + rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); + while( rc==SQLITE_OK ){ + char const *zToken; /* Buffer containing token */ + int nToken = 0; /* Number of bytes in token */ + int iDum1 = 0, iDum2 = 0; /* Dummy variables */ + int iPos = 0; /* Position of token in zText */ + + rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ + Fts3PhraseToken *pPT = pDef->pToken; + if( (pDef->iCol>=p->nColumn || pDef->iCol==i) + && (pPT->bFirst==0 || iPos==0) + && (pPT->n==nToken || (pPT->isPrefix && pPT->nz, pPT->n)) + ){ + fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); } } } - - fts3SegReaderSort(apSegment, nMerge, j, xCmp); + if( pTC ) pModule->xClose(pTC); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; } - if( nDoclist>0 ){ - pCsr->aDoclist = pCsr->aBuffer; - pCsr->nDoclist = nDoclist; - rc = SQLITE_ROW; + } + + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ + if( pDef->pList ){ + rc = fts3PendingListAppendVarint(&pDef->pList, 0); } } - pCsr->nAdvance = nMerge; - }while( rc==SQLITE_OK ); + } return rc; } - -SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish( - Fts3MultiSegReader *pCsr /* Cursor object */ +SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList( + Fts3DeferredToken *p, + char **ppData, + int *pnData ){ - if( pCsr ){ - int i; - for(i=0; inSegment; i++){ - sqlite3Fts3SegReaderFree(pCsr->apSegment[i]); - } - sqlite3_free(pCsr->apSegment); - sqlite3_free(pCsr->aBuffer); + char *pRet; + int nSkip; + sqlite3_int64 dummy; - pCsr->nSegment = 0; - pCsr->apSegment = 0; - pCsr->aBuffer = 0; + *ppData = 0; + *pnData = 0; + + if( p->pList==0 ){ + return SQLITE_OK; } + + pRet = (char *)sqlite3_malloc(p->pList->nData); + if( !pRet ) return SQLITE_NOMEM; + + nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy); + *pnData = p->pList->nData - nSkip; + *ppData = pRet; + + memcpy(pRet, &p->pList->aData[nSkip], *pnData); + return SQLITE_OK; } /* -** Decode the "end_block" field, selected by column iCol of the SELECT -** statement passed as the first argument. -** -** The "end_block" field may contain either an integer, or a text field -** containing the text representation of two non-negative integers separated -** by one or more space (0x20) characters. In the first case, set *piEndBlock -** to the integer value and *pnByte to zero before returning. In the second, -** set *piEndBlock to the first value and *pnByte to the second. +** Add an entry for token pToken to the pCsr->pDeferred list. */ -static void fts3ReadEndBlockField( - sqlite3_stmt *pStmt, - int iCol, - i64 *piEndBlock, - i64 *pnByte +SQLITE_PRIVATE int sqlite3Fts3DeferToken( + Fts3Cursor *pCsr, /* Fts3 table cursor */ + Fts3PhraseToken *pToken, /* Token to defer */ + int iCol /* Column that token must appear in (or -1) */ ){ - const unsigned char *zText = sqlite3_column_text(pStmt, iCol); - if( zText ){ - int i; - int iMul = 1; - i64 iVal = 0; - for(i=0; zText[i]>='0' && zText[i]<='9'; i++){ - iVal = iVal*10 + (zText[i] - '0'); - } - *piEndBlock = iVal; - while( zText[i]==' ' ) i++; - iVal = 0; - if( zText[i]=='-' ){ - i++; - iMul = -1; - } - for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){ - iVal = iVal*10 + (zText[i] - '0'); - } - *pnByte = (iVal * (i64)iMul); + Fts3DeferredToken *pDeferred; + pDeferred = sqlite3_malloc(sizeof(*pDeferred)); + if( !pDeferred ){ + return SQLITE_NOMEM; } -} + memset(pDeferred, 0, sizeof(*pDeferred)); + pDeferred->pToken = pToken; + pDeferred->pNext = pCsr->pDeferred; + pDeferred->iCol = iCol; + pCsr->pDeferred = pDeferred; + + assert( pToken->pDeferred==0 ); + pToken->pDeferred = pDeferred; + return SQLITE_OK; +} +#endif /* -** A segment of size nByte bytes has just been written to absolute level -** iAbsLevel. Promote any segments that should be promoted as a result. +** SQLite value pRowid contains the rowid of a row that may or may not be +** present in the FTS3 table. If it is, delete it and adjust the contents +** of subsiduary data structures accordingly. */ -static int fts3PromoteSegments( - Fts3Table *p, /* FTS table handle */ - sqlite3_int64 iAbsLevel, /* Absolute level just updated */ - sqlite3_int64 nByte /* Size of new segment at iAbsLevel */ +static int fts3DeleteByRowid( + Fts3Table *p, + sqlite3_value *pRowid, + int *pnChng, /* IN/OUT: Decrement if row is deleted */ + u32 *aSzDel ){ - int rc = SQLITE_OK; - sqlite3_stmt *pRange; - - rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0); - - if( rc==SQLITE_OK ){ - int bOk = 0; - i64 iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1; - i64 nLimit = (nByte*3)/2; - - /* Loop through all entries in the %_segdir table corresponding to - ** segments in this index on levels greater than iAbsLevel. If there is - ** at least one such segment, and it is possible to determine that all - ** such segments are smaller than nLimit bytes in size, they will be - ** promoted to level iAbsLevel. */ - sqlite3_bind_int64(pRange, 1, iAbsLevel+1); - sqlite3_bind_int64(pRange, 2, iLast); - while( SQLITE_ROW==sqlite3_step(pRange) ){ - i64 nSize = 0, dummy; - fts3ReadEndBlockField(pRange, 2, &dummy, &nSize); - if( nSize<=0 || nSize>nLimit ){ - /* If nSize==0, then the %_segdir.end_block field does not not - ** contain a size value. This happens if it was written by an - ** old version of FTS. In this case it is not possible to determine - ** the size of the segment, and so segment promotion does not - ** take place. */ - bOk = 0; - break; - } - bOk = 1; - } - rc = sqlite3_reset(pRange); - - if( bOk ){ - int iIdx = 0; - sqlite3_stmt *pUpdate1 = 0; - sqlite3_stmt *pUpdate2 = 0; - - if( rc==SQLITE_OK ){ - rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0); - } - if( rc==SQLITE_OK ){ - rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0); - } - - if( rc==SQLITE_OK ){ + int rc = SQLITE_OK; /* Return code */ + int bFound = 0; /* True if *pRowid really is in the table */ - /* Loop through all %_segdir entries for segments in this index with - ** levels equal to or greater than iAbsLevel. As each entry is visited, - ** updated it to set (level = -1) and (idx = N), where N is 0 for the - ** oldest segment in the range, 1 for the next oldest, and so on. - ** - ** In other words, move all segments being promoted to level -1, - ** setting the "idx" fields as appropriate to keep them in the same - ** order. The contents of level -1 (which is never used, except - ** transiently here), will be moved back to level iAbsLevel below. */ - sqlite3_bind_int64(pRange, 1, iAbsLevel); - while( SQLITE_ROW==sqlite3_step(pRange) ){ - sqlite3_bind_int(pUpdate1, 1, iIdx++); - sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0)); - sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1)); - sqlite3_step(pUpdate1); - rc = sqlite3_reset(pUpdate1); - if( rc!=SQLITE_OK ){ - sqlite3_reset(pRange); - break; - } + fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound); + if( bFound && rc==SQLITE_OK ){ + int isEmpty = 0; /* Deleting *pRowid leaves the table empty */ + rc = fts3IsEmpty(p, pRowid, &isEmpty); + if( rc==SQLITE_OK ){ + if( isEmpty ){ + /* Deleting this row means the whole table is empty. In this case + ** delete the contents of all three tables and throw away any + ** data in the pendingTerms hash table. */ + rc = fts3DeleteAll(p, 1); + *pnChng = 0; + memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2); + }else{ + *pnChng = *pnChng - 1; + if( p->zContentTbl==0 ){ + fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid); + } + if( p->bHasDocsize ){ + fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid); } - } - if( rc==SQLITE_OK ){ - rc = sqlite3_reset(pRange); - } - - /* Move level -1 to level iAbsLevel */ - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pUpdate2, 1, iAbsLevel); - sqlite3_step(pUpdate2); - rc = sqlite3_reset(pUpdate2); } } } - return rc; } /* -** Merge all level iLevel segments in the database into a single -** iLevel+1 segment. Or, if iLevel<0, merge all segments into a -** single segment with a level equal to the numerically largest level -** currently present in the database. +** This function does the work for the xUpdate method of FTS3 virtual +** tables. The schema of the virtual table being: ** -** If this function is called with iLevel<0, but there is only one -** segment in the database, SQLITE_DONE is returned immediately. -** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, -** an SQLite error code is returned. +** CREATE TABLE
    ( +** , +**
    HIDDEN, +** docid HIDDEN, +** HIDDEN +** ); +** +** */ -static int fts3SegmentMerge( - Fts3Table *p, - int iLangid, /* Language id to merge */ - int iIndex, /* Index in p->aIndex[] to merge */ - int iLevel /* Level to merge */ +SQLITE_PRIVATE int sqlite3Fts3UpdateMethod( + sqlite3_vtab *pVtab, /* FTS3 vtab object */ + int nArg, /* Size of argument array */ + sqlite3_value **apVal, /* Array of arguments */ + sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ ){ - int rc; /* Return code */ - int iIdx = 0; /* Index of new segment */ - sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */ - SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ - Fts3SegFilter filter; /* Segment term filter condition */ - Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ - int bIgnoreEmpty = 0; /* True to ignore empty segments */ - i64 iMaxLevel = 0; /* Max level number for this index/langid */ + Fts3Table *p = (Fts3Table *)pVtab; + int rc = SQLITE_OK; /* Return Code */ + int isRemove = 0; /* True for an UPDATE or DELETE */ + u32 *aSzIns = 0; /* Sizes of inserted documents */ + u32 *aSzDel = 0; /* Sizes of deleted documents */ + int nChng = 0; /* Net change in number of documents */ + int bInsertDone = 0; - assert( iLevel==FTS3_SEGCURSOR_ALL - || iLevel==FTS3_SEGCURSOR_PENDING - || iLevel>=0 - ); - assert( iLevel=0 && iIndexnIndex ); + /* At this point it must be known if the %_stat table exists or not. + ** So bHasStat may not be 2. */ + assert( p->bHasStat==0 || p->bHasStat==1 ); - rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr); - if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; + assert( p->pSegments==0 ); + assert( + nArg==1 /* DELETE operations */ + || nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */ + ); - if( iLevel!=FTS3_SEGCURSOR_PENDING ){ - rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel); - if( rc!=SQLITE_OK ) goto finished; + /* Check for a "special" INSERT operation. One of the form: + ** + ** INSERT INTO xyz(xyz) VALUES('command'); + */ + if( nArg>1 + && sqlite3_value_type(apVal[0])==SQLITE_NULL + && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL + ){ + rc = fts3SpecialInsert(p, apVal[p->nColumn+2]); + goto update_out; } - if( iLevel==FTS3_SEGCURSOR_ALL ){ - /* This call is to merge all segments in the database to a single - ** segment. The level of the new segment is equal to the numerically - ** greatest segment level currently present in the database for this - ** index. The idx of the new segment is always 0. */ - if( csr.nSegment==1 ){ - rc = SQLITE_DONE; - goto finished; - } - iNewLevel = iMaxLevel; - bIgnoreEmpty = 1; + if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){ + rc = SQLITE_CONSTRAINT; + goto update_out; + } - }else{ - /* This call is to merge all segments at level iLevel. find the next - ** available segment index at level iLevel+1. The call to - ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to - ** a single iLevel+2 segment if necessary. */ - assert( FTS3_SEGCURSOR_PENDING==-1 ); - iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1); - rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); - bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel); + /* Allocate space to hold the change in document sizes */ + aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 ); + if( aSzDel==0 ){ + rc = SQLITE_NOMEM; + goto update_out; } - if( rc!=SQLITE_OK ) goto finished; + aSzIns = &aSzDel[p->nColumn+1]; + memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2); - assert( csr.nSegment>0 ); - assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) ); - assert( iNewLevel1 && p->zContentTbl==0 ){ + /* Find the value object that holds the new rowid value. */ + sqlite3_value *pNewRowid = apVal[3+p->nColumn]; + if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){ + pNewRowid = apVal[1]; + } - rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); - while( SQLITE_OK==rc ){ - rc = sqlite3Fts3SegReaderStep(p, &csr); - if( rc!=SQLITE_ROW ) break; - rc = fts3SegWriterAdd(p, &pWriter, 1, - csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist); + if( sqlite3_value_type(pNewRowid)!=SQLITE_NULL && ( + sqlite3_value_type(apVal[0])==SQLITE_NULL + || sqlite3_value_int64(apVal[0])!=sqlite3_value_int64(pNewRowid) + )){ + /* The new rowid is not NULL (in this case the rowid will be + ** automatically assigned and there is no chance of a conflict), and + ** the statement is either an INSERT or an UPDATE that modifies the + ** rowid column. So if the conflict mode is REPLACE, then delete any + ** existing row with rowid=pNewRowid. + ** + ** Or, if the conflict mode is not REPLACE, insert the new record into + ** the %_content table. If we hit the duplicate rowid constraint (or any + ** other error) while doing so, return immediately. + ** + ** This branch may also run if pNewRowid contains a value that cannot + ** be losslessly converted to an integer. In this case, the eventual + ** call to fts3InsertData() (either just below or further on in this + ** function) will return SQLITE_MISMATCH. If fts3DeleteByRowid is + ** invoked, it will delete zero rows (since no row will have + ** docid=$pNewRowid if $pNewRowid is not an integer value). + */ + if( sqlite3_vtab_on_conflict(p->db)==SQLITE_REPLACE ){ + rc = fts3DeleteByRowid(p, pNewRowid, &nChng, aSzDel); + }else{ + rc = fts3InsertData(p, apVal, pRowid); + bInsertDone = 1; + } + } + } + if( rc!=SQLITE_OK ){ + goto update_out; } - if( rc!=SQLITE_OK ) goto finished; - assert( pWriter || bIgnoreEmpty ); - if( iLevel!=FTS3_SEGCURSOR_PENDING ){ - rc = fts3DeleteSegdir( - p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment - ); - if( rc!=SQLITE_OK ) goto finished; + /* If this is a DELETE or UPDATE operation, remove the old record. */ + if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ + assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER ); + rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel); + isRemove = 1; } - if( pWriter ){ - rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); - if( rc==SQLITE_OK ){ - if( iLevel==FTS3_SEGCURSOR_PENDING || iNewLevelnLeafData); + + /* If this is an INSERT or UPDATE operation, insert the new record. */ + if( nArg>1 && rc==SQLITE_OK ){ + int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]); + if( bInsertDone==0 ){ + rc = fts3InsertData(p, apVal, pRowid); + if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){ + rc = FTS_CORRUPT_VTAB; } } + if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){ + rc = fts3PendingTermsDocid(p, iLangid, *pRowid); + } + if( rc==SQLITE_OK ){ + assert( p->iPrevDocid==*pRowid ); + rc = fts3InsertTerms(p, iLangid, apVal, aSzIns); + } + if( p->bHasDocsize ){ + fts3InsertDocsize(&rc, p, aSzIns); + } + nChng++; } - finished: - fts3SegWriterFree(pWriter); - sqlite3Fts3SegReaderFinish(&csr); + if( p->bFts4 ){ + fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng); + } + + update_out: + sqlite3_free(aSzDel); + sqlite3Fts3SegmentsClose(p); return rc; } - /* -** Flush the contents of pendingTerms to level 0 segments. +** Flush any data in the pending-terms hash table to disk. If successful, +** merge all segments in the database (including the new segment, if +** there was any data to flush) into a single segment. */ -SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ - int rc = SQLITE_OK; - int i; - - for(i=0; rc==SQLITE_OK && inIndex; i++){ - rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; - } - sqlite3Fts3PendingTermsClear(p); - - /* Determine the auto-incr-merge setting if unknown. If enabled, - ** estimate the number of leaf blocks of content to be written - */ - if( rc==SQLITE_OK && p->bHasStat - && p->nAutoincrmerge==0xff && p->nLeafAdd>0 - ){ - sqlite3_stmt *pStmt = 0; - rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); - rc = sqlite3_step(pStmt); - if( rc==SQLITE_ROW ){ - p->nAutoincrmerge = sqlite3_column_int(pStmt, 0); - if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8; - }else if( rc==SQLITE_DONE ){ - p->nAutoincrmerge = 0; - } - rc = sqlite3_reset(pStmt); +SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *p){ + int rc; + rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0); + if( rc==SQLITE_OK ){ + rc = fts3DoOptimize(p, 1); + if( rc==SQLITE_OK || rc==SQLITE_DONE ){ + int rc2 = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); + if( rc2!=SQLITE_OK ) rc = rc2; + }else{ + sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0); + sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); } } + sqlite3Fts3SegmentsClose(p); return rc; } +#endif + +/************** End of fts3_write.c ******************************************/ +/************** Begin file fts3_snippet.c ************************************/ /* -** Encode N integers as varints into a blob. +** 2009 Oct 23 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** */ -static void fts3EncodeIntArray( - int N, /* The number of integers to encode */ - u32 *a, /* The integer values */ - char *zBuf, /* Write the BLOB here */ - int *pNBuf /* Write number of bytes if zBuf[] used here */ -){ - int i, j; - for(i=j=0; i */ +/* #include */ /* -** Decode a blob of varints into N integers +** Characters that may appear in the second argument to matchinfo(). */ -static void fts3DecodeIntArray( - int N, /* The number of integers to decode */ - u32 *a, /* Write the integer values */ - const char *zBuf, /* The BLOB containing the varints */ - int nBuf /* size of the BLOB */ -){ - int i, j; - UNUSED_PARAMETER(nBuf); - for(i=j=0; iiPrevDocid. The sizes are encoded as -** a blob of varints. +** The default value for the second argument to matchinfo(). */ -static void fts3InsertDocsize( - int *pRC, /* Result code */ - Fts3Table *p, /* Table into which to insert */ - u32 *aSz /* Sizes of each column, in tokens */ -){ - char *pBlob; /* The BLOB encoding of the document size */ - int nBlob; /* Number of bytes in the BLOB */ - sqlite3_stmt *pStmt; /* Statement used to insert the encoding */ - int rc; /* Result code from subfunctions */ +#define FTS3_MATCHINFO_DEFAULT "pcx" - if( *pRC ) return; - pBlob = sqlite3_malloc( 10*p->nColumn ); - if( pBlob==0 ){ - *pRC = SQLITE_NOMEM; - return; - } - fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob); - rc = fts3SqlStmt(p, SQL_REPLACE_DOCSIZE, &pStmt, 0); - if( rc ){ - sqlite3_free(pBlob); - *pRC = rc; - return; - } - sqlite3_bind_int64(pStmt, 1, p->iPrevDocid); - sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free); - sqlite3_step(pStmt); - *pRC = sqlite3_reset(pStmt); -} /* -** Record 0 of the %_stat table contains a blob consisting of N varints, -** where N is the number of user defined columns in the fts3 table plus -** two. If nCol is the number of user defined columns, then values of the -** varints are set as follows: -** -** Varint 0: Total number of rows in the table. -** -** Varint 1..nCol: For each column, the total number of tokens stored in -** the column for all rows of the table. -** -** Varint 1+nCol: The total size, in bytes, of all text values in all -** columns of all rows of the table. -** +** Used as an fts3ExprIterate() context when loading phrase doclists to +** Fts3Expr.aDoclist[]/nDoclist. */ -static void fts3UpdateDocTotals( - int *pRC, /* The result code */ - Fts3Table *p, /* Table being updated */ - u32 *aSzIns, /* Size increases */ - u32 *aSzDel, /* Size decreases */ - int nChng /* Change in the number of documents */ -){ - char *pBlob; /* Storage for BLOB written into %_stat */ - int nBlob; /* Size of BLOB written into %_stat */ - u32 *a; /* Array of integers that becomes the BLOB */ - sqlite3_stmt *pStmt; /* Statement for reading and writing */ - int i; /* Loop counter */ - int rc; /* Result code from subfunctions */ +typedef struct LoadDoclistCtx LoadDoclistCtx; +struct LoadDoclistCtx { + Fts3Cursor *pCsr; /* FTS3 Cursor */ + int nPhrase; /* Number of phrases seen so far */ + int nToken; /* Number of tokens seen so far */ +}; - const int nStat = p->nColumn+2; +/* +** The following types are used as part of the implementation of the +** fts3BestSnippet() routine. +*/ +typedef struct SnippetIter SnippetIter; +typedef struct SnippetPhrase SnippetPhrase; +typedef struct SnippetFragment SnippetFragment; - if( *pRC ) return; - a = sqlite3_malloc( (sizeof(u32)+10)*nStat ); - if( a==0 ){ - *pRC = SQLITE_NOMEM; - return; - } - pBlob = (char*)&a[nStat]; - rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); - if( rc ){ - sqlite3_free(a); - *pRC = rc; - return; +struct SnippetIter { + Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ + int iCol; /* Extract snippet from this column */ + int nSnippet; /* Requested snippet length (in tokens) */ + int nPhrase; /* Number of phrases in query */ + SnippetPhrase *aPhrase; /* Array of size nPhrase */ + int iCurrent; /* First token of current snippet */ +}; + +struct SnippetPhrase { + int nToken; /* Number of tokens in phrase */ + char *pList; /* Pointer to start of phrase position list */ + int iHead; /* Next value in position list */ + char *pHead; /* Position list data following iHead */ + int iTail; /* Next value in trailing position list */ + char *pTail; /* Position list data following iTail */ +}; + +struct SnippetFragment { + int iCol; /* Column snippet is extracted from */ + int iPos; /* Index of first token in snippet */ + u64 covered; /* Mask of query phrases covered */ + u64 hlmask; /* Mask of snippet terms to highlight */ +}; + +/* +** This type is used as an fts3ExprIterate() context object while +** accumulating the data returned by the matchinfo() function. +*/ +typedef struct MatchInfo MatchInfo; +struct MatchInfo { + Fts3Cursor *pCursor; /* FTS3 Cursor */ + int nCol; /* Number of columns in table */ + int nPhrase; /* Number of matchable phrases in query */ + sqlite3_int64 nDoc; /* Number of docs in database */ + char flag; + u32 *aMatchinfo; /* Pre-allocated buffer */ +}; + +/* +** An instance of this structure is used to manage a pair of buffers, each +** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below +** for details. +*/ +struct MatchinfoBuffer { + u8 aRef[3]; + int nElem; + int bGlobal; /* Set if global data is loaded */ + char *zMatchinfo; + u32 aMatchinfo[1]; +}; + + +/* +** The snippet() and offsets() functions both return text values. An instance +** of the following structure is used to accumulate those values while the +** functions are running. See fts3StringAppend() for details. +*/ +typedef struct StrBuffer StrBuffer; +struct StrBuffer { + char *z; /* Pointer to buffer containing string */ + int n; /* Length of z in bytes (excl. nul-term) */ + int nAlloc; /* Allocated size of buffer z in bytes */ +}; + + +/************************************************************************* +** Start of MatchinfoBuffer code. +*/ + +/* +** Allocate a two-slot MatchinfoBuffer object. +*/ +static MatchinfoBuffer *fts3MIBufferNew(int nElem, const char *zMatchinfo){ + MatchinfoBuffer *pRet; + int nByte = sizeof(u32) * (2*nElem + 1) + sizeof(MatchinfoBuffer); + int nStr = (int)strlen(zMatchinfo); + + pRet = sqlite3_malloc(nByte + nStr+1); + if( pRet ){ + memset(pRet, 0, nByte); + pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet; + pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0] + sizeof(u32)*(nElem+1); + pRet->nElem = nElem; + pRet->zMatchinfo = ((char*)pRet) + nByte; + memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1); + pRet->aRef[0] = 1; } - sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); - if( sqlite3_step(pStmt)==SQLITE_ROW ){ - fts3DecodeIntArray(nStat, a, - sqlite3_column_blob(pStmt, 0), - sqlite3_column_bytes(pStmt, 0)); + + return pRet; +} + +static void fts3MIBufferFree(void *p){ + MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]); + + assert( (u32*)p==&pBuf->aMatchinfo[1] + || (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2] + ); + if( (u32*)p==&pBuf->aMatchinfo[1] ){ + pBuf->aRef[1] = 0; }else{ - memset(a, 0, sizeof(u32)*(nStat) ); + pBuf->aRef[2] = 0; } - rc = sqlite3_reset(pStmt); - if( rc!=SQLITE_OK ){ - sqlite3_free(a); - *pRC = rc; - return; + + if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){ + sqlite3_free(pBuf); } - if( nChng<0 && a[0]<(u32)(-nChng) ){ - a[0] = 0; - }else{ - a[0] += nChng; +} + +static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){ + void (*xRet)(void*) = 0; + u32 *aOut = 0; + + if( p->aRef[1]==0 ){ + p->aRef[1] = 1; + aOut = &p->aMatchinfo[1]; + xRet = fts3MIBufferFree; } - for(i=0; inColumn+1; i++){ - u32 x = a[i+1]; - if( x+aSzIns[i] < aSzDel[i] ){ - x = 0; - }else{ - x = x + aSzIns[i] - aSzDel[i]; + else if( p->aRef[2]==0 ){ + p->aRef[2] = 1; + aOut = &p->aMatchinfo[p->nElem+2]; + xRet = fts3MIBufferFree; + }else{ + aOut = (u32*)sqlite3_malloc(p->nElem * sizeof(u32)); + if( aOut ){ + xRet = sqlite3_free; + if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32)); } - a[i+1] = x; - } - fts3EncodeIntArray(nStat, a, pBlob, &nBlob); - rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); - if( rc ){ - sqlite3_free(a); - *pRC = rc; - return; } - sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); - sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC); - sqlite3_step(pStmt); - *pRC = sqlite3_reset(pStmt); - sqlite3_free(a); + + *paOut = aOut; + return xRet; +} + +static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){ + p->bGlobal = 1; + memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32)); } /* -** Merge the entire database so that there is one segment for each -** iIndex/iLangid combination. +** Free a MatchinfoBuffer object allocated using fts3MIBufferNew() */ -static int fts3DoOptimize(Fts3Table *p, int bReturnDone){ - int bSeenDone = 0; - int rc; - sqlite3_stmt *pAllLangid = 0; - - rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); - if( rc==SQLITE_OK ){ - int rc2; - sqlite3_bind_int(pAllLangid, 1, p->nIndex); - while( sqlite3_step(pAllLangid)==SQLITE_ROW ){ - int i; - int iLangid = sqlite3_column_int(pAllLangid, 0); - for(i=0; rc==SQLITE_OK && inIndex; i++){ - rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL); - if( rc==SQLITE_DONE ){ - bSeenDone = 1; - rc = SQLITE_OK; - } - } +SQLITE_PRIVATE void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){ + if( p ){ + assert( p->aRef[0]==1 ); + p->aRef[0] = 0; + if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){ + sqlite3_free(p); } - rc2 = sqlite3_reset(pAllLangid); - if( rc==SQLITE_OK ) rc = rc2; } +} - sqlite3Fts3SegmentsClose(p); - sqlite3Fts3PendingTermsClear(p); +/* +** End of MatchinfoBuffer code. +*************************************************************************/ - return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc; -} /* -** This function is called when the user executes the following statement: +** This function is used to help iterate through a position-list. A position +** list is a list of unique integers, sorted from smallest to largest. Each +** element of the list is represented by an FTS3 varint that takes the value +** of the difference between the current element and the previous one plus +** two. For example, to store the position-list: ** -** INSERT INTO () VALUES('rebuild'); +** 4 9 113 ** -** The entire FTS index is discarded and rebuilt. If the table is one -** created using the content=xxx option, then the new index is based on -** the current contents of the xxx table. Otherwise, it is rebuilt based -** on the contents of the %_content table. +** the three varints: +** +** 6 7 106 +** +** are encoded. +** +** When this function is called, *pp points to the start of an element of +** the list. *piPos contains the value of the previous entry in the list. +** After it returns, *piPos contains the value of the next element of the +** list and *pp is advanced to the following varint. */ -static int fts3DoRebuild(Fts3Table *p){ - int rc; /* Return Code */ - - rc = fts3DeleteAll(p, 0); - if( rc==SQLITE_OK ){ - u32 *aSz = 0; - u32 *aSzIns = 0; - u32 *aSzDel = 0; - sqlite3_stmt *pStmt = 0; - int nEntry = 0; - - /* Compose and prepare an SQL statement to loop through the content table */ - char *zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); - if( !zSql ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); - sqlite3_free(zSql); - } - - if( rc==SQLITE_OK ){ - int nByte = sizeof(u32) * (p->nColumn+1)*3; - aSz = (u32 *)sqlite3_malloc(nByte); - if( aSz==0 ){ - rc = SQLITE_NOMEM; - }else{ - memset(aSz, 0, nByte); - aSzIns = &aSz[p->nColumn+1]; - aSzDel = &aSzIns[p->nColumn+1]; - } - } - - while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ - int iCol; - int iLangid = langidFromSelect(p, pStmt); - rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0)); - memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1)); - for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ - if( p->abNotindexed[iCol]==0 ){ - const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); - rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); - aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); - } - } - if( p->bHasDocsize ){ - fts3InsertDocsize(&rc, p, aSz); - } - if( rc!=SQLITE_OK ){ - sqlite3_finalize(pStmt); - pStmt = 0; - }else{ - nEntry++; - for(iCol=0; iCol<=p->nColumn; iCol++){ - aSzIns[iCol] += aSz[iCol]; - } - } - } - if( p->bFts4 ){ - fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nEntry); - } - sqlite3_free(aSz); - - if( pStmt ){ - int rc2 = sqlite3_finalize(pStmt); - if( rc==SQLITE_OK ){ - rc = rc2; - } - } - } - - return rc; +static void fts3GetDeltaPosition(char **pp, int *piPos){ + int iVal; + *pp += fts3GetVarint32(*pp, &iVal); + *piPos += (iVal-2); } - /* -** This function opens a cursor used to read the input data for an -** incremental merge operation. Specifically, it opens a cursor to scan -** the oldest nSeg segments (idx=0 through idx=(nSeg-1)) in absolute -** level iAbsLevel. +** Helper function for fts3ExprIterate() (see below). */ -static int fts3IncrmergeCsr( - Fts3Table *p, /* FTS3 table handle */ - sqlite3_int64 iAbsLevel, /* Absolute level to open */ - int nSeg, /* Number of segments to merge */ - Fts3MultiSegReader *pCsr /* Cursor object to populate */ +static int fts3ExprIterate2( + Fts3Expr *pExpr, /* Expression to iterate phrases of */ + int *piPhrase, /* Pointer to phrase counter */ + int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ + void *pCtx /* Second argument to pass to callback */ ){ - int rc; /* Return Code */ - sqlite3_stmt *pStmt = 0; /* Statement used to read %_segdir entry */ - int nByte; /* Bytes allocated at pCsr->apSegment[] */ - - /* Allocate space for the Fts3MultiSegReader.aCsr[] array */ - memset(pCsr, 0, sizeof(*pCsr)); - nByte = sizeof(Fts3SegReader *) * nSeg; - pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte); + int rc; /* Return code */ + int eType = pExpr->eType; /* Type of expression node pExpr */ - if( pCsr->apSegment==0 ){ - rc = SQLITE_NOMEM; - }else{ - memset(pCsr->apSegment, 0, nByte); - rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); - } - if( rc==SQLITE_OK ){ - int i; - int rc2; - sqlite3_bind_int64(pStmt, 1, iAbsLevel); - assert( pCsr->nSegment==0 ); - for(i=0; rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW && iapSegment[i] - ); - pCsr->nSegment++; + if( eType!=FTSQUERY_PHRASE ){ + assert( pExpr->pLeft && pExpr->pRight ); + rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); + if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ + rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); } - rc2 = sqlite3_reset(pStmt); - if( rc==SQLITE_OK ) rc = rc2; + }else{ + rc = x(pExpr, *piPhrase, pCtx); + (*piPhrase)++; } - return rc; } -typedef struct IncrmergeWriter IncrmergeWriter; -typedef struct NodeWriter NodeWriter; -typedef struct Blob Blob; -typedef struct NodeReader NodeReader; - /* -** An instance of the following structure is used as a dynamic buffer -** to build up nodes or other blobs of data in. +** Iterate through all phrase nodes in an FTS3 query, except those that +** are part of a sub-tree that is the right-hand-side of a NOT operator. +** For each phrase node found, the supplied callback function is invoked. ** -** The function blobGrowBuffer() is used to extend the allocation. +** If the callback function returns anything other than SQLITE_OK, +** the iteration is abandoned and the error code returned immediately. +** Otherwise, SQLITE_OK is returned after a callback has been made for +** all eligible phrase nodes. */ -struct Blob { - char *a; /* Pointer to allocation */ - int n; /* Number of valid bytes of data in a[] */ - int nAlloc; /* Allocated size of a[] (nAlloc>=n) */ -}; +static int fts3ExprIterate( + Fts3Expr *pExpr, /* Expression to iterate phrases of */ + int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ + void *pCtx /* Second argument to pass to callback */ +){ + int iPhrase = 0; /* Variable used as the phrase counter */ + return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); +} -/* -** This structure is used to build up buffers containing segment b-tree -** nodes (blocks). -*/ -struct NodeWriter { - sqlite3_int64 iBlock; /* Current block id */ - Blob key; /* Last key written to the current block */ - Blob block; /* Current block image */ -}; /* -** An object of this type contains the state required to create or append -** to an appendable b-tree segment. +** This is an fts3ExprIterate() callback used while loading the doclists +** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also +** fts3ExprLoadDoclists(). */ -struct IncrmergeWriter { - int nLeafEst; /* Space allocated for leaf blocks */ - int nWork; /* Number of leaf pages flushed */ - sqlite3_int64 iAbsLevel; /* Absolute level of input segments */ - int iIdx; /* Index of *output* segment in iAbsLevel+1 */ - sqlite3_int64 iStart; /* Block number of first allocated block */ - sqlite3_int64 iEnd; /* Block number of last allocated block */ - sqlite3_int64 nLeafData; /* Bytes of leaf page data so far */ - u8 bNoLeafData; /* If true, store 0 for segment size */ - NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT]; -}; +static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ + int rc = SQLITE_OK; + Fts3Phrase *pPhrase = pExpr->pPhrase; + LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; + + UNUSED_PARAMETER(iPhrase); + + p->nPhrase++; + p->nToken += pPhrase->nToken; + + return rc; +} /* -** An object of the following type is used to read data from a single -** FTS segment node. See the following functions: +** Load the doclists for each phrase in the query associated with FTS3 cursor +** pCsr. ** -** nodeReaderInit() -** nodeReaderNext() -** nodeReaderRelease() +** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable +** phrases in the expression (all phrases except those directly or +** indirectly descended from the right-hand-side of a NOT operator). If +** pnToken is not NULL, then it is set to the number of tokens in all +** matchable phrases of the expression. */ -struct NodeReader { - const char *aNode; - int nNode; - int iOff; /* Current offset within aNode[] */ +static int fts3ExprLoadDoclists( + Fts3Cursor *pCsr, /* Fts3 cursor for current query */ + int *pnPhrase, /* OUT: Number of phrases in query */ + int *pnToken /* OUT: Number of tokens in query */ +){ + int rc; /* Return Code */ + LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ + sCtx.pCsr = pCsr; + rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); + if( pnPhrase ) *pnPhrase = sCtx.nPhrase; + if( pnToken ) *pnToken = sCtx.nToken; + return rc; +} - /* Output variables. Containing the current node entry. */ - sqlite3_int64 iChild; /* Pointer to child node */ - Blob term; /* Current term */ - const char *aDoclist; /* Pointer to doclist */ - int nDoclist; /* Size of doclist in bytes */ -}; +static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ + (*(int *)ctx)++; + pExpr->iPhrase = iPhrase; + return SQLITE_OK; +} +static int fts3ExprPhraseCount(Fts3Expr *pExpr){ + int nPhrase = 0; + (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); + return nPhrase; +} /* -** If *pRc is not SQLITE_OK when this function is called, it is a no-op. -** Otherwise, if the allocation at pBlob->a is not already at least nMin -** bytes in size, extend (realloc) it to be so. -** -** If an OOM error occurs, set *pRc to SQLITE_NOMEM and leave pBlob->a -** unmodified. Otherwise, if the allocation succeeds, update pBlob->nAlloc -** to reflect the new size of the pBlob->a[] buffer. +** Advance the position list iterator specified by the first two +** arguments so that it points to the first element with a value greater +** than or equal to parameter iNext. */ -static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){ - if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){ - int nAlloc = nMin; - char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc); - if( a ){ - pBlob->nAlloc = nAlloc; - pBlob->a = a; - }else{ - *pRc = SQLITE_NOMEM; +static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ + char *pIter = *ppIter; + if( pIter ){ + int iIter = *piIter; + + while( iIteraNode to -** NULL to indicate EOF. Otherwise, populate the NodeReader structure output -** variables for the new entry. +** Advance the snippet iterator to the next candidate snippet. */ -static int nodeReaderNext(NodeReader *p){ - int bFirst = (p->term.n==0); /* True for first term on the node */ - int nPrefix = 0; /* Bytes to copy from previous term */ - int nSuffix = 0; /* Bytes to append to the prefix */ - int rc = SQLITE_OK; /* Return code */ +static int fts3SnippetNextCandidate(SnippetIter *pIter){ + int i; /* Loop counter */ - assert( p->aNode ); - if( p->iChild && bFirst==0 ) p->iChild++; - if( p->iOff>=p->nNode ){ - /* EOF */ - p->aNode = 0; - }else{ - if( bFirst==0 ){ - p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nPrefix); + if( pIter->iCurrent<0 ){ + /* The SnippetIter object has just been initialized. The first snippet + ** candidate always starts at offset 0 (even if this candidate has a + ** score of 0.0). + */ + pIter->iCurrent = 0; + + /* Advance the 'head' iterator of each phrase to the first offset that + ** is greater than or equal to (iNext+nSnippet). + */ + for(i=0; inPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); } - p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nSuffix); + }else{ + int iStart; + int iEnd = 0x7FFFFFFF; - blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc); - if( rc==SQLITE_OK ){ - memcpy(&p->term.a[nPrefix], &p->aNode[p->iOff], nSuffix); - p->term.n = nPrefix+nSuffix; - p->iOff += nSuffix; - if( p->iChild==0 ){ - p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist); - p->aDoclist = &p->aNode[p->iOff]; - p->iOff += p->nDoclist; + for(i=0; inPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + if( pPhrase->pHead && pPhrase->iHeadiHead; } } - } + if( iEnd==0x7FFFFFFF ){ + return 1; + } - assert( p->iOff<=p->nNode ); + pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; + for(i=0; inPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); + fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); + } + } - return rc; + return 0; } /* -** Release all dynamic resources held by node-reader object *p. +** Retrieve information about the current candidate snippet of snippet +** iterator pIter. */ -static void nodeReaderRelease(NodeReader *p){ - sqlite3_free(p->term.a); +static void fts3SnippetDetails( + SnippetIter *pIter, /* Snippet iterator */ + u64 mCovered, /* Bitmask of phrases already covered */ + int *piToken, /* OUT: First token of proposed snippet */ + int *piScore, /* OUT: "Score" for this snippet */ + u64 *pmCover, /* OUT: Bitmask of phrases covered */ + u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ +){ + int iStart = pIter->iCurrent; /* First token of snippet */ + int iScore = 0; /* Score of this snippet */ + int i; /* Loop counter */ + u64 mCover = 0; /* Mask of phrases covered by this snippet */ + u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ + + for(i=0; inPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + if( pPhrase->pTail ){ + char *pCsr = pPhrase->pTail; + int iCsr = pPhrase->iTail; + + while( iCsr<(iStart+pIter->nSnippet) ){ + int j; + u64 mPhrase = (u64)1 << i; + u64 mPos = (u64)1 << (iCsr - iStart); + assert( iCsr>=iStart ); + if( (mCover|mCovered)&mPhrase ){ + iScore++; + }else{ + iScore += 1000; + } + mCover |= mPhrase; + + for(j=0; jnToken; j++){ + mHighlight |= (mPos>>j); + } + + if( 0==(*pCsr & 0x0FE) ) break; + fts3GetDeltaPosition(&pCsr, &iCsr); + } + } + } + + /* Set the output variables before returning. */ + *piToken = iStart; + *piScore = iScore; + *pmCover = mCover; + *pmHighlight = mHighlight; } /* -** Initialize a node-reader object to read the node in buffer aNode/nNode. -** -** If successful, SQLITE_OK is returned and the NodeReader object set to -** point to the first entry on the node (if any). Otherwise, an SQLite -** error code is returned. +** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). +** Each invocation populates an element of the SnippetIter.aPhrase[] array. */ -static int nodeReaderInit(NodeReader *p, const char *aNode, int nNode){ - memset(p, 0, sizeof(NodeReader)); - p->aNode = aNode; - p->nNode = nNode; +static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ + SnippetIter *p = (SnippetIter *)ctx; + SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; + char *pCsr; + int rc; - /* Figure out if this is a leaf or an internal node. */ - if( p->aNode[0] ){ - /* An internal node. */ - p->iOff = 1 + sqlite3Fts3GetVarint(&p->aNode[1], &p->iChild); + pPhrase->nToken = pExpr->pPhrase->nToken; + rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr); + assert( rc==SQLITE_OK || pCsr==0 ); + if( pCsr ){ + int iFirst = 0; + pPhrase->pList = pCsr; + fts3GetDeltaPosition(&pCsr, &iFirst); + assert( iFirst>=0 ); + pPhrase->pHead = pCsr; + pPhrase->pTail = pCsr; + pPhrase->iHead = iFirst; + pPhrase->iTail = iFirst; }else{ - p->iOff = 1; + assert( rc!=SQLITE_OK || ( + pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 + )); } - return nodeReaderNext(p); + return rc; } /* -** This function is called while writing an FTS segment each time a leaf o -** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed -** to be greater than the largest key on the node just written, but smaller -** than or equal to the first key that will be written to the next leaf -** node. +** Select the fragment of text consisting of nFragment contiguous tokens +** from column iCol that represent the "best" snippet. The best snippet +** is the snippet with the highest score, where scores are calculated +** by adding: ** -** The block id of the leaf node just written to disk may be found in -** (pWriter->aNodeWriter[0].iBlock) when this function is called. +** (a) +1 point for each occurrence of a matchable phrase in the snippet. +** +** (b) +1000 points for the first occurrence of each matchable phrase in +** the snippet for which the corresponding mCovered bit is not set. +** +** The selected snippet parameters are stored in structure *pFragment before +** returning. The score of the selected snippet is stored in *piScore +** before returning. */ -static int fts3IncrmergePush( - Fts3Table *p, /* Fts3 table handle */ - IncrmergeWriter *pWriter, /* Writer object */ - const char *zTerm, /* Term to write to internal node */ - int nTerm /* Bytes at zTerm */ +static int fts3BestSnippet( + int nSnippet, /* Desired snippet length */ + Fts3Cursor *pCsr, /* Cursor to create snippet for */ + int iCol, /* Index of column to create snippet from */ + u64 mCovered, /* Mask of phrases already covered */ + u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ + SnippetFragment *pFragment, /* OUT: Best snippet found */ + int *piScore /* OUT: Score of snippet pFragment */ ){ - sqlite3_int64 iPtr = pWriter->aNodeWriter[0].iBlock; - int iLayer; - - assert( nTerm>0 ); - for(iLayer=1; ALWAYS(iLayeraNodeWriter[iLayer]; - int rc = SQLITE_OK; - int nPrefix; - int nSuffix; - int nSpace; + int rc; /* Return Code */ + int nList; /* Number of phrases in expression */ + SnippetIter sIter; /* Iterates through snippet candidates */ + int nByte; /* Number of bytes of space to allocate */ + int iBestScore = -1; /* Best snippet score found so far */ + int i; /* Loop counter */ - /* Figure out how much space the key will consume if it is written to - ** the current node of layer iLayer. Due to the prefix compression, - ** the space required changes depending on which node the key is to - ** be added to. */ - nPrefix = fts3PrefixCompress(pNode->key.a, pNode->key.n, zTerm, nTerm); - nSuffix = nTerm - nPrefix; - nSpace = sqlite3Fts3VarintLen(nPrefix); - nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; + memset(&sIter, 0, sizeof(sIter)); - if( pNode->key.n==0 || (pNode->block.n + nSpace)<=p->nNodeSize ){ - /* If the current node of layer iLayer contains zero keys, or if adding - ** the key to it will not cause it to grow to larger than nNodeSize - ** bytes in size, write the key here. */ + /* Iterate through the phrases in the expression to count them. The same + ** callback makes sure the doclists are loaded for each phrase. + */ + rc = fts3ExprLoadDoclists(pCsr, &nList, 0); + if( rc!=SQLITE_OK ){ + return rc; + } - Blob *pBlk = &pNode->block; - if( pBlk->n==0 ){ - blobGrowBuffer(pBlk, p->nNodeSize, &rc); - if( rc==SQLITE_OK ){ - pBlk->a[0] = (char)iLayer; - pBlk->n = 1 + sqlite3Fts3PutVarint(&pBlk->a[1], iPtr); - } - } - blobGrowBuffer(pBlk, pBlk->n + nSpace, &rc); - blobGrowBuffer(&pNode->key, nTerm, &rc); + /* Now that it is known how many phrases there are, allocate and zero + ** the required space using malloc(). + */ + nByte = sizeof(SnippetPhrase) * nList; + sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); + if( !sIter.aPhrase ){ + return SQLITE_NOMEM; + } + memset(sIter.aPhrase, 0, nByte); - if( rc==SQLITE_OK ){ - if( pNode->key.n ){ - pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nPrefix); - } - pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nSuffix); - memcpy(&pBlk->a[pBlk->n], &zTerm[nPrefix], nSuffix); - pBlk->n += nSuffix; + /* Initialize the contents of the SnippetIter object. Then iterate through + ** the set of phrases in the expression to populate the aPhrase[] array. + */ + sIter.pCsr = pCsr; + sIter.iCol = iCol; + sIter.nSnippet = nSnippet; + sIter.nPhrase = nList; + sIter.iCurrent = -1; + rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter); + if( rc==SQLITE_OK ){ - memcpy(pNode->key.a, zTerm, nTerm); - pNode->key.n = nTerm; + /* Set the *pmSeen output variable. */ + for(i=0; iiBlock, pNode->block.a, pNode->block.n); - - assert( pNode->block.nAlloc>=p->nNodeSize ); - pNode->block.a[0] = (char)iLayer; - pNode->block.n = 1 + sqlite3Fts3PutVarint(&pNode->block.a[1], iPtr+1); + } - iNextPtr = pNode->iBlock; - pNode->iBlock++; - pNode->key.n = 0; + /* Loop through all candidate snippets. Store the best snippet in + ** *pFragment. Store its associated 'score' in iBestScore. + */ + pFragment->iCol = iCol; + while( !fts3SnippetNextCandidate(&sIter) ){ + int iPos; + int iScore; + u64 mCover; + u64 mHighlite; + fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite); + assert( iScore>=0 ); + if( iScore>iBestScore ){ + pFragment->iPos = iPos; + pFragment->hlmask = mHighlite; + pFragment->covered = mCover; + iBestScore = iScore; + } } - if( rc!=SQLITE_OK || iNextPtr==0 ) return rc; - iPtr = iNextPtr; + *piScore = iBestScore; } - - assert( 0 ); - return 0; + sqlite3_free(sIter.aPhrase); + return rc; } + /* -** Append a term and (optionally) doclist to the FTS segment node currently -** stored in blob *pNode. The node need not contain any terms, but the -** header must be written before this function is called. -** -** A node header is a single 0x00 byte for a leaf node, or a height varint -** followed by the left-hand-child varint for an internal node. -** -** The term to be appended is passed via arguments zTerm/nTerm. For a -** leaf node, the doclist is passed as aDoclist/nDoclist. For an internal -** node, both aDoclist and nDoclist must be passed 0. -** -** If the size of the value in blob pPrev is zero, then this is the first -** term written to the node. Otherwise, pPrev contains a copy of the -** previous term. Before this function returns, it is updated to contain a -** copy of zTerm/nTerm. -** -** It is assumed that the buffer associated with pNode is already large -** enough to accommodate the new entry. The buffer associated with pPrev -** is extended by this function if requrired. +** Append a string to the string-buffer passed as the first argument. ** -** If an error (i.e. OOM condition) occurs, an SQLite error code is -** returned. Otherwise, SQLITE_OK. +** If nAppend is negative, then the length of the string zAppend is +** determined using strlen(). */ -static int fts3AppendToNode( - Blob *pNode, /* Current node image to append to */ - Blob *pPrev, /* Buffer containing previous term written */ - const char *zTerm, /* New term to write */ - int nTerm, /* Size of zTerm in bytes */ - const char *aDoclist, /* Doclist (or NULL) to write */ - int nDoclist /* Size of aDoclist in bytes */ +static int fts3StringAppend( + StrBuffer *pStr, /* Buffer to append to */ + const char *zAppend, /* Pointer to data to append to buffer */ + int nAppend /* Size of zAppend in bytes (or -1) */ ){ - int rc = SQLITE_OK; /* Return code */ - int bFirst = (pPrev->n==0); /* True if this is the first term written */ - int nPrefix; /* Size of term prefix in bytes */ - int nSuffix; /* Size of term suffix in bytes */ - - /* Node must have already been started. There must be a doclist for a - ** leaf node, and there must not be a doclist for an internal node. */ - assert( pNode->n>0 ); - assert( (pNode->a[0]=='\0')==(aDoclist!=0) ); - - blobGrowBuffer(pPrev, nTerm, &rc); - if( rc!=SQLITE_OK ) return rc; - - nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm); - nSuffix = nTerm - nPrefix; - memcpy(pPrev->a, zTerm, nTerm); - pPrev->n = nTerm; - - if( bFirst==0 ){ - pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix); + if( nAppend<0 ){ + nAppend = (int)strlen(zAppend); } - pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nSuffix); - memcpy(&pNode->a[pNode->n], &zTerm[nPrefix], nSuffix); - pNode->n += nSuffix; - if( aDoclist ){ - pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nDoclist); - memcpy(&pNode->a[pNode->n], aDoclist, nDoclist); - pNode->n += nDoclist; + /* If there is insufficient space allocated at StrBuffer.z, use realloc() + ** to grow the buffer until so that it is big enough to accomadate the + ** appended data. + */ + if( pStr->n+nAppend+1>=pStr->nAlloc ){ + int nAlloc = pStr->nAlloc+nAppend+100; + char *zNew = sqlite3_realloc(pStr->z, nAlloc); + if( !zNew ){ + return SQLITE_NOMEM; + } + pStr->z = zNew; + pStr->nAlloc = nAlloc; } + assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) ); - assert( pNode->n<=pNode->nAlloc ); + /* Append the data to the string buffer. */ + memcpy(&pStr->z[pStr->n], zAppend, nAppend); + pStr->n += nAppend; + pStr->z[pStr->n] = '\0'; return SQLITE_OK; } /* -** Append the current term and doclist pointed to by cursor pCsr to the -** appendable b-tree segment opened for writing by pWriter. +** The fts3BestSnippet() function often selects snippets that end with a +** query term. That is, the final term of the snippet is always a term +** that requires highlighting. For example, if 'X' is a highlighted term +** and '.' is a non-highlighted term, BestSnippet() may select: ** -** Return SQLITE_OK if successful, or an SQLite error code otherwise. +** ........X.....X +** +** This function "shifts" the beginning of the snippet forward in the +** document so that there are approximately the same number of +** non-highlighted terms to the right of the final highlighted term as there +** are to the left of the first highlighted term. For example, to this: +** +** ....X.....X.... +** +** This is done as part of extracting the snippet text, not when selecting +** the snippet. Snippet selection is done based on doclists only, so there +** is no way for fts3BestSnippet() to know whether or not the document +** actually contains terms that follow the final highlighted term. */ -static int fts3IncrmergeAppend( - Fts3Table *p, /* Fts3 table handle */ - IncrmergeWriter *pWriter, /* Writer object */ - Fts3MultiSegReader *pCsr /* Cursor containing term and doclist */ +static int fts3SnippetShift( + Fts3Table *pTab, /* FTS3 table snippet comes from */ + int iLangid, /* Language id to use in tokenizing */ + int nSnippet, /* Number of tokens desired for snippet */ + const char *zDoc, /* Document text to extract snippet from */ + int nDoc, /* Size of buffer zDoc in bytes */ + int *piPos, /* IN/OUT: First token of snippet */ + u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ ){ - const char *zTerm = pCsr->zTerm; - int nTerm = pCsr->nTerm; - const char *aDoclist = pCsr->aDoclist; - int nDoclist = pCsr->nDoclist; - int rc = SQLITE_OK; /* Return code */ - int nSpace; /* Total space in bytes required on leaf */ - int nPrefix; /* Size of prefix shared with previous term */ - int nSuffix; /* Size of suffix (nTerm - nPrefix) */ - NodeWriter *pLeaf; /* Object used to write leaf nodes */ - - pLeaf = &pWriter->aNodeWriter[0]; - nPrefix = fts3PrefixCompress(pLeaf->key.a, pLeaf->key.n, zTerm, nTerm); - nSuffix = nTerm - nPrefix; + u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ - nSpace = sqlite3Fts3VarintLen(nPrefix); - nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; - nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; + if( hlmask ){ + int nLeft; /* Tokens to the left of first highlight */ + int nRight; /* Tokens to the right of last highlight */ + int nDesired; /* Ideal number of tokens to shift forward */ - /* If the current block is not empty, and if adding this term/doclist - ** to the current block would make it larger than Fts3Table.nNodeSize - ** bytes, write this block out to the database. */ - if( pLeaf->block.n>0 && (pLeaf->block.n + nSpace)>p->nNodeSize ){ - rc = fts3WriteSegment(p, pLeaf->iBlock, pLeaf->block.a, pLeaf->block.n); - pWriter->nWork++; + for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); + for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); + nDesired = (nLeft-nRight)/2; - /* Add the current term to the parent node. The term added to the - ** parent must: - ** - ** a) be greater than the largest term on the leaf node just written - ** to the database (still available in pLeaf->key), and - ** - ** b) be less than or equal to the term about to be added to the new - ** leaf node (zTerm/nTerm). - ** - ** In other words, it must be the prefix of zTerm 1 byte longer than - ** the common prefix (if any) of zTerm and pWriter->zTerm. + /* Ideally, the start of the snippet should be pushed forward in the + ** document nDesired tokens. This block checks if there are actually + ** nDesired tokens to the right of the snippet. If so, *piPos and + ** *pHlMask are updated to shift the snippet nDesired tokens to the + ** right. Otherwise, the snippet is shifted by the number of tokens + ** available. */ - if( rc==SQLITE_OK ){ - rc = fts3IncrmergePush(p, pWriter, zTerm, nPrefix+1); - } - - /* Advance to the next output block */ - pLeaf->iBlock++; - pLeaf->key.n = 0; - pLeaf->block.n = 0; + if( nDesired>0 ){ + int nShift; /* Number of tokens to shift snippet by */ + int iCurrent = 0; /* Token counter */ + int rc; /* Return Code */ + sqlite3_tokenizer_module *pMod; + sqlite3_tokenizer_cursor *pC; + pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; - nSuffix = nTerm; - nSpace = 1; - nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; - nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; - } + /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) + ** or more tokens in zDoc/nDoc. + */ + rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC); + if( rc!=SQLITE_OK ){ + return rc; + } + while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ + const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0; + rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); + } + pMod->xClose(pC); + if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } - pWriter->nLeafData += nSpace; - blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc); - if( rc==SQLITE_OK ){ - if( pLeaf->block.n==0 ){ - pLeaf->block.n = 1; - pLeaf->block.a[0] = '\0'; + nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; + assert( nShift<=nDesired ); + if( nShift>0 ){ + *piPos += nShift; + *pHlmask = hlmask >> nShift; + } } - rc = fts3AppendToNode( - &pLeaf->block, &pLeaf->key, zTerm, nTerm, aDoclist, nDoclist - ); } - - return rc; + return SQLITE_OK; } /* -** This function is called to release all dynamic resources held by the -** merge-writer object pWriter, and if no error has occurred, to flush -** all outstanding node buffers held by pWriter to disk. -** -** If *pRc is not SQLITE_OK when this function is called, then no attempt -** is made to write any data to disk. Instead, this function serves only -** to release outstanding resources. -** -** Otherwise, if *pRc is initially SQLITE_OK and an error occurs while -** flushing buffers to disk, *pRc is set to an SQLite error code before -** returning. +** Extract the snippet text for fragment pFragment from cursor pCsr and +** append it to string buffer pOut. */ -static void fts3IncrmergeRelease( - Fts3Table *p, /* FTS3 table handle */ - IncrmergeWriter *pWriter, /* Merge-writer object */ - int *pRc /* IN/OUT: Error code */ +static int fts3SnippetText( + Fts3Cursor *pCsr, /* FTS3 Cursor */ + SnippetFragment *pFragment, /* Snippet to extract */ + int iFragment, /* Fragment number */ + int isLast, /* True for final fragment in snippet */ + int nSnippet, /* Number of tokens in extracted snippet */ + const char *zOpen, /* String inserted before highlighted term */ + const char *zClose, /* String inserted after highlighted term */ + const char *zEllipsis, /* String inserted between snippets */ + StrBuffer *pOut /* Write output here */ ){ - int i; /* Used to iterate through non-root layers */ - int iRoot; /* Index of root in pWriter->aNodeWriter */ - NodeWriter *pRoot; /* NodeWriter for root node */ - int rc = *pRc; /* Error code */ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc; /* Return code */ + const char *zDoc; /* Document text to extract snippet from */ + int nDoc; /* Size of zDoc in bytes */ + int iCurrent = 0; /* Current token number of document */ + int iEnd = 0; /* Byte offset of end of current token */ + int isShiftDone = 0; /* True after snippet is shifted */ + int iPos = pFragment->iPos; /* First token of snippet */ + u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ + int iCol = pFragment->iCol+1; /* Query column to extract text from */ + sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ + sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ + + zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); + if( zDoc==0 ){ + if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ + return SQLITE_NOMEM; + } + return SQLITE_OK; + } + nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); - /* Set iRoot to the index in pWriter->aNodeWriter[] of the output segment - ** root node. If the segment fits entirely on a single leaf node, iRoot - ** will be set to 0. If the root node is the parent of the leaves, iRoot - ** will be 1. And so on. */ - for(iRoot=FTS_MAX_APPENDABLE_HEIGHT-1; iRoot>=0; iRoot--){ - NodeWriter *pNode = &pWriter->aNodeWriter[iRoot]; - if( pNode->block.n>0 ) break; - assert( *pRc || pNode->block.nAlloc==0 ); - assert( *pRc || pNode->key.nAlloc==0 ); - sqlite3_free(pNode->block.a); - sqlite3_free(pNode->key.a); + /* Open a token cursor on the document. */ + pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; + rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC); + if( rc!=SQLITE_OK ){ + return rc; } - /* Empty output segment. This is a no-op. */ - if( iRoot<0 ) return; + while( rc==SQLITE_OK ){ + const char *ZDUMMY; /* Dummy argument used with tokenizer */ + int DUMMY1 = -1; /* Dummy argument used with tokenizer */ + int iBegin = 0; /* Offset in zDoc of start of token */ + int iFin = 0; /* Offset in zDoc of end of token */ + int isHighlight = 0; /* True for highlighted terms */ - /* The entire output segment fits on a single node. Normally, this means - ** the node would be stored as a blob in the "root" column of the %_segdir - ** table. However, this is not permitted in this case. The problem is that - ** space has already been reserved in the %_segments table, and so the - ** start_block and end_block fields of the %_segdir table must be populated. - ** And, by design or by accident, released versions of FTS cannot handle - ** segments that fit entirely on the root node with start_block!=0. - ** - ** Instead, create a synthetic root node that contains nothing but a - ** pointer to the single content node. So that the segment consists of a - ** single leaf and a single interior (root) node. - ** - ** Todo: Better might be to defer allocating space in the %_segments - ** table until we are sure it is needed. - */ - if( iRoot==0 ){ - Blob *pBlock = &pWriter->aNodeWriter[1].block; - blobGrowBuffer(pBlock, 1 + FTS3_VARINT_MAX, &rc); - if( rc==SQLITE_OK ){ - pBlock->a[0] = 0x01; - pBlock->n = 1 + sqlite3Fts3PutVarint( - &pBlock->a[1], pWriter->aNodeWriter[0].iBlock + /* Variable DUMMY1 is initialized to a negative value above. Elsewhere + ** in the FTS code the variable that the third argument to xNext points to + ** is initialized to zero before the first (*but not necessarily + ** subsequent*) call to xNext(). This is done for a particular application + ** that needs to know whether or not the tokenizer is being used for + ** snippet generation or for some other purpose. + ** + ** Extreme care is required when writing code to depend on this + ** initialization. It is not a documented part of the tokenizer interface. + ** If a tokenizer is used directly by any code outside of FTS, this + ** convention might not be respected. */ + rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_DONE ){ + /* Special case - the last token of the snippet is also the last token + ** of the column. Append any punctuation that occurred between the end + ** of the previous token and the end of the document to the output. + ** Then break out of the loop. */ + rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); + } + break; + } + if( iCurrentiLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask ); + isShiftDone = 1; + + /* Now that the shift has been done, check if the initial "..." are + ** required. They are required if (a) this is not the first fragment, + ** or (b) this fragment does not begin at position 0 of its column. + */ + if( rc==SQLITE_OK ){ + if( iPos>0 || iFragment>0 ){ + rc = fts3StringAppend(pOut, zEllipsis, -1); + }else if( iBegin ){ + rc = fts3StringAppend(pOut, zDoc, iBegin); + } + } + if( rc!=SQLITE_OK || iCurrentaNodeWriter[iRoot]; - /* Flush all currently outstanding nodes to disk. */ - for(i=0; iaNodeWriter[i]; - if( pNode->block.n>0 && rc==SQLITE_OK ){ - rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n); + if( iCurrent>=(iPos+nSnippet) ){ + if( isLast ){ + rc = fts3StringAppend(pOut, zEllipsis, -1); + } + break; } - sqlite3_free(pNode->block.a); - sqlite3_free(pNode->key.a); - } - /* Write the %_segdir record. */ - if( rc==SQLITE_OK ){ - rc = fts3WriteSegdir(p, - pWriter->iAbsLevel+1, /* level */ - pWriter->iIdx, /* idx */ - pWriter->iStart, /* start_block */ - pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */ - pWriter->iEnd, /* end_block */ - (pWriter->bNoLeafData==0 ? pWriter->nLeafData : 0), /* end_block */ - pRoot->block.a, pRoot->block.n /* root */ - ); + /* Set isHighlight to true if this term should be highlighted. */ + isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; + + if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); + if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); + if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); + if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); + + iEnd = iFin; } - sqlite3_free(pRoot->block.a); - sqlite3_free(pRoot->key.a); - *pRc = rc; + pMod->xClose(pC); + return rc; } + /* -** Compare the term in buffer zLhs (size in bytes nLhs) with that in -** zRhs (size in bytes nRhs) using memcmp. If one term is a prefix of -** the other, it is considered to be smaller than the other. +** This function is used to count the entries in a column-list (a +** delta-encoded list of term offsets within a single column of a single +** row). When this function is called, *ppCollist should point to the +** beginning of the first varint in the column-list (the varint that +** contains the position of the first matching term in the column data). +** Before returning, *ppCollist is set to point to the first byte after +** the last varint in the column-list (either the 0x00 signifying the end +** of the position-list, or the 0x01 that precedes the column number of +** the next column in the position-list). ** -** Return -ve if zLhs is smaller than zRhs, 0 if it is equal, or +ve -** if it is greater. +** The number of elements in the column-list is returned. */ -static int fts3TermCmp( - const char *zLhs, int nLhs, /* LHS of comparison */ - const char *zRhs, int nRhs /* RHS of comparison */ -){ - int nCmp = MIN(nLhs, nRhs); - int res; +static int fts3ColumnlistCount(char **ppCollist){ + char *pEnd = *ppCollist; + char c = 0; + int nEntry = 0; - res = memcmp(zLhs, zRhs, nCmp); - if( res==0 ) res = nLhs - nRhs; + /* A column-list is terminated by either a 0x01 or 0x00. */ + while( 0xFE & (*pEnd | c) ){ + c = *pEnd++ & 0x80; + if( !c ) nEntry++; + } - return res; + *ppCollist = pEnd; + return nEntry; } - /* -** Query to see if the entry in the %_segments table with blockid iEnd is -** NULL. If no error occurs and the entry is NULL, set *pbRes 1 before -** returning. Otherwise, set *pbRes to 0. -** -** Or, if an error occurs while querying the database, return an SQLite -** error code. The final value of *pbRes is undefined in this case. -** -** This is used to test if a segment is an "appendable" segment. If it -** is, then a NULL entry has been inserted into the %_segments table -** with blockid %_segdir.end_block. +** This function gathers 'y' or 'b' data for a single phrase. */ -static int fts3IsAppendable(Fts3Table *p, sqlite3_int64 iEnd, int *pbRes){ - int bRes = 0; /* Result to set *pbRes to */ - sqlite3_stmt *pCheck = 0; /* Statement to query database with */ - int rc; /* Return code */ +static void fts3ExprLHits( + Fts3Expr *pExpr, /* Phrase expression node */ + MatchInfo *p /* Matchinfo context */ +){ + Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab; + int iStart; + Fts3Phrase *pPhrase = pExpr->pPhrase; + char *pIter = pPhrase->doclist.pList; + int iCol = 0; - rc = fts3SqlStmt(p, SQL_SEGMENT_IS_APPENDABLE, &pCheck, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pCheck, 1, iEnd); - if( SQLITE_ROW==sqlite3_step(pCheck) ) bRes = 1; - rc = sqlite3_reset(pCheck); + assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS ); + if( p->flag==FTS3_MATCHINFO_LHITS ){ + iStart = pExpr->iPhrase * p->nCol; + }else{ + iStart = pExpr->iPhrase * ((p->nCol + 31) / 32); + } + + while( 1 ){ + int nHit = fts3ColumnlistCount(&pIter); + if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){ + if( p->flag==FTS3_MATCHINFO_LHITS ){ + p->aMatchinfo[iStart + iCol] = (u32)nHit; + }else if( nHit ){ + p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F)); + } + } + assert( *pIter==0x00 || *pIter==0x01 ); + if( *pIter!=0x01 ) break; + pIter++; + pIter += fts3GetVarint32(pIter, &iCol); } - - *pbRes = bRes; - return rc; } /* -** This function is called when initializing an incremental-merge operation. -** It checks if the existing segment with index value iIdx at absolute level -** (iAbsLevel+1) can be appended to by the incremental merge. If it can, the -** merge-writer object *pWriter is initialized to write to it. +** Gather the results for matchinfo directives 'y' and 'b'. +*/ +static void fts3ExprLHitGather( + Fts3Expr *pExpr, + MatchInfo *p +){ + assert( (pExpr->pLeft==0)==(pExpr->pRight==0) ); + if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){ + if( pExpr->pLeft ){ + fts3ExprLHitGather(pExpr->pLeft, p); + fts3ExprLHitGather(pExpr->pRight, p); + }else{ + fts3ExprLHits(pExpr, p); + } + } +} + +/* +** fts3ExprIterate() callback used to collect the "global" matchinfo stats +** for a single query. ** -** An existing segment can be appended to by an incremental merge if: +** fts3ExprIterate() callback to load the 'global' elements of a +** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements +** of the matchinfo array that are constant for all rows returned by the +** current query. ** -** * It was initially created as an appendable segment (with all required -** space pre-allocated), and +** Argument pCtx is actually a pointer to a struct of type MatchInfo. This +** function populates Matchinfo.aMatchinfo[] as follows: ** -** * The first key read from the input (arguments zKey and nKey) is -** greater than the largest key currently stored in the potential -** output segment. +** for(iCol=0; iColpCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] + ); +} - rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pSelect, 0); - if( rc==SQLITE_OK ){ - sqlite3_int64 iStart = 0; /* Value of %_segdir.start_block */ - sqlite3_int64 iLeafEnd = 0; /* Value of %_segdir.leaves_end_block */ - sqlite3_int64 iEnd = 0; /* Value of %_segdir.end_block */ - const char *aRoot = 0; /* Pointer to %_segdir.root buffer */ - int nRoot = 0; /* Size of aRoot[] in bytes */ - int rc2; /* Return code from sqlite3_reset() */ - int bAppendable = 0; /* Set to true if segment is appendable */ +/* +** fts3ExprIterate() callback used to collect the "local" part of the +** FTS3_MATCHINFO_HITS array. The local stats are those elements of the +** array that are different for each row returned by the query. +*/ +static int fts3ExprLocalHitsCb( + Fts3Expr *pExpr, /* Phrase expression node */ + int iPhrase, /* Phrase number */ + void *pCtx /* Pointer to MatchInfo structure */ +){ + int rc = SQLITE_OK; + MatchInfo *p = (MatchInfo *)pCtx; + int iStart = iPhrase * p->nCol * 3; + int i; - /* Read the %_segdir entry for index iIdx absolute level (iAbsLevel+1) */ - sqlite3_bind_int64(pSelect, 1, iAbsLevel+1); - sqlite3_bind_int(pSelect, 2, iIdx); - if( sqlite3_step(pSelect)==SQLITE_ROW ){ - iStart = sqlite3_column_int64(pSelect, 1); - iLeafEnd = sqlite3_column_int64(pSelect, 2); - fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData); - if( pWriter->nLeafData<0 ){ - pWriter->nLeafData = pWriter->nLeafData * -1; - } - pWriter->bNoLeafData = (pWriter->nLeafData==0); - nRoot = sqlite3_column_bytes(pSelect, 4); - aRoot = sqlite3_column_blob(pSelect, 4); + for(i=0; inCol && rc==SQLITE_OK; i++){ + char *pCsr; + rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr); + if( pCsr ){ + p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); }else{ - return sqlite3_reset(pSelect); + p->aMatchinfo[iStart+i*3] = 0; } + } - /* Check for the zero-length marker in the %_segments table */ - rc = fts3IsAppendable(p, iEnd, &bAppendable); + return rc; +} - /* Check that zKey/nKey is larger than the largest key the candidate */ - if( rc==SQLITE_OK && bAppendable ){ - char *aLeaf = 0; - int nLeaf = 0; +static int fts3MatchinfoCheck( + Fts3Table *pTab, + char cArg, + char **pzErr +){ + if( (cArg==FTS3_MATCHINFO_NPHRASE) + || (cArg==FTS3_MATCHINFO_NCOL) + || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) + || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) + || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) + || (cArg==FTS3_MATCHINFO_LCS) + || (cArg==FTS3_MATCHINFO_HITS) + || (cArg==FTS3_MATCHINFO_LHITS) + || (cArg==FTS3_MATCHINFO_LHITS_BM) + ){ + return SQLITE_OK; + } + sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg); + return SQLITE_ERROR; +} - rc = sqlite3Fts3ReadBlock(p, iLeafEnd, &aLeaf, &nLeaf, 0); - if( rc==SQLITE_OK ){ - NodeReader reader; - for(rc = nodeReaderInit(&reader, aLeaf, nLeaf); - rc==SQLITE_OK && reader.aNode; - rc = nodeReaderNext(&reader) - ){ - assert( reader.aNode ); - } - if( fts3TermCmp(zKey, nKey, reader.term.a, reader.term.n)<=0 ){ - bAppendable = 0; - } - nodeReaderRelease(&reader); - } - sqlite3_free(aLeaf); - } +static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ + int nVal; /* Number of integers output by cArg */ - if( rc==SQLITE_OK && bAppendable ){ - /* It is possible to append to this segment. Set up the IncrmergeWriter - ** object to do so. */ - int i; - int nHeight = (int)aRoot[0]; - NodeWriter *pNode; + switch( cArg ){ + case FTS3_MATCHINFO_NDOC: + case FTS3_MATCHINFO_NPHRASE: + case FTS3_MATCHINFO_NCOL: + nVal = 1; + break; - pWriter->nLeafEst = (int)((iEnd - iStart) + 1)/FTS_MAX_APPENDABLE_HEIGHT; - pWriter->iStart = iStart; - pWriter->iEnd = iEnd; - pWriter->iAbsLevel = iAbsLevel; - pWriter->iIdx = iIdx; + case FTS3_MATCHINFO_AVGLENGTH: + case FTS3_MATCHINFO_LENGTH: + case FTS3_MATCHINFO_LCS: + nVal = pInfo->nCol; + break; - for(i=nHeight+1; iaNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; - } + case FTS3_MATCHINFO_LHITS: + nVal = pInfo->nCol * pInfo->nPhrase; + break; - pNode = &pWriter->aNodeWriter[nHeight]; - pNode->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight; - blobGrowBuffer(&pNode->block, MAX(nRoot, p->nNodeSize), &rc); - if( rc==SQLITE_OK ){ - memcpy(pNode->block.a, aRoot, nRoot); - pNode->block.n = nRoot; - } + case FTS3_MATCHINFO_LHITS_BM: + nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32); + break; - for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){ - NodeReader reader; - pNode = &pWriter->aNodeWriter[i]; + default: + assert( cArg==FTS3_MATCHINFO_HITS ); + nVal = pInfo->nCol * pInfo->nPhrase * 3; + break; + } - rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n); - while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader); - blobGrowBuffer(&pNode->key, reader.term.n, &rc); - if( rc==SQLITE_OK ){ - memcpy(pNode->key.a, reader.term.a, reader.term.n); - pNode->key.n = reader.term.n; - if( i>0 ){ - char *aBlock = 0; - int nBlock = 0; - pNode = &pWriter->aNodeWriter[i-1]; - pNode->iBlock = reader.iChild; - rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0); - blobGrowBuffer(&pNode->block, MAX(nBlock, p->nNodeSize), &rc); - if( rc==SQLITE_OK ){ - memcpy(pNode->block.a, aBlock, nBlock); - pNode->block.n = nBlock; - } - sqlite3_free(aBlock); - } - } - nodeReaderRelease(&reader); - } - } + return nVal; +} - rc2 = sqlite3_reset(pSelect); - if( rc==SQLITE_OK ) rc = rc2; +static int fts3MatchinfoSelectDoctotal( + Fts3Table *pTab, + sqlite3_stmt **ppStmt, + sqlite3_int64 *pnDoc, + const char **paLen +){ + sqlite3_stmt *pStmt; + const char *a; + sqlite3_int64 nDoc; + + if( !*ppStmt ){ + int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); + if( rc!=SQLITE_OK ) return rc; } + pStmt = *ppStmt; + assert( sqlite3_data_count(pStmt)==1 ); - return rc; + a = sqlite3_column_blob(pStmt, 0); + a += sqlite3Fts3GetVarint(a, &nDoc); + if( nDoc==0 ) return FTS_CORRUPT_VTAB; + *pnDoc = (u32)nDoc; + + if( paLen ) *paLen = a; + return SQLITE_OK; } /* -** Determine the largest segment index value that exists within absolute -** level iAbsLevel+1. If no error occurs, set *piIdx to this value plus -** one before returning SQLITE_OK. Or, if there are no segments at all -** within level iAbsLevel, set *piIdx to zero. -** -** If an error occurs, return an SQLite error code. The final value of -** *piIdx is undefined in this case. +** An instance of the following structure is used to store state while +** iterating through a multi-column position-list corresponding to the +** hits for a single phrase on a single row in order to calculate the +** values for a matchinfo() FTS3_MATCHINFO_LCS request. */ -static int fts3IncrmergeOutputIdx( - Fts3Table *p, /* FTS Table handle */ - sqlite3_int64 iAbsLevel, /* Absolute index of input segments */ - int *piIdx /* OUT: Next free index at iAbsLevel+1 */ +typedef struct LcsIterator LcsIterator; +struct LcsIterator { + Fts3Expr *pExpr; /* Pointer to phrase expression */ + int iPosOffset; /* Tokens count up to end of this phrase */ + char *pRead; /* Cursor used to iterate through aDoclist */ + int iPos; /* Current position */ +}; + +/* +** If LcsIterator.iCol is set to the following value, the iterator has +** finished iterating through all offsets for all columns. +*/ +#define LCS_ITERATOR_FINISHED 0x7FFFFFFF; + +static int fts3MatchinfoLcsCb( + Fts3Expr *pExpr, /* Phrase expression node */ + int iPhrase, /* Phrase number (numbered from zero) */ + void *pCtx /* Pointer to MatchInfo structure */ ){ - int rc; - sqlite3_stmt *pOutputIdx = 0; /* SQL used to find output index */ + LcsIterator *aIter = (LcsIterator *)pCtx; + aIter[iPhrase].pExpr = pExpr; + return SQLITE_OK; +} - rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pOutputIdx, 1, iAbsLevel+1); - sqlite3_step(pOutputIdx); - *piIdx = sqlite3_column_int(pOutputIdx, 0); - rc = sqlite3_reset(pOutputIdx); +/* +** Advance the iterator passed as an argument to the next position. Return +** 1 if the iterator is at EOF or if it now points to the start of the +** position list for the next column. +*/ +static int fts3LcsIteratorAdvance(LcsIterator *pIter){ + char *pRead = pIter->pRead; + sqlite3_int64 iRead; + int rc = 0; + + pRead += sqlite3Fts3GetVarint(pRead, &iRead); + if( iRead==0 || iRead==1 ){ + pRead = 0; + rc = 1; + }else{ + pIter->iPos += (int)(iRead-2); } + pIter->pRead = pRead; return rc; } - -/* -** Allocate an appendable output segment on absolute level iAbsLevel+1 -** with idx value iIdx. -** -** In the %_segdir table, a segment is defined by the values in three -** columns: -** -** start_block -** leaves_end_block -** end_block -** -** When an appendable segment is allocated, it is estimated that the -** maximum number of leaf blocks that may be required is the sum of the -** number of leaf blocks consumed by the input segments, plus the number -** of input segments, multiplied by two. This value is stored in stack -** variable nLeafEst. + +/* +** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. ** -** A total of 16*nLeafEst blocks are allocated when an appendable segment -** is created ((1 + end_block - start_block)==16*nLeafEst). The contiguous -** array of leaf nodes starts at the first block allocated. The array -** of interior nodes that are parents of the leaf nodes start at block -** (start_block + (1 + end_block - start_block) / 16). And so on. +** If the call is successful, the longest-common-substring lengths for each +** column are written into the first nCol elements of the pInfo->aMatchinfo[] +** array before returning. SQLITE_OK is returned in this case. ** -** In the actual code below, the value "16" is replaced with the -** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT. +** Otherwise, if an error occurs, an SQLite error code is returned and the +** data written to the first nCol elements of pInfo->aMatchinfo[] is +** undefined. */ -static int fts3IncrmergeWriter( - Fts3Table *p, /* Fts3 table handle */ - sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ - int iIdx, /* Index of new output segment */ - Fts3MultiSegReader *pCsr, /* Cursor that data will be read from */ - IncrmergeWriter *pWriter /* Populate this object */ -){ - int rc; /* Return Code */ - int i; /* Iterator variable */ - int nLeafEst = 0; /* Blocks allocated for leaf nodes */ - sqlite3_stmt *pLeafEst = 0; /* SQL used to determine nLeafEst */ - sqlite3_stmt *pFirstBlock = 0; /* SQL used to determine first block */ +static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ + LcsIterator *aIter; + int i; + int iCol; + int nToken = 0; - /* Calculate nLeafEst. */ - rc = fts3SqlStmt(p, SQL_MAX_LEAF_NODE_ESTIMATE, &pLeafEst, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pLeafEst, 1, iAbsLevel); - sqlite3_bind_int64(pLeafEst, 2, pCsr->nSegment); - if( SQLITE_ROW==sqlite3_step(pLeafEst) ){ - nLeafEst = sqlite3_column_int(pLeafEst, 0); - } - rc = sqlite3_reset(pLeafEst); + /* Allocate and populate the array of LcsIterator objects. The array + ** contains one element for each matchable phrase in the query. + **/ + aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); + if( !aIter ) return SQLITE_NOMEM; + memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); + (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); + + for(i=0; inPhrase; i++){ + LcsIterator *pIter = &aIter[i]; + nToken -= pIter->pExpr->pPhrase->nToken; + pIter->iPosOffset = nToken; } - if( rc!=SQLITE_OK ) return rc; - /* Calculate the first block to use in the output segment */ - rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pFirstBlock, 0); - if( rc==SQLITE_OK ){ - if( SQLITE_ROW==sqlite3_step(pFirstBlock) ){ - pWriter->iStart = sqlite3_column_int64(pFirstBlock, 0); - pWriter->iEnd = pWriter->iStart - 1; - pWriter->iEnd += nLeafEst * FTS_MAX_APPENDABLE_HEIGHT; + for(iCol=0; iColnCol; iCol++){ + int nLcs = 0; /* LCS value for this column */ + int nLive = 0; /* Number of iterators in aIter not at EOF */ + + for(i=0; inPhrase; i++){ + int rc; + LcsIterator *pIt = &aIter[i]; + rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead); + if( rc!=SQLITE_OK ) return rc; + if( pIt->pRead ){ + pIt->iPos = pIt->iPosOffset; + fts3LcsIteratorAdvance(&aIter[i]); + nLive++; + } } - rc = sqlite3_reset(pFirstBlock); - } - if( rc!=SQLITE_OK ) return rc; - /* Insert the marker in the %_segments table to make sure nobody tries - ** to steal the space just allocated. This is also used to identify - ** appendable segments. */ - rc = fts3WriteSegment(p, pWriter->iEnd, 0, 0); - if( rc!=SQLITE_OK ) return rc; + while( nLive>0 ){ + LcsIterator *pAdv = 0; /* The iterator to advance by one position */ + int nThisLcs = 0; /* LCS for the current iterator positions */ - pWriter->iAbsLevel = iAbsLevel; - pWriter->nLeafEst = nLeafEst; - pWriter->iIdx = iIdx; + for(i=0; inPhrase; i++){ + LcsIterator *pIter = &aIter[i]; + if( pIter->pRead==0 ){ + /* This iterator is already at EOF for this column. */ + nThisLcs = 0; + }else{ + if( pAdv==0 || pIter->iPosiPos ){ + pAdv = pIter; + } + if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ + nThisLcs++; + }else{ + nThisLcs = 1; + } + if( nThisLcs>nLcs ) nLcs = nThisLcs; + } + } + if( fts3LcsIteratorAdvance(pAdv) ) nLive--; + } - /* Set up the array of NodeWriter objects */ - for(i=0; iaNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; + pInfo->aMatchinfo[iCol] = nLcs; } + + sqlite3_free(aIter); return SQLITE_OK; } /* -** Remove an entry from the %_segdir table. This involves running the -** following two statements: +** Populate the buffer pInfo->aMatchinfo[] with an array of integers to +** be returned by the matchinfo() function. Argument zArg contains the +** format string passed as the second argument to matchinfo (or the +** default value "pcx" if no second argument was specified). The format +** string has already been validated and the pInfo->aMatchinfo[] array +** is guaranteed to be large enough for the output. ** -** DELETE FROM %_segdir WHERE level = :iAbsLevel AND idx = :iIdx -** UPDATE %_segdir SET idx = idx - 1 WHERE level = :iAbsLevel AND idx > :iIdx +** If bGlobal is true, then populate all fields of the matchinfo() output. +** If it is false, then assume that those fields that do not change between +** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) +** have already been populated. ** -** The DELETE statement removes the specific %_segdir level. The UPDATE -** statement ensures that the remaining segments have contiguously allocated -** idx values. +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. If a value other than SQLITE_OK is returned, the state the +** pInfo->aMatchinfo[] buffer is left in is undefined. */ -static int fts3RemoveSegdirEntry( - Fts3Table *p, /* FTS3 table handle */ - sqlite3_int64 iAbsLevel, /* Absolute level to delete from */ - int iIdx /* Index of %_segdir entry to delete */ +static int fts3MatchinfoValues( + Fts3Cursor *pCsr, /* FTS3 cursor object */ + int bGlobal, /* True to grab the global stats */ + MatchInfo *pInfo, /* Matchinfo context object */ + const char *zArg /* Matchinfo format string */ ){ - int rc; /* Return code */ - sqlite3_stmt *pDelete = 0; /* DELETE statement */ + int rc = SQLITE_OK; + int i; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + sqlite3_stmt *pSelect = 0; - rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_ENTRY, &pDelete, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pDelete, 1, iAbsLevel); - sqlite3_bind_int(pDelete, 2, iIdx); - sqlite3_step(pDelete); - rc = sqlite3_reset(pDelete); - } + for(i=0; rc==SQLITE_OK && zArg[i]; i++){ + pInfo->flag = zArg[i]; + switch( zArg[i] ){ + case FTS3_MATCHINFO_NPHRASE: + if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; + break; - return rc; -} + case FTS3_MATCHINFO_NCOL: + if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; + break; + + case FTS3_MATCHINFO_NDOC: + if( bGlobal ){ + sqlite3_int64 nDoc = 0; + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); + pInfo->aMatchinfo[0] = (u32)nDoc; + } + break; -/* -** One or more segments have just been removed from absolute level iAbsLevel. -** Update the 'idx' values of the remaining segments in the level so that -** the idx values are a contiguous sequence starting from 0. -*/ -static int fts3RepackSegdirLevel( - Fts3Table *p, /* FTS3 table handle */ - sqlite3_int64 iAbsLevel /* Absolute level to repack */ -){ - int rc; /* Return code */ - int *aIdx = 0; /* Array of remaining idx values */ - int nIdx = 0; /* Valid entries in aIdx[] */ - int nAlloc = 0; /* Allocated size of aIdx[] */ - int i; /* Iterator variable */ - sqlite3_stmt *pSelect = 0; /* Select statement to read idx values */ - sqlite3_stmt *pUpdate = 0; /* Update statement to modify idx values */ + case FTS3_MATCHINFO_AVGLENGTH: + if( bGlobal ){ + sqlite3_int64 nDoc; /* Number of rows in table */ + const char *a; /* Aggregate column length array */ - rc = fts3SqlStmt(p, SQL_SELECT_INDEXES, &pSelect, 0); - if( rc==SQLITE_OK ){ - int rc2; - sqlite3_bind_int64(pSelect, 1, iAbsLevel); - while( SQLITE_ROW==sqlite3_step(pSelect) ){ - if( nIdx>=nAlloc ){ - int *aNew; - nAlloc += 16; - aNew = sqlite3_realloc(aIdx, nAlloc*sizeof(int)); - if( !aNew ){ - rc = SQLITE_NOMEM; - break; + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); + if( rc==SQLITE_OK ){ + int iCol; + for(iCol=0; iColnCol; iCol++){ + u32 iVal; + sqlite3_int64 nToken; + a += sqlite3Fts3GetVarint(a, &nToken); + iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); + pInfo->aMatchinfo[iCol] = iVal; + } + } } - aIdx = aNew; + break; + + case FTS3_MATCHINFO_LENGTH: { + sqlite3_stmt *pSelectDocsize = 0; + rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); + if( rc==SQLITE_OK ){ + int iCol; + const char *a = sqlite3_column_blob(pSelectDocsize, 0); + for(iCol=0; iColnCol; iCol++){ + sqlite3_int64 nToken; + a += sqlite3Fts3GetVarint(a, &nToken); + pInfo->aMatchinfo[iCol] = (u32)nToken; + } + } + sqlite3_reset(pSelectDocsize); + break; } - aIdx[nIdx++] = sqlite3_column_int(pSelect, 0); - } - rc2 = sqlite3_reset(pSelect); - if( rc==SQLITE_OK ) rc = rc2; - } - if( rc==SQLITE_OK ){ - rc = fts3SqlStmt(p, SQL_SHIFT_SEGDIR_ENTRY, &pUpdate, 0); - } - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pUpdate, 2, iAbsLevel); - } + case FTS3_MATCHINFO_LCS: + rc = fts3ExprLoadDoclists(pCsr, 0, 0); + if( rc==SQLITE_OK ){ + rc = fts3MatchinfoLcs(pCsr, pInfo); + } + break; - assert( p->bIgnoreSavepoint==0 ); - p->bIgnoreSavepoint = 1; - for(i=0; rc==SQLITE_OK && iaMatchinfo, 0, nZero); + fts3ExprLHitGather(pCsr->pExpr, pInfo); + break; + } + + default: { + Fts3Expr *pExpr; + assert( zArg[i]==FTS3_MATCHINFO_HITS ); + pExpr = pCsr->pExpr; + rc = fts3ExprLoadDoclists(pCsr, 0, 0); + if( rc!=SQLITE_OK ) break; + if( bGlobal ){ + if( pCsr->pDeferred ){ + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); + if( rc!=SQLITE_OK ) break; + } + rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); + sqlite3Fts3EvalTestDeferred(pCsr, &rc); + if( rc!=SQLITE_OK ) break; + } + (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); + break; + } } + + pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); } - p->bIgnoreSavepoint = 0; - sqlite3_free(aIdx); + sqlite3_reset(pSelect); return rc; } -static void fts3StartNode(Blob *pNode, int iHeight, sqlite3_int64 iChild){ - pNode->a[0] = (char)iHeight; - if( iChild ){ - assert( pNode->nAlloc>=1+sqlite3Fts3VarintLen(iChild) ); - pNode->n = 1 + sqlite3Fts3PutVarint(&pNode->a[1], iChild); - }else{ - assert( pNode->nAlloc>=1 ); - pNode->n = 1; - } -} /* -** The first two arguments are a pointer to and the size of a segment b-tree -** node. The node may be a leaf or an internal node. -** -** This function creates a new node image in blob object *pNew by copying -** all terms that are greater than or equal to zTerm/nTerm (for leaf nodes) -** or greater than zTerm/nTerm (for internal nodes) from aNode/nNode. +** Populate pCsr->aMatchinfo[] with data for the current row. The +** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). */ -static int fts3TruncateNode( - const char *aNode, /* Current node image */ - int nNode, /* Size of aNode in bytes */ - Blob *pNew, /* OUT: Write new node image here */ - const char *zTerm, /* Omit all terms smaller than this */ - int nTerm, /* Size of zTerm in bytes */ - sqlite3_int64 *piBlock /* OUT: Block number in next layer down */ +static void fts3GetMatchinfo( + sqlite3_context *pCtx, /* Return results here */ + Fts3Cursor *pCsr, /* FTS3 Cursor object */ + const char *zArg /* Second argument to matchinfo() function */ ){ - NodeReader reader; /* Reader object */ - Blob prev = {0, 0, 0}; /* Previous term written to new node */ - int rc = SQLITE_OK; /* Return code */ - int bLeaf = aNode[0]=='\0'; /* True for a leaf node */ + MatchInfo sInfo; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int bGlobal = 0; /* Collect 'global' stats as well as local */ - /* Allocate required output space */ - blobGrowBuffer(pNew, nNode, &rc); - if( rc!=SQLITE_OK ) return rc; - pNew->n = 0; + u32 *aOut = 0; + void (*xDestroyOut)(void*) = 0; - /* Populate new node buffer */ - for(rc = nodeReaderInit(&reader, aNode, nNode); - rc==SQLITE_OK && reader.aNode; - rc = nodeReaderNext(&reader) - ){ - if( pNew->n==0 ){ - int res = fts3TermCmp(reader.term.a, reader.term.n, zTerm, nTerm); - if( res<0 || (bLeaf==0 && res==0) ) continue; - fts3StartNode(pNew, (int)aNode[0], reader.iChild); - *piBlock = reader.iChild; - } - rc = fts3AppendToNode( - pNew, &prev, reader.term.a, reader.term.n, - reader.aDoclist, reader.nDoclist - ); - if( rc!=SQLITE_OK ) break; - } - if( pNew->n==0 ){ - fts3StartNode(pNew, (int)aNode[0], reader.iChild); - *piBlock = reader.iChild; + memset(&sInfo, 0, sizeof(MatchInfo)); + sInfo.pCursor = pCsr; + sInfo.nCol = pTab->nColumn; + + /* If there is cached matchinfo() data, but the format string for the + ** cache does not match the format string for this request, discard + ** the cached data. */ + if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){ + sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); + pCsr->pMIBuffer = 0; } - assert( pNew->n<=pNew->nAlloc ); - nodeReaderRelease(&reader); - sqlite3_free(prev.a); - return rc; -} + /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the + ** matchinfo function has been called for this query. In this case + ** allocate the array used to accumulate the matchinfo data and + ** initialize those elements that are constant for every row. + */ + if( pCsr->pMIBuffer==0 ){ + int nMatchinfo = 0; /* Number of u32 elements in match-info */ + int i; /* Used to iterate through zArg */ -/* -** Remove all terms smaller than zTerm/nTerm from segment iIdx in absolute -** level iAbsLevel. This may involve deleting entries from the %_segments -** table, and modifying existing entries in both the %_segments and %_segdir -** tables. -** -** SQLITE_OK is returned if the segment is updated successfully. Or an -** SQLite error code otherwise. -*/ -static int fts3TruncateSegment( - Fts3Table *p, /* FTS3 table handle */ - sqlite3_int64 iAbsLevel, /* Absolute level of segment to modify */ - int iIdx, /* Index within level of segment to modify */ - const char *zTerm, /* Remove terms smaller than this */ - int nTerm /* Number of bytes in buffer zTerm */ -){ - int rc = SQLITE_OK; /* Return code */ - Blob root = {0,0,0}; /* New root page image */ - Blob block = {0,0,0}; /* Buffer used for any other block */ - sqlite3_int64 iBlock = 0; /* Block id */ - sqlite3_int64 iNewStart = 0; /* New value for iStartBlock */ - sqlite3_int64 iOldStart = 0; /* Old value for iStartBlock */ - sqlite3_stmt *pFetch = 0; /* Statement used to fetch segdir */ + /* Determine the number of phrases in the query */ + pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); + sInfo.nPhrase = pCsr->nPhrase; - rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pFetch, 0); - if( rc==SQLITE_OK ){ - int rc2; /* sqlite3_reset() return code */ - sqlite3_bind_int64(pFetch, 1, iAbsLevel); - sqlite3_bind_int(pFetch, 2, iIdx); - if( SQLITE_ROW==sqlite3_step(pFetch) ){ - const char *aRoot = sqlite3_column_blob(pFetch, 4); - int nRoot = sqlite3_column_bytes(pFetch, 4); - iOldStart = sqlite3_column_int64(pFetch, 1); - rc = fts3TruncateNode(aRoot, nRoot, &root, zTerm, nTerm, &iBlock); + /* Determine the number of integers in the buffer returned by this call. */ + for(i=0; zArg[i]; i++){ + char *zErr = 0; + if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ + sqlite3_result_error(pCtx, zErr, -1); + sqlite3_free(zErr); + return; + } + nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); } - rc2 = sqlite3_reset(pFetch); - if( rc==SQLITE_OK ) rc = rc2; - } - while( rc==SQLITE_OK && iBlock ){ - char *aBlock = 0; - int nBlock = 0; - iNewStart = iBlock; + /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ + pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg); + if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM; - rc = sqlite3Fts3ReadBlock(p, iBlock, &aBlock, &nBlock, 0); - if( rc==SQLITE_OK ){ - rc = fts3TruncateNode(aBlock, nBlock, &block, zTerm, nTerm, &iBlock); - } - if( rc==SQLITE_OK ){ - rc = fts3WriteSegment(p, iNewStart, block.a, block.n); - } - sqlite3_free(aBlock); + pCsr->isMatchinfoNeeded = 1; + bGlobal = 1; } - /* Variable iNewStart now contains the first valid leaf node. */ - if( rc==SQLITE_OK && iNewStart ){ - sqlite3_stmt *pDel = 0; - rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDel, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pDel, 1, iOldStart); - sqlite3_bind_int64(pDel, 2, iNewStart-1); - sqlite3_step(pDel); - rc = sqlite3_reset(pDel); + if( rc==SQLITE_OK ){ + xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut); + if( xDestroyOut==0 ){ + rc = SQLITE_NOMEM; } } if( rc==SQLITE_OK ){ - sqlite3_stmt *pChomp = 0; - rc = fts3SqlStmt(p, SQL_CHOMP_SEGDIR, &pChomp, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pChomp, 1, iNewStart); - sqlite3_bind_blob(pChomp, 2, root.a, root.n, SQLITE_STATIC); - sqlite3_bind_int64(pChomp, 3, iAbsLevel); - sqlite3_bind_int(pChomp, 4, iIdx); - sqlite3_step(pChomp); - rc = sqlite3_reset(pChomp); + sInfo.aMatchinfo = aOut; + sInfo.nPhrase = pCsr->nPhrase; + rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); + if( bGlobal ){ + fts3MIBufferSetGlobal(pCsr->pMIBuffer); } } - sqlite3_free(root.a); - sqlite3_free(block.a); - return rc; + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + if( xDestroyOut ) xDestroyOut(aOut); + }else{ + int n = pCsr->pMIBuffer->nElem * sizeof(u32); + sqlite3_result_blob(pCtx, aOut, n, xDestroyOut); + } } /* -** This function is called after an incrmental-merge operation has run to -** merge (or partially merge) two or more segments from absolute level -** iAbsLevel. -** -** Each input segment is either removed from the db completely (if all of -** its data was copied to the output segment by the incrmerge operation) -** or modified in place so that it no longer contains those entries that -** have been duplicated in the output segment. +** Implementation of snippet() function. */ -static int fts3IncrmergeChomp( - Fts3Table *p, /* FTS table handle */ - sqlite3_int64 iAbsLevel, /* Absolute level containing segments */ - Fts3MultiSegReader *pCsr, /* Chomp all segments opened by this cursor */ - int *pnRem /* Number of segments not deleted */ +SQLITE_PRIVATE void sqlite3Fts3Snippet( + sqlite3_context *pCtx, /* SQLite function call context */ + Fts3Cursor *pCsr, /* Cursor object */ + const char *zStart, /* Snippet start text - "" */ + const char *zEnd, /* Snippet end text - "" */ + const char *zEllipsis, /* Snippet ellipsis text - "..." */ + int iCol, /* Extract snippet from this column */ + int nToken /* Approximate number of tokens in snippet */ ){ - int i; - int nRem = 0; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc = SQLITE_OK; + int i; + StrBuffer res = {0, 0, 0}; - for(i=pCsr->nSegment-1; i>=0 && rc==SQLITE_OK; i--){ - Fts3SegReader *pSeg = 0; - int j; + /* The returned text includes up to four fragments of text extracted from + ** the data in the current row. The first iteration of the for(...) loop + ** below attempts to locate a single fragment of text nToken tokens in + ** size that contains at least one instance of all phrases in the query + ** expression that appear in the current row. If such a fragment of text + ** cannot be found, the second iteration of the loop attempts to locate + ** a pair of fragments, and so on. + */ + int nSnippet = 0; /* Number of fragments in this snippet */ + SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ + int nFToken = -1; /* Number of tokens in each fragment */ - /* Find the Fts3SegReader object with Fts3SegReader.iIdx==i. It is hiding - ** somewhere in the pCsr->apSegment[] array. */ - for(j=0; ALWAYS(jnSegment); j++){ - pSeg = pCsr->apSegment[j]; - if( pSeg->iIdx==i ) break; + if( !pCsr->pExpr ){ + sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); + return; + } + + for(nSnippet=1; 1; nSnippet++){ + + int iSnip; /* Loop counter 0..nSnippet-1 */ + u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ + u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ + + if( nToken>=0 ){ + nFToken = (nToken+nSnippet-1) / nSnippet; + }else{ + nFToken = -1 * nToken; } - assert( jnSegment && pSeg->iIdx==i ); - if( pSeg->aNode==0 ){ - /* Seg-reader is at EOF. Remove the entire input segment. */ - rc = fts3DeleteSegment(p, pSeg); - if( rc==SQLITE_OK ){ - rc = fts3RemoveSegdirEntry(p, iAbsLevel, pSeg->iIdx); + for(iSnip=0; iSnipnColumn; iRead++){ + SnippetFragment sF = {0, 0, 0, 0}; + int iS = 0; + if( iCol>=0 && iRead!=iCol ) continue; + + /* Find the best snippet of nFToken tokens in column iRead. */ + rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); + if( rc!=SQLITE_OK ){ + goto snippet_out; + } + if( iS>iBestScore ){ + *pFragment = sF; + iBestScore = iS; + } } - *pnRem = 0; - }else{ - /* The incremental merge did not copy all the data from this - ** segment to the upper level. The segment is modified in place - ** so that it contains no keys smaller than zTerm/nTerm. */ - const char *zTerm = pSeg->zTerm; - int nTerm = pSeg->nTerm; - rc = fts3TruncateSegment(p, iAbsLevel, pSeg->iIdx, zTerm, nTerm); - nRem++; + + mCovered |= pFragment->covered; } + + /* If all query phrases seen by fts3BestSnippet() are present in at least + ** one of the nSnippet snippet fragments, break out of the loop. + */ + assert( (mCovered&mSeen)==mCovered ); + if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; } - if( rc==SQLITE_OK && nRem!=pCsr->nSegment ){ - rc = fts3RepackSegdirLevel(p, iAbsLevel); + assert( nFToken>0 ); + + for(i=0; ia, pHint->n, SQLITE_STATIC); - sqlite3_step(pReplace); - rc = sqlite3_reset(pReplace); - } +typedef struct TermOffset TermOffset; +typedef struct TermOffsetCtx TermOffsetCtx; - return rc; -} +struct TermOffset { + char *pList; /* Position-list */ + int iPos; /* Position just read from pList */ + int iOff; /* Offset of this term from read positions */ +}; + +struct TermOffsetCtx { + Fts3Cursor *pCsr; + int iCol; /* Column of table to populate aTerm for */ + int iTerm; + sqlite3_int64 iDocid; + TermOffset *aTerm; +}; /* -** Load an incr-merge hint from the database. The incr-merge hint, if one -** exists, is stored in the rowid==1 row of the %_stat table. -** -** If successful, populate blob *pHint with the value read from the %_stat -** table and return SQLITE_OK. Otherwise, if an error occurs, return an -** SQLite error code. +** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). */ -static int fts3IncrmergeHintLoad(Fts3Table *p, Blob *pHint){ - sqlite3_stmt *pSelect = 0; +static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ + TermOffsetCtx *p = (TermOffsetCtx *)ctx; + int nTerm; /* Number of tokens in phrase */ + int iTerm; /* For looping through nTerm phrase terms */ + char *pList; /* Pointer to position list for phrase */ + int iPos = 0; /* First position in position-list */ int rc; - pHint->n = 0; - rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pSelect, 0); - if( rc==SQLITE_OK ){ - int rc2; - sqlite3_bind_int(pSelect, 1, FTS_STAT_INCRMERGEHINT); - if( SQLITE_ROW==sqlite3_step(pSelect) ){ - const char *aHint = sqlite3_column_blob(pSelect, 0); - int nHint = sqlite3_column_bytes(pSelect, 0); - if( aHint ){ - blobGrowBuffer(pHint, nHint, &rc); - if( rc==SQLITE_OK ){ - memcpy(pHint->a, aHint, nHint); - pHint->n = nHint; - } - } - } - rc2 = sqlite3_reset(pSelect); - if( rc==SQLITE_OK ) rc = rc2; + UNUSED_PARAMETER(iPhrase); + rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList); + nTerm = pExpr->pPhrase->nToken; + if( pList ){ + fts3GetDeltaPosition(&pList, &iPos); + assert( iPos>=0 ); + } + + for(iTerm=0; iTermaTerm[p->iTerm++]; + pT->iOff = nTerm-iTerm-1; + pT->pList = pList; + pT->iPos = iPos; } return rc; } /* -** If *pRc is not SQLITE_OK when this function is called, it is a no-op. -** Otherwise, append an entry to the hint stored in blob *pHint. Each entry -** consists of two varints, the absolute level number of the input segments -** and the number of input segments. -** -** If successful, leave *pRc set to SQLITE_OK and return. If an error occurs, -** set *pRc to an SQLite error code before returning. +** Implementation of offsets() function. */ -static void fts3IncrmergeHintPush( - Blob *pHint, /* Hint blob to append to */ - i64 iAbsLevel, /* First varint to store in hint */ - int nInput, /* Second varint to store in hint */ - int *pRc /* IN/OUT: Error code */ +SQLITE_PRIVATE void sqlite3Fts3Offsets( + sqlite3_context *pCtx, /* SQLite function call context */ + Fts3Cursor *pCsr /* Cursor object */ ){ - blobGrowBuffer(pHint, pHint->n + 2*FTS3_VARINT_MAX, pRc); - if( *pRc==SQLITE_OK ){ - pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], iAbsLevel); - pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], (i64)nInput); - } -} - -/* -** Read the last entry (most recently pushed) from the hint blob *pHint -** and then remove the entry. Write the two values read to *piAbsLevel and -** *pnInput before returning. -** -** If no error occurs, return SQLITE_OK. If the hint blob in *pHint does -** not contain at least two valid varints, return SQLITE_CORRUPT_VTAB. -*/ -static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){ - const int nHint = pHint->n; - int i; - - i = pHint->n-2; - while( i>0 && (pHint->a[i-1] & 0x80) ) i--; - while( i>0 && (pHint->a[i-1] & 0x80) ) i--; - - pHint->n = i; - i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel); - i += fts3GetVarint32(&pHint->a[i], pnInput); - if( i!=nHint ) return SQLITE_CORRUPT_VTAB; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; + int rc; /* Return Code */ + int nToken; /* Number of tokens in query */ + int iCol; /* Column currently being processed */ + StrBuffer res = {0, 0, 0}; /* Result string */ + TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ - return SQLITE_OK; -} + if( !pCsr->pExpr ){ + sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); + return; + } + memset(&sCtx, 0, sizeof(sCtx)); + assert( pCsr->isRequireSeek==0 ); -/* -** Attempt an incremental merge that writes nMerge leaf blocks. -** -** Incremental merges happen nMin segments at a time. The segments -** to be merged are the nMin oldest segments (the ones with the smallest -** values for the _segdir.idx field) in the highest level that contains -** at least nMin segments. Multiple merges might occur in an attempt to -** write the quota of nMerge leaf blocks. -*/ -SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ - int rc; /* Return code */ - int nRem = nMerge; /* Number of leaf pages yet to be written */ - Fts3MultiSegReader *pCsr; /* Cursor used to read input data */ - Fts3SegFilter *pFilter; /* Filter used with cursor pCsr */ - IncrmergeWriter *pWriter; /* Writer object */ - int nSeg = 0; /* Number of input segments */ - sqlite3_int64 iAbsLevel = 0; /* Absolute level number to work on */ - Blob hint = {0, 0, 0}; /* Hint read from %_stat table */ - int bDirtyHint = 0; /* True if blob 'hint' has been modified */ + /* Count the number of terms in the query */ + rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); + if( rc!=SQLITE_OK ) goto offsets_out; - /* Allocate space for the cursor, filter and writer objects */ - const int nAlloc = sizeof(*pCsr) + sizeof(*pFilter) + sizeof(*pWriter); - pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc); - if( !pWriter ) return SQLITE_NOMEM; - pFilter = (Fts3SegFilter *)&pWriter[1]; - pCsr = (Fts3MultiSegReader *)&pFilter[1]; + /* Allocate the array of TermOffset iterators. */ + sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); + if( 0==sCtx.aTerm ){ + rc = SQLITE_NOMEM; + goto offsets_out; + } + sCtx.iDocid = pCsr->iPrevId; + sCtx.pCsr = pCsr; - rc = fts3IncrmergeHintLoad(p, &hint); - while( rc==SQLITE_OK && nRem>0 ){ - const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex; - sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */ - int bUseHint = 0; /* True if attempting to append */ - int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */ + /* Loop through the table columns, appending offset information to + ** string-buffer res for each column. + */ + for(iCol=0; iColnColumn; iCol++){ + sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ + const char *ZDUMMY; /* Dummy argument used with xNext() */ + int NDUMMY = 0; /* Dummy argument used with xNext() */ + int iStart = 0; + int iEnd = 0; + int iCurrent = 0; + const char *zDoc; + int nDoc; - /* Search the %_segdir table for the absolute level with the smallest - ** relative level number that contains at least nMin segments, if any. - ** If one is found, set iAbsLevel to the absolute level number and - ** nSeg to nMin. If no level with at least nMin segments can be found, - ** set nSeg to -1. + /* Initialize the contents of sCtx.aTerm[] for column iCol. There is + ** no way that this operation can fail, so the return code from + ** fts3ExprIterate() can be discarded. */ - rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0); - sqlite3_bind_int(pFindLevel, 1, nMin); - if( sqlite3_step(pFindLevel)==SQLITE_ROW ){ - iAbsLevel = sqlite3_column_int64(pFindLevel, 0); - nSeg = nMin; - }else{ - nSeg = -1; - } - rc = sqlite3_reset(pFindLevel); + sCtx.iCol = iCol; + sCtx.iTerm = 0; + (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx); - /* If the hint read from the %_stat table is not empty, check if the - ** last entry in it specifies a relative level smaller than or equal - ** to the level identified by the block above (if any). If so, this - ** iteration of the loop will work on merging at the hinted level. + /* Retreive the text stored in column iCol. If an SQL NULL is stored + ** in column iCol, jump immediately to the next iteration of the loop. + ** If an OOM occurs while retrieving the data (this can happen if SQLite + ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM + ** to the caller. */ - if( rc==SQLITE_OK && hint.n ){ - int nHint = hint.n; - sqlite3_int64 iHintAbsLevel = 0; /* Hint level */ - int nHintSeg = 0; /* Hint number of segments */ - - rc = fts3IncrmergeHintPop(&hint, &iHintAbsLevel, &nHintSeg); - if( nSeg<0 || (iAbsLevel % nMod) >= (iHintAbsLevel % nMod) ){ - iAbsLevel = iHintAbsLevel; - nSeg = nHintSeg; - bUseHint = 1; - bDirtyHint = 1; - }else{ - /* This undoes the effect of the HintPop() above - so that no entry - ** is removed from the hint blob. */ - hint.n = nHint; + zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); + nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); + if( zDoc==0 ){ + if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ + continue; } + rc = SQLITE_NOMEM; + goto offsets_out; } - /* If nSeg is less that zero, then there is no level with at least - ** nMin segments and no hint in the %_stat table. No work to do. - ** Exit early in this case. */ - if( nSeg<0 ) break; + /* Initialize a tokenizer iterator to iterate through column iCol. */ + rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, + zDoc, nDoc, &pC + ); + if( rc!=SQLITE_OK ) goto offsets_out; - /* Open a cursor to iterate through the contents of the oldest nSeg - ** indexes of absolute level iAbsLevel. If this cursor is opened using - ** the 'hint' parameters, it is possible that there are less than nSeg - ** segments available in level iAbsLevel. In this case, no work is - ** done on iAbsLevel - fall through to the next iteration of the loop - ** to start work on some other level. */ - memset(pWriter, 0, nAlloc); - pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; + rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); + while( rc==SQLITE_OK ){ + int i; /* Used to loop through terms */ + int iMinPos = 0x7FFFFFFF; /* Position of next token */ + TermOffset *pTerm = 0; /* TermOffset associated with next token */ - if( rc==SQLITE_OK ){ - rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); - assert( bUseHint==1 || bUseHint==0 ); - if( iIdx==0 || (bUseHint && iIdx==1) ){ - int bIgnore = 0; - rc = fts3SegmentIsMaxLevel(p, iAbsLevel+1, &bIgnore); - if( bIgnore ){ - pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY; + for(i=0; ipList && (pT->iPos-pT->iOff)iPos-pT->iOff; + pTerm = pT; } } - } - if( rc==SQLITE_OK ){ - rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr); - } - if( SQLITE_OK==rc && pCsr->nSegment==nSeg - && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter)) - && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) - ){ - if( bUseHint && iIdx>0 ){ - const char *zKey = pCsr->zTerm; - int nKey = pCsr->nTerm; - rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); + if( !pTerm ){ + /* All offsets for this column have been gathered. */ + rc = SQLITE_DONE; }else{ - rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); - } - - if( rc==SQLITE_OK && pWriter->nLeafEst ){ - fts3LogMerge(nSeg, iAbsLevel); - do { - rc = fts3IncrmergeAppend(p, pWriter, pCsr); - if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr); - if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK; - }while( rc==SQLITE_ROW ); - - /* Update or delete the input segments */ + assert( iCurrent<=iMinPos ); + if( 0==(0xFE&*pTerm->pList) ){ + pTerm->pList = 0; + }else{ + fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); + } + while( rc==SQLITE_OK && iCurrentxNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); + } if( rc==SQLITE_OK ){ - nRem -= (1 + pWriter->nWork); - rc = fts3IncrmergeChomp(p, iAbsLevel, pCsr, &nSeg); - if( nSeg!=0 ){ - bDirtyHint = 1; - fts3IncrmergeHintPush(&hint, iAbsLevel, nSeg, &rc); - } + char aBuffer[64]; + sqlite3_snprintf(sizeof(aBuffer), aBuffer, + "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart + ); + rc = fts3StringAppend(&res, aBuffer, -1); + }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ + rc = FTS_CORRUPT_VTAB; } } - - if( nSeg!=0 ){ - pWriter->nLeafData = pWriter->nLeafData * -1; - } - fts3IncrmergeRelease(p, pWriter, &rc); - if( nSeg==0 && pWriter->bNoLeafData==0 ){ - fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData); - } + } + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; } - sqlite3Fts3SegReaderFinish(pCsr); - } - - /* Write the hint values into the %_stat table for the next incr-merger */ - if( bDirtyHint && rc==SQLITE_OK ){ - rc = fts3IncrmergeHintStore(p, &hint); + pMod->xClose(pC); + if( rc!=SQLITE_OK ) goto offsets_out; } - sqlite3_free(pWriter); - sqlite3_free(hint.a); - return rc; -} - -/* -** Convert the text beginning at *pz into an integer and return -** its value. Advance *pz to point to the first character past -** the integer. -*/ -static int fts3Getint(const char **pz){ - const char *z = *pz; - int i = 0; - while( (*z)>='0' && (*z)<='9' ) i = 10*i + *(z++) - '0'; - *pz = z; - return i; + offsets_out: + sqlite3_free(sCtx.aTerm); + assert( rc!=SQLITE_DONE ); + sqlite3Fts3SegmentsClose(pTab); + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + sqlite3_free(res.z); + }else{ + sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); + } + return; } /* -** Process statements of the form: -** -** INSERT INTO table(table) VALUES('merge=A,B'); -** -** A and B are integers that decode to be the number of leaf pages -** written for the merge, and the minimum number of segments on a level -** before it will be selected for a merge, respectively. +** Implementation of matchinfo() function. */ -static int fts3DoIncrmerge( - Fts3Table *p, /* FTS3 table handle */ - const char *zParam /* Nul-terminated string containing "A,B" */ +SQLITE_PRIVATE void sqlite3Fts3Matchinfo( + sqlite3_context *pContext, /* Function call context */ + Fts3Cursor *pCsr, /* FTS3 table cursor */ + const char *zArg /* Second arg to matchinfo() function */ ){ - int rc; - int nMin = (FTS3_MERGE_COUNT / 2); - int nMerge = 0; - const char *z = zParam; - - /* Read the first integer value */ - nMerge = fts3Getint(&z); + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + const char *zFormat; - /* If the first integer value is followed by a ',', read the second - ** integer value. */ - if( z[0]==',' && z[1]!='\0' ){ - z++; - nMin = fts3Getint(&z); + if( zArg ){ + zFormat = zArg; + }else{ + zFormat = FTS3_MATCHINFO_DEFAULT; } - if( z[0]!='\0' || nMin<2 ){ - rc = SQLITE_ERROR; + if( !pCsr->pExpr ){ + sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); + return; }else{ - rc = SQLITE_OK; - if( !p->bHasStat ){ - assert( p->bFts4==0 ); - sqlite3Fts3CreateStatTable(&rc, p); - } - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3Incrmerge(p, nMerge, nMin); - } - sqlite3Fts3SegmentsClose(p); + /* Retrieve matchinfo() data. */ + fts3GetMatchinfo(pContext, pCsr, zFormat); + sqlite3Fts3SegmentsClose(pTab); } - return rc; } +#endif + +/************** End of fts3_snippet.c ****************************************/ +/************** Begin file fts3_unicode.c ************************************/ /* -** Process statements of the form: +** 2012 May 24 ** -** INSERT INTO table(table) VALUES('automerge=X'); +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: ** -** where X is an integer. X==0 means to turn automerge off. X!=0 means -** turn it on. The setting is persistent. +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** Implementation of the "unicode" full-text-search tokenizer. */ -static int fts3DoAutoincrmerge( - Fts3Table *p, /* FTS3 table handle */ - const char *zParam /* Nul-terminated string containing boolean */ -){ - int rc = SQLITE_OK; - sqlite3_stmt *pStmt = 0; - p->nAutoincrmerge = fts3Getint(&zParam); - if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){ - p->nAutoincrmerge = 8; - } - if( !p->bHasStat ){ - assert( p->bFts4==0 ); - sqlite3Fts3CreateStatTable(&rc, p); - if( rc ) return rc; - } - rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); - if( rc ) return rc; - sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); - sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge); - sqlite3_step(pStmt); - rc = sqlite3_reset(pStmt); - return rc; -} + +#ifndef SQLITE_DISABLE_FTS3_UNICODE + +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ +/* #include */ +/* #include */ + +/* #include "fts3_tokenizer.h" */ /* -** Return a 64-bit checksum for the FTS index entry specified by the -** arguments to this function. +** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied +** from the sqlite3 source file utf.c. If this file is compiled as part +** of the amalgamation, they are not required. */ -static u64 fts3ChecksumEntry( - const char *zTerm, /* Pointer to buffer containing term */ - int nTerm, /* Size of zTerm in bytes */ - int iLangid, /* Language id for current row */ - int iIndex, /* Index (0..Fts3Table.nIndex-1) */ - i64 iDocid, /* Docid for current row. */ - int iCol, /* Column number */ - int iPos /* Position */ -){ - int i; - u64 ret = (u64)iDocid; +#ifndef SQLITE_AMALGAMATION - ret += (ret<<3) + iLangid; - ret += (ret<<3) + iIndex; - ret += (ret<<3) + iCol; - ret += (ret<<3) + iPos; - for(i=0; i=0xc0 ){ \ + c = sqlite3Utf8Trans1[c-0xc0]; \ + while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ + c = (c<<6) + (0x3f & *(zIn++)); \ + } \ + if( c<0x80 \ + || (c&0xFFFFF800)==0xD800 \ + || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ + } + +#define WRITE_UTF8(zOut, c) { \ + if( c<0x00080 ){ \ + *zOut++ = (u8)(c&0xFF); \ + } \ + else if( c<0x00800 ){ \ + *zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \ + *zOut++ = 0x80 + (u8)(c & 0x3F); \ + } \ + else if( c<0x10000 ){ \ + *zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \ + *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ + *zOut++ = 0x80 + (u8)(c & 0x3F); \ + }else{ \ + *zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \ + *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \ + *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ + *zOut++ = 0x80 + (u8)(c & 0x3F); \ + } \ +} + +#endif /* ifndef SQLITE_AMALGAMATION */ + +typedef struct unicode_tokenizer unicode_tokenizer; +typedef struct unicode_cursor unicode_cursor; + +struct unicode_tokenizer { + sqlite3_tokenizer base; + int bRemoveDiacritic; + int nException; + int *aiException; +}; + +struct unicode_cursor { + sqlite3_tokenizer_cursor base; + const unsigned char *aInput; /* Input text being tokenized */ + int nInput; /* Size of aInput[] in bytes */ + int iOff; /* Current offset within aInput[] */ + int iToken; /* Index of next token to be returned */ + char *zToken; /* storage for current token */ + int nAlloc; /* space allocated at zToken */ +}; + + +/* +** Destroy a tokenizer allocated by unicodeCreate(). +*/ +static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){ + if( pTokenizer ){ + unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer; + sqlite3_free(p->aiException); + sqlite3_free(p); + } + return SQLITE_OK; } /* -** Return a checksum of all entries in the FTS index that correspond to -** language id iLangid. The checksum is calculated by XORing the checksums -** of each individual entry (see fts3ChecksumEntry()) together. +** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE +** statement has specified that the tokenizer for this table shall consider +** all characters in string zIn/nIn to be separators (if bAlnum==0) or +** token characters (if bAlnum==1). ** -** If successful, the checksum value is returned and *pRc set to SQLITE_OK. -** Otherwise, if an error occurs, *pRc is set to an SQLite error code. The -** return value is undefined in this case. +** For each codepoint in the zIn/nIn string, this function checks if the +** sqlite3FtsUnicodeIsalnum() function already returns the desired result. +** If so, no action is taken. Otherwise, the codepoint is added to the +** unicode_tokenizer.aiException[] array. For the purposes of tokenization, +** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all +** codepoints in the aiException[] array. +** +** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic() +** identifies as a diacritic) occurs in the zIn/nIn string it is ignored. +** It is not possible to change the behavior of the tokenizer with respect +** to these codepoints. */ -static u64 fts3ChecksumIndex( - Fts3Table *p, /* FTS3 table handle */ - int iLangid, /* Language id to return cksum for */ - int iIndex, /* Index to cksum (0..p->nIndex-1) */ - int *pRc /* OUT: Return code */ +static int unicodeAddExceptions( + unicode_tokenizer *p, /* Tokenizer to add exceptions to */ + int bAlnum, /* Replace Isalnum() return value with this */ + const char *zIn, /* Array of characters to make exceptions */ + int nIn /* Length of z in bytes */ ){ - Fts3SegFilter filter; - Fts3MultiSegReader csr; - int rc; - u64 cksum = 0; - - assert( *pRc==SQLITE_OK ); + const unsigned char *z = (const unsigned char *)zIn; + const unsigned char *zTerm = &z[nIn]; + int iCode; + int nEntry = 0; - memset(&filter, 0, sizeof(filter)); - memset(&csr, 0, sizeof(csr)); - filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; - filter.flags |= FTS3_SEGMENT_SCAN; + assert( bAlnum==0 || bAlnum==1 ); - rc = sqlite3Fts3SegReaderCursor( - p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr - ); - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); + while( zaiException, (p->nException+nEntry)*sizeof(int)); + if( aNew==0 ) return SQLITE_NOMEM; + nNew = p->nException; - pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid); - while( pCsri; j--) aNew[j] = aNew[j-1]; + aNew[i] = iCode; + nNew++; } } + p->aiException = aNew; + p->nException = nNew; } - sqlite3Fts3SegReaderFinish(&csr); - *pRc = rc; - return cksum; + return SQLITE_OK; } /* -** Check if the contents of the FTS index match the current contents of the -** content table. If no error occurs and the contents do match, set *pbOk -** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk -** to false before returning. -** -** If an error occurs (e.g. an OOM or IO error), return an SQLite error -** code. The final value of *pbOk is undefined in this case. +** Return true if the p->aiException[] array contains the value iCode. */ -static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){ - int rc = SQLITE_OK; /* Return code */ - u64 cksum1 = 0; /* Checksum based on FTS index contents */ - u64 cksum2 = 0; /* Checksum based on %_content contents */ - sqlite3_stmt *pAllLangid = 0; /* Statement to return all language-ids */ +static int unicodeIsException(unicode_tokenizer *p, int iCode){ + if( p->nException>0 ){ + int *a = p->aiException; + int iLo = 0; + int iHi = p->nException-1; - /* This block calculates the checksum according to the FTS index. */ - rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); - if( rc==SQLITE_OK ){ - int rc2; - sqlite3_bind_int(pAllLangid, 1, p->nIndex); - while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){ - int iLangid = sqlite3_column_int(pAllLangid, 0); - int i; - for(i=0; inIndex; i++){ - cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc); + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( iCode==a[iTest] ){ + return 1; + }else if( iCode>a[iTest] ){ + iLo = iTest+1; + }else{ + iHi = iTest-1; } } - rc2 = sqlite3_reset(pAllLangid); - if( rc==SQLITE_OK ) rc = rc2; } - /* This block calculates the checksum according to the %_content table */ - rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); - if( rc==SQLITE_OK ){ - sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule; - sqlite3_stmt *pStmt = 0; - char *zSql; - - zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); - if( !zSql ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); - sqlite3_free(zSql); - } + return 0; +} - while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ - i64 iDocid = sqlite3_column_int64(pStmt, 0); - int iLang = langidFromSelect(p, pStmt); - int iCol; +/* +** Return true if, for the purposes of tokenization, codepoint iCode is +** considered a token character (not a separator). +*/ +static int unicodeIsAlnum(unicode_tokenizer *p, int iCode){ + assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); + return sqlite3FtsUnicodeIsalnum(iCode) ^ unicodeIsException(p, iCode); +} - for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ - if( p->abNotindexed[iCol]==0 ){ - const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1); - int nText = sqlite3_column_bytes(pStmt, iCol+1); - sqlite3_tokenizer_cursor *pT = 0; +/* +** Create a new tokenizer instance. +*/ +static int unicodeCreate( + int nArg, /* Size of array argv[] */ + const char * const *azArg, /* Tokenizer creation arguments */ + sqlite3_tokenizer **pp /* OUT: New tokenizer handle */ +){ + unicode_tokenizer *pNew; /* New tokenizer object */ + int i; + int rc = SQLITE_OK; - rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText,&pT); - while( rc==SQLITE_OK ){ - char const *zToken; /* Buffer containing token */ - int nToken = 0; /* Number of bytes in token */ - int iDum1 = 0, iDum2 = 0; /* Dummy variables */ - int iPos = 0; /* Position of token in zText */ + pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer)); + if( pNew==NULL ) return SQLITE_NOMEM; + memset(pNew, 0, sizeof(unicode_tokenizer)); + pNew->bRemoveDiacritic = 1; - rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos); - if( rc==SQLITE_OK ){ - int i; - cksum2 = cksum2 ^ fts3ChecksumEntry( - zToken, nToken, iLang, 0, iDocid, iCol, iPos - ); - for(i=1; inIndex; i++){ - if( p->aIndex[i].nPrefix<=nToken ){ - cksum2 = cksum2 ^ fts3ChecksumEntry( - zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos - ); - } - } - } - } - if( pT ) pModule->xClose(pT); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; - } - } - } + for(i=0; rc==SQLITE_OK && ibRemoveDiacritic = 1; + } + else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){ + pNew->bRemoveDiacritic = 0; + } + else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){ + rc = unicodeAddExceptions(pNew, 1, &z[11], n-11); + } + else if( n>=11 && memcmp("separators=", z, 11)==0 ){ + rc = unicodeAddExceptions(pNew, 0, &z[11], n-11); + } + else{ + /* Unrecognized argument */ + rc = SQLITE_ERROR; + } } - *pbOk = (cksum1==cksum2); + if( rc!=SQLITE_OK ){ + unicodeDestroy((sqlite3_tokenizer *)pNew); + pNew = 0; + } + *pp = (sqlite3_tokenizer *)pNew; return rc; } /* -** Run the integrity-check. If no error occurs and the current contents of -** the FTS index are correct, return SQLITE_OK. Or, if the contents of the -** FTS index are incorrect, return SQLITE_CORRUPT_VTAB. -** -** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite -** error code. -** -** The integrity-check works as follows. For each token and indexed token -** prefix in the document set, a 64-bit checksum is calculated (by code -** in fts3ChecksumEntry()) based on the following: -** -** + The index number (0 for the main index, 1 for the first prefix -** index etc.), -** + The token (or token prefix) text itself, -** + The language-id of the row it appears in, -** + The docid of the row it appears in, -** + The column it appears in, and -** + The tokens position within that column. -** -** The checksums for all entries in the index are XORed together to create -** a single checksum for the entire index. -** -** The integrity-check code calculates the same checksum in two ways: -** -** 1. By scanning the contents of the FTS index, and -** 2. By scanning and tokenizing the content table. -** -** If the two checksums are identical, the integrity-check is deemed to have -** passed. +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is pInput[0..nBytes-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. */ -static int fts3DoIntegrityCheck( - Fts3Table *p /* FTS3 table handle */ +static int unicodeOpen( + sqlite3_tokenizer *p, /* The tokenizer */ + const char *aInput, /* Input string */ + int nInput, /* Size of string aInput in bytes */ + sqlite3_tokenizer_cursor **pp /* OUT: New cursor object */ ){ - int rc; - int bOk = 0; - rc = fts3IntegrityCheck(p, &bOk); - if( rc==SQLITE_OK && bOk==0 ) rc = SQLITE_CORRUPT_VTAB; - return rc; -} - -/* -** Handle a 'special' INSERT of the form: -** -** "INSERT INTO tbl(tbl) VALUES()" -** -** Argument pVal contains the result of . Currently the only -** meaningful value to insert is the text 'optimize'. -*/ -static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){ - int rc; /* Return Code */ - const char *zVal = (const char *)sqlite3_value_text(pVal); - int nVal = sqlite3_value_bytes(pVal); + unicode_cursor *pCsr; - if( !zVal ){ + pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor)); + if( pCsr==0 ){ return SQLITE_NOMEM; - }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){ - rc = fts3DoOptimize(p, 0); - }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){ - rc = fts3DoRebuild(p); - }else if( nVal==15 && 0==sqlite3_strnicmp(zVal, "integrity-check", 15) ){ - rc = fts3DoIntegrityCheck(p); - }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){ - rc = fts3DoIncrmerge(p, &zVal[6]); - }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){ - rc = fts3DoAutoincrmerge(p, &zVal[10]); -#ifdef SQLITE_TEST - }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){ - p->nNodeSize = atoi(&zVal[9]); - rc = SQLITE_OK; - }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ - p->nMaxPendingData = atoi(&zVal[11]); - rc = SQLITE_OK; - }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){ - p->bNoIncrDoclist = atoi(&zVal[21]); - rc = SQLITE_OK; -#endif + } + memset(pCsr, 0, sizeof(unicode_cursor)); + + pCsr->aInput = (const unsigned char *)aInput; + if( aInput==0 ){ + pCsr->nInput = 0; + }else if( nInput<0 ){ + pCsr->nInput = (int)strlen(aInput); }else{ - rc = SQLITE_ERROR; + pCsr->nInput = nInput; } - return rc; + *pp = &pCsr->base; + UNUSED_PARAMETER(p); + return SQLITE_OK; } -#ifndef SQLITE_DISABLE_FTS4_DEFERRED /* -** Delete all cached deferred doclists. Deferred doclists are cached -** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function. +** Close a tokenization cursor previously opened by a call to +** simpleOpen() above. */ -SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){ - Fts3DeferredToken *pDef; - for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){ - fts3PendingListDelete(pDef->pList); - pDef->pList = 0; - } +static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){ + unicode_cursor *pCsr = (unicode_cursor *) pCursor; + sqlite3_free(pCsr->zToken); + sqlite3_free(pCsr); + return SQLITE_OK; } /* -** Free all entries in the pCsr->pDeffered list. Entries are added to -** this list using sqlite3Fts3DeferToken(). +** Extract the next token from a tokenization cursor. The cursor must +** have been opened by a prior call to simpleOpen(). */ -SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){ - Fts3DeferredToken *pDef; - Fts3DeferredToken *pNext; - for(pDef=pCsr->pDeferred; pDef; pDef=pNext){ - pNext = pDef->pNext; - fts3PendingListDelete(pDef->pList); - sqlite3_free(pDef); - } - pCsr->pDeferred = 0; -} +static int unicodeNext( + sqlite3_tokenizer_cursor *pC, /* Cursor returned by simpleOpen */ + const char **paToken, /* OUT: Token text */ + int *pnToken, /* OUT: Number of bytes at *paToken */ + int *piStart, /* OUT: Starting offset of token */ + int *piEnd, /* OUT: Ending offset of token */ + int *piPos /* OUT: Position integer of token */ +){ + unicode_cursor *pCsr = (unicode_cursor *)pC; + unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer); + int iCode = 0; + char *zOut; + const unsigned char *z = &pCsr->aInput[pCsr->iOff]; + const unsigned char *zStart = z; + const unsigned char *zEnd; + const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput]; -/* -** Generate deferred-doclists for all tokens in the pCsr->pDeferred list -** based on the row that pCsr currently points to. -** -** A deferred-doclist is like any other doclist with position information -** included, except that it only contains entries for a single row of the -** table, not for all rows. -*/ -SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ - int rc = SQLITE_OK; /* Return code */ - if( pCsr->pDeferred ){ - int i; /* Used to iterate through table columns */ - sqlite3_int64 iDocid; /* Docid of the row pCsr points to */ - Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */ - - Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; - sqlite3_tokenizer *pT = p->pTokenizer; - sqlite3_tokenizer_module const *pModule = pT->pModule; - - assert( pCsr->isRequireSeek==0 ); - iDocid = sqlite3_column_int64(pCsr->pStmt, 0); - - for(i=0; inColumn && rc==SQLITE_OK; i++){ - if( p->abNotindexed[i]==0 ){ - const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); - sqlite3_tokenizer_cursor *pTC = 0; + /* Scan past any delimiter characters before the start of the next token. + ** Return SQLITE_DONE early if this takes us all the way to the end of + ** the input. */ + while( z=zTerm ) return SQLITE_DONE; - rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); - while( rc==SQLITE_OK ){ - char const *zToken; /* Buffer containing token */ - int nToken = 0; /* Number of bytes in token */ - int iDum1 = 0, iDum2 = 0; /* Dummy variables */ - int iPos = 0; /* Position of token in zText */ + zOut = pCsr->zToken; + do { + int iOut; - rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); - for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ - Fts3PhraseToken *pPT = pDef->pToken; - if( (pDef->iCol>=p->nColumn || pDef->iCol==i) - && (pPT->bFirst==0 || iPos==0) - && (pPT->n==nToken || (pPT->isPrefix && pPT->nz, pPT->n)) - ){ - fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); - } - } - } - if( pTC ) pModule->xClose(pTC); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; - } + /* Grow the output buffer if required. */ + if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){ + char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64); + if( !zNew ) return SQLITE_NOMEM; + zOut = &zNew[zOut - pCsr->zToken]; + pCsr->zToken = zNew; + pCsr->nAlloc += 64; } - for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ - if( pDef->pList ){ - rc = fts3PendingListAppendVarint(&pDef->pList, 0); - } + /* Write the folded case of the last character read to the output */ + zEnd = z; + iOut = sqlite3FtsUnicodeFold(iCode, p->bRemoveDiacritic); + if( iOut ){ + WRITE_UTF8(zOut, iOut); } - } - - return rc; -} -SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList( - Fts3DeferredToken *p, - char **ppData, - int *pnData -){ - char *pRet; - int nSkip; - sqlite3_int64 dummy; + /* If the cursor is not at EOF, read the next character */ + if( z>=zTerm ) break; + READ_UTF8(z, zTerm, iCode); + }while( unicodeIsAlnum(p, iCode) + || sqlite3FtsUnicodeIsdiacritic(iCode) + ); - *ppData = 0; - *pnData = 0; + /* Set the output variables and return. */ + pCsr->iOff = (int)(z - pCsr->aInput); + *paToken = pCsr->zToken; + *pnToken = (int)(zOut - pCsr->zToken); + *piStart = (int)(zStart - pCsr->aInput); + *piEnd = (int)(zEnd - pCsr->aInput); + *piPos = pCsr->iToken++; + return SQLITE_OK; +} - if( p->pList==0 ){ - return SQLITE_OK; - } +/* +** Set *ppModule to a pointer to the sqlite3_tokenizer_module +** structure for the unicode tokenizer. +*/ +SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){ + static const sqlite3_tokenizer_module module = { + 0, + unicodeCreate, + unicodeDestroy, + unicodeOpen, + unicodeClose, + unicodeNext, + 0, + }; + *ppModule = &module; +} - pRet = (char *)sqlite3_malloc(p->pList->nData); - if( !pRet ) return SQLITE_NOMEM; +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ +#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */ - nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy); - *pnData = p->pList->nData - nSkip; - *ppData = pRet; - - memcpy(pRet, &p->pList->aData[nSkip], *pnData); - return SQLITE_OK; -} +/************** End of fts3_unicode.c ****************************************/ +/************** Begin file fts3_unicode2.c ***********************************/ +/* +** 2012 May 25 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +*/ /* -** Add an entry for token pToken to the pCsr->pDeferred list. +** DO NOT EDIT THIS MACHINE GENERATED FILE. */ -SQLITE_PRIVATE int sqlite3Fts3DeferToken( - Fts3Cursor *pCsr, /* Fts3 table cursor */ - Fts3PhraseToken *pToken, /* Token to defer */ - int iCol /* Column that token must appear in (or -1) */ -){ - Fts3DeferredToken *pDeferred; - pDeferred = sqlite3_malloc(sizeof(*pDeferred)); - if( !pDeferred ){ - return SQLITE_NOMEM; - } - memset(pDeferred, 0, sizeof(*pDeferred)); - pDeferred->pToken = pToken; - pDeferred->pNext = pCsr->pDeferred; - pDeferred->iCol = iCol; - pCsr->pDeferred = pDeferred; - assert( pToken->pDeferred==0 ); - pToken->pDeferred = pDeferred; +#ifndef SQLITE_DISABLE_FTS3_UNICODE +#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) - return SQLITE_OK; -} -#endif +/* #include */ /* -** SQLite value pRowid contains the rowid of a row that may or may not be -** present in the FTS3 table. If it is, delete it and adjust the contents -** of subsiduary data structures accordingly. +** Return true if the argument corresponds to a unicode codepoint +** classified as either a letter or a number. Otherwise false. +** +** The results are undefined if the value passed to this function +** is less than zero. */ -static int fts3DeleteByRowid( - Fts3Table *p, - sqlite3_value *pRowid, - int *pnChng, /* IN/OUT: Decrement if row is deleted */ - u32 *aSzDel -){ - int rc = SQLITE_OK; /* Return code */ - int bFound = 0; /* True if *pRowid really is in the table */ +SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int c){ + /* Each unsigned integer in the following array corresponds to a contiguous + ** range of unicode codepoints that are not either letters or numbers (i.e. + ** codepoints for which this function should return 0). + ** + ** The most significant 22 bits in each 32-bit value contain the first + ** codepoint in the range. The least significant 10 bits are used to store + ** the size of the range (always at least 1). In other words, the value + ** ((C<<22) + N) represents a range of N codepoints starting with codepoint + ** C. It is not possible to represent a range larger than 1023 codepoints + ** using this format. + */ + static const unsigned int aEntry[] = { + 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, + 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, + 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, + 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01, + 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01, + 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802, + 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F, + 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401, + 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804, + 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403, + 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812, + 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001, + 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802, + 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805, + 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401, + 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03, + 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807, + 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001, + 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01, + 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804, + 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001, + 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802, + 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01, + 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06, + 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007, + 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006, + 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417, + 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14, + 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07, + 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01, + 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001, + 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802, + 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F, + 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002, + 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802, + 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006, + 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D, + 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802, + 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027, + 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403, + 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805, + 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04, + 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401, + 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005, + 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B, + 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A, + 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001, + 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59, + 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807, + 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01, + 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E, + 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100, + 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10, + 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402, + 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804, + 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012, + 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004, + 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002, + 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803, + 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, + 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, + 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, + 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, + 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, + 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, + 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, + 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, + 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, + 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, + 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, + 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, + 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, + 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, + 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, + 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, + 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, + 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, + 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, + 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, + 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, + 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, + 0x380400F0, + }; + static const unsigned int aAscii[4] = { + 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, + }; - fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound); - if( bFound && rc==SQLITE_OK ){ - int isEmpty = 0; /* Deleting *pRowid leaves the table empty */ - rc = fts3IsEmpty(p, pRowid, &isEmpty); - if( rc==SQLITE_OK ){ - if( isEmpty ){ - /* Deleting this row means the whole table is empty. In this case - ** delete the contents of all three tables and throw away any - ** data in the pendingTerms hash table. */ - rc = fts3DeleteAll(p, 1); - *pnChng = 0; - memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2); + if( c<128 ){ + return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); + }else if( c<(1<<22) ){ + unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; + int iRes = 0; + int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; + int iLo = 0; + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( key >= aEntry[iTest] ){ + iRes = iTest; + iLo = iTest+1; }else{ - *pnChng = *pnChng - 1; - if( p->zContentTbl==0 ){ - fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid); - } - if( p->bHasDocsize ){ - fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid); - } + iHi = iTest-1; } } + assert( aEntry[0]=aEntry[iRes] ); + return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); } - - return rc; + return 1; } + /* -** This function does the work for the xUpdate method of FTS3 virtual -** tables. The schema of the virtual table being: -** -** CREATE TABLE
    ( -** , -**
    HIDDEN, -** docid HIDDEN, -** HIDDEN -** ); -** -** +** If the argument is a codepoint corresponding to a lowercase letter +** in the ASCII range with a diacritic added, return the codepoint +** of the ASCII letter only. For example, if passed 235 - "LATIN +** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER +** E"). The resuls of passing a codepoint that corresponds to an +** uppercase letter are undefined. */ -SQLITE_PRIVATE int sqlite3Fts3UpdateMethod( - sqlite3_vtab *pVtab, /* FTS3 vtab object */ - int nArg, /* Size of argument array */ - sqlite3_value **apVal, /* Array of arguments */ - sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ -){ - Fts3Table *p = (Fts3Table *)pVtab; - int rc = SQLITE_OK; /* Return Code */ - int isRemove = 0; /* True for an UPDATE or DELETE */ - u32 *aSzIns = 0; /* Sizes of inserted documents */ - u32 *aSzDel = 0; /* Sizes of deleted documents */ - int nChng = 0; /* Net change in number of documents */ - int bInsertDone = 0; - - /* At this point it must be known if the %_stat table exists or not. - ** So bHasStat may not be 2. */ - assert( p->bHasStat==0 || p->bHasStat==1 ); - - assert( p->pSegments==0 ); - assert( - nArg==1 /* DELETE operations */ - || nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */ - ); +static int remove_diacritic(int c){ + unsigned short aDia[] = { + 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, + 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, + 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, + 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, + 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, + 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, + 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, + 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, + 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, + 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, + 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, + 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, + 62924, 63050, 63082, 63274, 63390, + }; + char aChar[] = { + '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', + 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', + 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', + 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', + 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', + 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', + 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', + 'e', 'i', 'o', 'u', 'y', + }; - /* Check for a "special" INSERT operation. One of the form: - ** - ** INSERT INTO xyz(xyz) VALUES('command'); - */ - if( nArg>1 - && sqlite3_value_type(apVal[0])==SQLITE_NULL - && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL - ){ - rc = fts3SpecialInsert(p, apVal[p->nColumn+2]); - goto update_out; + unsigned int key = (((unsigned int)c)<<3) | 0x00000007; + int iRes = 0; + int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; + int iLo = 0; + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( key >= aDia[iTest] ){ + iRes = iTest; + iLo = iTest+1; + }else{ + iHi = iTest-1; + } } + assert( key>=aDia[iRes] ); + return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); +} - if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){ - rc = SQLITE_CONSTRAINT; - goto update_out; - } - /* Allocate space to hold the change in document sizes */ - aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 ); - if( aSzDel==0 ){ - rc = SQLITE_NOMEM; - goto update_out; - } - aSzIns = &aSzDel[p->nColumn+1]; - memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2); +/* +** Return true if the argument interpreted as a unicode codepoint +** is a diacritical modifier character. +*/ +SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int c){ + unsigned int mask0 = 0x08029FDF; + unsigned int mask1 = 0x000361F8; + if( c<768 || c>817 ) return 0; + return (c < 768+32) ? + (mask0 & (1 << (c-768))) : + (mask1 & (1 << (c-768-32))); +} - rc = fts3Writelock(p); - if( rc!=SQLITE_OK ) goto update_out; - /* If this is an INSERT operation, or an UPDATE that modifies the rowid - ** value, then this operation requires constraint handling. +/* +** Interpret the argument as a unicode codepoint. If the codepoint +** is an upper case character that has a lower case equivalent, +** return the codepoint corresponding to the lower case version. +** Otherwise, return a copy of the argument. +** +** The results are undefined if the value passed to this function +** is less than zero. +*/ +SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){ + /* Each entry in the following array defines a rule for folding a range + ** of codepoints to lower case. The rule applies to a range of nRange + ** codepoints starting at codepoint iCode. ** - ** If the on-conflict mode is REPLACE, this means that the existing row - ** should be deleted from the database before inserting the new row. Or, - ** if the on-conflict mode is other than REPLACE, then this method must - ** detect the conflict and return SQLITE_CONSTRAINT before beginning to - ** modify the database file. + ** If the least significant bit in flags is clear, then the rule applies + ** to all nRange codepoints (i.e. all nRange codepoints are upper case and + ** need to be folded). Or, if it is set, then the rule only applies to + ** every second codepoint in the range, starting with codepoint C. + ** + ** The 7 most significant bits in flags are an index into the aiOff[] + ** array. If a specific codepoint C does require folding, then its lower + ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). + ** + ** The contents of this array are generated by parsing the CaseFolding.txt + ** file distributed as part of the "Unicode Character Database". See + ** http://www.unicode.org for details. */ - if( nArg>1 && p->zContentTbl==0 ){ - /* Find the value object that holds the new rowid value. */ - sqlite3_value *pNewRowid = apVal[3+p->nColumn]; - if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){ - pNewRowid = apVal[1]; - } + static const struct TableEntry { + unsigned short iCode; + unsigned char flags; + unsigned char nRange; + } aEntry[] = { + {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, + {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, + {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, + {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, + {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, + {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, + {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, + {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, + {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, + {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, + {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, + {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, + {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, + {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, + {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, + {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, + {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, + {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, + {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, + {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, + {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, + {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, + {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, + {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, + {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, + {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, + {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, + {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, + {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, + {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, + {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, + {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, + {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, + {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, + {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, + {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, + {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, + {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, + {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, + {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, + {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, + {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, + {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, + {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, + {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, + {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, + {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, + {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, + {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, + {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, + {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, + {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, + {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, + {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, + {65313, 14, 26}, + }; + static const unsigned short aiOff[] = { + 1, 2, 8, 15, 16, 26, 28, 32, + 37, 38, 40, 48, 63, 64, 69, 71, + 79, 80, 116, 202, 203, 205, 206, 207, + 209, 210, 211, 213, 214, 217, 218, 219, + 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, + 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, + 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, + 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, + 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, + 65514, 65521, 65527, 65528, 65529, + }; - if( sqlite3_value_type(pNewRowid)!=SQLITE_NULL && ( - sqlite3_value_type(apVal[0])==SQLITE_NULL - || sqlite3_value_int64(apVal[0])!=sqlite3_value_int64(pNewRowid) - )){ - /* The new rowid is not NULL (in this case the rowid will be - ** automatically assigned and there is no chance of a conflict), and - ** the statement is either an INSERT or an UPDATE that modifies the - ** rowid column. So if the conflict mode is REPLACE, then delete any - ** existing row with rowid=pNewRowid. - ** - ** Or, if the conflict mode is not REPLACE, insert the new record into - ** the %_content table. If we hit the duplicate rowid constraint (or any - ** other error) while doing so, return immediately. - ** - ** This branch may also run if pNewRowid contains a value that cannot - ** be losslessly converted to an integer. In this case, the eventual - ** call to fts3InsertData() (either just below or further on in this - ** function) will return SQLITE_MISMATCH. If fts3DeleteByRowid is - ** invoked, it will delete zero rows (since no row will have - ** docid=$pNewRowid if $pNewRowid is not an integer value). - */ - if( sqlite3_vtab_on_conflict(p->db)==SQLITE_REPLACE ){ - rc = fts3DeleteByRowid(p, pNewRowid, &nChng, aSzDel); + int ret = c; + + assert( c>=0 ); + assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); + + if( c<128 ){ + if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); + }else if( c<65536 ){ + int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; + int iLo = 0; + int iRes = -1; + + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + int cmp = (c - aEntry[iTest].iCode); + if( cmp>=0 ){ + iRes = iTest; + iLo = iTest+1; }else{ - rc = fts3InsertData(p, apVal, pRowid); - bInsertDone = 1; + iHi = iTest-1; } } - } - if( rc!=SQLITE_OK ){ - goto update_out; - } + assert( iRes<0 || c>=aEntry[iRes].iCode ); - /* If this is a DELETE or UPDATE operation, remove the old record. */ - if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ - assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER ); - rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel); - isRemove = 1; - } - - /* If this is an INSERT or UPDATE operation, insert the new record. */ - if( nArg>1 && rc==SQLITE_OK ){ - int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]); - if( bInsertDone==0 ){ - rc = fts3InsertData(p, apVal, pRowid); - if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){ - rc = FTS_CORRUPT_VTAB; + if( iRes>=0 ){ + const struct TableEntry *p = &aEntry[iRes]; + if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ + ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; + assert( ret>0 ); } } - if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){ - rc = fts3PendingTermsDocid(p, iLangid, *pRowid); - } - if( rc==SQLITE_OK ){ - assert( p->iPrevDocid==*pRowid ); - rc = fts3InsertTerms(p, iLangid, apVal, aSzIns); - } - if( p->bHasDocsize ){ - fts3InsertDocsize(&rc, p, aSzIns); - } - nChng++; - } - if( p->bFts4 ){ - fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng); + if( bRemoveDiacritic ) ret = remove_diacritic(ret); } - - update_out: - sqlite3_free(aSzDel); - sqlite3Fts3SegmentsClose(p); - return rc; -} - -/* -** Flush any data in the pending-terms hash table to disk. If successful, -** merge all segments in the database (including the new segment, if -** there was any data to flush) into a single segment. -*/ -SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *p){ - int rc; - rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0); - if( rc==SQLITE_OK ){ - rc = fts3DoOptimize(p, 1); - if( rc==SQLITE_OK || rc==SQLITE_DONE ){ - int rc2 = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); - if( rc2!=SQLITE_OK ) rc = rc2; - }else{ - sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0); - sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); - } + + else if( c>=66560 && c<66600 ){ + ret = c + 40; } - sqlite3Fts3SegmentsClose(p); - return rc; -} -#endif + return ret; +} +#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */ +#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */ -/************** End of fts3_write.c ******************************************/ -/************** Begin file fts3_snippet.c ************************************/ +/************** End of fts3_unicode2.c ***************************************/ +/************** Begin file rtree.c *******************************************/ /* -** 2009 Oct 23 +** 2001 September 15 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: @@ -148115,6263 +152601,8375 @@ SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *p){ ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** -****************************************************************************** +************************************************************************* +** This file contains code for implementations of the r-tree and r*-tree +** algorithms packaged as an SQLite virtual table module. */ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) +/* +** Database Format of R-Tree Tables +** -------------------------------- +** +** The data structure for a single virtual r-tree table is stored in three +** native SQLite tables declared as follows. In each case, the '%' character +** in the table name is replaced with the user-supplied name of the r-tree +** table. +** +** CREATE TABLE %_node(nodeno INTEGER PRIMARY KEY, data BLOB) +** CREATE TABLE %_parent(nodeno INTEGER PRIMARY KEY, parentnode INTEGER) +** CREATE TABLE %_rowid(rowid INTEGER PRIMARY KEY, nodeno INTEGER) +** +** The data for each node of the r-tree structure is stored in the %_node +** table. For each node that is not the root node of the r-tree, there is +** an entry in the %_parent table associating the node with its parent. +** And for each row of data in the table, there is an entry in the %_rowid +** table that maps from the entries rowid to the id of the node that it +** is stored on. +** +** The root node of an r-tree always exists, even if the r-tree table is +** empty. The nodeno of the root node is always 1. All other nodes in the +** table must be the same size as the root node. The content of each node +** is formatted as follows: +** +** 1. If the node is the root node (node 1), then the first 2 bytes +** of the node contain the tree depth as a big-endian integer. +** For non-root nodes, the first 2 bytes are left unused. +** +** 2. The next 2 bytes contain the number of entries currently +** stored in the node. +** +** 3. The remainder of the node contains the node entries. Each entry +** consists of a single 8-byte integer followed by an even number +** of 4-byte coordinates. For leaf nodes the integer is the rowid +** of a record. For internal nodes it is the node number of a +** child page. +*/ + +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RTREE) + +#ifndef SQLITE_CORE +/* #include "sqlite3ext.h" */ + SQLITE_EXTENSION_INIT1 +#else +/* #include "sqlite3.h" */ +#endif /* #include */ /* #include */ +/* #include */ -/* -** Characters that may appear in the second argument to matchinfo(). -*/ -#define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ -#define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ -#define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ -#define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ -#define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ -#define FTS3_MATCHINFO_LCS 's' /* nCol values */ -#define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ +#ifndef SQLITE_AMALGAMATION +#include "sqlite3rtree.h" +typedef sqlite3_int64 i64; +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +#endif -/* -** The default value for the second argument to matchinfo(). +/* The following macro is used to suppress compiler warnings. */ -#define FTS3_MATCHINFO_DEFAULT "pcx" +#ifndef UNUSED_PARAMETER +# define UNUSED_PARAMETER(x) (void)(x) +#endif + +typedef struct Rtree Rtree; +typedef struct RtreeCursor RtreeCursor; +typedef struct RtreeNode RtreeNode; +typedef struct RtreeCell RtreeCell; +typedef struct RtreeConstraint RtreeConstraint; +typedef struct RtreeMatchArg RtreeMatchArg; +typedef struct RtreeGeomCallback RtreeGeomCallback; +typedef union RtreeCoord RtreeCoord; +typedef struct RtreeSearchPoint RtreeSearchPoint; +/* The rtree may have between 1 and RTREE_MAX_DIMENSIONS dimensions. */ +#define RTREE_MAX_DIMENSIONS 5 -/* -** Used as an fts3ExprIterate() context when loading phrase doclists to -** Fts3Expr.aDoclist[]/nDoclist. +/* Size of hash table Rtree.aHash. This hash table is not expected to +** ever contain very many entries, so a fixed number of buckets is +** used. */ -typedef struct LoadDoclistCtx LoadDoclistCtx; -struct LoadDoclistCtx { - Fts3Cursor *pCsr; /* FTS3 Cursor */ - int nPhrase; /* Number of phrases seen so far */ - int nToken; /* Number of tokens seen so far */ -}; +#define HASHSIZE 97 -/* -** The following types are used as part of the implementation of the -** fts3BestSnippet() routine. +/* The xBestIndex method of this virtual table requires an estimate of +** the number of rows in the virtual table to calculate the costs of +** various strategies. If possible, this estimate is loaded from the +** sqlite_stat1 table (with RTREE_MIN_ROWEST as a hard-coded minimum). +** Otherwise, if no sqlite_stat1 entry is available, use +** RTREE_DEFAULT_ROWEST. */ -typedef struct SnippetIter SnippetIter; -typedef struct SnippetPhrase SnippetPhrase; -typedef struct SnippetFragment SnippetFragment; +#define RTREE_DEFAULT_ROWEST 1048576 +#define RTREE_MIN_ROWEST 100 -struct SnippetIter { - Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ - int iCol; /* Extract snippet from this column */ - int nSnippet; /* Requested snippet length (in tokens) */ - int nPhrase; /* Number of phrases in query */ - SnippetPhrase *aPhrase; /* Array of size nPhrase */ - int iCurrent; /* First token of current snippet */ -}; +/* +** An rtree virtual-table object. +*/ +struct Rtree { + sqlite3_vtab base; /* Base class. Must be first */ + sqlite3 *db; /* Host database connection */ + int iNodeSize; /* Size in bytes of each node in the node table */ + u8 nDim; /* Number of dimensions */ + u8 eCoordType; /* RTREE_COORD_REAL32 or RTREE_COORD_INT32 */ + u8 nBytesPerCell; /* Bytes consumed per cell */ + int iDepth; /* Current depth of the r-tree structure */ + char *zDb; /* Name of database containing r-tree table */ + char *zName; /* Name of r-tree table */ + int nBusy; /* Current number of users of this structure */ + i64 nRowEst; /* Estimated number of rows in this table */ -struct SnippetPhrase { - int nToken; /* Number of tokens in phrase */ - char *pList; /* Pointer to start of phrase position list */ - int iHead; /* Next value in position list */ - char *pHead; /* Position list data following iHead */ - int iTail; /* Next value in trailing position list */ - char *pTail; /* Position list data following iTail */ -}; + /* List of nodes removed during a CondenseTree operation. List is + ** linked together via the pointer normally used for hash chains - + ** RtreeNode.pNext. RtreeNode.iNode stores the depth of the sub-tree + ** headed by the node (leaf nodes have RtreeNode.iNode==0). + */ + RtreeNode *pDeleted; + int iReinsertHeight; /* Height of sub-trees Reinsert() has run on */ -struct SnippetFragment { - int iCol; /* Column snippet is extracted from */ - int iPos; /* Index of first token in snippet */ - u64 covered; /* Mask of query phrases covered */ - u64 hlmask; /* Mask of snippet terms to highlight */ -}; + /* Statements to read/write/delete a record from xxx_node */ + sqlite3_stmt *pReadNode; + sqlite3_stmt *pWriteNode; + sqlite3_stmt *pDeleteNode; -/* -** This type is used as an fts3ExprIterate() context object while -** accumulating the data returned by the matchinfo() function. -*/ -typedef struct MatchInfo MatchInfo; -struct MatchInfo { - Fts3Cursor *pCursor; /* FTS3 Cursor */ - int nCol; /* Number of columns in table */ - int nPhrase; /* Number of matchable phrases in query */ - sqlite3_int64 nDoc; /* Number of docs in database */ - u32 *aMatchinfo; /* Pre-allocated buffer */ + /* Statements to read/write/delete a record from xxx_rowid */ + sqlite3_stmt *pReadRowid; + sqlite3_stmt *pWriteRowid; + sqlite3_stmt *pDeleteRowid; + + /* Statements to read/write/delete a record from xxx_parent */ + sqlite3_stmt *pReadParent; + sqlite3_stmt *pWriteParent; + sqlite3_stmt *pDeleteParent; + + RtreeNode *aHash[HASHSIZE]; /* Hash table of in-memory nodes. */ }; +/* Possible values for Rtree.eCoordType: */ +#define RTREE_COORD_REAL32 0 +#define RTREE_COORD_INT32 1 +/* +** If SQLITE_RTREE_INT_ONLY is defined, then this virtual table will +** only deal with integer coordinates. No floating point operations +** will be done. +*/ +#ifdef SQLITE_RTREE_INT_ONLY + typedef sqlite3_int64 RtreeDValue; /* High accuracy coordinate */ + typedef int RtreeValue; /* Low accuracy coordinate */ +# define RTREE_ZERO 0 +#else + typedef double RtreeDValue; /* High accuracy coordinate */ + typedef float RtreeValue; /* Low accuracy coordinate */ +# define RTREE_ZERO 0.0 +#endif /* -** The snippet() and offsets() functions both return text values. An instance -** of the following structure is used to accumulate those values while the -** functions are running. See fts3StringAppend() for details. +** When doing a search of an r-tree, instances of the following structure +** record intermediate results from the tree walk. +** +** The id is always a node-id. For iLevel>=1 the id is the node-id of +** the node that the RtreeSearchPoint represents. When iLevel==0, however, +** the id is of the parent node and the cell that RtreeSearchPoint +** represents is the iCell-th entry in the parent node. */ -typedef struct StrBuffer StrBuffer; -struct StrBuffer { - char *z; /* Pointer to buffer containing string */ - int n; /* Length of z in bytes (excl. nul-term) */ - int nAlloc; /* Allocated size of buffer z in bytes */ +struct RtreeSearchPoint { + RtreeDValue rScore; /* The score for this node. Smallest goes first. */ + sqlite3_int64 id; /* Node ID */ + u8 iLevel; /* 0=entries. 1=leaf node. 2+ for higher */ + u8 eWithin; /* PARTLY_WITHIN or FULLY_WITHIN */ + u8 iCell; /* Cell index within the node */ }; - /* -** This function is used to help iterate through a position-list. A position -** list is a list of unique integers, sorted from smallest to largest. Each -** element of the list is represented by an FTS3 varint that takes the value -** of the difference between the current element and the previous one plus -** two. For example, to store the position-list: -** -** 4 9 113 -** -** the three varints: -** -** 6 7 106 +** The minimum number of cells allowed for a node is a third of the +** maximum. In Gutman's notation: ** -** are encoded. +** m = M/3 ** -** When this function is called, *pp points to the start of an element of -** the list. *piPos contains the value of the previous entry in the list. -** After it returns, *piPos contains the value of the next element of the -** list and *pp is advanced to the following varint. +** If an R*-tree "Reinsert" operation is required, the same number of +** cells are removed from the overfull node and reinserted into the tree. */ -static void fts3GetDeltaPosition(char **pp, int *piPos){ - int iVal; - *pp += fts3GetVarint32(*pp, &iVal); - *piPos += (iVal-2); -} +#define RTREE_MINCELLS(p) ((((p)->iNodeSize-4)/(p)->nBytesPerCell)/3) +#define RTREE_REINSERT(p) RTREE_MINCELLS(p) +#define RTREE_MAXCELLS 51 /* -** Helper function for fts3ExprIterate() (see below). +** The smallest possible node-size is (512-64)==448 bytes. And the largest +** supported cell size is 48 bytes (8 byte rowid + ten 4 byte coordinates). +** Therefore all non-root nodes must contain at least 3 entries. Since +** 2^40 is greater than 2^64, an r-tree structure always has a depth of +** 40 or less. */ -static int fts3ExprIterate2( - Fts3Expr *pExpr, /* Expression to iterate phrases of */ - int *piPhrase, /* Pointer to phrase counter */ - int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ - void *pCtx /* Second argument to pass to callback */ -){ - int rc; /* Return code */ - int eType = pExpr->eType; /* Type of expression node pExpr */ +#define RTREE_MAX_DEPTH 40 - if( eType!=FTSQUERY_PHRASE ){ - assert( pExpr->pLeft && pExpr->pRight ); - rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); - if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ - rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); - } - }else{ - rc = x(pExpr, *piPhrase, pCtx); - (*piPhrase)++; - } - return rc; -} /* -** Iterate through all phrase nodes in an FTS3 query, except those that -** are part of a sub-tree that is the right-hand-side of a NOT operator. -** For each phrase node found, the supplied callback function is invoked. -** -** If the callback function returns anything other than SQLITE_OK, -** the iteration is abandoned and the error code returned immediately. -** Otherwise, SQLITE_OK is returned after a callback has been made for -** all eligible phrase nodes. +** Number of entries in the cursor RtreeNode cache. The first entry is +** used to cache the RtreeNode for RtreeCursor.sPoint. The remaining +** entries cache the RtreeNode for the first elements of the priority queue. */ -static int fts3ExprIterate( - Fts3Expr *pExpr, /* Expression to iterate phrases of */ - int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ - void *pCtx /* Second argument to pass to callback */ -){ - int iPhrase = 0; /* Variable used as the phrase counter */ - return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); -} +#define RTREE_CACHE_SZ 5 -/* -** This is an fts3ExprIterate() callback used while loading the doclists -** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also -** fts3ExprLoadDoclists(). +/* +** An rtree cursor object. */ -static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ - int rc = SQLITE_OK; - Fts3Phrase *pPhrase = pExpr->pPhrase; - LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; - - UNUSED_PARAMETER(iPhrase); - - p->nPhrase++; - p->nToken += pPhrase->nToken; +struct RtreeCursor { + sqlite3_vtab_cursor base; /* Base class. Must be first */ + u8 atEOF; /* True if at end of search */ + u8 bPoint; /* True if sPoint is valid */ + int iStrategy; /* Copy of idxNum search parameter */ + int nConstraint; /* Number of entries in aConstraint */ + RtreeConstraint *aConstraint; /* Search constraints. */ + int nPointAlloc; /* Number of slots allocated for aPoint[] */ + int nPoint; /* Number of slots used in aPoint[] */ + int mxLevel; /* iLevel value for root of the tree */ + RtreeSearchPoint *aPoint; /* Priority queue for search points */ + RtreeSearchPoint sPoint; /* Cached next search point */ + RtreeNode *aNode[RTREE_CACHE_SZ]; /* Rtree node cache */ + u32 anQueue[RTREE_MAX_DEPTH+1]; /* Number of queued entries by iLevel */ +}; - return rc; -} +/* Return the Rtree of a RtreeCursor */ +#define RTREE_OF_CURSOR(X) ((Rtree*)((X)->base.pVtab)) /* -** Load the doclists for each phrase in the query associated with FTS3 cursor -** pCsr. -** -** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable -** phrases in the expression (all phrases except those directly or -** indirectly descended from the right-hand-side of a NOT operator). If -** pnToken is not NULL, then it is set to the number of tokens in all -** matchable phrases of the expression. +** A coordinate can be either a floating point number or a integer. All +** coordinates within a single R-Tree are always of the same time. */ -static int fts3ExprLoadDoclists( - Fts3Cursor *pCsr, /* Fts3 cursor for current query */ - int *pnPhrase, /* OUT: Number of phrases in query */ - int *pnToken /* OUT: Number of tokens in query */ -){ - int rc; /* Return Code */ - LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ - sCtx.pCsr = pCsr; - rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); - if( pnPhrase ) *pnPhrase = sCtx.nPhrase; - if( pnToken ) *pnToken = sCtx.nToken; - return rc; -} - -static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ - (*(int *)ctx)++; - UNUSED_PARAMETER(pExpr); - UNUSED_PARAMETER(iPhrase); - return SQLITE_OK; -} -static int fts3ExprPhraseCount(Fts3Expr *pExpr){ - int nPhrase = 0; - (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); - return nPhrase; -} +union RtreeCoord { + RtreeValue f; /* Floating point value */ + int i; /* Integer value */ + u32 u; /* Unsigned for byte-order conversions */ +}; /* -** Advance the position list iterator specified by the first two -** arguments so that it points to the first element with a value greater -** than or equal to parameter iNext. +** The argument is an RtreeCoord. Return the value stored within the RtreeCoord +** formatted as a RtreeDValue (double or int64). This macro assumes that local +** variable pRtree points to the Rtree structure associated with the +** RtreeCoord. */ -static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ - char *pIter = *ppIter; - if( pIter ){ - int iIter = *piIter; - - while( iItereCoordType==RTREE_COORD_REAL32) ? \ + ((double)coord.f) : \ + ((double)coord.i) \ + ) +#endif /* -** Advance the snippet iterator to the next candidate snippet. +** A search constraint. */ -static int fts3SnippetNextCandidate(SnippetIter *pIter){ - int i; /* Loop counter */ +struct RtreeConstraint { + int iCoord; /* Index of constrained coordinate */ + int op; /* Constraining operation */ + union { + RtreeDValue rValue; /* Constraint value. */ + int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*); + int (*xQueryFunc)(sqlite3_rtree_query_info*); + } u; + sqlite3_rtree_query_info *pInfo; /* xGeom and xQueryFunc argument */ +}; - if( pIter->iCurrent<0 ){ - /* The SnippetIter object has just been initialized. The first snippet - ** candidate always starts at offset 0 (even if this candidate has a - ** score of 0.0). - */ - pIter->iCurrent = 0; +/* Possible values for RtreeConstraint.op */ +#define RTREE_EQ 0x41 /* A */ +#define RTREE_LE 0x42 /* B */ +#define RTREE_LT 0x43 /* C */ +#define RTREE_GE 0x44 /* D */ +#define RTREE_GT 0x45 /* E */ +#define RTREE_MATCH 0x46 /* F: Old-style sqlite3_rtree_geometry_callback() */ +#define RTREE_QUERY 0x47 /* G: New-style sqlite3_rtree_query_callback() */ - /* Advance the 'head' iterator of each phrase to the first offset that - ** is greater than or equal to (iNext+nSnippet). - */ - for(i=0; inPhrase; i++){ - SnippetPhrase *pPhrase = &pIter->aPhrase[i]; - fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); - } - }else{ - int iStart; - int iEnd = 0x7FFFFFFF; - for(i=0; inPhrase; i++){ - SnippetPhrase *pPhrase = &pIter->aPhrase[i]; - if( pPhrase->pHead && pPhrase->iHeadiHead; - } - } - if( iEnd==0x7FFFFFFF ){ - return 1; - } +/* +** An rtree structure node. +*/ +struct RtreeNode { + RtreeNode *pParent; /* Parent node */ + i64 iNode; /* The node number */ + int nRef; /* Number of references to this node */ + int isDirty; /* True if the node needs to be written to disk */ + u8 *zData; /* Content of the node, as should be on disk */ + RtreeNode *pNext; /* Next node in this hash collision chain */ +}; - pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; - for(i=0; inPhrase; i++){ - SnippetPhrase *pPhrase = &pIter->aPhrase[i]; - fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); - fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); - } - } +/* Return the number of cells in a node */ +#define NCELL(pNode) readInt16(&(pNode)->zData[2]) + +/* +** A single cell from a node, deserialized +*/ +struct RtreeCell { + i64 iRowid; /* Node or entry ID */ + RtreeCoord aCoord[RTREE_MAX_DIMENSIONS*2]; /* Bounding box coordinates */ +}; - return 0; -} /* -** Retrieve information about the current candidate snippet of snippet -** iterator pIter. +** This object becomes the sqlite3_user_data() for the SQL functions +** that are created by sqlite3_rtree_geometry_callback() and +** sqlite3_rtree_query_callback() and which appear on the right of MATCH +** operators in order to constrain a search. +** +** xGeom and xQueryFunc are the callback functions. Exactly one of +** xGeom and xQueryFunc fields is non-NULL, depending on whether the +** SQL function was created using sqlite3_rtree_geometry_callback() or +** sqlite3_rtree_query_callback(). +** +** This object is deleted automatically by the destructor mechanism in +** sqlite3_create_function_v2(). */ -static void fts3SnippetDetails( - SnippetIter *pIter, /* Snippet iterator */ - u64 mCovered, /* Bitmask of phrases already covered */ - int *piToken, /* OUT: First token of proposed snippet */ - int *piScore, /* OUT: "Score" for this snippet */ - u64 *pmCover, /* OUT: Bitmask of phrases covered */ - u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ -){ - int iStart = pIter->iCurrent; /* First token of snippet */ - int iScore = 0; /* Score of this snippet */ - int i; /* Loop counter */ - u64 mCover = 0; /* Mask of phrases covered by this snippet */ - u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ +struct RtreeGeomCallback { + int (*xGeom)(sqlite3_rtree_geometry*, int, RtreeDValue*, int*); + int (*xQueryFunc)(sqlite3_rtree_query_info*); + void (*xDestructor)(void*); + void *pContext; +}; - for(i=0; inPhrase; i++){ - SnippetPhrase *pPhrase = &pIter->aPhrase[i]; - if( pPhrase->pTail ){ - char *pCsr = pPhrase->pTail; - int iCsr = pPhrase->iTail; - while( iCsr<(iStart+pIter->nSnippet) ){ - int j; - u64 mPhrase = (u64)1 << i; - u64 mPos = (u64)1 << (iCsr - iStart); - assert( iCsr>=iStart ); - if( (mCover|mCovered)&mPhrase ){ - iScore++; - }else{ - iScore += 1000; - } - mCover |= mPhrase; +/* +** Value for the first field of every RtreeMatchArg object. The MATCH +** operator tests that the first field of a blob operand matches this +** value to avoid operating on invalid blobs (which could cause a segfault). +*/ +#define RTREE_GEOMETRY_MAGIC 0x891245AB - for(j=0; jnToken; j++){ - mHighlight |= (mPos>>j); - } +/* +** An instance of this structure (in the form of a BLOB) is returned by +** the SQL functions that sqlite3_rtree_geometry_callback() and +** sqlite3_rtree_query_callback() create, and is read as the right-hand +** operand to the MATCH operator of an R-Tree. +*/ +struct RtreeMatchArg { + u32 magic; /* Always RTREE_GEOMETRY_MAGIC */ + RtreeGeomCallback cb; /* Info about the callback functions */ + int nParam; /* Number of parameters to the SQL function */ + sqlite3_value **apSqlParam; /* Original SQL parameter values */ + RtreeDValue aParam[1]; /* Values for parameters to the SQL function */ +}; - if( 0==(*pCsr & 0x0FE) ) break; - fts3GetDeltaPosition(&pCsr, &iCsr); - } - } - } +#ifndef MAX +# define MAX(x,y) ((x) < (y) ? (y) : (x)) +#endif +#ifndef MIN +# define MIN(x,y) ((x) > (y) ? (y) : (x)) +#endif - /* Set the output variables before returning. */ - *piToken = iStart; - *piScore = iScore; - *pmCover = mCover; - *pmHighlight = mHighlight; +/* +** Functions to deserialize a 16 bit integer, 32 bit real number and +** 64 bit integer. The deserialized value is returned. +*/ +static int readInt16(u8 *p){ + return (p[0]<<8) + p[1]; +} +static void readCoord(u8 *p, RtreeCoord *pCoord){ + pCoord->u = ( + (((u32)p[0]) << 24) + + (((u32)p[1]) << 16) + + (((u32)p[2]) << 8) + + (((u32)p[3]) << 0) + ); +} +static i64 readInt64(u8 *p){ + return ( + (((i64)p[0]) << 56) + + (((i64)p[1]) << 48) + + (((i64)p[2]) << 40) + + (((i64)p[3]) << 32) + + (((i64)p[4]) << 24) + + (((i64)p[5]) << 16) + + (((i64)p[6]) << 8) + + (((i64)p[7]) << 0) + ); } /* -** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). -** Each invocation populates an element of the SnippetIter.aPhrase[] array. +** Functions to serialize a 16 bit integer, 32 bit real number and +** 64 bit integer. The value returned is the number of bytes written +** to the argument buffer (always 2, 4 and 8 respectively). */ -static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ - SnippetIter *p = (SnippetIter *)ctx; - SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; - char *pCsr; - int rc; - - pPhrase->nToken = pExpr->pPhrase->nToken; - rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr); - assert( rc==SQLITE_OK || pCsr==0 ); - if( pCsr ){ - int iFirst = 0; - pPhrase->pList = pCsr; - fts3GetDeltaPosition(&pCsr, &iFirst); - assert( iFirst>=0 ); - pPhrase->pHead = pCsr; - pPhrase->pTail = pCsr; - pPhrase->iHead = iFirst; - pPhrase->iTail = iFirst; - }else{ - assert( rc!=SQLITE_OK || ( - pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 - )); - } - - return rc; +static int writeInt16(u8 *p, int i){ + p[0] = (i>> 8)&0xFF; + p[1] = (i>> 0)&0xFF; + return 2; +} +static int writeCoord(u8 *p, RtreeCoord *pCoord){ + u32 i; + assert( sizeof(RtreeCoord)==4 ); + assert( sizeof(u32)==4 ); + i = pCoord->u; + p[0] = (i>>24)&0xFF; + p[1] = (i>>16)&0xFF; + p[2] = (i>> 8)&0xFF; + p[3] = (i>> 0)&0xFF; + return 4; +} +static int writeInt64(u8 *p, i64 i){ + p[0] = (i>>56)&0xFF; + p[1] = (i>>48)&0xFF; + p[2] = (i>>40)&0xFF; + p[3] = (i>>32)&0xFF; + p[4] = (i>>24)&0xFF; + p[5] = (i>>16)&0xFF; + p[6] = (i>> 8)&0xFF; + p[7] = (i>> 0)&0xFF; + return 8; } /* -** Select the fragment of text consisting of nFragment contiguous tokens -** from column iCol that represent the "best" snippet. The best snippet -** is the snippet with the highest score, where scores are calculated -** by adding: -** -** (a) +1 point for each occurrence of a matchable phrase in the snippet. -** -** (b) +1000 points for the first occurrence of each matchable phrase in -** the snippet for which the corresponding mCovered bit is not set. -** -** The selected snippet parameters are stored in structure *pFragment before -** returning. The score of the selected snippet is stored in *piScore -** before returning. +** Increment the reference count of node p. */ -static int fts3BestSnippet( - int nSnippet, /* Desired snippet length */ - Fts3Cursor *pCsr, /* Cursor to create snippet for */ - int iCol, /* Index of column to create snippet from */ - u64 mCovered, /* Mask of phrases already covered */ - u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ - SnippetFragment *pFragment, /* OUT: Best snippet found */ - int *piScore /* OUT: Score of snippet pFragment */ -){ - int rc; /* Return Code */ - int nList; /* Number of phrases in expression */ - SnippetIter sIter; /* Iterates through snippet candidates */ - int nByte; /* Number of bytes of space to allocate */ - int iBestScore = -1; /* Best snippet score found so far */ - int i; /* Loop counter */ - - memset(&sIter, 0, sizeof(sIter)); - - /* Iterate through the phrases in the expression to count them. The same - ** callback makes sure the doclists are loaded for each phrase. - */ - rc = fts3ExprLoadDoclists(pCsr, &nList, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - /* Now that it is known how many phrases there are, allocate and zero - ** the required space using malloc(). - */ - nByte = sizeof(SnippetPhrase) * nList; - sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); - if( !sIter.aPhrase ){ - return SQLITE_NOMEM; +static void nodeReference(RtreeNode *p){ + if( p ){ + p->nRef++; } - memset(sIter.aPhrase, 0, nByte); - - /* Initialize the contents of the SnippetIter object. Then iterate through - ** the set of phrases in the expression to populate the aPhrase[] array. - */ - sIter.pCsr = pCsr; - sIter.iCol = iCol; - sIter.nSnippet = nSnippet; - sIter.nPhrase = nList; - sIter.iCurrent = -1; - rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter); - if( rc==SQLITE_OK ){ - - /* Set the *pmSeen output variable. */ - for(i=0; iiCol = iCol; - while( !fts3SnippetNextCandidate(&sIter) ){ - int iPos; - int iScore; - u64 mCover; - u64 mHighlite; - fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite); - assert( iScore>=0 ); - if( iScore>iBestScore ){ - pFragment->iPos = iPos; - pFragment->hlmask = mHighlite; - pFragment->covered = mCover; - iBestScore = iScore; - } - } +/* +** Clear the content of node p (set all bytes to 0x00). +*/ +static void nodeZero(Rtree *pRtree, RtreeNode *p){ + memset(&p->zData[2], 0, pRtree->iNodeSize-2); + p->isDirty = 1; +} - *piScore = iBestScore; - } - sqlite3_free(sIter.aPhrase); - return rc; +/* +** Given a node number iNode, return the corresponding key to use +** in the Rtree.aHash table. +*/ +static int nodeHash(i64 iNode){ + return iNode % HASHSIZE; } +/* +** Search the node hash table for node iNode. If found, return a pointer +** to it. Otherwise, return 0. +*/ +static RtreeNode *nodeHashLookup(Rtree *pRtree, i64 iNode){ + RtreeNode *p; + for(p=pRtree->aHash[nodeHash(iNode)]; p && p->iNode!=iNode; p=p->pNext); + return p; +} /* -** Append a string to the string-buffer passed as the first argument. -** -** If nAppend is negative, then the length of the string zAppend is -** determined using strlen(). +** Add node pNode to the node hash table. */ -static int fts3StringAppend( - StrBuffer *pStr, /* Buffer to append to */ - const char *zAppend, /* Pointer to data to append to buffer */ - int nAppend /* Size of zAppend in bytes (or -1) */ -){ - if( nAppend<0 ){ - nAppend = (int)strlen(zAppend); - } +static void nodeHashInsert(Rtree *pRtree, RtreeNode *pNode){ + int iHash; + assert( pNode->pNext==0 ); + iHash = nodeHash(pNode->iNode); + pNode->pNext = pRtree->aHash[iHash]; + pRtree->aHash[iHash] = pNode; +} - /* If there is insufficient space allocated at StrBuffer.z, use realloc() - ** to grow the buffer until so that it is big enough to accomadate the - ** appended data. - */ - if( pStr->n+nAppend+1>=pStr->nAlloc ){ - int nAlloc = pStr->nAlloc+nAppend+100; - char *zNew = sqlite3_realloc(pStr->z, nAlloc); - if( !zNew ){ - return SQLITE_NOMEM; - } - pStr->z = zNew; - pStr->nAlloc = nAlloc; +/* +** Remove node pNode from the node hash table. +*/ +static void nodeHashDelete(Rtree *pRtree, RtreeNode *pNode){ + RtreeNode **pp; + if( pNode->iNode!=0 ){ + pp = &pRtree->aHash[nodeHash(pNode->iNode)]; + for( ; (*pp)!=pNode; pp = &(*pp)->pNext){ assert(*pp); } + *pp = pNode->pNext; + pNode->pNext = 0; } - assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) ); - - /* Append the data to the string buffer. */ - memcpy(&pStr->z[pStr->n], zAppend, nAppend); - pStr->n += nAppend; - pStr->z[pStr->n] = '\0'; - - return SQLITE_OK; } /* -** The fts3BestSnippet() function often selects snippets that end with a -** query term. That is, the final term of the snippet is always a term -** that requires highlighting. For example, if 'X' is a highlighted term -** and '.' is a non-highlighted term, BestSnippet() may select: -** -** ........X.....X -** -** This function "shifts" the beginning of the snippet forward in the -** document so that there are approximately the same number of -** non-highlighted terms to the right of the final highlighted term as there -** are to the left of the first highlighted term. For example, to this: -** -** ....X.....X.... -** -** This is done as part of extracting the snippet text, not when selecting -** the snippet. Snippet selection is done based on doclists only, so there -** is no way for fts3BestSnippet() to know whether or not the document -** actually contains terms that follow the final highlighted term. +** Allocate and return new r-tree node. Initially, (RtreeNode.iNode==0), +** indicating that node has not yet been assigned a node number. It is +** assigned a node number when nodeWrite() is called to write the +** node contents out to the database. */ -static int fts3SnippetShift( - Fts3Table *pTab, /* FTS3 table snippet comes from */ - int iLangid, /* Language id to use in tokenizing */ - int nSnippet, /* Number of tokens desired for snippet */ - const char *zDoc, /* Document text to extract snippet from */ - int nDoc, /* Size of buffer zDoc in bytes */ - int *piPos, /* IN/OUT: First token of snippet */ - u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ -){ - u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ - - if( hlmask ){ - int nLeft; /* Tokens to the left of first highlight */ - int nRight; /* Tokens to the right of last highlight */ - int nDesired; /* Ideal number of tokens to shift forward */ - - for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); - for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); - nDesired = (nLeft-nRight)/2; - - /* Ideally, the start of the snippet should be pushed forward in the - ** document nDesired tokens. This block checks if there are actually - ** nDesired tokens to the right of the snippet. If so, *piPos and - ** *pHlMask are updated to shift the snippet nDesired tokens to the - ** right. Otherwise, the snippet is shifted by the number of tokens - ** available. - */ - if( nDesired>0 ){ - int nShift; /* Number of tokens to shift snippet by */ - int iCurrent = 0; /* Token counter */ - int rc; /* Return Code */ - sqlite3_tokenizer_module *pMod; - sqlite3_tokenizer_cursor *pC; - pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; - - /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) - ** or more tokens in zDoc/nDoc. - */ - rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC); - if( rc!=SQLITE_OK ){ - return rc; - } - while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ - const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0; - rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); - } - pMod->xClose(pC); - if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } - - nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; - assert( nShift<=nDesired ); - if( nShift>0 ){ - *piPos += nShift; - *pHlmask = hlmask >> nShift; - } - } +static RtreeNode *nodeNew(Rtree *pRtree, RtreeNode *pParent){ + RtreeNode *pNode; + pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode) + pRtree->iNodeSize); + if( pNode ){ + memset(pNode, 0, sizeof(RtreeNode) + pRtree->iNodeSize); + pNode->zData = (u8 *)&pNode[1]; + pNode->nRef = 1; + pNode->pParent = pParent; + pNode->isDirty = 1; + nodeReference(pParent); } - return SQLITE_OK; + return pNode; } /* -** Extract the snippet text for fragment pFragment from cursor pCsr and -** append it to string buffer pOut. +** Obtain a reference to an r-tree node. */ -static int fts3SnippetText( - Fts3Cursor *pCsr, /* FTS3 Cursor */ - SnippetFragment *pFragment, /* Snippet to extract */ - int iFragment, /* Fragment number */ - int isLast, /* True for final fragment in snippet */ - int nSnippet, /* Number of tokens in extracted snippet */ - const char *zOpen, /* String inserted before highlighted term */ - const char *zClose, /* String inserted after highlighted term */ - const char *zEllipsis, /* String inserted between snippets */ - StrBuffer *pOut /* Write output here */ +static int nodeAcquire( + Rtree *pRtree, /* R-tree structure */ + i64 iNode, /* Node number to load */ + RtreeNode *pParent, /* Either the parent node or NULL */ + RtreeNode **ppNode /* OUT: Acquired node */ ){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc; /* Return code */ - const char *zDoc; /* Document text to extract snippet from */ - int nDoc; /* Size of zDoc in bytes */ - int iCurrent = 0; /* Current token number of document */ - int iEnd = 0; /* Byte offset of end of current token */ - int isShiftDone = 0; /* True after snippet is shifted */ - int iPos = pFragment->iPos; /* First token of snippet */ - u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ - int iCol = pFragment->iCol+1; /* Query column to extract text from */ - sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ - sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ - - zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); - if( zDoc==0 ){ - if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ - return SQLITE_NOMEM; + int rc; + int rc2 = SQLITE_OK; + RtreeNode *pNode; + + /* Check if the requested node is already in the hash table. If so, + ** increase its reference count and return it. + */ + if( (pNode = nodeHashLookup(pRtree, iNode)) ){ + assert( !pParent || !pNode->pParent || pNode->pParent==pParent ); + if( pParent && !pNode->pParent ){ + nodeReference(pParent); + pNode->pParent = pParent; } + pNode->nRef++; + *ppNode = pNode; return SQLITE_OK; } - nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); - - /* Open a token cursor on the document. */ - pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; - rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC); - if( rc!=SQLITE_OK ){ - return rc; - } - - while( rc==SQLITE_OK ){ - const char *ZDUMMY; /* Dummy argument used with tokenizer */ - int DUMMY1 = -1; /* Dummy argument used with tokenizer */ - int iBegin = 0; /* Offset in zDoc of start of token */ - int iFin = 0; /* Offset in zDoc of end of token */ - int isHighlight = 0; /* True for highlighted terms */ - /* Variable DUMMY1 is initialized to a negative value above. Elsewhere - ** in the FTS code the variable that the third argument to xNext points to - ** is initialized to zero before the first (*but not necessarily - ** subsequent*) call to xNext(). This is done for a particular application - ** that needs to know whether or not the tokenizer is being used for - ** snippet generation or for some other purpose. - ** - ** Extreme care is required when writing code to depend on this - ** initialization. It is not a documented part of the tokenizer interface. - ** If a tokenizer is used directly by any code outside of FTS, this - ** convention might not be respected. */ - rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); - if( rc!=SQLITE_OK ){ - if( rc==SQLITE_DONE ){ - /* Special case - the last token of the snippet is also the last token - ** of the column. Append any punctuation that occurred between the end - ** of the previous token and the end of the document to the output. - ** Then break out of the loop. */ - rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); + sqlite3_bind_int64(pRtree->pReadNode, 1, iNode); + rc = sqlite3_step(pRtree->pReadNode); + if( rc==SQLITE_ROW ){ + const u8 *zBlob = sqlite3_column_blob(pRtree->pReadNode, 0); + if( pRtree->iNodeSize==sqlite3_column_bytes(pRtree->pReadNode, 0) ){ + pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode)+pRtree->iNodeSize); + if( !pNode ){ + rc2 = SQLITE_NOMEM; + }else{ + pNode->pParent = pParent; + pNode->zData = (u8 *)&pNode[1]; + pNode->nRef = 1; + pNode->iNode = iNode; + pNode->isDirty = 0; + pNode->pNext = 0; + memcpy(pNode->zData, zBlob, pRtree->iNodeSize); + nodeReference(pParent); } - break; } - if( iCurrentiLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask - ); - isShiftDone = 1; + } + rc = sqlite3_reset(pRtree->pReadNode); + if( rc==SQLITE_OK ) rc = rc2; - /* Now that the shift has been done, check if the initial "..." are - ** required. They are required if (a) this is not the first fragment, - ** or (b) this fragment does not begin at position 0 of its column. - */ - if( rc==SQLITE_OK ){ - if( iPos>0 || iFragment>0 ){ - rc = fts3StringAppend(pOut, zEllipsis, -1); - }else if( iBegin ){ - rc = fts3StringAppend(pOut, zDoc, iBegin); - } - } - if( rc!=SQLITE_OK || iCurrentiDepth to the height + ** of the r-tree structure. A height of zero means all data is stored on + ** the root node. A height of one means the children of the root node + ** are the leaves, and so on. If the depth as specified on the root node + ** is greater than RTREE_MAX_DEPTH, the r-tree structure must be corrupt. + */ + if( pNode && iNode==1 ){ + pRtree->iDepth = readInt16(pNode->zData); + if( pRtree->iDepth>RTREE_MAX_DEPTH ){ + rc = SQLITE_CORRUPT_VTAB; } + } - if( iCurrent>=(iPos+nSnippet) ){ - if( isLast ){ - rc = fts3StringAppend(pOut, zEllipsis, -1); - } - break; + /* If no error has occurred so far, check if the "number of entries" + ** field on the node is too large. If so, set the return code to + ** SQLITE_CORRUPT_VTAB. + */ + if( pNode && rc==SQLITE_OK ){ + if( NCELL(pNode)>((pRtree->iNodeSize-4)/pRtree->nBytesPerCell) ){ + rc = SQLITE_CORRUPT_VTAB; } + } - /* Set isHighlight to true if this term should be highlighted. */ - isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; - - if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); - if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); - if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); - if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); - - iEnd = iFin; + if( rc==SQLITE_OK ){ + if( pNode!=0 ){ + nodeHashInsert(pRtree, pNode); + }else{ + rc = SQLITE_CORRUPT_VTAB; + } + *ppNode = pNode; + }else{ + sqlite3_free(pNode); + *ppNode = 0; } - pMod->xClose(pC); return rc; } - /* -** This function is used to count the entries in a column-list (a -** delta-encoded list of term offsets within a single column of a single -** row). When this function is called, *ppCollist should point to the -** beginning of the first varint in the column-list (the varint that -** contains the position of the first matching term in the column data). -** Before returning, *ppCollist is set to point to the first byte after -** the last varint in the column-list (either the 0x00 signifying the end -** of the position-list, or the 0x01 that precedes the column number of -** the next column in the position-list). -** -** The number of elements in the column-list is returned. +** Overwrite cell iCell of node pNode with the contents of pCell. */ -static int fts3ColumnlistCount(char **ppCollist){ - char *pEnd = *ppCollist; - char c = 0; - int nEntry = 0; - - /* A column-list is terminated by either a 0x01 or 0x00. */ - while( 0xFE & (*pEnd | c) ){ - c = *pEnd++ & 0x80; - if( !c ) nEntry++; +static void nodeOverwriteCell( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* The node into which the cell is to be written */ + RtreeCell *pCell, /* The cell to write */ + int iCell /* Index into pNode into which pCell is written */ +){ + int ii; + u8 *p = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; + p += writeInt64(p, pCell->iRowid); + for(ii=0; ii<(pRtree->nDim*2); ii++){ + p += writeCoord(p, &pCell->aCoord[ii]); } + pNode->isDirty = 1; +} - *ppCollist = pEnd; - return nEntry; +/* +** Remove the cell with index iCell from node pNode. +*/ +static void nodeDeleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell){ + u8 *pDst = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; + u8 *pSrc = &pDst[pRtree->nBytesPerCell]; + int nByte = (NCELL(pNode) - iCell - 1) * pRtree->nBytesPerCell; + memmove(pDst, pSrc, nByte); + writeInt16(&pNode->zData[2], NCELL(pNode)-1); + pNode->isDirty = 1; } /* -** fts3ExprIterate() callback used to collect the "global" matchinfo stats -** for a single query. -** -** fts3ExprIterate() callback to load the 'global' elements of a -** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements -** of the matchinfo array that are constant for all rows returned by the -** current query. -** -** Argument pCtx is actually a pointer to a struct of type MatchInfo. This -** function populates Matchinfo.aMatchinfo[] as follows: -** -** for(iCol=0; iColpCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] - ); + int nCell; /* Current number of cells in pNode */ + int nMaxCell; /* Maximum number of cells for pNode */ + + nMaxCell = (pRtree->iNodeSize-4)/pRtree->nBytesPerCell; + nCell = NCELL(pNode); + + assert( nCell<=nMaxCell ); + if( nCellzData[2], nCell+1); + pNode->isDirty = 1; + } + + return (nCell==nMaxCell); } /* -** fts3ExprIterate() callback used to collect the "local" part of the -** FTS3_MATCHINFO_HITS array. The local stats are those elements of the -** array that are different for each row returned by the query. +** If the node is dirty, write it out to the database. */ -static int fts3ExprLocalHitsCb( - Fts3Expr *pExpr, /* Phrase expression node */ - int iPhrase, /* Phrase number */ - void *pCtx /* Pointer to MatchInfo structure */ -){ +static int nodeWrite(Rtree *pRtree, RtreeNode *pNode){ int rc = SQLITE_OK; - MatchInfo *p = (MatchInfo *)pCtx; - int iStart = iPhrase * p->nCol * 3; - int i; - - for(i=0; inCol && rc==SQLITE_OK; i++){ - char *pCsr; - rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr); - if( pCsr ){ - p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); + if( pNode->isDirty ){ + sqlite3_stmt *p = pRtree->pWriteNode; + if( pNode->iNode ){ + sqlite3_bind_int64(p, 1, pNode->iNode); }else{ - p->aMatchinfo[iStart+i*3] = 0; + sqlite3_bind_null(p, 1); + } + sqlite3_bind_blob(p, 2, pNode->zData, pRtree->iNodeSize, SQLITE_STATIC); + sqlite3_step(p); + pNode->isDirty = 0; + rc = sqlite3_reset(p); + if( pNode->iNode==0 && rc==SQLITE_OK ){ + pNode->iNode = sqlite3_last_insert_rowid(pRtree->db); + nodeHashInsert(pRtree, pNode); } } + return rc; +} +/* +** Release a reference to a node. If the node is dirty and the reference +** count drops to zero, the node data is written to the database. +*/ +static int nodeRelease(Rtree *pRtree, RtreeNode *pNode){ + int rc = SQLITE_OK; + if( pNode ){ + assert( pNode->nRef>0 ); + pNode->nRef--; + if( pNode->nRef==0 ){ + if( pNode->iNode==1 ){ + pRtree->iDepth = -1; + } + if( pNode->pParent ){ + rc = nodeRelease(pRtree, pNode->pParent); + } + if( rc==SQLITE_OK ){ + rc = nodeWrite(pRtree, pNode); + } + nodeHashDelete(pRtree, pNode); + sqlite3_free(pNode); + } + } return rc; } -static int fts3MatchinfoCheck( - Fts3Table *pTab, - char cArg, - char **pzErr +/* +** Return the 64-bit integer value associated with cell iCell of +** node pNode. If pNode is a leaf node, this is a rowid. If it is +** an internal node, then the 64-bit integer is a child page number. +*/ +static i64 nodeGetRowid( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* The node from which to extract the ID */ + int iCell /* The cell index from which to extract the ID */ ){ - if( (cArg==FTS3_MATCHINFO_NPHRASE) - || (cArg==FTS3_MATCHINFO_NCOL) - || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) - || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) - || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) - || (cArg==FTS3_MATCHINFO_LCS) - || (cArg==FTS3_MATCHINFO_HITS) - ){ - return SQLITE_OK; - } - *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg); - return SQLITE_ERROR; + assert( iCellzData[4 + pRtree->nBytesPerCell*iCell]); } -static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ - int nVal; /* Number of integers output by cArg */ +/* +** Return coordinate iCoord from cell iCell in node pNode. +*/ +static void nodeGetCoord( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* The node from which to extract a coordinate */ + int iCell, /* The index of the cell within the node */ + int iCoord, /* Which coordinate to extract */ + RtreeCoord *pCoord /* OUT: Space to write result to */ +){ + readCoord(&pNode->zData[12 + pRtree->nBytesPerCell*iCell + 4*iCoord], pCoord); +} - switch( cArg ){ - case FTS3_MATCHINFO_NDOC: - case FTS3_MATCHINFO_NPHRASE: - case FTS3_MATCHINFO_NCOL: - nVal = 1; - break; +/* +** Deserialize cell iCell of node pNode. Populate the structure pointed +** to by pCell with the results. +*/ +static void nodeGetCell( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* The node containing the cell to be read */ + int iCell, /* Index of the cell within the node */ + RtreeCell *pCell /* OUT: Write the cell contents here */ +){ + u8 *pData; + RtreeCoord *pCoord; + int ii; + pCell->iRowid = nodeGetRowid(pRtree, pNode, iCell); + pData = pNode->zData + (12 + pRtree->nBytesPerCell*iCell); + pCoord = pCell->aCoord; + for(ii=0; iinDim*2; ii++){ + readCoord(&pData[ii*4], &pCoord[ii]); + } +} - case FTS3_MATCHINFO_AVGLENGTH: - case FTS3_MATCHINFO_LENGTH: - case FTS3_MATCHINFO_LCS: - nVal = pInfo->nCol; - break; - default: - assert( cArg==FTS3_MATCHINFO_HITS ); - nVal = pInfo->nCol * pInfo->nPhrase * 3; - break; - } +/* Forward declaration for the function that does the work of +** the virtual table module xCreate() and xConnect() methods. +*/ +static int rtreeInit( + sqlite3 *, void *, int, const char *const*, sqlite3_vtab **, char **, int +); - return nVal; +/* +** Rtree virtual table module xCreate method. +*/ +static int rtreeCreate( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 1); } -static int fts3MatchinfoSelectDoctotal( - Fts3Table *pTab, - sqlite3_stmt **ppStmt, - sqlite3_int64 *pnDoc, - const char **paLen +/* +** Rtree virtual table module xConnect method. +*/ +static int rtreeConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr ){ - sqlite3_stmt *pStmt; - const char *a; - sqlite3_int64 nDoc; - - if( !*ppStmt ){ - int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); - if( rc!=SQLITE_OK ) return rc; - } - pStmt = *ppStmt; - assert( sqlite3_data_count(pStmt)==1 ); - - a = sqlite3_column_blob(pStmt, 0); - a += sqlite3Fts3GetVarint(a, &nDoc); - if( nDoc==0 ) return FTS_CORRUPT_VTAB; - *pnDoc = (u32)nDoc; + return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 0); +} - if( paLen ) *paLen = a; - return SQLITE_OK; +/* +** Increment the r-tree reference count. +*/ +static void rtreeReference(Rtree *pRtree){ + pRtree->nBusy++; } /* -** An instance of the following structure is used to store state while -** iterating through a multi-column position-list corresponding to the -** hits for a single phrase on a single row in order to calculate the -** values for a matchinfo() FTS3_MATCHINFO_LCS request. +** Decrement the r-tree reference count. When the reference count reaches +** zero the structure is deleted. */ -typedef struct LcsIterator LcsIterator; -struct LcsIterator { - Fts3Expr *pExpr; /* Pointer to phrase expression */ - int iPosOffset; /* Tokens count up to end of this phrase */ - char *pRead; /* Cursor used to iterate through aDoclist */ - int iPos; /* Current position */ -}; +static void rtreeRelease(Rtree *pRtree){ + pRtree->nBusy--; + if( pRtree->nBusy==0 ){ + sqlite3_finalize(pRtree->pReadNode); + sqlite3_finalize(pRtree->pWriteNode); + sqlite3_finalize(pRtree->pDeleteNode); + sqlite3_finalize(pRtree->pReadRowid); + sqlite3_finalize(pRtree->pWriteRowid); + sqlite3_finalize(pRtree->pDeleteRowid); + sqlite3_finalize(pRtree->pReadParent); + sqlite3_finalize(pRtree->pWriteParent); + sqlite3_finalize(pRtree->pDeleteParent); + sqlite3_free(pRtree); + } +} /* -** If LcsIterator.iCol is set to the following value, the iterator has -** finished iterating through all offsets for all columns. +** Rtree virtual table module xDisconnect method. */ -#define LCS_ITERATOR_FINISHED 0x7FFFFFFF; - -static int fts3MatchinfoLcsCb( - Fts3Expr *pExpr, /* Phrase expression node */ - int iPhrase, /* Phrase number (numbered from zero) */ - void *pCtx /* Pointer to MatchInfo structure */ -){ - LcsIterator *aIter = (LcsIterator *)pCtx; - aIter[iPhrase].pExpr = pExpr; +static int rtreeDisconnect(sqlite3_vtab *pVtab){ + rtreeRelease((Rtree *)pVtab); return SQLITE_OK; } -/* -** Advance the iterator passed as an argument to the next position. Return -** 1 if the iterator is at EOF or if it now points to the start of the -** position list for the next column. +/* +** Rtree virtual table module xDestroy method. */ -static int fts3LcsIteratorAdvance(LcsIterator *pIter){ - char *pRead = pIter->pRead; - sqlite3_int64 iRead; - int rc = 0; - - pRead += sqlite3Fts3GetVarint(pRead, &iRead); - if( iRead==0 || iRead==1 ){ - pRead = 0; - rc = 1; +static int rtreeDestroy(sqlite3_vtab *pVtab){ + Rtree *pRtree = (Rtree *)pVtab; + int rc; + char *zCreate = sqlite3_mprintf( + "DROP TABLE '%q'.'%q_node';" + "DROP TABLE '%q'.'%q_rowid';" + "DROP TABLE '%q'.'%q_parent';", + pRtree->zDb, pRtree->zName, + pRtree->zDb, pRtree->zName, + pRtree->zDb, pRtree->zName + ); + if( !zCreate ){ + rc = SQLITE_NOMEM; }else{ - pIter->iPos += (int)(iRead-2); + rc = sqlite3_exec(pRtree->db, zCreate, 0, 0, 0); + sqlite3_free(zCreate); + } + if( rc==SQLITE_OK ){ + rtreeRelease(pRtree); } - pIter->pRead = pRead; return rc; } - -/* -** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. -** -** If the call is successful, the longest-common-substring lengths for each -** column are written into the first nCol elements of the pInfo->aMatchinfo[] -** array before returning. SQLITE_OK is returned in this case. -** -** Otherwise, if an error occurs, an SQLite error code is returned and the -** data written to the first nCol elements of pInfo->aMatchinfo[] is -** undefined. -*/ -static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ - LcsIterator *aIter; - int i; - int iCol; - int nToken = 0; - /* Allocate and populate the array of LcsIterator objects. The array - ** contains one element for each matchable phrase in the query. - **/ - aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); - if( !aIter ) return SQLITE_NOMEM; - memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); - (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); +/* +** Rtree virtual table module xOpen method. +*/ +static int rtreeOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ + int rc = SQLITE_NOMEM; + RtreeCursor *pCsr; - for(i=0; inPhrase; i++){ - LcsIterator *pIter = &aIter[i]; - nToken -= pIter->pExpr->pPhrase->nToken; - pIter->iPosOffset = nToken; + pCsr = (RtreeCursor *)sqlite3_malloc(sizeof(RtreeCursor)); + if( pCsr ){ + memset(pCsr, 0, sizeof(RtreeCursor)); + pCsr->base.pVtab = pVTab; + rc = SQLITE_OK; } + *ppCursor = (sqlite3_vtab_cursor *)pCsr; - for(iCol=0; iColnCol; iCol++){ - int nLcs = 0; /* LCS value for this column */ - int nLive = 0; /* Number of iterators in aIter not at EOF */ - - for(i=0; inPhrase; i++){ - int rc; - LcsIterator *pIt = &aIter[i]; - rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead); - if( rc!=SQLITE_OK ) return rc; - if( pIt->pRead ){ - pIt->iPos = pIt->iPosOffset; - fts3LcsIteratorAdvance(&aIter[i]); - nLive++; - } - } + return rc; +} - while( nLive>0 ){ - LcsIterator *pAdv = 0; /* The iterator to advance by one position */ - int nThisLcs = 0; /* LCS for the current iterator positions */ - for(i=0; inPhrase; i++){ - LcsIterator *pIter = &aIter[i]; - if( pIter->pRead==0 ){ - /* This iterator is already at EOF for this column. */ - nThisLcs = 0; - }else{ - if( pAdv==0 || pIter->iPosiPos ){ - pAdv = pIter; - } - if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ - nThisLcs++; - }else{ - nThisLcs = 1; - } - if( nThisLcs>nLcs ) nLcs = nThisLcs; - } +/* +** Free the RtreeCursor.aConstraint[] array and its contents. +*/ +static void freeCursorConstraints(RtreeCursor *pCsr){ + if( pCsr->aConstraint ){ + int i; /* Used to iterate through constraint array */ + for(i=0; inConstraint; i++){ + sqlite3_rtree_query_info *pInfo = pCsr->aConstraint[i].pInfo; + if( pInfo ){ + if( pInfo->xDelUser ) pInfo->xDelUser(pInfo->pUser); + sqlite3_free(pInfo); } - if( fts3LcsIteratorAdvance(pAdv) ) nLive--; } - - pInfo->aMatchinfo[iCol] = nLcs; + sqlite3_free(pCsr->aConstraint); + pCsr->aConstraint = 0; } +} - sqlite3_free(aIter); +/* +** Rtree virtual table module xClose method. +*/ +static int rtreeClose(sqlite3_vtab_cursor *cur){ + Rtree *pRtree = (Rtree *)(cur->pVtab); + int ii; + RtreeCursor *pCsr = (RtreeCursor *)cur; + freeCursorConstraints(pCsr); + sqlite3_free(pCsr->aPoint); + for(ii=0; iiaNode[ii]); + sqlite3_free(pCsr); return SQLITE_OK; } /* -** Populate the buffer pInfo->aMatchinfo[] with an array of integers to -** be returned by the matchinfo() function. Argument zArg contains the -** format string passed as the second argument to matchinfo (or the -** default value "pcx" if no second argument was specified). The format -** string has already been validated and the pInfo->aMatchinfo[] array -** is guaranteed to be large enough for the output. +** Rtree virtual table module xEof method. ** -** If bGlobal is true, then populate all fields of the matchinfo() output. -** If it is false, then assume that those fields that do not change between -** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) -** have already been populated. +** Return non-zero if the cursor does not currently point to a valid +** record (i.e if the scan has finished), or zero otherwise. +*/ +static int rtreeEof(sqlite3_vtab_cursor *cur){ + RtreeCursor *pCsr = (RtreeCursor *)cur; + return pCsr->atEOF; +} + +/* +** Convert raw bits from the on-disk RTree record into a coordinate value. +** The on-disk format is big-endian and needs to be converted for little- +** endian platforms. The on-disk record stores integer coordinates if +** eInt is true and it stores 32-bit floating point records if eInt is +** false. a[] is the four bytes of the on-disk record to be decoded. +** Store the results in "r". ** -** Return SQLITE_OK if successful, or an SQLite error code if an error -** occurs. If a value other than SQLITE_OK is returned, the state the -** pInfo->aMatchinfo[] buffer is left in is undefined. +** There are three versions of this macro, one each for little-endian and +** big-endian processors and a third generic implementation. The endian- +** specific implementations are much faster and are preferred if the +** processor endianness is known at compile-time. The SQLITE_BYTEORDER +** macro is part of sqliteInt.h and hence the endian-specific +** implementation will only be used if this module is compiled as part +** of the amalgamation. */ -static int fts3MatchinfoValues( - Fts3Cursor *pCsr, /* FTS3 cursor object */ - int bGlobal, /* True to grab the global stats */ - MatchInfo *pInfo, /* Matchinfo context object */ - const char *zArg /* Matchinfo format string */ -){ - int rc = SQLITE_OK; - int i; - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - sqlite3_stmt *pSelect = 0; +#if defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==1234 +#define RTREE_DECODE_COORD(eInt, a, r) { \ + RtreeCoord c; /* Coordinate decoded */ \ + memcpy(&c.u,a,4); \ + c.u = ((c.u>>24)&0xff)|((c.u>>8)&0xff00)| \ + ((c.u&0xff)<<24)|((c.u&0xff00)<<8); \ + r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +} +#elif defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==4321 +#define RTREE_DECODE_COORD(eInt, a, r) { \ + RtreeCoord c; /* Coordinate decoded */ \ + memcpy(&c.u,a,4); \ + r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +} +#else +#define RTREE_DECODE_COORD(eInt, a, r) { \ + RtreeCoord c; /* Coordinate decoded */ \ + c.u = ((u32)a[0]<<24) + ((u32)a[1]<<16) \ + +((u32)a[2]<<8) + a[3]; \ + r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +} +#endif - for(i=0; rc==SQLITE_OK && zArg[i]; i++){ +/* +** Check the RTree node or entry given by pCellData and p against the MATCH +** constraint pConstraint. +*/ +static int rtreeCallbackConstraint( + RtreeConstraint *pConstraint, /* The constraint to test */ + int eInt, /* True if RTree holding integer coordinates */ + u8 *pCellData, /* Raw cell content */ + RtreeSearchPoint *pSearch, /* Container of this cell */ + sqlite3_rtree_dbl *prScore, /* OUT: score for the cell */ + int *peWithin /* OUT: visibility of the cell */ +){ + int i; /* Loop counter */ + sqlite3_rtree_query_info *pInfo = pConstraint->pInfo; /* Callback info */ + int nCoord = pInfo->nCoord; /* No. of coordinates */ + int rc; /* Callback return code */ + sqlite3_rtree_dbl aCoord[RTREE_MAX_DIMENSIONS*2]; /* Decoded coordinates */ - switch( zArg[i] ){ - case FTS3_MATCHINFO_NPHRASE: - if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; - break; + assert( pConstraint->op==RTREE_MATCH || pConstraint->op==RTREE_QUERY ); + assert( nCoord==2 || nCoord==4 || nCoord==6 || nCoord==8 || nCoord==10 ); - case FTS3_MATCHINFO_NCOL: - if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; - break; - - case FTS3_MATCHINFO_NDOC: - if( bGlobal ){ - sqlite3_int64 nDoc = 0; - rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); - pInfo->aMatchinfo[0] = (u32)nDoc; - } - break; + if( pConstraint->op==RTREE_QUERY && pSearch->iLevel==1 ){ + pInfo->iRowid = readInt64(pCellData); + } + pCellData += 8; + for(i=0; iop==RTREE_MATCH ){ + rc = pConstraint->u.xGeom((sqlite3_rtree_geometry*)pInfo, + nCoord, aCoord, &i); + if( i==0 ) *peWithin = NOT_WITHIN; + *prScore = RTREE_ZERO; + }else{ + pInfo->aCoord = aCoord; + pInfo->iLevel = pSearch->iLevel - 1; + pInfo->rScore = pInfo->rParentScore = pSearch->rScore; + pInfo->eWithin = pInfo->eParentWithin = pSearch->eWithin; + rc = pConstraint->u.xQueryFunc(pInfo); + if( pInfo->eWithin<*peWithin ) *peWithin = pInfo->eWithin; + if( pInfo->rScore<*prScore || *prScorerScore; + } + } + return rc; +} - case FTS3_MATCHINFO_AVGLENGTH: - if( bGlobal ){ - sqlite3_int64 nDoc; /* Number of rows in table */ - const char *a; /* Aggregate column length array */ +/* +** Check the internal RTree node given by pCellData against constraint p. +** If this constraint cannot be satisfied by any child within the node, +** set *peWithin to NOT_WITHIN. +*/ +static void rtreeNonleafConstraint( + RtreeConstraint *p, /* The constraint to test */ + int eInt, /* True if RTree holds integer coordinates */ + u8 *pCellData, /* Raw cell content as appears on disk */ + int *peWithin /* Adjust downward, as appropriate */ +){ + sqlite3_rtree_dbl val; /* Coordinate value convert to a double */ - rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); - if( rc==SQLITE_OK ){ - int iCol; - for(iCol=0; iColnCol; iCol++){ - u32 iVal; - sqlite3_int64 nToken; - a += sqlite3Fts3GetVarint(a, &nToken); - iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); - pInfo->aMatchinfo[iCol] = iVal; - } - } - } - break; + /* p->iCoord might point to either a lower or upper bound coordinate + ** in a coordinate pair. But make pCellData point to the lower bound. + */ + pCellData += 8 + 4*(p->iCoord&0xfe); - case FTS3_MATCHINFO_LENGTH: { - sqlite3_stmt *pSelectDocsize = 0; - rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); - if( rc==SQLITE_OK ){ - int iCol; - const char *a = sqlite3_column_blob(pSelectDocsize, 0); - for(iCol=0; iColnCol; iCol++){ - sqlite3_int64 nToken; - a += sqlite3Fts3GetVarint(a, &nToken); - pInfo->aMatchinfo[iCol] = (u32)nToken; - } - } - sqlite3_reset(pSelectDocsize); - break; - } + assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE + || p->op==RTREE_GT || p->op==RTREE_EQ ); + switch( p->op ){ + case RTREE_LE: + case RTREE_LT: + case RTREE_EQ: + RTREE_DECODE_COORD(eInt, pCellData, val); + /* val now holds the lower bound of the coordinate pair */ + if( p->u.rValue>=val ) return; + if( p->op!=RTREE_EQ ) break; /* RTREE_LE and RTREE_LT end here */ + /* Fall through for the RTREE_EQ case */ - case FTS3_MATCHINFO_LCS: - rc = fts3ExprLoadDoclists(pCsr, 0, 0); - if( rc==SQLITE_OK ){ - rc = fts3MatchinfoLcs(pCsr, pInfo); - } - break; + default: /* RTREE_GT or RTREE_GE, or fallthrough of RTREE_EQ */ + pCellData += 4; + RTREE_DECODE_COORD(eInt, pCellData, val); + /* val now holds the upper bound of the coordinate pair */ + if( p->u.rValue<=val ) return; + } + *peWithin = NOT_WITHIN; +} - default: { - Fts3Expr *pExpr; - assert( zArg[i]==FTS3_MATCHINFO_HITS ); - pExpr = pCsr->pExpr; - rc = fts3ExprLoadDoclists(pCsr, 0, 0); - if( rc!=SQLITE_OK ) break; - if( bGlobal ){ - if( pCsr->pDeferred ){ - rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); - if( rc!=SQLITE_OK ) break; - } - rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); - if( rc!=SQLITE_OK ) break; - } - (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); - break; - } - } +/* +** Check the leaf RTree cell given by pCellData against constraint p. +** If this constraint is not satisfied, set *peWithin to NOT_WITHIN. +** If the constraint is satisfied, leave *peWithin unchanged. +** +** The constraint is of the form: xN op $val +** +** The op is given by p->op. The xN is p->iCoord-th coordinate in +** pCellData. $val is given by p->u.rValue. +*/ +static void rtreeLeafConstraint( + RtreeConstraint *p, /* The constraint to test */ + int eInt, /* True if RTree holds integer coordinates */ + u8 *pCellData, /* Raw cell content as appears on disk */ + int *peWithin /* Adjust downward, as appropriate */ +){ + RtreeDValue xN; /* Coordinate value converted to a double */ - pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); + assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE + || p->op==RTREE_GT || p->op==RTREE_EQ ); + pCellData += 8 + p->iCoord*4; + RTREE_DECODE_COORD(eInt, pCellData, xN); + switch( p->op ){ + case RTREE_LE: if( xN <= p->u.rValue ) return; break; + case RTREE_LT: if( xN < p->u.rValue ) return; break; + case RTREE_GE: if( xN >= p->u.rValue ) return; break; + case RTREE_GT: if( xN > p->u.rValue ) return; break; + default: if( xN == p->u.rValue ) return; break; } - - sqlite3_reset(pSelect); - return rc; + *peWithin = NOT_WITHIN; } - /* -** Populate pCsr->aMatchinfo[] with data for the current row. The -** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). +** One of the cells in node pNode is guaranteed to have a 64-bit +** integer value equal to iRowid. Return the index of this cell. */ -static int fts3GetMatchinfo( - Fts3Cursor *pCsr, /* FTS3 Cursor object */ - const char *zArg /* Second argument to matchinfo() function */ +static int nodeRowidIndex( + Rtree *pRtree, + RtreeNode *pNode, + i64 iRowid, + int *piIndex ){ - MatchInfo sInfo; - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc = SQLITE_OK; - int bGlobal = 0; /* Collect 'global' stats as well as local */ - - memset(&sInfo, 0, sizeof(MatchInfo)); - sInfo.pCursor = pCsr; - sInfo.nCol = pTab->nColumn; - - /* If there is cached matchinfo() data, but the format string for the - ** cache does not match the format string for this request, discard - ** the cached data. */ - if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){ - assert( pCsr->aMatchinfo ); - sqlite3_free(pCsr->aMatchinfo); - pCsr->zMatchinfo = 0; - pCsr->aMatchinfo = 0; + int ii; + int nCell = NCELL(pNode); + assert( nCell<200 ); + for(ii=0; iiaMatchinfo==0 ){ - int nMatchinfo = 0; /* Number of u32 elements in match-info */ - int nArg; /* Bytes in zArg */ - int i; /* Used to iterate through zArg */ +/* +** Return the index of the cell containing a pointer to node pNode +** in its parent. If pNode is the root node, return -1. +*/ +static int nodeParentIndex(Rtree *pRtree, RtreeNode *pNode, int *piIndex){ + RtreeNode *pParent = pNode->pParent; + if( pParent ){ + return nodeRowidIndex(pRtree, pParent, pNode->iNode, piIndex); + } + *piIndex = -1; + return SQLITE_OK; +} - /* Determine the number of phrases in the query */ - pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); - sInfo.nPhrase = pCsr->nPhrase; +/* +** Compare two search points. Return negative, zero, or positive if the first +** is less than, equal to, or greater than the second. +** +** The rScore is the primary key. Smaller rScore values come first. +** If the rScore is a tie, then use iLevel as the tie breaker with smaller +** iLevel values coming first. In this way, if rScore is the same for all +** SearchPoints, then iLevel becomes the deciding factor and the result +** is a depth-first search, which is the desired default behavior. +*/ +static int rtreeSearchPointCompare( + const RtreeSearchPoint *pA, + const RtreeSearchPoint *pB +){ + if( pA->rScorerScore ) return -1; + if( pA->rScore>pB->rScore ) return +1; + if( pA->iLeveliLevel ) return -1; + if( pA->iLevel>pB->iLevel ) return +1; + return 0; +} - /* Determine the number of integers in the buffer returned by this call. */ - for(i=0; zArg[i]; i++){ - nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); +/* +** Interchange to search points in a cursor. +*/ +static void rtreeSearchPointSwap(RtreeCursor *p, int i, int j){ + RtreeSearchPoint t = p->aPoint[i]; + assert( iaPoint[i] = p->aPoint[j]; + p->aPoint[j] = t; + i++; j++; + if( i=RTREE_CACHE_SZ ){ + nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); + p->aNode[i] = 0; + }else{ + RtreeNode *pTemp = p->aNode[i]; + p->aNode[i] = p->aNode[j]; + p->aNode[j] = pTemp; } - - /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ - nArg = (int)strlen(zArg); - pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1); - if( !pCsr->aMatchinfo ) return SQLITE_NOMEM; - - pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo]; - pCsr->nMatchinfo = nMatchinfo; - memcpy(pCsr->zMatchinfo, zArg, nArg+1); - memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo); - pCsr->isMatchinfoNeeded = 1; - bGlobal = 1; } +} - sInfo.aMatchinfo = pCsr->aMatchinfo; - sInfo.nPhrase = pCsr->nPhrase; - if( pCsr->isMatchinfoNeeded ){ - rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); - pCsr->isMatchinfoNeeded = 0; - } +/* +** Return the search point with the lowest current score. +*/ +static RtreeSearchPoint *rtreeSearchPointFirst(RtreeCursor *pCur){ + return pCur->bPoint ? &pCur->sPoint : pCur->nPoint ? pCur->aPoint : 0; +} - return rc; +/* +** Get the RtreeNode for the search point with the lowest score. +*/ +static RtreeNode *rtreeNodeOfFirstSearchPoint(RtreeCursor *pCur, int *pRC){ + sqlite3_int64 id; + int ii = 1 - pCur->bPoint; + assert( ii==0 || ii==1 ); + assert( pCur->bPoint || pCur->nPoint ); + if( pCur->aNode[ii]==0 ){ + assert( pRC!=0 ); + id = ii ? pCur->aPoint[0].id : pCur->sPoint.id; + *pRC = nodeAcquire(RTREE_OF_CURSOR(pCur), id, 0, &pCur->aNode[ii]); + } + return pCur->aNode[ii]; } /* -** Implementation of snippet() function. +** Push a new element onto the priority queue */ -SQLITE_PRIVATE void sqlite3Fts3Snippet( - sqlite3_context *pCtx, /* SQLite function call context */ - Fts3Cursor *pCsr, /* Cursor object */ - const char *zStart, /* Snippet start text - "" */ - const char *zEnd, /* Snippet end text - "" */ - const char *zEllipsis, /* Snippet ellipsis text - "..." */ - int iCol, /* Extract snippet from this column */ - int nToken /* Approximate number of tokens in snippet */ +static RtreeSearchPoint *rtreeEnqueue( + RtreeCursor *pCur, /* The cursor */ + RtreeDValue rScore, /* Score for the new search point */ + u8 iLevel /* Level for the new search point */ ){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc = SQLITE_OK; - int i; - StrBuffer res = {0, 0, 0}; - - /* The returned text includes up to four fragments of text extracted from - ** the data in the current row. The first iteration of the for(...) loop - ** below attempts to locate a single fragment of text nToken tokens in - ** size that contains at least one instance of all phrases in the query - ** expression that appear in the current row. If such a fragment of text - ** cannot be found, the second iteration of the loop attempts to locate - ** a pair of fragments, and so on. - */ - int nSnippet = 0; /* Number of fragments in this snippet */ - SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ - int nFToken = -1; /* Number of tokens in each fragment */ - - if( !pCsr->pExpr ){ - sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); - return; + int i, j; + RtreeSearchPoint *pNew; + if( pCur->nPoint>=pCur->nPointAlloc ){ + int nNew = pCur->nPointAlloc*2 + 8; + pNew = sqlite3_realloc(pCur->aPoint, nNew*sizeof(pCur->aPoint[0])); + if( pNew==0 ) return 0; + pCur->aPoint = pNew; + pCur->nPointAlloc = nNew; } + i = pCur->nPoint++; + pNew = pCur->aPoint + i; + pNew->rScore = rScore; + pNew->iLevel = iLevel; + assert( iLevel<=RTREE_MAX_DEPTH ); + while( i>0 ){ + RtreeSearchPoint *pParent; + j = (i-1)/2; + pParent = pCur->aPoint + j; + if( rtreeSearchPointCompare(pNew, pParent)>=0 ) break; + rtreeSearchPointSwap(pCur, j, i); + i = j; + pNew = pParent; + } + return pNew; +} - for(nSnippet=1; 1; nSnippet++){ - - int iSnip; /* Loop counter 0..nSnippet-1 */ - u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ - u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ - - if( nToken>=0 ){ - nFToken = (nToken+nSnippet-1) / nSnippet; - }else{ - nFToken = -1 * nToken; +/* +** Allocate a new RtreeSearchPoint and return a pointer to it. Return +** NULL if malloc fails. +*/ +static RtreeSearchPoint *rtreeSearchPointNew( + RtreeCursor *pCur, /* The cursor */ + RtreeDValue rScore, /* Score for the new search point */ + u8 iLevel /* Level for the new search point */ +){ + RtreeSearchPoint *pNew, *pFirst; + pFirst = rtreeSearchPointFirst(pCur); + pCur->anQueue[iLevel]++; + if( pFirst==0 + || pFirst->rScore>rScore + || (pFirst->rScore==rScore && pFirst->iLevel>iLevel) + ){ + if( pCur->bPoint ){ + int ii; + pNew = rtreeEnqueue(pCur, rScore, iLevel); + if( pNew==0 ) return 0; + ii = (int)(pNew - pCur->aPoint) + 1; + if( iiaNode[ii]==0 ); + pCur->aNode[ii] = pCur->aNode[0]; + }else{ + nodeRelease(RTREE_OF_CURSOR(pCur), pCur->aNode[0]); + } + pCur->aNode[0] = 0; + *pNew = pCur->sPoint; } + pCur->sPoint.rScore = rScore; + pCur->sPoint.iLevel = iLevel; + pCur->bPoint = 1; + return &pCur->sPoint; + }else{ + return rtreeEnqueue(pCur, rScore, iLevel); + } +} - for(iSnip=0; iSnipnColumn; iRead++){ - SnippetFragment sF = {0, 0, 0, 0}; - int iS = 0; - if( iCol>=0 && iRead!=iCol ) continue; +#if 0 +/* Tracing routines for the RtreeSearchPoint queue */ +static void tracePoint(RtreeSearchPoint *p, int idx, RtreeCursor *pCur){ + if( idx<0 ){ printf(" s"); }else{ printf("%2d", idx); } + printf(" %d.%05lld.%02d %g %d", + p->iLevel, p->id, p->iCell, p->rScore, p->eWithin + ); + idx++; + if( idxaNode[idx]); + }else{ + printf("\n"); + } +} +static void traceQueue(RtreeCursor *pCur, const char *zPrefix){ + int ii; + printf("=== %9s ", zPrefix); + if( pCur->bPoint ){ + tracePoint(&pCur->sPoint, -1, pCur); + } + for(ii=0; iinPoint; ii++){ + if( ii>0 || pCur->bPoint ) printf(" "); + tracePoint(&pCur->aPoint[ii], ii, pCur); + } +} +# define RTREE_QUEUE_TRACE(A,B) traceQueue(A,B) +#else +# define RTREE_QUEUE_TRACE(A,B) /* no-op */ +#endif - /* Find the best snippet of nFToken tokens in column iRead. */ - rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); - if( rc!=SQLITE_OK ){ - goto snippet_out; +/* Remove the search point with the lowest current score. +*/ +static void rtreeSearchPointPop(RtreeCursor *p){ + int i, j, k, n; + i = 1 - p->bPoint; + assert( i==0 || i==1 ); + if( p->aNode[i] ){ + nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); + p->aNode[i] = 0; + } + if( p->bPoint ){ + p->anQueue[p->sPoint.iLevel]--; + p->bPoint = 0; + }else if( p->nPoint ){ + p->anQueue[p->aPoint[0].iLevel]--; + n = --p->nPoint; + p->aPoint[0] = p->aPoint[n]; + if( naNode[1] = p->aNode[n+1]; + p->aNode[n+1] = 0; + } + i = 0; + while( (j = i*2+1)aPoint[k], &p->aPoint[j])<0 ){ + if( rtreeSearchPointCompare(&p->aPoint[k], &p->aPoint[i])<0 ){ + rtreeSearchPointSwap(p, i, k); + i = k; + }else{ + break; } - if( iS>iBestScore ){ - *pFragment = sF; - iBestScore = iS; + }else{ + if( rtreeSearchPointCompare(&p->aPoint[j], &p->aPoint[i])<0 ){ + rtreeSearchPointSwap(p, i, j); + i = j; + }else{ + break; } } - - mCovered |= pFragment->covered; } - - /* If all query phrases seen by fts3BestSnippet() are present in at least - ** one of the nSnippet snippet fragments, break out of the loop. - */ - assert( (mCovered&mSeen)==mCovered ); - if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; } +} - assert( nFToken>0 ); - for(i=0; inConstraint; + int ii; + int eInt; + RtreeSearchPoint x; - snippet_out: - sqlite3Fts3SegmentsClose(pTab); - if( rc!=SQLITE_OK ){ - sqlite3_result_error_code(pCtx, rc); - sqlite3_free(res.z); - }else{ - sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); + eInt = pRtree->eCoordType==RTREE_COORD_INT32; + while( (p = rtreeSearchPointFirst(pCur))!=0 && p->iLevel>0 ){ + pNode = rtreeNodeOfFirstSearchPoint(pCur, &rc); + if( rc ) return rc; + nCell = NCELL(pNode); + assert( nCell<200 ); + while( p->iCellzData + (4+pRtree->nBytesPerCell*p->iCell); + eWithin = FULLY_WITHIN; + for(ii=0; iiaConstraint + ii; + if( pConstraint->op>=RTREE_MATCH ){ + rc = rtreeCallbackConstraint(pConstraint, eInt, pCellData, p, + &rScore, &eWithin); + if( rc ) return rc; + }else if( p->iLevel==1 ){ + rtreeLeafConstraint(pConstraint, eInt, pCellData, &eWithin); + }else{ + rtreeNonleafConstraint(pConstraint, eInt, pCellData, &eWithin); + } + if( eWithin==NOT_WITHIN ) break; + } + p->iCell++; + if( eWithin==NOT_WITHIN ) continue; + x.iLevel = p->iLevel - 1; + if( x.iLevel ){ + x.id = readInt64(pCellData); + x.iCell = 0; + }else{ + x.id = p->id; + x.iCell = p->iCell - 1; + } + if( p->iCell>=nCell ){ + RTREE_QUEUE_TRACE(pCur, "POP-S:"); + rtreeSearchPointPop(pCur); + } + if( rScoreeWithin = eWithin; + p->id = x.id; + p->iCell = x.iCell; + RTREE_QUEUE_TRACE(pCur, "PUSH-S:"); + break; + } + if( p->iCell>=nCell ){ + RTREE_QUEUE_TRACE(pCur, "POP-Se:"); + rtreeSearchPointPop(pCur); + } } + pCur->atEOF = p==0; + return SQLITE_OK; } +/* +** Rtree virtual table module xNext method. +*/ +static int rtreeNext(sqlite3_vtab_cursor *pVtabCursor){ + RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; + int rc = SQLITE_OK; -typedef struct TermOffset TermOffset; -typedef struct TermOffsetCtx TermOffsetCtx; - -struct TermOffset { - char *pList; /* Position-list */ - int iPos; /* Position just read from pList */ - int iOff; /* Offset of this term from read positions */ -}; + /* Move to the next entry that matches the configured constraints. */ + RTREE_QUEUE_TRACE(pCsr, "POP-Nx:"); + rtreeSearchPointPop(pCsr); + rc = rtreeStepToLeaf(pCsr); + return rc; +} -struct TermOffsetCtx { - Fts3Cursor *pCsr; - int iCol; /* Column of table to populate aTerm for */ - int iTerm; - sqlite3_int64 iDocid; - TermOffset *aTerm; -}; +/* +** Rtree virtual table module xRowid method. +*/ +static int rtreeRowid(sqlite3_vtab_cursor *pVtabCursor, sqlite_int64 *pRowid){ + RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; + RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); + int rc = SQLITE_OK; + RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); + if( rc==SQLITE_OK && p ){ + *pRowid = nodeGetRowid(RTREE_OF_CURSOR(pCsr), pNode, p->iCell); + } + return rc; +} -/* -** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). +/* +** Rtree virtual table module xColumn method. */ -static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ - TermOffsetCtx *p = (TermOffsetCtx *)ctx; - int nTerm; /* Number of tokens in phrase */ - int iTerm; /* For looping through nTerm phrase terms */ - char *pList; /* Pointer to position list for phrase */ - int iPos = 0; /* First position in position-list */ - int rc; +static int rtreeColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ + Rtree *pRtree = (Rtree *)cur->pVtab; + RtreeCursor *pCsr = (RtreeCursor *)cur; + RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); + RtreeCoord c; + int rc = SQLITE_OK; + RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); - UNUSED_PARAMETER(iPhrase); - rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList); - nTerm = pExpr->pPhrase->nToken; - if( pList ){ - fts3GetDeltaPosition(&pList, &iPos); - assert( iPos>=0 ); + if( rc ) return rc; + if( p==0 ) return SQLITE_OK; + if( i==0 ){ + sqlite3_result_int64(ctx, nodeGetRowid(pRtree, pNode, p->iCell)); + }else{ + if( rc ) return rc; + nodeGetCoord(pRtree, pNode, p->iCell, i-1, &c); +#ifndef SQLITE_RTREE_INT_ONLY + if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ + sqlite3_result_double(ctx, c.f); + }else +#endif + { + assert( pRtree->eCoordType==RTREE_COORD_INT32 ); + sqlite3_result_int(ctx, c.i); + } } + return SQLITE_OK; +} - for(iTerm=0; iTermaTerm[p->iTerm++]; - pT->iOff = nTerm-iTerm-1; - pT->pList = pList; - pT->iPos = iPos; +/* +** Use nodeAcquire() to obtain the leaf node containing the record with +** rowid iRowid. If successful, set *ppLeaf to point to the node and +** return SQLITE_OK. If there is no such record in the table, set +** *ppLeaf to 0 and return SQLITE_OK. If an error occurs, set *ppLeaf +** to zero and return an SQLite error code. +*/ +static int findLeafNode( + Rtree *pRtree, /* RTree to search */ + i64 iRowid, /* The rowid searching for */ + RtreeNode **ppLeaf, /* Write the node here */ + sqlite3_int64 *piNode /* Write the node-id here */ +){ + int rc; + *ppLeaf = 0; + sqlite3_bind_int64(pRtree->pReadRowid, 1, iRowid); + if( sqlite3_step(pRtree->pReadRowid)==SQLITE_ROW ){ + i64 iNode = sqlite3_column_int64(pRtree->pReadRowid, 0); + if( piNode ) *piNode = iNode; + rc = nodeAcquire(pRtree, iNode, 0, ppLeaf); + sqlite3_reset(pRtree->pReadRowid); + }else{ + rc = sqlite3_reset(pRtree->pReadRowid); } - return rc; } /* -** Implementation of offsets() function. +** This function is called to configure the RtreeConstraint object passed +** as the second argument for a MATCH constraint. The value passed as the +** first argument to this function is the right-hand operand to the MATCH +** operator. */ -SQLITE_PRIVATE void sqlite3Fts3Offsets( - sqlite3_context *pCtx, /* SQLite function call context */ - Fts3Cursor *pCsr /* Cursor object */ -){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; - int rc; /* Return Code */ - int nToken; /* Number of tokens in query */ - int iCol; /* Column currently being processed */ - StrBuffer res = {0, 0, 0}; /* Result string */ - TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ - - if( !pCsr->pExpr ){ - sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); - return; - } - - memset(&sCtx, 0, sizeof(sCtx)); - assert( pCsr->isRequireSeek==0 ); +static int deserializeGeometry(sqlite3_value *pValue, RtreeConstraint *pCons){ + RtreeMatchArg *pBlob; /* BLOB returned by geometry function */ + sqlite3_rtree_query_info *pInfo; /* Callback information */ + int nBlob; /* Size of the geometry function blob */ + int nExpected; /* Expected size of the BLOB */ - /* Count the number of terms in the query */ - rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); - if( rc!=SQLITE_OK ) goto offsets_out; + /* Check that value is actually a blob. */ + if( sqlite3_value_type(pValue)!=SQLITE_BLOB ) return SQLITE_ERROR; - /* Allocate the array of TermOffset iterators. */ - sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); - if( 0==sCtx.aTerm ){ - rc = SQLITE_NOMEM; - goto offsets_out; + /* Check that the blob is roughly the right size. */ + nBlob = sqlite3_value_bytes(pValue); + if( nBlob<(int)sizeof(RtreeMatchArg) ){ + return SQLITE_ERROR; } - sCtx.iDocid = pCsr->iPrevId; - sCtx.pCsr = pCsr; - /* Loop through the table columns, appending offset information to - ** string-buffer res for each column. - */ - for(iCol=0; iColnColumn; iCol++){ - sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ - const char *ZDUMMY; /* Dummy argument used with xNext() */ - int NDUMMY = 0; /* Dummy argument used with xNext() */ - int iStart = 0; - int iEnd = 0; - int iCurrent = 0; - const char *zDoc; - int nDoc; + pInfo = (sqlite3_rtree_query_info*)sqlite3_malloc( sizeof(*pInfo)+nBlob ); + if( !pInfo ) return SQLITE_NOMEM; + memset(pInfo, 0, sizeof(*pInfo)); + pBlob = (RtreeMatchArg*)&pInfo[1]; - /* Initialize the contents of sCtx.aTerm[] for column iCol. There is - ** no way that this operation can fail, so the return code from - ** fts3ExprIterate() can be discarded. - */ - sCtx.iCol = iCol; - sCtx.iTerm = 0; - (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx); + memcpy(pBlob, sqlite3_value_blob(pValue), nBlob); + nExpected = (int)(sizeof(RtreeMatchArg) + + pBlob->nParam*sizeof(sqlite3_value*) + + (pBlob->nParam-1)*sizeof(RtreeDValue)); + if( pBlob->magic!=RTREE_GEOMETRY_MAGIC || nBlob!=nExpected ){ + sqlite3_free(pInfo); + return SQLITE_ERROR; + } + pInfo->pContext = pBlob->cb.pContext; + pInfo->nParam = pBlob->nParam; + pInfo->aParam = pBlob->aParam; + pInfo->apSqlParam = pBlob->apSqlParam; - /* Retreive the text stored in column iCol. If an SQL NULL is stored - ** in column iCol, jump immediately to the next iteration of the loop. - ** If an OOM occurs while retrieving the data (this can happen if SQLite - ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM - ** to the caller. - */ - zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); - nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); - if( zDoc==0 ){ - if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ - continue; - } - rc = SQLITE_NOMEM; - goto offsets_out; - } + if( pBlob->cb.xGeom ){ + pCons->u.xGeom = pBlob->cb.xGeom; + }else{ + pCons->op = RTREE_QUERY; + pCons->u.xQueryFunc = pBlob->cb.xQueryFunc; + } + pCons->pInfo = pInfo; + return SQLITE_OK; +} - /* Initialize a tokenizer iterator to iterate through column iCol. */ - rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, - zDoc, nDoc, &pC - ); - if( rc!=SQLITE_OK ) goto offsets_out; +/* +** Rtree virtual table module xFilter method. +*/ +static int rtreeFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + Rtree *pRtree = (Rtree *)pVtabCursor->pVtab; + RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; + RtreeNode *pRoot = 0; + int ii; + int rc = SQLITE_OK; + int iCell = 0; - rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); - while( rc==SQLITE_OK ){ - int i; /* Used to loop through terms */ - int iMinPos = 0x7FFFFFFF; /* Position of next token */ - TermOffset *pTerm = 0; /* TermOffset associated with next token */ + rtreeReference(pRtree); - for(i=0; ipList && (pT->iPos-pT->iOff)iPos-pT->iOff; - pTerm = pT; - } - } + /* Reset the cursor to the same state as rtreeOpen() leaves it in. */ + freeCursorConstraints(pCsr); + sqlite3_free(pCsr->aPoint); + memset(pCsr, 0, sizeof(RtreeCursor)); + pCsr->base.pVtab = (sqlite3_vtab*)pRtree; - if( !pTerm ){ - /* All offsets for this column have been gathered. */ - rc = SQLITE_DONE; + pCsr->iStrategy = idxNum; + if( idxNum==1 ){ + /* Special case - lookup by rowid. */ + RtreeNode *pLeaf; /* Leaf on which the required cell resides */ + RtreeSearchPoint *p; /* Search point for the the leaf */ + i64 iRowid = sqlite3_value_int64(argv[0]); + i64 iNode = 0; + rc = findLeafNode(pRtree, iRowid, &pLeaf, &iNode); + if( rc==SQLITE_OK && pLeaf!=0 ){ + p = rtreeSearchPointNew(pCsr, RTREE_ZERO, 0); + assert( p!=0 ); /* Always returns pCsr->sPoint */ + pCsr->aNode[0] = pLeaf; + p->id = iNode; + p->eWithin = PARTLY_WITHIN; + rc = nodeRowidIndex(pRtree, pLeaf, iRowid, &iCell); + p->iCell = iCell; + RTREE_QUEUE_TRACE(pCsr, "PUSH-F1:"); + }else{ + pCsr->atEOF = 1; + } + }else{ + /* Normal case - r-tree scan. Set up the RtreeCursor.aConstraint array + ** with the configured constraints. + */ + rc = nodeAcquire(pRtree, 1, 0, &pRoot); + if( rc==SQLITE_OK && argc>0 ){ + pCsr->aConstraint = sqlite3_malloc(sizeof(RtreeConstraint)*argc); + pCsr->nConstraint = argc; + if( !pCsr->aConstraint ){ + rc = SQLITE_NOMEM; }else{ - assert( iCurrent<=iMinPos ); - if( 0==(0xFE&*pTerm->pList) ){ - pTerm->pList = 0; - }else{ - fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); - } - while( rc==SQLITE_OK && iCurrentxNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); - } - if( rc==SQLITE_OK ){ - char aBuffer[64]; - sqlite3_snprintf(sizeof(aBuffer), aBuffer, - "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart - ); - rc = fts3StringAppend(&res, aBuffer, -1); - }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ - rc = FTS_CORRUPT_VTAB; + memset(pCsr->aConstraint, 0, sizeof(RtreeConstraint)*argc); + memset(pCsr->anQueue, 0, sizeof(u32)*(pRtree->iDepth + 1)); + assert( (idxStr==0 && argc==0) + || (idxStr && (int)strlen(idxStr)==argc*2) ); + for(ii=0; iiaConstraint[ii]; + p->op = idxStr[ii*2]; + p->iCoord = idxStr[ii*2+1]-'0'; + if( p->op>=RTREE_MATCH ){ + /* A MATCH operator. The right-hand-side must be a blob that + ** can be cast into an RtreeMatchArg object. One created using + ** an sqlite3_rtree_geometry_callback() SQL user function. + */ + rc = deserializeGeometry(argv[ii], p); + if( rc!=SQLITE_OK ){ + break; + } + p->pInfo->nCoord = pRtree->nDim*2; + p->pInfo->anQueue = pCsr->anQueue; + p->pInfo->mxLevel = pRtree->iDepth + 1; + }else{ +#ifdef SQLITE_RTREE_INT_ONLY + p->u.rValue = sqlite3_value_int64(argv[ii]); +#else + p->u.rValue = sqlite3_value_double(argv[ii]); +#endif + } } } } - if( rc==SQLITE_DONE ){ - rc = SQLITE_OK; + if( rc==SQLITE_OK ){ + RtreeSearchPoint *pNew; + pNew = rtreeSearchPointNew(pCsr, RTREE_ZERO, pRtree->iDepth+1); + if( pNew==0 ) return SQLITE_NOMEM; + pNew->id = 1; + pNew->iCell = 0; + pNew->eWithin = PARTLY_WITHIN; + assert( pCsr->bPoint==1 ); + pCsr->aNode[0] = pRoot; + pRoot = 0; + RTREE_QUEUE_TRACE(pCsr, "PUSH-Fm:"); + rc = rtreeStepToLeaf(pCsr); } - - pMod->xClose(pC); - if( rc!=SQLITE_OK ) goto offsets_out; } - offsets_out: - sqlite3_free(sCtx.aTerm); - assert( rc!=SQLITE_DONE ); - sqlite3Fts3SegmentsClose(pTab); - if( rc!=SQLITE_OK ){ - sqlite3_result_error_code(pCtx, rc); - sqlite3_free(res.z); - }else{ - sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); - } - return; + nodeRelease(pRtree, pRoot); + rtreeRelease(pRtree); + return rc; } /* -** Implementation of matchinfo() function. +** Set the pIdxInfo->estimatedRows variable to nRow. Unless this +** extension is currently being used by a version of SQLite too old to +** support estimatedRows. In that case this function is a no-op. */ -SQLITE_PRIVATE void sqlite3Fts3Matchinfo( - sqlite3_context *pContext, /* Function call context */ - Fts3Cursor *pCsr, /* FTS3 table cursor */ - const char *zArg /* Second arg to matchinfo() function */ -){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc; - int i; - const char *zFormat; - - if( zArg ){ - for(i=0; zArg[i]; i++){ - char *zErr = 0; - if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ - sqlite3_result_error(pContext, zErr, -1); - sqlite3_free(zErr); - return; - } - } - zFormat = zArg; - }else{ - zFormat = FTS3_MATCHINFO_DEFAULT; - } - - if( !pCsr->pExpr ){ - sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); - return; - } - - /* Retrieve matchinfo() data. */ - rc = fts3GetMatchinfo(pCsr, zFormat); - sqlite3Fts3SegmentsClose(pTab); - - if( rc!=SQLITE_OK ){ - sqlite3_result_error_code(pContext, rc); - }else{ - int n = pCsr->nMatchinfo * sizeof(u32); - sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); +static void setEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ +#if SQLITE_VERSION_NUMBER>=3008002 + if( sqlite3_libversion_number()>=3008002 ){ + pIdxInfo->estimatedRows = nRow; } -} - #endif +} -/************** End of fts3_snippet.c ****************************************/ -/************** Begin file fts3_unicode.c ************************************/ /* -** 2012 May 24 +** Rtree virtual table module xBestIndex method. There are three +** table scan strategies to choose from (in order from most to +** least desirable): ** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: +** idxNum idxStr Strategy +** ------------------------------------------------ +** 1 Unused Direct lookup by rowid. +** 2 See below R-tree query or full-table scan. +** ------------------------------------------------ ** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. +** If strategy 1 is used, then idxStr is not meaningful. If strategy +** 2 is used, idxStr is formatted to contain 2 bytes for each +** constraint used. The first two bytes of idxStr correspond to +** the constraint in sqlite3_index_info.aConstraintUsage[] with +** (argvIndex==1) etc. ** -****************************************************************************** +** The first of each pair of bytes in idxStr identifies the constraint +** operator as follows: ** -** Implementation of the "unicode" full-text-search tokenizer. +** Operator Byte Value +** ---------------------- +** = 0x41 ('A') +** <= 0x42 ('B') +** < 0x43 ('C') +** >= 0x44 ('D') +** > 0x45 ('E') +** MATCH 0x46 ('F') +** ---------------------- +** +** The second of each pair of bytes identifies the coordinate column +** to which the constraint applies. The leftmost coordinate column +** is 'a', the second from the left 'b' etc. */ +static int rtreeBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ + Rtree *pRtree = (Rtree*)tab; + int rc = SQLITE_OK; + int ii; + int bMatch = 0; /* True if there exists a MATCH constraint */ + i64 nRow; /* Estimated rows returned by this scan */ -#ifndef SQLITE_DISABLE_FTS3_UNICODE - -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + int iIdx = 0; + char zIdxStr[RTREE_MAX_DIMENSIONS*8+1]; + memset(zIdxStr, 0, sizeof(zIdxStr)); -/* #include */ -/* #include */ -/* #include */ -/* #include */ + /* Check if there exists a MATCH constraint - even an unusable one. If there + ** is, do not consider the lookup-by-rowid plan as using such a plan would + ** require the VDBE to evaluate the MATCH constraint, which is not currently + ** possible. */ + for(ii=0; iinConstraint; ii++){ + if( pIdxInfo->aConstraint[ii].op==SQLITE_INDEX_CONSTRAINT_MATCH ){ + bMatch = 1; + } + } + assert( pIdxInfo->idxStr==0 ); + for(ii=0; iinConstraint && iIdx<(int)(sizeof(zIdxStr)-1); ii++){ + struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[ii]; -/* -** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied -** from the sqlite3 source file utf.c. If this file is compiled as part -** of the amalgamation, they are not required. -*/ -#ifndef SQLITE_AMALGAMATION + if( bMatch==0 && p->usable + && p->iColumn==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + /* We have an equality constraint on the rowid. Use strategy 1. */ + int jj; + for(jj=0; jjaConstraintUsage[jj].argvIndex = 0; + pIdxInfo->aConstraintUsage[jj].omit = 0; + } + pIdxInfo->idxNum = 1; + pIdxInfo->aConstraintUsage[ii].argvIndex = 1; + pIdxInfo->aConstraintUsage[jj].omit = 1; -static const unsigned char sqlite3Utf8Trans1[] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, -}; + /* This strategy involves a two rowid lookups on an B-Tree structures + ** and then a linear search of an R-Tree node. This should be + ** considered almost as quick as a direct rowid lookup (for which + ** sqlite uses an internal cost of 0.0). It is expected to return + ** a single row. + */ + pIdxInfo->estimatedCost = 30.0; + setEstimatedRows(pIdxInfo, 1); + return SQLITE_OK; + } -#define READ_UTF8(zIn, zTerm, c) \ - c = *(zIn++); \ - if( c>=0xc0 ){ \ - c = sqlite3Utf8Trans1[c-0xc0]; \ - while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ - c = (c<<6) + (0x3f & *(zIn++)); \ - } \ - if( c<0x80 \ - || (c&0xFFFFF800)==0xD800 \ - || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ + if( p->usable && (p->iColumn>0 || p->op==SQLITE_INDEX_CONSTRAINT_MATCH) ){ + u8 op; + switch( p->op ){ + case SQLITE_INDEX_CONSTRAINT_EQ: op = RTREE_EQ; break; + case SQLITE_INDEX_CONSTRAINT_GT: op = RTREE_GT; break; + case SQLITE_INDEX_CONSTRAINT_LE: op = RTREE_LE; break; + case SQLITE_INDEX_CONSTRAINT_LT: op = RTREE_LT; break; + case SQLITE_INDEX_CONSTRAINT_GE: op = RTREE_GE; break; + default: + assert( p->op==SQLITE_INDEX_CONSTRAINT_MATCH ); + op = RTREE_MATCH; + break; + } + zIdxStr[iIdx++] = op; + zIdxStr[iIdx++] = p->iColumn - 1 + '0'; + pIdxInfo->aConstraintUsage[ii].argvIndex = (iIdx/2); + pIdxInfo->aConstraintUsage[ii].omit = 1; + } } -#define WRITE_UTF8(zOut, c) { \ - if( c<0x00080 ){ \ - *zOut++ = (u8)(c&0xFF); \ - } \ - else if( c<0x00800 ){ \ - *zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \ - *zOut++ = 0x80 + (u8)(c & 0x3F); \ - } \ - else if( c<0x10000 ){ \ - *zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \ - *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ - *zOut++ = 0x80 + (u8)(c & 0x3F); \ - }else{ \ - *zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \ - *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \ - *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ - *zOut++ = 0x80 + (u8)(c & 0x3F); \ - } \ -} - -#endif /* ifndef SQLITE_AMALGAMATION */ + pIdxInfo->idxNum = 2; + pIdxInfo->needToFreeIdxStr = 1; + if( iIdx>0 && 0==(pIdxInfo->idxStr = sqlite3_mprintf("%s", zIdxStr)) ){ + return SQLITE_NOMEM; + } -typedef struct unicode_tokenizer unicode_tokenizer; -typedef struct unicode_cursor unicode_cursor; + nRow = pRtree->nRowEst / (iIdx + 1); + pIdxInfo->estimatedCost = (double)6.0 * (double)nRow; + setEstimatedRows(pIdxInfo, nRow); -struct unicode_tokenizer { - sqlite3_tokenizer base; - int bRemoveDiacritic; - int nException; - int *aiException; -}; + return rc; +} -struct unicode_cursor { - sqlite3_tokenizer_cursor base; - const unsigned char *aInput; /* Input text being tokenized */ - int nInput; /* Size of aInput[] in bytes */ - int iOff; /* Current offset within aInput[] */ - int iToken; /* Index of next token to be returned */ - char *zToken; /* storage for current token */ - int nAlloc; /* space allocated at zToken */ -}; +/* +** Return the N-dimensional volumn of the cell stored in *p. +*/ +static RtreeDValue cellArea(Rtree *pRtree, RtreeCell *p){ + RtreeDValue area = (RtreeDValue)1; + int ii; + for(ii=0; ii<(pRtree->nDim*2); ii+=2){ + area = (area * (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii]))); + } + return area; +} +/* +** Return the margin length of cell p. The margin length is the sum +** of the objects size in each dimension. +*/ +static RtreeDValue cellMargin(Rtree *pRtree, RtreeCell *p){ + RtreeDValue margin = (RtreeDValue)0; + int ii; + for(ii=0; ii<(pRtree->nDim*2); ii+=2){ + margin += (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii])); + } + return margin; +} /* -** Destroy a tokenizer allocated by unicodeCreate(). +** Store the union of cells p1 and p2 in p1. */ -static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){ - if( pTokenizer ){ - unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer; - sqlite3_free(p->aiException); - sqlite3_free(p); +static void cellUnion(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){ + int ii; + if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ + for(ii=0; ii<(pRtree->nDim*2); ii+=2){ + p1->aCoord[ii].f = MIN(p1->aCoord[ii].f, p2->aCoord[ii].f); + p1->aCoord[ii+1].f = MAX(p1->aCoord[ii+1].f, p2->aCoord[ii+1].f); + } + }else{ + for(ii=0; ii<(pRtree->nDim*2); ii+=2){ + p1->aCoord[ii].i = MIN(p1->aCoord[ii].i, p2->aCoord[ii].i); + p1->aCoord[ii+1].i = MAX(p1->aCoord[ii+1].i, p2->aCoord[ii+1].i); + } } - return SQLITE_OK; } /* -** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE -** statement has specified that the tokenizer for this table shall consider -** all characters in string zIn/nIn to be separators (if bAlnum==0) or -** token characters (if bAlnum==1). -** -** For each codepoint in the zIn/nIn string, this function checks if the -** sqlite3FtsUnicodeIsalnum() function already returns the desired result. -** If so, no action is taken. Otherwise, the codepoint is added to the -** unicode_tokenizer.aiException[] array. For the purposes of tokenization, -** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all -** codepoints in the aiException[] array. -** -** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic() -** identifies as a diacritic) occurs in the zIn/nIn string it is ignored. -** It is not possible to change the behavior of the tokenizer with respect -** to these codepoints. +** Return true if the area covered by p2 is a subset of the area covered +** by p1. False otherwise. */ -static int unicodeAddExceptions( - unicode_tokenizer *p, /* Tokenizer to add exceptions to */ - int bAlnum, /* Replace Isalnum() return value with this */ - const char *zIn, /* Array of characters to make exceptions */ - int nIn /* Length of z in bytes */ -){ - const unsigned char *z = (const unsigned char *)zIn; - const unsigned char *zTerm = &z[nIn]; - int iCode; - int nEntry = 0; - - assert( bAlnum==0 || bAlnum==1 ); - - while( zeCoordType==RTREE_COORD_INT32); + for(ii=0; ii<(pRtree->nDim*2); ii+=2){ + RtreeCoord *a1 = &p1->aCoord[ii]; + RtreeCoord *a2 = &p2->aCoord[ii]; + if( (!isInt && (a2[0].fa1[1].f)) + || ( isInt && (a2[0].ia1[1].i)) ){ - nEntry++; - } - } - - if( nEntry ){ - int *aNew; /* New aiException[] array */ - int nNew; /* Number of valid entries in array aNew[] */ - - aNew = sqlite3_realloc(p->aiException, (p->nException+nEntry)*sizeof(int)); - if( aNew==0 ) return SQLITE_NOMEM; - nNew = p->nException; - - z = (const unsigned char *)zIn; - while( zi; j--) aNew[j] = aNew[j-1]; - aNew[i] = iCode; - nNew++; - } + return 0; } - p->aiException = aNew; - p->nException = nNew; } - - return SQLITE_OK; + return 1; } /* -** Return true if the p->aiException[] array contains the value iCode. +** Return the amount cell p would grow by if it were unioned with pCell. */ -static int unicodeIsException(unicode_tokenizer *p, int iCode){ - if( p->nException>0 ){ - int *a = p->aiException; - int iLo = 0; - int iHi = p->nException-1; +static RtreeDValue cellGrowth(Rtree *pRtree, RtreeCell *p, RtreeCell *pCell){ + RtreeDValue area; + RtreeCell cell; + memcpy(&cell, p, sizeof(RtreeCell)); + area = cellArea(pRtree, &cell); + cellUnion(pRtree, &cell, pCell); + return (cellArea(pRtree, &cell)-area); +} - while( iHi>=iLo ){ - int iTest = (iHi + iLo) / 2; - if( iCode==a[iTest] ){ - return 1; - }else if( iCode>a[iTest] ){ - iLo = iTest+1; +static RtreeDValue cellOverlap( + Rtree *pRtree, + RtreeCell *p, + RtreeCell *aCell, + int nCell +){ + int ii; + RtreeDValue overlap = RTREE_ZERO; + for(ii=0; iinDim*2); jj+=2){ + RtreeDValue x1, x2; + x1 = MAX(DCOORD(p->aCoord[jj]), DCOORD(aCell[ii].aCoord[jj])); + x2 = MIN(DCOORD(p->aCoord[jj+1]), DCOORD(aCell[ii].aCoord[jj+1])); + if( x2bRemoveDiacritic = 1; + for(ii=0; rc==SQLITE_OK && ii<(pRtree->iDepth-iHeight); ii++){ + int iCell; + sqlite3_int64 iBest = 0; - for(i=0; rc==SQLITE_OK && ibRemoveDiacritic = 1; - } - else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){ - pNew->bRemoveDiacritic = 0; - } - else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){ - rc = unicodeAddExceptions(pNew, 1, &z[11], n-11); - } - else if( n>=11 && memcmp("separators=", z, 11)==0 ){ - rc = unicodeAddExceptions(pNew, 0, &z[11], n-11); - } - else{ - /* Unrecognized argument */ - rc = SQLITE_ERROR; + int nCell = NCELL(pNode); + RtreeCell cell; + RtreeNode *pChild; + + RtreeCell *aCell = 0; + + /* Select the child node which will be enlarged the least if pCell + ** is inserted into it. Resolve ties by choosing the entry with + ** the smallest area. + */ + for(iCell=0; iCellpParent ){ + RtreeNode *pParent = p->pParent; + RtreeCell cell; + int iCell; - pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor)); - if( pCsr==0 ){ - return SQLITE_NOMEM; - } - memset(pCsr, 0, sizeof(unicode_cursor)); + if( nodeParentIndex(pRtree, p, &iCell) ){ + return SQLITE_CORRUPT_VTAB; + } - pCsr->aInput = (const unsigned char *)aInput; - if( aInput==0 ){ - pCsr->nInput = 0; - }else if( nInput<0 ){ - pCsr->nInput = (int)strlen(aInput); - }else{ - pCsr->nInput = nInput; + nodeGetCell(pRtree, pParent, iCell, &cell); + if( !cellContains(pRtree, &cell, pCell) ){ + cellUnion(pRtree, &cell, pCell); + nodeOverwriteCell(pRtree, pParent, &cell, iCell); + } + + p = pParent; } - - *pp = &pCsr->base; - UNUSED_PARAMETER(p); return SQLITE_OK; } /* -** Close a tokenization cursor previously opened by a call to -** simpleOpen() above. +** Write mapping (iRowid->iNode) to the _rowid table. */ -static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){ - unicode_cursor *pCsr = (unicode_cursor *) pCursor; - sqlite3_free(pCsr->zToken); - sqlite3_free(pCsr); - return SQLITE_OK; +static int rowidWrite(Rtree *pRtree, sqlite3_int64 iRowid, sqlite3_int64 iNode){ + sqlite3_bind_int64(pRtree->pWriteRowid, 1, iRowid); + sqlite3_bind_int64(pRtree->pWriteRowid, 2, iNode); + sqlite3_step(pRtree->pWriteRowid); + return sqlite3_reset(pRtree->pWriteRowid); } /* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to simpleOpen(). +** Write mapping (iNode->iPar) to the _parent table. */ -static int unicodeNext( - sqlite3_tokenizer_cursor *pC, /* Cursor returned by simpleOpen */ - const char **paToken, /* OUT: Token text */ - int *pnToken, /* OUT: Number of bytes at *paToken */ - int *piStart, /* OUT: Starting offset of token */ - int *piEnd, /* OUT: Ending offset of token */ - int *piPos /* OUT: Position integer of token */ -){ - unicode_cursor *pCsr = (unicode_cursor *)pC; - unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer); - int iCode = 0; - char *zOut; - const unsigned char *z = &pCsr->aInput[pCsr->iOff]; - const unsigned char *zStart = z; - const unsigned char *zEnd; - const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput]; - - /* Scan past any delimiter characters before the start of the next token. - ** Return SQLITE_DONE early if this takes us all the way to the end of - ** the input. */ - while( z=zTerm ) return SQLITE_DONE; - - zOut = pCsr->zToken; - do { - int iOut; - - /* Grow the output buffer if required. */ - if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){ - char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64); - if( !zNew ) return SQLITE_NOMEM; - zOut = &zNew[zOut - pCsr->zToken]; - pCsr->zToken = zNew; - pCsr->nAlloc += 64; - } - - /* Write the folded case of the last character read to the output */ - zEnd = z; - iOut = sqlite3FtsUnicodeFold(iCode, p->bRemoveDiacritic); - if( iOut ){ - WRITE_UTF8(zOut, iOut); - } - - /* If the cursor is not at EOF, read the next character */ - if( z>=zTerm ) break; - READ_UTF8(z, zTerm, iCode); - }while( unicodeIsAlnum(p, iCode) - || sqlite3FtsUnicodeIsdiacritic(iCode) - ); - - /* Set the output variables and return. */ - pCsr->iOff = (int)(z - pCsr->aInput); - *paToken = pCsr->zToken; - *pnToken = (int)(zOut - pCsr->zToken); - *piStart = (int)(zStart - pCsr->aInput); - *piEnd = (int)(zEnd - pCsr->aInput); - *piPos = pCsr->iToken++; - return SQLITE_OK; +static int parentWrite(Rtree *pRtree, sqlite3_int64 iNode, sqlite3_int64 iPar){ + sqlite3_bind_int64(pRtree->pWriteParent, 1, iNode); + sqlite3_bind_int64(pRtree->pWriteParent, 2, iPar); + sqlite3_step(pRtree->pWriteParent); + return sqlite3_reset(pRtree->pWriteParent); } -/* -** Set *ppModule to a pointer to the sqlite3_tokenizer_module -** structure for the unicode tokenizer. -*/ -SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){ - static const sqlite3_tokenizer_module module = { - 0, - unicodeCreate, - unicodeDestroy, - unicodeOpen, - unicodeClose, - unicodeNext, - 0, - }; - *ppModule = &module; -} +static int rtreeInsertCell(Rtree *, RtreeNode *, RtreeCell *, int); -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ -#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */ -/************** End of fts3_unicode.c ****************************************/ -/************** Begin file fts3_unicode2.c ***********************************/ /* -** 2012 May 25 +** Arguments aIdx, aDistance and aSpare all point to arrays of size +** nIdx. The aIdx array contains the set of integers from 0 to +** (nIdx-1) in no particular order. This function sorts the values +** in aIdx according to the indexed values in aDistance. For +** example, assuming the inputs: ** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: +** aIdx = { 0, 1, 2, 3 } +** aDistance = { 5.0, 2.0, 7.0, 6.0 } ** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. +** this function sets the aIdx array to contain: ** -****************************************************************************** +** aIdx = { 0, 1, 2, 3 } +** +** The aSpare array is used as temporary working space by the +** sorting algorithm. */ +static void SortByDistance( + int *aIdx, + int nIdx, + RtreeDValue *aDistance, + int *aSpare +){ + if( nIdx>1 ){ + int iLeft = 0; + int iRight = 0; -/* -** DO NOT EDIT THIS MACHINE GENERATED FILE. -*/ + int nLeft = nIdx/2; + int nRight = nIdx-nLeft; + int *aLeft = aIdx; + int *aRight = &aIdx[nLeft]; -#ifndef SQLITE_DISABLE_FTS3_UNICODE -#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) + SortByDistance(aLeft, nLeft, aDistance, aSpare); + SortByDistance(aRight, nRight, aDistance, aSpare); -/* #include */ + memcpy(aSpare, aLeft, sizeof(int)*nLeft); + aLeft = aSpare; + + while( iLeft1 ){ - if( c<128 ){ - return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); - }else if( c<(1<<22) ){ - unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; - int iRes = 0; - int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; - int iLo = 0; - while( iHi>=iLo ){ - int iTest = (iHi + iLo) / 2; - if( key >= aEntry[iTest] ){ - iRes = iTest; - iLo = iTest+1; + int iLeft = 0; + int iRight = 0; + + int nLeft = nIdx/2; + int nRight = nIdx-nLeft; + int *aLeft = aIdx; + int *aRight = &aIdx[nLeft]; + + SortByDimension(pRtree, aLeft, nLeft, iDim, aCell, aSpare); + SortByDimension(pRtree, aRight, nRight, iDim, aCell, aSpare); + + memcpy(aSpare, aLeft, sizeof(int)*nLeft); + aLeft = aSpare; + while( iLeft=aEntry[iRes] ); - return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); + +#if 0 + /* Check that the sort worked */ + { + int jj; + for(jj=1; jj=iLo ){ - int iTest = (iHi + iLo) / 2; - if( key >= aDia[iTest] ){ - iRes = iTest; - iLo = iTest+1; - }else{ - iHi = iTest-1; + int iBestDim = 0; + int iBestSplit = 0; + RtreeDValue fBestMargin = RTREE_ZERO; + + int nByte = (pRtree->nDim+1)*(sizeof(int*)+nCell*sizeof(int)); + + aaSorted = (int **)sqlite3_malloc(nByte); + if( !aaSorted ){ + return SQLITE_NOMEM; + } + + aSpare = &((int *)&aaSorted[pRtree->nDim])[pRtree->nDim*nCell]; + memset(aaSorted, 0, nByte); + for(ii=0; iinDim; ii++){ + int jj; + aaSorted[ii] = &((int *)&aaSorted[pRtree->nDim])[ii*nCell]; + for(jj=0; jj=aDia[iRes] ); - return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); + + for(ii=0; iinDim; ii++){ + RtreeDValue margin = RTREE_ZERO; + RtreeDValue fBestOverlap = RTREE_ZERO; + RtreeDValue fBestArea = RTREE_ZERO; + int iBestLeft = 0; + int nLeft; + + for( + nLeft=RTREE_MINCELLS(pRtree); + nLeft<=(nCell-RTREE_MINCELLS(pRtree)); + nLeft++ + ){ + RtreeCell left; + RtreeCell right; + int kk; + RtreeDValue overlap; + RtreeDValue area; + + memcpy(&left, &aCell[aaSorted[ii][0]], sizeof(RtreeCell)); + memcpy(&right, &aCell[aaSorted[ii][nCell-1]], sizeof(RtreeCell)); + for(kk=1; kk<(nCell-1); kk++){ + if( kk817 ) return 0; - return (c < 768+32) ? - (mask0 & (1 << (c-768))) : - (mask1 & (1 << (c-768-32))); +static int updateMapping( + Rtree *pRtree, + i64 iRowid, + RtreeNode *pNode, + int iHeight +){ + int (*xSetMapping)(Rtree *, sqlite3_int64, sqlite3_int64); + xSetMapping = ((iHeight==0)?rowidWrite:parentWrite); + if( iHeight>0 ){ + RtreeNode *pChild = nodeHashLookup(pRtree, iRowid); + if( pChild ){ + nodeRelease(pRtree, pChild->pParent); + nodeReference(pNode); + pChild->pParent = pNode; + } + } + return xSetMapping(pRtree, iRowid, pNode->iNode); } +static int SplitNode( + Rtree *pRtree, + RtreeNode *pNode, + RtreeCell *pCell, + int iHeight +){ + int i; + int newCellIsRight = 0; + + int rc = SQLITE_OK; + int nCell = NCELL(pNode); + RtreeCell *aCell; + int *aiUsed; -/* -** Interpret the argument as a unicode codepoint. If the codepoint -** is an upper case character that has a lower case equivalent, -** return the codepoint corresponding to the lower case version. -** Otherwise, return a copy of the argument. -** -** The results are undefined if the value passed to this function -** is less than zero. -*/ -SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){ - /* Each entry in the following array defines a rule for folding a range - ** of codepoints to lower case. The rule applies to a range of nRange - ** codepoints starting at codepoint iCode. - ** - ** If the least significant bit in flags is clear, then the rule applies - ** to all nRange codepoints (i.e. all nRange codepoints are upper case and - ** need to be folded). Or, if it is set, then the rule only applies to - ** every second codepoint in the range, starting with codepoint C. - ** - ** The 7 most significant bits in flags are an index into the aiOff[] - ** array. If a specific codepoint C does require folding, then its lower - ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). - ** - ** The contents of this array are generated by parsing the CaseFolding.txt - ** file distributed as part of the "Unicode Character Database". See - ** http://www.unicode.org for details. + RtreeNode *pLeft = 0; + RtreeNode *pRight = 0; + + RtreeCell leftbbox; + RtreeCell rightbbox; + + /* Allocate an array and populate it with a copy of pCell and + ** all cells from node pLeft. Then zero the original node. */ - static const struct TableEntry { - unsigned short iCode; - unsigned char flags; - unsigned char nRange; - } aEntry[] = { - {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, - {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, - {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, - {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, - {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, - {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, - {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, - {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, - {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, - {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, - {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, - {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, - {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, - {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, - {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, - {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, - {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, - {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, - {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, - {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, - {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, - {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, - {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, - {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, - {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, - {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, - {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, - {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, - {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, - {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, - {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, - {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, - {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, - {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, - {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, - {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, - {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, - {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, - {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, - {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, - {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, - {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, - {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, - {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, - {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, - {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, - {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, - {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, - {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, - {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, - {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, - {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, - {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, - {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, - {65313, 14, 26}, - }; - static const unsigned short aiOff[] = { - 1, 2, 8, 15, 16, 26, 28, 32, - 37, 38, 40, 48, 63, 64, 69, 71, - 79, 80, 116, 202, 203, 205, 206, 207, - 209, 210, 211, 213, 214, 217, 218, 219, - 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, - 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, - 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, - 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, - 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, - 65514, 65521, 65527, 65528, 65529, - }; + aCell = sqlite3_malloc((sizeof(RtreeCell)+sizeof(int))*(nCell+1)); + if( !aCell ){ + rc = SQLITE_NOMEM; + goto splitnode_out; + } + aiUsed = (int *)&aCell[nCell+1]; + memset(aiUsed, 0, sizeof(int)*(nCell+1)); + for(i=0; iiNode==1 ){ + pRight = nodeNew(pRtree, pNode); + pLeft = nodeNew(pRtree, pNode); + pRtree->iDepth++; + pNode->isDirty = 1; + writeInt16(pNode->zData, pRtree->iDepth); + }else{ + pLeft = pNode; + pRight = nodeNew(pRtree, pLeft->pParent); + nodeReference(pLeft); + } - assert( c>=0 ); - assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); + if( !pLeft || !pRight ){ + rc = SQLITE_NOMEM; + goto splitnode_out; + } - if( c<128 ){ - if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); - }else if( c<65536 ){ - int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; - int iLo = 0; - int iRes = -1; + memset(pLeft->zData, 0, pRtree->iNodeSize); + memset(pRight->zData, 0, pRtree->iNodeSize); - while( iHi>=iLo ){ - int iTest = (iHi + iLo) / 2; - int cmp = (c - aEntry[iTest].iCode); - if( cmp>=0 ){ - iRes = iTest; - iLo = iTest+1; - }else{ - iHi = iTest-1; - } + rc = splitNodeStartree(pRtree, aCell, nCell, pLeft, pRight, + &leftbbox, &rightbbox); + if( rc!=SQLITE_OK ){ + goto splitnode_out; + } + + /* Ensure both child nodes have node numbers assigned to them by calling + ** nodeWrite(). Node pRight always needs a node number, as it was created + ** by nodeNew() above. But node pLeft sometimes already has a node number. + ** In this case avoid the all to nodeWrite(). + */ + if( SQLITE_OK!=(rc = nodeWrite(pRtree, pRight)) + || (0==pLeft->iNode && SQLITE_OK!=(rc = nodeWrite(pRtree, pLeft))) + ){ + goto splitnode_out; + } + + rightbbox.iRowid = pRight->iNode; + leftbbox.iRowid = pLeft->iNode; + + if( pNode->iNode==1 ){ + rc = rtreeInsertCell(pRtree, pLeft->pParent, &leftbbox, iHeight+1); + if( rc!=SQLITE_OK ){ + goto splitnode_out; } - assert( iRes<0 || c>=aEntry[iRes].iCode ); + }else{ + RtreeNode *pParent = pLeft->pParent; + int iCell; + rc = nodeParentIndex(pRtree, pLeft, &iCell); + if( rc==SQLITE_OK ){ + nodeOverwriteCell(pRtree, pParent, &leftbbox, iCell); + rc = AdjustTree(pRtree, pParent, &leftbbox); + } + if( rc!=SQLITE_OK ){ + goto splitnode_out; + } + } + if( (rc = rtreeInsertCell(pRtree, pRight->pParent, &rightbbox, iHeight+1)) ){ + goto splitnode_out; + } - if( iRes>=0 ){ - const struct TableEntry *p = &aEntry[iRes]; - if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ - ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; - assert( ret>0 ); + for(i=0; iiRowid ){ + newCellIsRight = 1; + } + if( rc!=SQLITE_OK ){ + goto splitnode_out; + } + } + if( pNode->iNode==1 ){ + for(i=0; iiRowid, pLeft, iHeight); + } - if( bRemoveDiacritic ) ret = remove_diacritic(ret); + if( rc==SQLITE_OK ){ + rc = nodeRelease(pRtree, pRight); + pRight = 0; } - - else if( c>=66560 && c<66600 ){ - ret = c + 40; + if( rc==SQLITE_OK ){ + rc = nodeRelease(pRtree, pLeft); + pLeft = 0; } - return ret; +splitnode_out: + nodeRelease(pRtree, pRight); + nodeRelease(pRtree, pLeft); + sqlite3_free(aCell); + return rc; } -#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */ -#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */ -/************** End of fts3_unicode2.c ***************************************/ -/************** Begin file rtree.c *******************************************/ /* -** 2001 September 15 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. +** If node pLeaf is not the root of the r-tree and its pParent pointer is +** still NULL, load all ancestor nodes of pLeaf into memory and populate +** the pLeaf->pParent chain all the way up to the root node. ** -************************************************************************* -** This file contains code for implementations of the r-tree and r*-tree -** algorithms packaged as an SQLite virtual table module. +** This operation is required when a row is deleted (or updated - an update +** is implemented as a delete followed by an insert). SQLite provides the +** rowid of the row to delete, which can be used to find the leaf on which +** the entry resides (argument pLeaf). Once the leaf is located, this +** function is called to determine its ancestry. */ +static int fixLeafParent(Rtree *pRtree, RtreeNode *pLeaf){ + int rc = SQLITE_OK; + RtreeNode *pChild = pLeaf; + while( rc==SQLITE_OK && pChild->iNode!=1 && pChild->pParent==0 ){ + int rc2 = SQLITE_OK; /* sqlite3_reset() return code */ + sqlite3_bind_int64(pRtree->pReadParent, 1, pChild->iNode); + rc = sqlite3_step(pRtree->pReadParent); + if( rc==SQLITE_ROW ){ + RtreeNode *pTest; /* Used to test for reference loops */ + i64 iNode; /* Node number of parent node */ -/* -** Database Format of R-Tree Tables -** -------------------------------- -** -** The data structure for a single virtual r-tree table is stored in three -** native SQLite tables declared as follows. In each case, the '%' character -** in the table name is replaced with the user-supplied name of the r-tree -** table. -** -** CREATE TABLE %_node(nodeno INTEGER PRIMARY KEY, data BLOB) -** CREATE TABLE %_parent(nodeno INTEGER PRIMARY KEY, parentnode INTEGER) -** CREATE TABLE %_rowid(rowid INTEGER PRIMARY KEY, nodeno INTEGER) -** -** The data for each node of the r-tree structure is stored in the %_node -** table. For each node that is not the root node of the r-tree, there is -** an entry in the %_parent table associating the node with its parent. -** And for each row of data in the table, there is an entry in the %_rowid -** table that maps from the entries rowid to the id of the node that it -** is stored on. -** -** The root node of an r-tree always exists, even if the r-tree table is -** empty. The nodeno of the root node is always 1. All other nodes in the -** table must be the same size as the root node. The content of each node -** is formatted as follows: -** -** 1. If the node is the root node (node 1), then the first 2 bytes -** of the node contain the tree depth as a big-endian integer. -** For non-root nodes, the first 2 bytes are left unused. -** -** 2. The next 2 bytes contain the number of entries currently -** stored in the node. -** -** 3. The remainder of the node contains the node entries. Each entry -** consists of a single 8-byte integer followed by an even number -** of 4-byte coordinates. For leaf nodes the integer is the rowid -** of a record. For internal nodes it is the node number of a -** child page. -*/ + /* Before setting pChild->pParent, test that we are not creating a + ** loop of references (as we would if, say, pChild==pParent). We don't + ** want to do this as it leads to a memory leak when trying to delete + ** the referenced counted node structures. + */ + iNode = sqlite3_column_int64(pRtree->pReadParent, 0); + for(pTest=pLeaf; pTest && pTest->iNode!=iNode; pTest=pTest->pParent); + if( !pTest ){ + rc2 = nodeAcquire(pRtree, iNode, 0, &pChild->pParent); + } + } + rc = sqlite3_reset(pRtree->pReadParent); + if( rc==SQLITE_OK ) rc = rc2; + if( rc==SQLITE_OK && !pChild->pParent ) rc = SQLITE_CORRUPT_VTAB; + pChild = pChild->pParent; + } + return rc; +} -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RTREE) +static int deleteCell(Rtree *, RtreeNode *, int, int); -#ifndef SQLITE_CORE - SQLITE_EXTENSION_INIT1 -#else -#endif +static int removeNode(Rtree *pRtree, RtreeNode *pNode, int iHeight){ + int rc; + int rc2; + RtreeNode *pParent = 0; + int iCell; -/* #include */ -/* #include */ -/* #include */ + assert( pNode->nRef==1 ); -#ifndef SQLITE_AMALGAMATION -#include "sqlite3rtree.h" -typedef sqlite3_int64 i64; -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -#endif + /* Remove the entry in the parent cell. */ + rc = nodeParentIndex(pRtree, pNode, &iCell); + if( rc==SQLITE_OK ){ + pParent = pNode->pParent; + pNode->pParent = 0; + rc = deleteCell(pRtree, pParent, iCell, iHeight+1); + } + rc2 = nodeRelease(pRtree, pParent); + if( rc==SQLITE_OK ){ + rc = rc2; + } + if( rc!=SQLITE_OK ){ + return rc; + } -/* The following macro is used to suppress compiler warnings. -*/ -#ifndef UNUSED_PARAMETER -# define UNUSED_PARAMETER(x) (void)(x) -#endif + /* Remove the xxx_node entry. */ + sqlite3_bind_int64(pRtree->pDeleteNode, 1, pNode->iNode); + sqlite3_step(pRtree->pDeleteNode); + if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteNode)) ){ + return rc; + } -typedef struct Rtree Rtree; -typedef struct RtreeCursor RtreeCursor; -typedef struct RtreeNode RtreeNode; -typedef struct RtreeCell RtreeCell; -typedef struct RtreeConstraint RtreeConstraint; -typedef struct RtreeMatchArg RtreeMatchArg; -typedef struct RtreeGeomCallback RtreeGeomCallback; -typedef union RtreeCoord RtreeCoord; -typedef struct RtreeSearchPoint RtreeSearchPoint; + /* Remove the xxx_parent entry. */ + sqlite3_bind_int64(pRtree->pDeleteParent, 1, pNode->iNode); + sqlite3_step(pRtree->pDeleteParent); + if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteParent)) ){ + return rc; + } + + /* Remove the node from the in-memory hash table and link it into + ** the Rtree.pDeleted list. Its contents will be re-inserted later on. + */ + nodeHashDelete(pRtree, pNode); + pNode->iNode = iHeight; + pNode->pNext = pRtree->pDeleted; + pNode->nRef++; + pRtree->pDeleted = pNode; -/* The rtree may have between 1 and RTREE_MAX_DIMENSIONS dimensions. */ -#define RTREE_MAX_DIMENSIONS 5 + return SQLITE_OK; +} -/* Size of hash table Rtree.aHash. This hash table is not expected to -** ever contain very many entries, so a fixed number of buckets is -** used. -*/ -#define HASHSIZE 97 +static int fixBoundingBox(Rtree *pRtree, RtreeNode *pNode){ + RtreeNode *pParent = pNode->pParent; + int rc = SQLITE_OK; + if( pParent ){ + int ii; + int nCell = NCELL(pNode); + RtreeCell box; /* Bounding box for pNode */ + nodeGetCell(pRtree, pNode, 0, &box); + for(ii=1; iiiNode; + rc = nodeParentIndex(pRtree, pNode, &ii); + if( rc==SQLITE_OK ){ + nodeOverwriteCell(pRtree, pParent, &box, ii); + rc = fixBoundingBox(pRtree, pParent); + } + } + return rc; +} -/* The xBestIndex method of this virtual table requires an estimate of -** the number of rows in the virtual table to calculate the costs of -** various strategies. If possible, this estimate is loaded from the -** sqlite_stat1 table (with RTREE_MIN_ROWEST as a hard-coded minimum). -** Otherwise, if no sqlite_stat1 entry is available, use -** RTREE_DEFAULT_ROWEST. +/* +** Delete the cell at index iCell of node pNode. After removing the +** cell, adjust the r-tree data structure if required. */ -#define RTREE_DEFAULT_ROWEST 1048576 -#define RTREE_MIN_ROWEST 100 +static int deleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell, int iHeight){ + RtreeNode *pParent; + int rc; -/* -** An rtree virtual-table object. -*/ -struct Rtree { - sqlite3_vtab base; /* Base class. Must be first */ - sqlite3 *db; /* Host database connection */ - int iNodeSize; /* Size in bytes of each node in the node table */ - u8 nDim; /* Number of dimensions */ - u8 eCoordType; /* RTREE_COORD_REAL32 or RTREE_COORD_INT32 */ - u8 nBytesPerCell; /* Bytes consumed per cell */ - int iDepth; /* Current depth of the r-tree structure */ - char *zDb; /* Name of database containing r-tree table */ - char *zName; /* Name of r-tree table */ - int nBusy; /* Current number of users of this structure */ - i64 nRowEst; /* Estimated number of rows in this table */ + if( SQLITE_OK!=(rc = fixLeafParent(pRtree, pNode)) ){ + return rc; + } - /* List of nodes removed during a CondenseTree operation. List is - ** linked together via the pointer normally used for hash chains - - ** RtreeNode.pNext. RtreeNode.iNode stores the depth of the sub-tree - ** headed by the node (leaf nodes have RtreeNode.iNode==0). + /* Remove the cell from the node. This call just moves bytes around + ** the in-memory node image, so it cannot fail. */ - RtreeNode *pDeleted; - int iReinsertHeight; /* Height of sub-trees Reinsert() has run on */ + nodeDeleteCell(pRtree, pNode, iCell); - /* Statements to read/write/delete a record from xxx_node */ - sqlite3_stmt *pReadNode; - sqlite3_stmt *pWriteNode; - sqlite3_stmt *pDeleteNode; + /* If the node is not the tree root and now has less than the minimum + ** number of cells, remove it from the tree. Otherwise, update the + ** cell in the parent node so that it tightly contains the updated + ** node. + */ + pParent = pNode->pParent; + assert( pParent || pNode->iNode==1 ); + if( pParent ){ + if( NCELL(pNode)=1 the id is the node-id of -** the node that the RtreeSearchPoint represents. When iLevel==0, however, -** the id is of the parent node and the cell that RtreeSearchPoint -** represents is the iCell-th entry in the parent node. -*/ -struct RtreeSearchPoint { - RtreeDValue rScore; /* The score for this node. Smallest goes first. */ - sqlite3_int64 id; /* Node ID */ - u8 iLevel; /* 0=entries. 1=leaf node. 2+ for higher */ - u8 eWithin; /* PARTLY_WITHIN or FULLY_WITHIN */ - u8 iCell; /* Cell index within the node */ -}; + for(ii=0; iinDim; iDim++){ + aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2]); + aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2+1]); + } + } + for(iDim=0; iDimnDim; iDim++){ + aCenterCoord[iDim] = (aCenterCoord[iDim]/(nCell*(RtreeDValue)2)); + } -/* -** The minimum number of cells allowed for a node is a third of the -** maximum. In Gutman's notation: -** -** m = M/3 -** -** If an R*-tree "Reinsert" operation is required, the same number of -** cells are removed from the overfull node and reinserted into the tree. -*/ -#define RTREE_MINCELLS(p) ((((p)->iNodeSize-4)/(p)->nBytesPerCell)/3) -#define RTREE_REINSERT(p) RTREE_MINCELLS(p) -#define RTREE_MAXCELLS 51 + for(ii=0; iinDim; iDim++){ + RtreeDValue coord = (DCOORD(aCell[ii].aCoord[iDim*2+1]) - + DCOORD(aCell[ii].aCoord[iDim*2])); + aDistance[ii] += (coord-aCenterCoord[iDim])*(coord-aCenterCoord[iDim]); + } + } -/* -** The smallest possible node-size is (512-64)==448 bytes. And the largest -** supported cell size is 48 bytes (8 byte rowid + ten 4 byte coordinates). -** Therefore all non-root nodes must contain at least 3 entries. Since -** 2^40 is greater than 2^64, an r-tree structure always has a depth of -** 40 or less. -*/ -#define RTREE_MAX_DEPTH 40 + SortByDistance(aOrder, nCell, aDistance, aSpare); + nodeZero(pRtree, pNode); + + for(ii=0; rc==SQLITE_OK && ii<(nCell-(RTREE_MINCELLS(pRtree)+1)); ii++){ + RtreeCell *p = &aCell[aOrder[ii]]; + nodeInsertCell(pRtree, pNode, p); + if( p->iRowid==pCell->iRowid ){ + if( iHeight==0 ){ + rc = rowidWrite(pRtree, p->iRowid, pNode->iNode); + }else{ + rc = parentWrite(pRtree, p->iRowid, pNode->iNode); + } + } + } + if( rc==SQLITE_OK ){ + rc = fixBoundingBox(pRtree, pNode); + } + for(; rc==SQLITE_OK && iiiNode currently contains + ** the height of the sub-tree headed by the cell. + */ + RtreeNode *pInsert; + RtreeCell *p = &aCell[aOrder[ii]]; + rc = ChooseLeaf(pRtree, p, iHeight, &pInsert); + if( rc==SQLITE_OK ){ + int rc2; + rc = rtreeInsertCell(pRtree, pInsert, p, iHeight); + rc2 = nodeRelease(pRtree, pInsert); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } + } + sqlite3_free(aCell); + return rc; +} /* -** Number of entries in the cursor RtreeNode cache. The first entry is -** used to cache the RtreeNode for RtreeCursor.sPoint. The remaining -** entries cache the RtreeNode for the first elements of the priority queue. +** Insert cell pCell into node pNode. Node pNode is the head of a +** subtree iHeight high (leaf nodes have iHeight==0). */ -#define RTREE_CACHE_SZ 5 +static int rtreeInsertCell( + Rtree *pRtree, + RtreeNode *pNode, + RtreeCell *pCell, + int iHeight +){ + int rc = SQLITE_OK; + if( iHeight>0 ){ + RtreeNode *pChild = nodeHashLookup(pRtree, pCell->iRowid); + if( pChild ){ + nodeRelease(pRtree, pChild->pParent); + nodeReference(pNode); + pChild->pParent = pNode; + } + } + if( nodeInsertCell(pRtree, pNode, pCell) ){ + if( iHeight<=pRtree->iReinsertHeight || pNode->iNode==1){ + rc = SplitNode(pRtree, pNode, pCell, iHeight); + }else{ + pRtree->iReinsertHeight = iHeight; + rc = Reinsert(pRtree, pNode, pCell, iHeight); + } + }else{ + rc = AdjustTree(pRtree, pNode, pCell); + if( rc==SQLITE_OK ){ + if( iHeight==0 ){ + rc = rowidWrite(pRtree, pCell->iRowid, pNode->iNode); + }else{ + rc = parentWrite(pRtree, pCell->iRowid, pNode->iNode); + } + } + } + return rc; +} -/* -** An rtree cursor object. -*/ -struct RtreeCursor { - sqlite3_vtab_cursor base; /* Base class. Must be first */ - u8 atEOF; /* True if at end of search */ - u8 bPoint; /* True if sPoint is valid */ - int iStrategy; /* Copy of idxNum search parameter */ - int nConstraint; /* Number of entries in aConstraint */ - RtreeConstraint *aConstraint; /* Search constraints. */ - int nPointAlloc; /* Number of slots allocated for aPoint[] */ - int nPoint; /* Number of slots used in aPoint[] */ - int mxLevel; /* iLevel value for root of the tree */ - RtreeSearchPoint *aPoint; /* Priority queue for search points */ - RtreeSearchPoint sPoint; /* Cached next search point */ - RtreeNode *aNode[RTREE_CACHE_SZ]; /* Rtree node cache */ - u32 anQueue[RTREE_MAX_DEPTH+1]; /* Number of queued entries by iLevel */ -}; +static int reinsertNodeContent(Rtree *pRtree, RtreeNode *pNode){ + int ii; + int rc = SQLITE_OK; + int nCell = NCELL(pNode); -/* Return the Rtree of a RtreeCursor */ -#define RTREE_OF_CURSOR(X) ((Rtree*)((X)->base.pVtab)) + for(ii=0; rc==SQLITE_OK && iiiNode currently contains + ** the height of the sub-tree headed by the cell. + */ + rc = ChooseLeaf(pRtree, &cell, (int)pNode->iNode, &pInsert); + if( rc==SQLITE_OK ){ + int rc2; + rc = rtreeInsertCell(pRtree, pInsert, &cell, (int)pNode->iNode); + rc2 = nodeRelease(pRtree, pInsert); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } + } + return rc; +} /* -** The argument is an RtreeCoord. Return the value stored within the RtreeCoord -** formatted as a RtreeDValue (double or int64). This macro assumes that local -** variable pRtree points to the Rtree structure associated with the -** RtreeCoord. +** Select a currently unused rowid for a new r-tree record. */ -#ifdef SQLITE_RTREE_INT_ONLY -# define DCOORD(coord) ((RtreeDValue)coord.i) -#else -# define DCOORD(coord) ( \ - (pRtree->eCoordType==RTREE_COORD_REAL32) ? \ - ((double)coord.f) : \ - ((double)coord.i) \ - ) -#endif +static int newRowid(Rtree *pRtree, i64 *piRowid){ + int rc; + sqlite3_bind_null(pRtree->pWriteRowid, 1); + sqlite3_bind_null(pRtree->pWriteRowid, 2); + sqlite3_step(pRtree->pWriteRowid); + rc = sqlite3_reset(pRtree->pWriteRowid); + *piRowid = sqlite3_last_insert_rowid(pRtree->db); + return rc; +} /* -** A search constraint. +** Remove the entry with rowid=iDelete from the r-tree structure. */ -struct RtreeConstraint { - int iCoord; /* Index of constrained coordinate */ - int op; /* Constraining operation */ - union { - RtreeDValue rValue; /* Constraint value. */ - int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*); - int (*xQueryFunc)(sqlite3_rtree_query_info*); - } u; - sqlite3_rtree_query_info *pInfo; /* xGeom and xQueryFunc argument */ -}; +static int rtreeDeleteRowid(Rtree *pRtree, sqlite3_int64 iDelete){ + int rc; /* Return code */ + RtreeNode *pLeaf = 0; /* Leaf node containing record iDelete */ + int iCell; /* Index of iDelete cell in pLeaf */ + RtreeNode *pRoot; /* Root node of rtree structure */ -/* Possible values for RtreeConstraint.op */ -#define RTREE_EQ 0x41 /* A */ -#define RTREE_LE 0x42 /* B */ -#define RTREE_LT 0x43 /* C */ -#define RTREE_GE 0x44 /* D */ -#define RTREE_GT 0x45 /* E */ -#define RTREE_MATCH 0x46 /* F: Old-style sqlite3_rtree_geometry_callback() */ -#define RTREE_QUERY 0x47 /* G: New-style sqlite3_rtree_query_callback() */ + /* Obtain a reference to the root node to initialize Rtree.iDepth */ + rc = nodeAcquire(pRtree, 1, 0, &pRoot); -/* -** An rtree structure node. -*/ -struct RtreeNode { - RtreeNode *pParent; /* Parent node */ - i64 iNode; /* The node number */ - int nRef; /* Number of references to this node */ - int isDirty; /* True if the node needs to be written to disk */ - u8 *zData; /* Content of the node, as should be on disk */ - RtreeNode *pNext; /* Next node in this hash collision chain */ -}; + /* Obtain a reference to the leaf node that contains the entry + ** about to be deleted. + */ + if( rc==SQLITE_OK ){ + rc = findLeafNode(pRtree, iDelete, &pLeaf, 0); + } -/* Return the number of cells in a node */ -#define NCELL(pNode) readInt16(&(pNode)->zData[2]) + /* Delete the cell in question from the leaf node. */ + if( rc==SQLITE_OK ){ + int rc2; + rc = nodeRowidIndex(pRtree, pLeaf, iDelete, &iCell); + if( rc==SQLITE_OK ){ + rc = deleteCell(pRtree, pLeaf, iCell, 0); + } + rc2 = nodeRelease(pRtree, pLeaf); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } -/* -** A single cell from a node, deserialized -*/ -struct RtreeCell { - i64 iRowid; /* Node or entry ID */ - RtreeCoord aCoord[RTREE_MAX_DIMENSIONS*2]; /* Bounding box coordinates */ -}; + /* Delete the corresponding entry in the _rowid table. */ + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pRtree->pDeleteRowid, 1, iDelete); + sqlite3_step(pRtree->pDeleteRowid); + rc = sqlite3_reset(pRtree->pDeleteRowid); + } + /* Check if the root node now has exactly one child. If so, remove + ** it, schedule the contents of the child for reinsertion and + ** reduce the tree height by one. + ** + ** This is equivalent to copying the contents of the child into + ** the root node (the operation that Gutman's paper says to perform + ** in this scenario). + */ + if( rc==SQLITE_OK && pRtree->iDepth>0 && NCELL(pRoot)==1 ){ + int rc2; + RtreeNode *pChild; + i64 iChild = nodeGetRowid(pRtree, pRoot, 0); + rc = nodeAcquire(pRtree, iChild, pRoot, &pChild); + if( rc==SQLITE_OK ){ + rc = removeNode(pRtree, pChild, pRtree->iDepth-1); + } + rc2 = nodeRelease(pRtree, pChild); + if( rc==SQLITE_OK ) rc = rc2; + if( rc==SQLITE_OK ){ + pRtree->iDepth--; + writeInt16(pRoot->zData, pRtree->iDepth); + pRoot->isDirty = 1; + } + } -/* -** This object becomes the sqlite3_user_data() for the SQL functions -** that are created by sqlite3_rtree_geometry_callback() and -** sqlite3_rtree_query_callback() and which appear on the right of MATCH -** operators in order to constrain a search. -** -** xGeom and xQueryFunc are the callback functions. Exactly one of -** xGeom and xQueryFunc fields is non-NULL, depending on whether the -** SQL function was created using sqlite3_rtree_geometry_callback() or -** sqlite3_rtree_query_callback(). -** -** This object is deleted automatically by the destructor mechanism in -** sqlite3_create_function_v2(). -*/ -struct RtreeGeomCallback { - int (*xGeom)(sqlite3_rtree_geometry*, int, RtreeDValue*, int*); - int (*xQueryFunc)(sqlite3_rtree_query_info*); - void (*xDestructor)(void*); - void *pContext; -}; + /* Re-insert the contents of any underfull nodes removed from the tree. */ + for(pLeaf=pRtree->pDeleted; pLeaf; pLeaf=pRtree->pDeleted){ + if( rc==SQLITE_OK ){ + rc = reinsertNodeContent(pRtree, pLeaf); + } + pRtree->pDeleted = pLeaf->pNext; + sqlite3_free(pLeaf); + } + /* Release the reference to the root node. */ + if( rc==SQLITE_OK ){ + rc = nodeRelease(pRtree, pRoot); + }else{ + nodeRelease(pRtree, pRoot); + } -/* -** Value for the first field of every RtreeMatchArg object. The MATCH -** operator tests that the first field of a blob operand matches this -** value to avoid operating on invalid blobs (which could cause a segfault). -*/ -#define RTREE_GEOMETRY_MAGIC 0x891245AB + return rc; +} /* -** An instance of this structure (in the form of a BLOB) is returned by -** the SQL functions that sqlite3_rtree_geometry_callback() and -** sqlite3_rtree_query_callback() create, and is read as the right-hand -** operand to the MATCH operator of an R-Tree. +** Rounding constants for float->double conversion. */ -struct RtreeMatchArg { - u32 magic; /* Always RTREE_GEOMETRY_MAGIC */ - RtreeGeomCallback cb; /* Info about the callback functions */ - int nParam; /* Number of parameters to the SQL function */ - RtreeDValue aParam[1]; /* Values for parameters to the SQL function */ -}; - -#ifndef MAX -# define MAX(x,y) ((x) < (y) ? (y) : (x)) -#endif -#ifndef MIN -# define MIN(x,y) ((x) > (y) ? (y) : (x)) -#endif +#define RNDTOWARDS (1.0 - 1.0/8388608.0) /* Round towards zero */ +#define RNDAWAY (1.0 + 1.0/8388608.0) /* Round away from zero */ +#if !defined(SQLITE_RTREE_INT_ONLY) /* -** Functions to deserialize a 16 bit integer, 32 bit real number and -** 64 bit integer. The deserialized value is returned. +** Convert an sqlite3_value into an RtreeValue (presumably a float) +** while taking care to round toward negative or positive, respectively. */ -static int readInt16(u8 *p){ - return (p[0]<<8) + p[1]; -} -static void readCoord(u8 *p, RtreeCoord *pCoord){ - pCoord->u = ( - (((u32)p[0]) << 24) + - (((u32)p[1]) << 16) + - (((u32)p[2]) << 8) + - (((u32)p[3]) << 0) - ); +static RtreeValue rtreeValueDown(sqlite3_value *v){ + double d = sqlite3_value_double(v); + float f = (float)d; + if( f>d ){ + f = (float)(d*(d<0 ? RNDAWAY : RNDTOWARDS)); + } + return f; } -static i64 readInt64(u8 *p){ - return ( - (((i64)p[0]) << 56) + - (((i64)p[1]) << 48) + - (((i64)p[2]) << 40) + - (((i64)p[3]) << 32) + - (((i64)p[4]) << 24) + - (((i64)p[5]) << 16) + - (((i64)p[6]) << 8) + - (((i64)p[7]) << 0) - ); +static RtreeValue rtreeValueUp(sqlite3_value *v){ + double d = sqlite3_value_double(v); + float f = (float)d; + if( f> 8)&0xFF; - p[1] = (i>> 0)&0xFF; - return 2; -} -static int writeCoord(u8 *p, RtreeCoord *pCoord){ - u32 i; - assert( sizeof(RtreeCoord)==4 ); - assert( sizeof(u32)==4 ); - i = pCoord->u; - p[0] = (i>>24)&0xFF; - p[1] = (i>>16)&0xFF; - p[2] = (i>> 8)&0xFF; - p[3] = (i>> 0)&0xFF; - return 4; -} -static int writeInt64(u8 *p, i64 i){ - p[0] = (i>>56)&0xFF; - p[1] = (i>>48)&0xFF; - p[2] = (i>>40)&0xFF; - p[3] = (i>>32)&0xFF; - p[4] = (i>>24)&0xFF; - p[5] = (i>>16)&0xFF; - p[6] = (i>> 8)&0xFF; - p[7] = (i>> 0)&0xFF; - return 8; -} /* -** Increment the reference count of node p. +** The xUpdate method for rtree module virtual tables. */ -static void nodeReference(RtreeNode *p){ - if( p ){ - p->nRef++; +static int rtreeUpdate( + sqlite3_vtab *pVtab, + int nData, + sqlite3_value **azData, + sqlite_int64 *pRowid +){ + Rtree *pRtree = (Rtree *)pVtab; + int rc = SQLITE_OK; + RtreeCell cell; /* New cell to insert if nData>1 */ + int bHaveRowid = 0; /* Set to 1 after new rowid is determined */ + + rtreeReference(pRtree); + assert(nData>=1); + + cell.iRowid = 0; /* Used only to suppress a compiler warning */ + + /* Constraint handling. A write operation on an r-tree table may return + ** SQLITE_CONSTRAINT for two reasons: + ** + ** 1. A duplicate rowid value, or + ** 2. The supplied data violates the "x2>=x1" constraint. + ** + ** In the first case, if the conflict-handling mode is REPLACE, then + ** the conflicting row can be removed before proceeding. In the second + ** case, SQLITE_CONSTRAINT must be returned regardless of the + ** conflict-handling mode specified by the user. + */ + if( nData>1 ){ + int ii; + + /* Populate the cell.aCoord[] array. The first coordinate is azData[3]. + ** + ** NB: nData can only be less than nDim*2+3 if the rtree is mis-declared + ** with "column" that are interpreted as table constraints. + ** Example: CREATE VIRTUAL TABLE bad USING rtree(x,y,CHECK(y>5)); + ** This problem was discovered after years of use, so we silently ignore + ** these kinds of misdeclared tables to avoid breaking any legacy. + */ + assert( nData<=(pRtree->nDim*2 + 3) ); + +#ifndef SQLITE_RTREE_INT_ONLY + if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ + for(ii=0; iicell.aCoord[ii+1].f ){ + rc = SQLITE_CONSTRAINT; + goto constraint; + } + } + }else +#endif + { + for(ii=0; iicell.aCoord[ii+1].i ){ + rc = SQLITE_CONSTRAINT; + goto constraint; + } + } + } + + /* If a rowid value was supplied, check if it is already present in + ** the table. If so, the constraint has failed. */ + if( sqlite3_value_type(azData[2])!=SQLITE_NULL ){ + cell.iRowid = sqlite3_value_int64(azData[2]); + if( sqlite3_value_type(azData[0])==SQLITE_NULL + || sqlite3_value_int64(azData[0])!=cell.iRowid + ){ + int steprc; + sqlite3_bind_int64(pRtree->pReadRowid, 1, cell.iRowid); + steprc = sqlite3_step(pRtree->pReadRowid); + rc = sqlite3_reset(pRtree->pReadRowid); + if( SQLITE_ROW==steprc ){ + if( sqlite3_vtab_on_conflict(pRtree->db)==SQLITE_REPLACE ){ + rc = rtreeDeleteRowid(pRtree, cell.iRowid); + }else{ + rc = SQLITE_CONSTRAINT; + goto constraint; + } + } + } + bHaveRowid = 1; + } + } + + /* If azData[0] is not an SQL NULL value, it is the rowid of a + ** record to delete from the r-tree table. The following block does + ** just that. + */ + if( sqlite3_value_type(azData[0])!=SQLITE_NULL ){ + rc = rtreeDeleteRowid(pRtree, sqlite3_value_int64(azData[0])); + } + + /* If the azData[] array contains more than one element, elements + ** (azData[2]..azData[argc-1]) contain a new record to insert into + ** the r-tree structure. + */ + if( rc==SQLITE_OK && nData>1 ){ + /* Insert the new record into the r-tree */ + RtreeNode *pLeaf = 0; + + /* Figure out the rowid of the new row. */ + if( bHaveRowid==0 ){ + rc = newRowid(pRtree, &cell.iRowid); + } + *pRowid = cell.iRowid; + + if( rc==SQLITE_OK ){ + rc = ChooseLeaf(pRtree, &cell, 0, &pLeaf); + } + if( rc==SQLITE_OK ){ + int rc2; + pRtree->iReinsertHeight = -1; + rc = rtreeInsertCell(pRtree, pLeaf, &cell, 0); + rc2 = nodeRelease(pRtree, pLeaf); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } } + +constraint: + rtreeRelease(pRtree); + return rc; } /* -** Clear the content of node p (set all bytes to 0x00). +** The xRename method for rtree module virtual tables. */ -static void nodeZero(Rtree *pRtree, RtreeNode *p){ - memset(&p->zData[2], 0, pRtree->iNodeSize-2); - p->isDirty = 1; +static int rtreeRename(sqlite3_vtab *pVtab, const char *zNewName){ + Rtree *pRtree = (Rtree *)pVtab; + int rc = SQLITE_NOMEM; + char *zSql = sqlite3_mprintf( + "ALTER TABLE %Q.'%q_node' RENAME TO \"%w_node\";" + "ALTER TABLE %Q.'%q_parent' RENAME TO \"%w_parent\";" + "ALTER TABLE %Q.'%q_rowid' RENAME TO \"%w_rowid\";" + , pRtree->zDb, pRtree->zName, zNewName + , pRtree->zDb, pRtree->zName, zNewName + , pRtree->zDb, pRtree->zName, zNewName + ); + if( zSql ){ + rc = sqlite3_exec(pRtree->db, zSql, 0, 0, 0); + sqlite3_free(zSql); + } + return rc; } /* -** Given a node number iNode, return the corresponding key to use -** in the Rtree.aHash table. +** This function populates the pRtree->nRowEst variable with an estimate +** of the number of rows in the virtual table. If possible, this is based +** on sqlite_stat1 data. Otherwise, use RTREE_DEFAULT_ROWEST. */ -static int nodeHash(i64 iNode){ - return iNode % HASHSIZE; +static int rtreeQueryStat1(sqlite3 *db, Rtree *pRtree){ + const char *zFmt = "SELECT stat FROM %Q.sqlite_stat1 WHERE tbl = '%q_rowid'"; + char *zSql; + sqlite3_stmt *p; + int rc; + i64 nRow = 0; + + zSql = sqlite3_mprintf(zFmt, pRtree->zDb, pRtree->zName); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(db, zSql, -1, &p, 0); + if( rc==SQLITE_OK ){ + if( sqlite3_step(p)==SQLITE_ROW ) nRow = sqlite3_column_int64(p, 0); + rc = sqlite3_finalize(p); + }else if( rc!=SQLITE_NOMEM ){ + rc = SQLITE_OK; + } + + if( rc==SQLITE_OK ){ + if( nRow==0 ){ + pRtree->nRowEst = RTREE_DEFAULT_ROWEST; + }else{ + pRtree->nRowEst = MAX(nRow, RTREE_MIN_ROWEST); + } + } + sqlite3_free(zSql); + } + + return rc; } -/* -** Search the node hash table for node iNode. If found, return a pointer -** to it. Otherwise, return 0. -*/ -static RtreeNode *nodeHashLookup(Rtree *pRtree, i64 iNode){ - RtreeNode *p; - for(p=pRtree->aHash[nodeHash(iNode)]; p && p->iNode!=iNode; p=p->pNext); - return p; -} +static sqlite3_module rtreeModule = { + 0, /* iVersion */ + rtreeCreate, /* xCreate - create a table */ + rtreeConnect, /* xConnect - connect to an existing table */ + rtreeBestIndex, /* xBestIndex - Determine search strategy */ + rtreeDisconnect, /* xDisconnect - Disconnect from a table */ + rtreeDestroy, /* xDestroy - Drop a table */ + rtreeOpen, /* xOpen - open a cursor */ + rtreeClose, /* xClose - close a cursor */ + rtreeFilter, /* xFilter - configure scan constraints */ + rtreeNext, /* xNext - advance a cursor */ + rtreeEof, /* xEof */ + rtreeColumn, /* xColumn - read data */ + rtreeRowid, /* xRowid - read data */ + rtreeUpdate, /* xUpdate - write data */ + 0, /* xBegin - begin transaction */ + 0, /* xSync - sync transaction */ + 0, /* xCommit - commit transaction */ + 0, /* xRollback - rollback transaction */ + 0, /* xFindFunction - function overloading */ + rtreeRename, /* xRename - rename the table */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0 /* xRollbackTo */ +}; + +static int rtreeSqlInit( + Rtree *pRtree, + sqlite3 *db, + const char *zDb, + const char *zPrefix, + int isCreate +){ + int rc = SQLITE_OK; + + #define N_STATEMENT 9 + static const char *azSql[N_STATEMENT] = { + /* Read and write the xxx_node table */ + "SELECT data FROM '%q'.'%q_node' WHERE nodeno = :1", + "INSERT OR REPLACE INTO '%q'.'%q_node' VALUES(:1, :2)", + "DELETE FROM '%q'.'%q_node' WHERE nodeno = :1", + + /* Read and write the xxx_rowid table */ + "SELECT nodeno FROM '%q'.'%q_rowid' WHERE rowid = :1", + "INSERT OR REPLACE INTO '%q'.'%q_rowid' VALUES(:1, :2)", + "DELETE FROM '%q'.'%q_rowid' WHERE rowid = :1", + + /* Read and write the xxx_parent table */ + "SELECT parentnode FROM '%q'.'%q_parent' WHERE nodeno = :1", + "INSERT OR REPLACE INTO '%q'.'%q_parent' VALUES(:1, :2)", + "DELETE FROM '%q'.'%q_parent' WHERE nodeno = :1" + }; + sqlite3_stmt **appStmt[N_STATEMENT]; + int i; + + pRtree->db = db; + + if( isCreate ){ + char *zCreate = sqlite3_mprintf( +"CREATE TABLE \"%w\".\"%w_node\"(nodeno INTEGER PRIMARY KEY, data BLOB);" +"CREATE TABLE \"%w\".\"%w_rowid\"(rowid INTEGER PRIMARY KEY, nodeno INTEGER);" +"CREATE TABLE \"%w\".\"%w_parent\"(nodeno INTEGER PRIMARY KEY," + " parentnode INTEGER);" +"INSERT INTO '%q'.'%q_node' VALUES(1, zeroblob(%d))", + zDb, zPrefix, zDb, zPrefix, zDb, zPrefix, zDb, zPrefix, pRtree->iNodeSize + ); + if( !zCreate ){ + return SQLITE_NOMEM; + } + rc = sqlite3_exec(db, zCreate, 0, 0, 0); + sqlite3_free(zCreate); + if( rc!=SQLITE_OK ){ + return rc; + } + } + + appStmt[0] = &pRtree->pReadNode; + appStmt[1] = &pRtree->pWriteNode; + appStmt[2] = &pRtree->pDeleteNode; + appStmt[3] = &pRtree->pReadRowid; + appStmt[4] = &pRtree->pWriteRowid; + appStmt[5] = &pRtree->pDeleteRowid; + appStmt[6] = &pRtree->pReadParent; + appStmt[7] = &pRtree->pWriteParent; + appStmt[8] = &pRtree->pDeleteParent; -/* -** Add node pNode to the node hash table. -*/ -static void nodeHashInsert(Rtree *pRtree, RtreeNode *pNode){ - int iHash; - assert( pNode->pNext==0 ); - iHash = nodeHash(pNode->iNode); - pNode->pNext = pRtree->aHash[iHash]; - pRtree->aHash[iHash] = pNode; + rc = rtreeQueryStat1(db, pRtree); + for(i=0; iiNode!=0 ){ - pp = &pRtree->aHash[nodeHash(pNode->iNode)]; - for( ; (*pp)!=pNode; pp = &(*pp)->pNext){ assert(*pp); } - *pp = pNode->pNext; - pNode->pNext = 0; +static int getIntFromStmt(sqlite3 *db, const char *zSql, int *piVal){ + int rc = SQLITE_NOMEM; + if( zSql ){ + sqlite3_stmt *pStmt = 0; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + *piVal = sqlite3_column_int(pStmt, 0); + } + rc = sqlite3_finalize(pStmt); + } } + return rc; } /* -** Allocate and return new r-tree node. Initially, (RtreeNode.iNode==0), -** indicating that node has not yet been assigned a node number. It is -** assigned a node number when nodeWrite() is called to write the -** node contents out to the database. +** This function is called from within the xConnect() or xCreate() method to +** determine the node-size used by the rtree table being created or connected +** to. If successful, pRtree->iNodeSize is populated and SQLITE_OK returned. +** Otherwise, an SQLite error code is returned. +** +** If this function is being called as part of an xConnect(), then the rtree +** table already exists. In this case the node-size is determined by inspecting +** the root node of the tree. +** +** Otherwise, for an xCreate(), use 64 bytes less than the database page-size. +** This ensures that each node is stored on a single database page. If the +** database page-size is so large that more than RTREE_MAXCELLS entries +** would fit in a single node, use a smaller node-size. */ -static RtreeNode *nodeNew(Rtree *pRtree, RtreeNode *pParent){ - RtreeNode *pNode; - pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode) + pRtree->iNodeSize); - if( pNode ){ - memset(pNode, 0, sizeof(RtreeNode) + pRtree->iNodeSize); - pNode->zData = (u8 *)&pNode[1]; - pNode->nRef = 1; - pNode->pParent = pParent; - pNode->isDirty = 1; - nodeReference(pParent); +static int getNodeSize( + sqlite3 *db, /* Database handle */ + Rtree *pRtree, /* Rtree handle */ + int isCreate, /* True for xCreate, false for xConnect */ + char **pzErr /* OUT: Error message, if any */ +){ + int rc; + char *zSql; + if( isCreate ){ + int iPageSize = 0; + zSql = sqlite3_mprintf("PRAGMA %Q.page_size", pRtree->zDb); + rc = getIntFromStmt(db, zSql, &iPageSize); + if( rc==SQLITE_OK ){ + pRtree->iNodeSize = iPageSize-64; + if( (4+pRtree->nBytesPerCell*RTREE_MAXCELLS)iNodeSize ){ + pRtree->iNodeSize = 4+pRtree->nBytesPerCell*RTREE_MAXCELLS; + } + }else{ + *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + } + }else{ + zSql = sqlite3_mprintf( + "SELECT length(data) FROM '%q'.'%q_node' WHERE nodeno = 1", + pRtree->zDb, pRtree->zName + ); + rc = getIntFromStmt(db, zSql, &pRtree->iNodeSize); + if( rc!=SQLITE_OK ){ + *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + } } - return pNode; + + sqlite3_free(zSql); + return rc; } -/* -** Obtain a reference to an r-tree node. +/* +** This function is the implementation of both the xConnect and xCreate +** methods of the r-tree virtual table. +** +** argv[0] -> module name +** argv[1] -> database name +** argv[2] -> table name +** argv[...] -> column names... */ -static int nodeAcquire( - Rtree *pRtree, /* R-tree structure */ - i64 iNode, /* Node number to load */ - RtreeNode *pParent, /* Either the parent node or NULL */ - RtreeNode **ppNode /* OUT: Acquired node */ +static int rtreeInit( + sqlite3 *db, /* Database connection */ + void *pAux, /* One of the RTREE_COORD_* constants */ + int argc, const char *const*argv, /* Parameters to CREATE TABLE statement */ + sqlite3_vtab **ppVtab, /* OUT: New virtual table */ + char **pzErr, /* OUT: Error message, if any */ + int isCreate /* True for xCreate, false for xConnect */ ){ - int rc; - int rc2 = SQLITE_OK; - RtreeNode *pNode; + int rc = SQLITE_OK; + Rtree *pRtree; + int nDb; /* Length of string argv[1] */ + int nName; /* Length of string argv[2] */ + int eCoordType = (pAux ? RTREE_COORD_INT32 : RTREE_COORD_REAL32); - /* Check if the requested node is already in the hash table. If so, - ** increase its reference count and return it. - */ - if( (pNode = nodeHashLookup(pRtree, iNode)) ){ - assert( !pParent || !pNode->pParent || pNode->pParent==pParent ); - if( pParent && !pNode->pParent ){ - nodeReference(pParent); - pNode->pParent = pParent; - } - pNode->nRef++; - *ppNode = pNode; - return SQLITE_OK; - } + const char *aErrMsg[] = { + 0, /* 0 */ + "Wrong number of columns for an rtree table", /* 1 */ + "Too few columns for an rtree table", /* 2 */ + "Too many columns for an rtree table" /* 3 */ + }; - sqlite3_bind_int64(pRtree->pReadNode, 1, iNode); - rc = sqlite3_step(pRtree->pReadNode); - if( rc==SQLITE_ROW ){ - const u8 *zBlob = sqlite3_column_blob(pRtree->pReadNode, 0); - if( pRtree->iNodeSize==sqlite3_column_bytes(pRtree->pReadNode, 0) ){ - pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode)+pRtree->iNodeSize); - if( !pNode ){ - rc2 = SQLITE_NOMEM; - }else{ - pNode->pParent = pParent; - pNode->zData = (u8 *)&pNode[1]; - pNode->nRef = 1; - pNode->iNode = iNode; - pNode->isDirty = 0; - pNode->pNext = 0; - memcpy(pNode->zData, zBlob, pRtree->iNodeSize); - nodeReference(pParent); - } - } + int iErr = (argc<6) ? 2 : argc>(RTREE_MAX_DIMENSIONS*2+4) ? 3 : argc%2; + if( aErrMsg[iErr] ){ + *pzErr = sqlite3_mprintf("%s", aErrMsg[iErr]); + return SQLITE_ERROR; } - rc = sqlite3_reset(pRtree->pReadNode); - if( rc==SQLITE_OK ) rc = rc2; - /* If the root node was just loaded, set pRtree->iDepth to the height - ** of the r-tree structure. A height of zero means all data is stored on - ** the root node. A height of one means the children of the root node - ** are the leaves, and so on. If the depth as specified on the root node - ** is greater than RTREE_MAX_DEPTH, the r-tree structure must be corrupt. - */ - if( pNode && iNode==1 ){ - pRtree->iDepth = readInt16(pNode->zData); - if( pRtree->iDepth>RTREE_MAX_DEPTH ){ - rc = SQLITE_CORRUPT_VTAB; - } + sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); + + /* Allocate the sqlite3_vtab structure */ + nDb = (int)strlen(argv[1]); + nName = (int)strlen(argv[2]); + pRtree = (Rtree *)sqlite3_malloc(sizeof(Rtree)+nDb+nName+2); + if( !pRtree ){ + return SQLITE_NOMEM; } + memset(pRtree, 0, sizeof(Rtree)+nDb+nName+2); + pRtree->nBusy = 1; + pRtree->base.pModule = &rtreeModule; + pRtree->zDb = (char *)&pRtree[1]; + pRtree->zName = &pRtree->zDb[nDb+1]; + pRtree->nDim = (argc-4)/2; + pRtree->nBytesPerCell = 8 + pRtree->nDim*4*2; + pRtree->eCoordType = eCoordType; + memcpy(pRtree->zDb, argv[1], nDb); + memcpy(pRtree->zName, argv[2], nName); - /* If no error has occurred so far, check if the "number of entries" - ** field on the node is too large. If so, set the return code to - ** SQLITE_CORRUPT_VTAB. + /* Figure out the node size to use. */ + rc = getNodeSize(db, pRtree, isCreate, pzErr); + + /* Create/Connect to the underlying relational database schema. If + ** that is successful, call sqlite3_declare_vtab() to configure + ** the r-tree table schema. */ - if( pNode && rc==SQLITE_OK ){ - if( NCELL(pNode)>((pRtree->iNodeSize-4)/pRtree->nBytesPerCell) ){ - rc = SQLITE_CORRUPT_VTAB; + if( rc==SQLITE_OK ){ + if( (rc = rtreeSqlInit(pRtree, db, argv[1], argv[2], isCreate)) ){ + *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + }else{ + char *zSql = sqlite3_mprintf("CREATE TABLE x(%s", argv[3]); + char *zTmp; + int ii; + for(ii=4; zSql && iinBusy==1 ); + rtreeRelease(pRtree); } - return rc; } + /* -** Overwrite cell iCell of node pNode with the contents of pCell. +** Implementation of a scalar function that decodes r-tree nodes to +** human readable strings. This can be used for debugging and analysis. +** +** The scalar function takes two arguments: (1) the number of dimensions +** to the rtree (between 1 and 5, inclusive) and (2) a blob of data containing +** an r-tree node. For a two-dimensional r-tree structure called "rt", to +** deserialize all nodes, a statement like: +** +** SELECT rtreenode(2, data) FROM rt_node; +** +** The human readable string takes the form of a Tcl list with one +** entry for each cell in the r-tree node. Each entry is itself a +** list, containing the 8-byte rowid/pageno followed by the +** *2 coordinates. */ -static void nodeOverwriteCell( - Rtree *pRtree, /* The overall R-Tree */ - RtreeNode *pNode, /* The node into which the cell is to be written */ - RtreeCell *pCell, /* The cell to write */ - int iCell /* Index into pNode into which pCell is written */ -){ +static void rtreenode(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){ + char *zText = 0; + RtreeNode node; + Rtree tree; int ii; - u8 *p = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; - p += writeInt64(p, pCell->iRowid); - for(ii=0; ii<(pRtree->nDim*2); ii++){ - p += writeCoord(p, &pCell->aCoord[ii]); + + UNUSED_PARAMETER(nArg); + memset(&node, 0, sizeof(RtreeNode)); + memset(&tree, 0, sizeof(Rtree)); + tree.nDim = sqlite3_value_int(apArg[0]); + tree.nBytesPerCell = 8 + 8 * tree.nDim; + node.zData = (u8 *)sqlite3_value_blob(apArg[1]); + + for(ii=0; iiisDirty = 1; + + sqlite3_result_text(ctx, zText, -1, sqlite3_free); } -/* -** Remove the cell with index iCell from node pNode. +/* This routine implements an SQL function that returns the "depth" parameter +** from the front of a blob that is an r-tree node. For example: +** +** SELECT rtreedepth(data) FROM rt_node WHERE nodeno=1; +** +** The depth value is 0 for all nodes other than the root node, and the root +** node always has nodeno=1, so the example above is the primary use for this +** routine. This routine is intended for testing and analysis only. */ -static void nodeDeleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell){ - u8 *pDst = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; - u8 *pSrc = &pDst[pRtree->nBytesPerCell]; - int nByte = (NCELL(pNode) - iCell - 1) * pRtree->nBytesPerCell; - memmove(pDst, pSrc, nByte); - writeInt16(&pNode->zData[2], NCELL(pNode)-1); - pNode->isDirty = 1; +static void rtreedepth(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){ + UNUSED_PARAMETER(nArg); + if( sqlite3_value_type(apArg[0])!=SQLITE_BLOB + || sqlite3_value_bytes(apArg[0])<2 + ){ + sqlite3_result_error(ctx, "Invalid argument to rtreedepth()", -1); + }else{ + u8 *zBlob = (u8 *)sqlite3_value_blob(apArg[0]); + sqlite3_result_int(ctx, readInt16(zBlob)); + } } /* -** Insert the contents of cell pCell into node pNode. If the insert -** is successful, return SQLITE_OK. -** -** If there is not enough free space in pNode, return SQLITE_FULL. +** Register the r-tree module with database handle db. This creates the +** virtual table module "rtree" and the debugging/analysis scalar +** function "rtreenode". */ -static int nodeInsertCell( - Rtree *pRtree, /* The overall R-Tree */ - RtreeNode *pNode, /* Write new cell into this node */ - RtreeCell *pCell /* The cell to be inserted */ -){ - int nCell; /* Current number of cells in pNode */ - int nMaxCell; /* Maximum number of cells for pNode */ - - nMaxCell = (pRtree->iNodeSize-4)/pRtree->nBytesPerCell; - nCell = NCELL(pNode); +SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db){ + const int utf8 = SQLITE_UTF8; + int rc; - assert( nCell<=nMaxCell ); - if( nCellzData[2], nCell+1); - pNode->isDirty = 1; + rc = sqlite3_create_function(db, "rtreenode", 2, utf8, 0, rtreenode, 0, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "rtreedepth", 1, utf8, 0,rtreedepth, 0, 0); + } + if( rc==SQLITE_OK ){ +#ifdef SQLITE_RTREE_INT_ONLY + void *c = (void *)RTREE_COORD_INT32; +#else + void *c = (void *)RTREE_COORD_REAL32; +#endif + rc = sqlite3_create_module_v2(db, "rtree", &rtreeModule, c, 0); + } + if( rc==SQLITE_OK ){ + void *c = (void *)RTREE_COORD_INT32; + rc = sqlite3_create_module_v2(db, "rtree_i32", &rtreeModule, c, 0); } - return (nCell==nMaxCell); + return rc; } /* -** If the node is dirty, write it out to the database. +** This routine deletes the RtreeGeomCallback object that was attached +** one of the SQL functions create by sqlite3_rtree_geometry_callback() +** or sqlite3_rtree_query_callback(). In other words, this routine is the +** destructor for an RtreeGeomCallback objecct. This routine is called when +** the corresponding SQL function is deleted. */ -static int nodeWrite(Rtree *pRtree, RtreeNode *pNode){ - int rc = SQLITE_OK; - if( pNode->isDirty ){ - sqlite3_stmt *p = pRtree->pWriteNode; - if( pNode->iNode ){ - sqlite3_bind_int64(p, 1, pNode->iNode); - }else{ - sqlite3_bind_null(p, 1); - } - sqlite3_bind_blob(p, 2, pNode->zData, pRtree->iNodeSize, SQLITE_STATIC); - sqlite3_step(p); - pNode->isDirty = 0; - rc = sqlite3_reset(p); - if( pNode->iNode==0 && rc==SQLITE_OK ){ - pNode->iNode = sqlite3_last_insert_rowid(pRtree->db); - nodeHashInsert(pRtree, pNode); - } - } - return rc; +static void rtreeFreeCallback(void *p){ + RtreeGeomCallback *pInfo = (RtreeGeomCallback*)p; + if( pInfo->xDestructor ) pInfo->xDestructor(pInfo->pContext); + sqlite3_free(p); } /* -** Release a reference to a node. If the node is dirty and the reference -** count drops to zero, the node data is written to the database. +** This routine frees the BLOB that is returned by geomCallback(). */ -static int nodeRelease(Rtree *pRtree, RtreeNode *pNode){ - int rc = SQLITE_OK; - if( pNode ){ - assert( pNode->nRef>0 ); - pNode->nRef--; - if( pNode->nRef==0 ){ - if( pNode->iNode==1 ){ - pRtree->iDepth = -1; - } - if( pNode->pParent ){ - rc = nodeRelease(pRtree, pNode->pParent); - } - if( rc==SQLITE_OK ){ - rc = nodeWrite(pRtree, pNode); - } - nodeHashDelete(pRtree, pNode); - sqlite3_free(pNode); - } +static void rtreeMatchArgFree(void *pArg){ + int i; + RtreeMatchArg *p = (RtreeMatchArg*)pArg; + for(i=0; inParam; i++){ + sqlite3_value_free(p->apSqlParam[i]); } - return rc; + sqlite3_free(p); } /* -** Return the 64-bit integer value associated with cell iCell of -** node pNode. If pNode is a leaf node, this is a rowid. If it is -** an internal node, then the 64-bit integer is a child page number. +** Each call to sqlite3_rtree_geometry_callback() or +** sqlite3_rtree_query_callback() creates an ordinary SQLite +** scalar function that is implemented by this routine. +** +** All this function does is construct an RtreeMatchArg object that +** contains the geometry-checking callback routines and a list of +** parameters to this function, then return that RtreeMatchArg object +** as a BLOB. +** +** The R-Tree MATCH operator will read the returned BLOB, deserialize +** the RtreeMatchArg object, and use the RtreeMatchArg object to figure +** out which elements of the R-Tree should be returned by the query. */ -static i64 nodeGetRowid( - Rtree *pRtree, /* The overall R-Tree */ - RtreeNode *pNode, /* The node from which to extract the ID */ - int iCell /* The cell index from which to extract the ID */ -){ - assert( iCellzData[4 + pRtree->nBytesPerCell*iCell]); +static void geomCallback(sqlite3_context *ctx, int nArg, sqlite3_value **aArg){ + RtreeGeomCallback *pGeomCtx = (RtreeGeomCallback *)sqlite3_user_data(ctx); + RtreeMatchArg *pBlob; + int nBlob; + int memErr = 0; + + nBlob = sizeof(RtreeMatchArg) + (nArg-1)*sizeof(RtreeDValue) + + nArg*sizeof(sqlite3_value*); + pBlob = (RtreeMatchArg *)sqlite3_malloc(nBlob); + if( !pBlob ){ + sqlite3_result_error_nomem(ctx); + }else{ + int i; + pBlob->magic = RTREE_GEOMETRY_MAGIC; + pBlob->cb = pGeomCtx[0]; + pBlob->apSqlParam = (sqlite3_value**)&pBlob->aParam[nArg]; + pBlob->nParam = nArg; + for(i=0; iapSqlParam[i] = sqlite3_value_dup(aArg[i]); + if( pBlob->apSqlParam[i]==0 ) memErr = 1; +#ifdef SQLITE_RTREE_INT_ONLY + pBlob->aParam[i] = sqlite3_value_int64(aArg[i]); +#else + pBlob->aParam[i] = sqlite3_value_double(aArg[i]); +#endif + } + if( memErr ){ + sqlite3_result_error_nomem(ctx); + rtreeMatchArgFree(pBlob); + }else{ + sqlite3_result_blob(ctx, pBlob, nBlob, rtreeMatchArgFree); + } + } } /* -** Return coordinate iCoord from cell iCell in node pNode. +** Register a new geometry function for use with the r-tree MATCH operator. */ -static void nodeGetCoord( - Rtree *pRtree, /* The overall R-Tree */ - RtreeNode *pNode, /* The node from which to extract a coordinate */ - int iCell, /* The index of the cell within the node */ - int iCoord, /* Which coordinate to extract */ - RtreeCoord *pCoord /* OUT: Space to write result to */ +SQLITE_API int SQLITE_STDCALL sqlite3_rtree_geometry_callback( + sqlite3 *db, /* Register SQL function on this connection */ + const char *zGeom, /* Name of the new SQL function */ + int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*), /* Callback */ + void *pContext /* Extra data associated with the callback */ ){ - readCoord(&pNode->zData[12 + pRtree->nBytesPerCell*iCell + 4*iCoord], pCoord); + RtreeGeomCallback *pGeomCtx; /* Context object for new user-function */ + + /* Allocate and populate the context object. */ + pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback)); + if( !pGeomCtx ) return SQLITE_NOMEM; + pGeomCtx->xGeom = xGeom; + pGeomCtx->xQueryFunc = 0; + pGeomCtx->xDestructor = 0; + pGeomCtx->pContext = pContext; + return sqlite3_create_function_v2(db, zGeom, -1, SQLITE_ANY, + (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback + ); } /* -** Deserialize cell iCell of node pNode. Populate the structure pointed -** to by pCell with the results. +** Register a new 2nd-generation geometry function for use with the +** r-tree MATCH operator. */ -static void nodeGetCell( - Rtree *pRtree, /* The overall R-Tree */ - RtreeNode *pNode, /* The node containing the cell to be read */ - int iCell, /* Index of the cell within the node */ - RtreeCell *pCell /* OUT: Write the cell contents here */ +SQLITE_API int SQLITE_STDCALL sqlite3_rtree_query_callback( + sqlite3 *db, /* Register SQL function on this connection */ + const char *zQueryFunc, /* Name of new SQL function */ + int (*xQueryFunc)(sqlite3_rtree_query_info*), /* Callback */ + void *pContext, /* Extra data passed into the callback */ + void (*xDestructor)(void*) /* Destructor for the extra data */ ){ - u8 *pData; - RtreeCoord *pCoord; - int ii; - pCell->iRowid = nodeGetRowid(pRtree, pNode, iCell); - pData = pNode->zData + (12 + pRtree->nBytesPerCell*iCell); - pCoord = pCell->aCoord; - for(ii=0; iinDim*2; ii++){ - readCoord(&pData[ii*4], &pCoord[ii]); - } -} - - -/* Forward declaration for the function that does the work of -** the virtual table module xCreate() and xConnect() methods. -*/ -static int rtreeInit( - sqlite3 *, void *, int, const char *const*, sqlite3_vtab **, char **, int -); + RtreeGeomCallback *pGeomCtx; /* Context object for new user-function */ -/* -** Rtree virtual table module xCreate method. -*/ -static int rtreeCreate( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVtab, - char **pzErr -){ - return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 1); + /* Allocate and populate the context object. */ + pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback)); + if( !pGeomCtx ) return SQLITE_NOMEM; + pGeomCtx->xGeom = 0; + pGeomCtx->xQueryFunc = xQueryFunc; + pGeomCtx->xDestructor = xDestructor; + pGeomCtx->pContext = pContext; + return sqlite3_create_function_v2(db, zQueryFunc, -1, SQLITE_ANY, + (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback + ); } -/* -** Rtree virtual table module xConnect method. -*/ -static int rtreeConnect( +#if !SQLITE_CORE +#ifdef _WIN32 +__declspec(dllexport) +#endif +SQLITE_API int SQLITE_STDCALL sqlite3_rtree_init( sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVtab, - char **pzErr + char **pzErrMsg, + const sqlite3_api_routines *pApi ){ - return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 0); + SQLITE_EXTENSION_INIT2(pApi) + return sqlite3RtreeInit(db); } +#endif + +#endif +/************** End of rtree.c ***********************************************/ +/************** Begin file icu.c *********************************************/ /* -** Increment the r-tree reference count. +** 2007 May 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $ +** +** This file implements an integration between the ICU library +** ("International Components for Unicode", an open-source library +** for handling unicode data) and SQLite. The integration uses +** ICU to provide the following to SQLite: +** +** * An implementation of the SQL regexp() function (and hence REGEXP +** operator) using the ICU uregex_XX() APIs. +** +** * Implementations of the SQL scalar upper() and lower() functions +** for case mapping. +** +** * Integration of ICU and SQLite collation sequences. +** +** * An implementation of the LIKE operator that uses ICU to +** provide case-independent matching. */ -static void rtreeReference(Rtree *pRtree){ - pRtree->nBusy++; -} + +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) + +/* Include ICU headers */ +#include +#include +#include +#include + +/* #include */ + +#ifndef SQLITE_CORE +/* #include "sqlite3ext.h" */ + SQLITE_EXTENSION_INIT1 +#else +/* #include "sqlite3.h" */ +#endif /* -** Decrement the r-tree reference count. When the reference count reaches -** zero the structure is deleted. +** Maximum length (in bytes) of the pattern in a LIKE or GLOB +** operator. */ -static void rtreeRelease(Rtree *pRtree){ - pRtree->nBusy--; - if( pRtree->nBusy==0 ){ - sqlite3_finalize(pRtree->pReadNode); - sqlite3_finalize(pRtree->pWriteNode); - sqlite3_finalize(pRtree->pDeleteNode); - sqlite3_finalize(pRtree->pReadRowid); - sqlite3_finalize(pRtree->pWriteRowid); - sqlite3_finalize(pRtree->pDeleteRowid); - sqlite3_finalize(pRtree->pReadParent); - sqlite3_finalize(pRtree->pWriteParent); - sqlite3_finalize(pRtree->pDeleteParent); - sqlite3_free(pRtree); - } -} +#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH +# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000 +#endif -/* -** Rtree virtual table module xDisconnect method. +/* +** Version of sqlite3_free() that is always a function, never a macro. */ -static int rtreeDisconnect(sqlite3_vtab *pVtab){ - rtreeRelease((Rtree *)pVtab); - return SQLITE_OK; +static void xFree(void *p){ + sqlite3_free(p); } -/* -** Rtree virtual table module xDestroy method. +/* +** Compare two UTF-8 strings for equality where the first string is +** a "LIKE" expression. Return true (1) if they are the same and +** false (0) if they are different. */ -static int rtreeDestroy(sqlite3_vtab *pVtab){ - Rtree *pRtree = (Rtree *)pVtab; - int rc; - char *zCreate = sqlite3_mprintf( - "DROP TABLE '%q'.'%q_node';" - "DROP TABLE '%q'.'%q_rowid';" - "DROP TABLE '%q'.'%q_parent';", - pRtree->zDb, pRtree->zName, - pRtree->zDb, pRtree->zName, - pRtree->zDb, pRtree->zName - ); - if( !zCreate ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_exec(pRtree->db, zCreate, 0, 0, 0); - sqlite3_free(zCreate); - } - if( rc==SQLITE_OK ){ - rtreeRelease(pRtree); - } +static int icuLikeCompare( + const uint8_t *zPattern, /* LIKE pattern */ + const uint8_t *zString, /* The UTF-8 string to compare against */ + const UChar32 uEsc /* The escape character */ +){ + static const int MATCH_ONE = (UChar32)'_'; + static const int MATCH_ALL = (UChar32)'%'; - return rc; -} + int iPattern = 0; /* Current byte index in zPattern */ + int iString = 0; /* Current byte index in zString */ -/* -** Rtree virtual table module xOpen method. -*/ -static int rtreeOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ - int rc = SQLITE_NOMEM; - RtreeCursor *pCsr; + int prevEscape = 0; /* True if the previous character was uEsc */ - pCsr = (RtreeCursor *)sqlite3_malloc(sizeof(RtreeCursor)); - if( pCsr ){ - memset(pCsr, 0, sizeof(RtreeCursor)); - pCsr->base.pVtab = pVTab; - rc = SQLITE_OK; - } - *ppCursor = (sqlite3_vtab_cursor *)pCsr; + while( zPattern[iPattern]!=0 ){ - return rc; -} + /* Read (and consume) the next character from the input pattern. */ + UChar32 uPattern; + U8_NEXT_UNSAFE(zPattern, iPattern, uPattern); + /* There are now 4 possibilities: + ** + ** 1. uPattern is an unescaped match-all character "%", + ** 2. uPattern is an unescaped match-one character "_", + ** 3. uPattern is an unescaped escape character, or + ** 4. uPattern is to be handled as an ordinary character + */ + if( !prevEscape && uPattern==MATCH_ALL ){ + /* Case 1. */ + uint8_t c; -/* -** Free the RtreeCursor.aConstraint[] array and its contents. -*/ -static void freeCursorConstraints(RtreeCursor *pCsr){ - if( pCsr->aConstraint ){ - int i; /* Used to iterate through constraint array */ - for(i=0; inConstraint; i++){ - sqlite3_rtree_query_info *pInfo = pCsr->aConstraint[i].pInfo; - if( pInfo ){ - if( pInfo->xDelUser ) pInfo->xDelUser(pInfo->pUser); - sqlite3_free(pInfo); + /* Skip any MATCH_ALL or MATCH_ONE characters that follow a + ** MATCH_ALL. For each MATCH_ONE, skip one character in the + ** test string. + */ + while( (c=zPattern[iPattern]) == MATCH_ALL || c == MATCH_ONE ){ + if( c==MATCH_ONE ){ + if( zString[iString]==0 ) return 0; + U8_FWD_1_UNSAFE(zString, iString); + } + iPattern++; + } + + if( zPattern[iPattern]==0 ) return 1; + + while( zString[iString] ){ + if( icuLikeCompare(&zPattern[iPattern], &zString[iString], uEsc) ){ + return 1; + } + U8_FWD_1_UNSAFE(zString, iString); + } + return 0; + + }else if( !prevEscape && uPattern==MATCH_ONE ){ + /* Case 2. */ + if( zString[iString]==0 ) return 0; + U8_FWD_1_UNSAFE(zString, iString); + + }else if( !prevEscape && uPattern==uEsc){ + /* Case 3. */ + prevEscape = 1; + + }else{ + /* Case 4. */ + UChar32 uString; + U8_NEXT_UNSAFE(zString, iString, uString); + uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT); + uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT); + if( uString!=uPattern ){ + return 0; } + prevEscape = 0; } - sqlite3_free(pCsr->aConstraint); - pCsr->aConstraint = 0; } -} -/* -** Rtree virtual table module xClose method. -*/ -static int rtreeClose(sqlite3_vtab_cursor *cur){ - Rtree *pRtree = (Rtree *)(cur->pVtab); - int ii; - RtreeCursor *pCsr = (RtreeCursor *)cur; - freeCursorConstraints(pCsr); - sqlite3_free(pCsr->aPoint); - for(ii=0; iiaNode[ii]); - sqlite3_free(pCsr); - return SQLITE_OK; + return zString[iString]==0; } /* -** Rtree virtual table module xEof method. +** Implementation of the like() SQL function. This function implements +** the build-in LIKE operator. The first argument to the function is the +** pattern and the second argument is the string. So, the SQL statements: ** -** Return non-zero if the cursor does not currently point to a valid -** record (i.e if the scan has finished), or zero otherwise. +** A LIKE B +** +** is implemented as like(B, A). If there is an escape character E, +** +** A LIKE B ESCAPE E +** +** is mapped to like(B, A, E). */ -static int rtreeEof(sqlite3_vtab_cursor *cur){ - RtreeCursor *pCsr = (RtreeCursor *)cur; - return pCsr->atEOF; +static void icuLikeFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const unsigned char *zA = sqlite3_value_text(argv[0]); + const unsigned char *zB = sqlite3_value_text(argv[1]); + UChar32 uEsc = 0; + + /* Limit the length of the LIKE or GLOB pattern to avoid problems + ** of deep recursion and N*N behavior in patternCompare(). + */ + if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){ + sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1); + return; + } + + + if( argc==3 ){ + /* The escape character string must consist of a single UTF-8 character. + ** Otherwise, return an error. + */ + int nE= sqlite3_value_bytes(argv[2]); + const unsigned char *zE = sqlite3_value_text(argv[2]); + int i = 0; + if( zE==0 ) return; + U8_NEXT(zE, i, nE, uEsc); + if( i!=nE){ + sqlite3_result_error(context, + "ESCAPE expression must be a single character", -1); + return; + } + } + + if( zA && zB ){ + sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc)); + } } /* -** Convert raw bits from the on-disk RTree record into a coordinate value. -** The on-disk format is big-endian and needs to be converted for little- -** endian platforms. The on-disk record stores integer coordinates if -** eInt is true and it stores 32-bit floating point records if eInt is -** false. a[] is the four bytes of the on-disk record to be decoded. -** Store the results in "r". +** This function is called when an ICU function called from within +** the implementation of an SQL scalar function returns an error. ** -** There are three versions of this macro, one each for little-endian and -** big-endian processors and a third generic implementation. The endian- -** specific implementations are much faster and are preferred if the -** processor endianness is known at compile-time. The SQLITE_BYTEORDER -** macro is part of sqliteInt.h and hence the endian-specific -** implementation will only be used if this module is compiled as part -** of the amalgamation. +** The scalar function context passed as the first argument is +** loaded with an error message based on the following two args. */ -#if defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==1234 -#define RTREE_DECODE_COORD(eInt, a, r) { \ - RtreeCoord c; /* Coordinate decoded */ \ - memcpy(&c.u,a,4); \ - c.u = ((c.u>>24)&0xff)|((c.u>>8)&0xff00)| \ - ((c.u&0xff)<<24)|((c.u&0xff00)<<8); \ - r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ -} -#elif defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==4321 -#define RTREE_DECODE_COORD(eInt, a, r) { \ - RtreeCoord c; /* Coordinate decoded */ \ - memcpy(&c.u,a,4); \ - r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +static void icuFunctionError( + sqlite3_context *pCtx, /* SQLite scalar function context */ + const char *zName, /* Name of ICU function that failed */ + UErrorCode e /* Error code returned by ICU function */ +){ + char zBuf[128]; + sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e)); + zBuf[127] = '\0'; + sqlite3_result_error(pCtx, zBuf, -1); } -#else -#define RTREE_DECODE_COORD(eInt, a, r) { \ - RtreeCoord c; /* Coordinate decoded */ \ - c.u = ((u32)a[0]<<24) + ((u32)a[1]<<16) \ - +((u32)a[2]<<8) + a[3]; \ - r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ + +/* +** Function to delete compiled regexp objects. Registered as +** a destructor function with sqlite3_set_auxdata(). +*/ +static void icuRegexpDelete(void *p){ + URegularExpression *pExpr = (URegularExpression *)p; + uregex_close(pExpr); } -#endif /* -** Check the RTree node or entry given by pCellData and p against the MATCH -** constraint pConstraint. +** Implementation of SQLite REGEXP operator. This scalar function takes +** two arguments. The first is a regular expression pattern to compile +** the second is a string to match against that pattern. If either +** argument is an SQL NULL, then NULL Is returned. Otherwise, the result +** is 1 if the string matches the pattern, or 0 otherwise. +** +** SQLite maps the regexp() function to the regexp() operator such +** that the following two are equivalent: +** +** zString REGEXP zPattern +** regexp(zPattern, zString) +** +** Uses the following ICU regexp APIs: +** +** uregex_open() +** uregex_matches() +** uregex_close() */ -static int rtreeCallbackConstraint( - RtreeConstraint *pConstraint, /* The constraint to test */ - int eInt, /* True if RTree holding integer coordinates */ - u8 *pCellData, /* Raw cell content */ - RtreeSearchPoint *pSearch, /* Container of this cell */ - sqlite3_rtree_dbl *prScore, /* OUT: score for the cell */ - int *peWithin /* OUT: visibility of the cell */ -){ - int i; /* Loop counter */ - sqlite3_rtree_query_info *pInfo = pConstraint->pInfo; /* Callback info */ - int nCoord = pInfo->nCoord; /* No. of coordinates */ - int rc; /* Callback return code */ - sqlite3_rtree_dbl aCoord[RTREE_MAX_DIMENSIONS*2]; /* Decoded coordinates */ +static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){ + UErrorCode status = U_ZERO_ERROR; + URegularExpression *pExpr; + UBool res; + const UChar *zString = sqlite3_value_text16(apArg[1]); - assert( pConstraint->op==RTREE_MATCH || pConstraint->op==RTREE_QUERY ); - assert( nCoord==2 || nCoord==4 || nCoord==6 || nCoord==8 || nCoord==10 ); + (void)nArg; /* Unused parameter */ - if( pConstraint->op==RTREE_QUERY && pSearch->iLevel==1 ){ - pInfo->iRowid = readInt64(pCellData); - } - pCellData += 8; - for(i=0; iop==RTREE_MATCH ){ - rc = pConstraint->u.xGeom((sqlite3_rtree_geometry*)pInfo, - nCoord, aCoord, &i); - if( i==0 ) *peWithin = NOT_WITHIN; - *prScore = RTREE_ZERO; - }else{ - pInfo->aCoord = aCoord; - pInfo->iLevel = pSearch->iLevel - 1; - pInfo->rScore = pInfo->rParentScore = pSearch->rScore; - pInfo->eWithin = pInfo->eParentWithin = pSearch->eWithin; - rc = pConstraint->u.xQueryFunc(pInfo); - if( pInfo->eWithin<*peWithin ) *peWithin = pInfo->eWithin; - if( pInfo->rScore<*prScore || *prScorerScore; + + pExpr = sqlite3_get_auxdata(p, 0); + if( !pExpr ){ + const UChar *zPattern = sqlite3_value_text16(apArg[0]); + if( !zPattern ){ + return; + } + pExpr = uregex_open(zPattern, -1, 0, 0, &status); + + if( U_SUCCESS(status) ){ + sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete); + }else{ + assert(!pExpr); + icuFunctionError(p, "uregex_open", status); + return; } } - return rc; -} - -/* -** Check the internal RTree node given by pCellData against constraint p. -** If this constraint cannot be satisfied by any child within the node, -** set *peWithin to NOT_WITHIN. -*/ -static void rtreeNonleafConstraint( - RtreeConstraint *p, /* The constraint to test */ - int eInt, /* True if RTree holds integer coordinates */ - u8 *pCellData, /* Raw cell content as appears on disk */ - int *peWithin /* Adjust downward, as appropriate */ -){ - sqlite3_rtree_dbl val; /* Coordinate value convert to a double */ - /* p->iCoord might point to either a lower or upper bound coordinate - ** in a coordinate pair. But make pCellData point to the lower bound. + /* Configure the text that the regular expression operates on. */ + uregex_setText(pExpr, zString, -1, &status); + if( !U_SUCCESS(status) ){ + icuFunctionError(p, "uregex_setText", status); + return; + } + + /* Attempt the match */ + res = uregex_matches(pExpr, 0, &status); + if( !U_SUCCESS(status) ){ + icuFunctionError(p, "uregex_matches", status); + return; + } + + /* Set the text that the regular expression operates on to a NULL + ** pointer. This is not really necessary, but it is tidier than + ** leaving the regular expression object configured with an invalid + ** pointer after this function returns. */ - pCellData += 8 + 4*(p->iCoord&0xfe); - - assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE - || p->op==RTREE_GT || p->op==RTREE_EQ ); - switch( p->op ){ - case RTREE_LE: - case RTREE_LT: - case RTREE_EQ: - RTREE_DECODE_COORD(eInt, pCellData, val); - /* val now holds the lower bound of the coordinate pair */ - if( p->u.rValue>=val ) return; - if( p->op!=RTREE_EQ ) break; /* RTREE_LE and RTREE_LT end here */ - /* Fall through for the RTREE_EQ case */ + uregex_setText(pExpr, 0, 0, &status); - default: /* RTREE_GT or RTREE_GE, or fallthrough of RTREE_EQ */ - pCellData += 4; - RTREE_DECODE_COORD(eInt, pCellData, val); - /* val now holds the upper bound of the coordinate pair */ - if( p->u.rValue<=val ) return; - } - *peWithin = NOT_WITHIN; + /* Return 1 or 0. */ + sqlite3_result_int(p, res ? 1 : 0); } /* -** Check the leaf RTree cell given by pCellData against constraint p. -** If this constraint is not satisfied, set *peWithin to NOT_WITHIN. -** If the constraint is satisfied, leave *peWithin unchanged. +** Implementations of scalar functions for case mapping - upper() and +** lower(). Function upper() converts its input to upper-case (ABC). +** Function lower() converts to lower-case (abc). ** -** The constraint is of the form: xN op $val +** ICU provides two types of case mapping, "general" case mapping and +** "language specific". Refer to ICU documentation for the differences +** between the two. ** -** The op is given by p->op. The xN is p->iCoord-th coordinate in -** pCellData. $val is given by p->u.rValue. +** To utilise "general" case mapping, the upper() or lower() scalar +** functions are invoked with one argument: +** +** upper('ABC') -> 'abc' +** lower('abc') -> 'ABC' +** +** To access ICU "language specific" case mapping, upper() or lower() +** should be invoked with two arguments. The second argument is the name +** of the locale to use. Passing an empty string ("") or SQL NULL value +** as the second argument is the same as invoking the 1 argument version +** of upper() or lower(). +** +** lower('I', 'en_us') -> 'i' +** lower('I', 'tr_tr') -> 'ı' (small dotless i) +** +** http://www.icu-project.org/userguide/posix.html#case_mappings */ -static void rtreeLeafConstraint( - RtreeConstraint *p, /* The constraint to test */ - int eInt, /* True if RTree holds integer coordinates */ - u8 *pCellData, /* Raw cell content as appears on disk */ - int *peWithin /* Adjust downward, as appropriate */ -){ - RtreeDValue xN; /* Coordinate value converted to a double */ +static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){ + const UChar *zInput; + UChar *zOutput; + int nInput; + int nOutput; - assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE - || p->op==RTREE_GT || p->op==RTREE_EQ ); - pCellData += 8 + p->iCoord*4; - RTREE_DECODE_COORD(eInt, pCellData, xN); - switch( p->op ){ - case RTREE_LE: if( xN <= p->u.rValue ) return; break; - case RTREE_LT: if( xN < p->u.rValue ) return; break; - case RTREE_GE: if( xN >= p->u.rValue ) return; break; - case RTREE_GT: if( xN > p->u.rValue ) return; break; - default: if( xN == p->u.rValue ) return; break; + UErrorCode status = U_ZERO_ERROR; + const char *zLocale = 0; + + assert(nArg==1 || nArg==2); + if( nArg==2 ){ + zLocale = (const char *)sqlite3_value_text(apArg[1]); } - *peWithin = NOT_WITHIN; -} -/* -** One of the cells in node pNode is guaranteed to have a 64-bit -** integer value equal to iRowid. Return the index of this cell. -*/ -static int nodeRowidIndex( - Rtree *pRtree, - RtreeNode *pNode, - i64 iRowid, - int *piIndex -){ - int ii; - int nCell = NCELL(pNode); - assert( nCell<200 ); - for(ii=0; iipParent; - if( pParent ){ - return nodeRowidIndex(pRtree, pParent, pNode->iNode, piIndex); + nOutput = nInput * 2 + 2; + zOutput = sqlite3_malloc(nOutput); + if( !zOutput ){ + return; } - *piIndex = -1; - return SQLITE_OK; -} -/* -** Compare two search points. Return negative, zero, or positive if the first -** is less than, equal to, or greater than the second. -** -** The rScore is the primary key. Smaller rScore values come first. -** If the rScore is a tie, then use iLevel as the tie breaker with smaller -** iLevel values coming first. In this way, if rScore is the same for all -** SearchPoints, then iLevel becomes the deciding factor and the result -** is a depth-first search, which is the desired default behavior. -*/ -static int rtreeSearchPointCompare( - const RtreeSearchPoint *pA, - const RtreeSearchPoint *pB -){ - if( pA->rScorerScore ) return -1; - if( pA->rScore>pB->rScore ) return +1; - if( pA->iLeveliLevel ) return -1; - if( pA->iLevel>pB->iLevel ) return +1; - return 0; -} + if( sqlite3_user_data(p) ){ + u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); + }else{ + u_strToLower(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); + } -/* -** Interchange to search points in a cursor. -*/ -static void rtreeSearchPointSwap(RtreeCursor *p, int i, int j){ - RtreeSearchPoint t = p->aPoint[i]; - assert( iaPoint[i] = p->aPoint[j]; - p->aPoint[j] = t; - i++; j++; - if( i=RTREE_CACHE_SZ ){ - nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); - p->aNode[i] = 0; - }else{ - RtreeNode *pTemp = p->aNode[i]; - p->aNode[i] = p->aNode[j]; - p->aNode[j] = pTemp; - } + if( !U_SUCCESS(status) ){ + icuFunctionError(p, "u_strToLower()/u_strToUpper", status); + return; } + + sqlite3_result_text16(p, zOutput, -1, xFree); } /* -** Return the search point with the lowest current score. +** Collation sequence destructor function. The pCtx argument points to +** a UCollator structure previously allocated using ucol_open(). */ -static RtreeSearchPoint *rtreeSearchPointFirst(RtreeCursor *pCur){ - return pCur->bPoint ? &pCur->sPoint : pCur->nPoint ? pCur->aPoint : 0; +static void icuCollationDel(void *pCtx){ + UCollator *p = (UCollator *)pCtx; + ucol_close(p); } /* -** Get the RtreeNode for the search point with the lowest score. +** Collation sequence comparison function. The pCtx argument points to +** a UCollator structure previously allocated using ucol_open(). */ -static RtreeNode *rtreeNodeOfFirstSearchPoint(RtreeCursor *pCur, int *pRC){ - sqlite3_int64 id; - int ii = 1 - pCur->bPoint; - assert( ii==0 || ii==1 ); - assert( pCur->bPoint || pCur->nPoint ); - if( pCur->aNode[ii]==0 ){ - assert( pRC!=0 ); - id = ii ? pCur->aPoint[0].id : pCur->sPoint.id; - *pRC = nodeAcquire(RTREE_OF_CURSOR(pCur), id, 0, &pCur->aNode[ii]); +static int icuCollationColl( + void *pCtx, + int nLeft, + const void *zLeft, + int nRight, + const void *zRight +){ + UCollationResult res; + UCollator *p = (UCollator *)pCtx; + res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2); + switch( res ){ + case UCOL_LESS: return -1; + case UCOL_GREATER: return +1; + case UCOL_EQUAL: return 0; } - return pCur->aNode[ii]; + assert(!"Unexpected return value from ucol_strcoll()"); + return 0; } /* -** Push a new element onto the priority queue +** Implementation of the scalar function icu_load_collation(). +** +** This scalar function is used to add ICU collation based collation +** types to an SQLite database connection. It is intended to be called +** as follows: +** +** SELECT icu_load_collation(, ); +** +** Where is a string containing an ICU locale identifier (i.e. +** "en_AU", "tr_TR" etc.) and is the name of the +** collation sequence to create. */ -static RtreeSearchPoint *rtreeEnqueue( - RtreeCursor *pCur, /* The cursor */ - RtreeDValue rScore, /* Score for the new search point */ - u8 iLevel /* Level for the new search point */ +static void icuLoadCollation( + sqlite3_context *p, + int nArg, + sqlite3_value **apArg ){ - int i, j; - RtreeSearchPoint *pNew; - if( pCur->nPoint>=pCur->nPointAlloc ){ - int nNew = pCur->nPointAlloc*2 + 8; - pNew = sqlite3_realloc(pCur->aPoint, nNew*sizeof(pCur->aPoint[0])); - if( pNew==0 ) return 0; - pCur->aPoint = pNew; - pCur->nPointAlloc = nNew; + sqlite3 *db = (sqlite3 *)sqlite3_user_data(p); + UErrorCode status = U_ZERO_ERROR; + const char *zLocale; /* Locale identifier - (eg. "jp_JP") */ + const char *zName; /* SQL Collation sequence name (eg. "japanese") */ + UCollator *pUCollator; /* ICU library collation object */ + int rc; /* Return code from sqlite3_create_collation_x() */ + + assert(nArg==2); + (void)nArg; /* Unused parameter */ + zLocale = (const char *)sqlite3_value_text(apArg[0]); + zName = (const char *)sqlite3_value_text(apArg[1]); + + if( !zLocale || !zName ){ + return; } - i = pCur->nPoint++; - pNew = pCur->aPoint + i; - pNew->rScore = rScore; - pNew->iLevel = iLevel; - assert( iLevel<=RTREE_MAX_DEPTH ); - while( i>0 ){ - RtreeSearchPoint *pParent; - j = (i-1)/2; - pParent = pCur->aPoint + j; - if( rtreeSearchPointCompare(pNew, pParent)>=0 ) break; - rtreeSearchPointSwap(pCur, j, i); - i = j; - pNew = pParent; + + pUCollator = ucol_open(zLocale, &status); + if( !U_SUCCESS(status) ){ + icuFunctionError(p, "ucol_open", status); + return; + } + assert(p); + + rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator, + icuCollationColl, icuCollationDel + ); + if( rc!=SQLITE_OK ){ + ucol_close(pUCollator); + sqlite3_result_error(p, "Error registering collation function", -1); } - return pNew; } /* -** Allocate a new RtreeSearchPoint and return a pointer to it. Return -** NULL if malloc fails. +** Register the ICU extension functions with database db. */ -static RtreeSearchPoint *rtreeSearchPointNew( - RtreeCursor *pCur, /* The cursor */ - RtreeDValue rScore, /* Score for the new search point */ - u8 iLevel /* Level for the new search point */ -){ - RtreeSearchPoint *pNew, *pFirst; - pFirst = rtreeSearchPointFirst(pCur); - pCur->anQueue[iLevel]++; - if( pFirst==0 - || pFirst->rScore>rScore - || (pFirst->rScore==rScore && pFirst->iLevel>iLevel) - ){ - if( pCur->bPoint ){ - int ii; - pNew = rtreeEnqueue(pCur, rScore, iLevel); - if( pNew==0 ) return 0; - ii = (int)(pNew - pCur->aPoint) + 1; - if( iiaNode[ii]==0 ); - pCur->aNode[ii] = pCur->aNode[0]; - }else{ - nodeRelease(RTREE_OF_CURSOR(pCur), pCur->aNode[0]); - } - pCur->aNode[0] = 0; - *pNew = pCur->sPoint; - } - pCur->sPoint.rScore = rScore; - pCur->sPoint.iLevel = iLevel; - pCur->bPoint = 1; - return &pCur->sPoint; - }else{ - return rtreeEnqueue(pCur, rScore, iLevel); - } -} +SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db){ + struct IcuScalar { + const char *zName; /* Function name */ + int nArg; /* Number of arguments */ + int enc; /* Optimal text encoding */ + void *pContext; /* sqlite3_user_data() context */ + void (*xFunc)(sqlite3_context*,int,sqlite3_value**); + } scalars[] = { + {"regexp", 2, SQLITE_ANY, 0, icuRegexpFunc}, -#if 0 -/* Tracing routines for the RtreeSearchPoint queue */ -static void tracePoint(RtreeSearchPoint *p, int idx, RtreeCursor *pCur){ - if( idx<0 ){ printf(" s"); }else{ printf("%2d", idx); } - printf(" %d.%05lld.%02d %g %d", - p->iLevel, p->id, p->iCell, p->rScore, p->eWithin - ); - idx++; - if( idxaNode[idx]); - }else{ - printf("\n"); + {"lower", 1, SQLITE_UTF16, 0, icuCaseFunc16}, + {"lower", 2, SQLITE_UTF16, 0, icuCaseFunc16}, + {"upper", 1, SQLITE_UTF16, (void*)1, icuCaseFunc16}, + {"upper", 2, SQLITE_UTF16, (void*)1, icuCaseFunc16}, + + {"lower", 1, SQLITE_UTF8, 0, icuCaseFunc16}, + {"lower", 2, SQLITE_UTF8, 0, icuCaseFunc16}, + {"upper", 1, SQLITE_UTF8, (void*)1, icuCaseFunc16}, + {"upper", 2, SQLITE_UTF8, (void*)1, icuCaseFunc16}, + + {"like", 2, SQLITE_UTF8, 0, icuLikeFunc}, + {"like", 3, SQLITE_UTF8, 0, icuLikeFunc}, + + {"icu_load_collation", 2, SQLITE_UTF8, (void*)db, icuLoadCollation}, + }; + + int rc = SQLITE_OK; + int i; + + for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){ + struct IcuScalar *p = &scalars[i]; + rc = sqlite3_create_function( + db, p->zName, p->nArg, p->enc, p->pContext, p->xFunc, 0, 0 + ); } + + return rc; } -static void traceQueue(RtreeCursor *pCur, const char *zPrefix){ - int ii; - printf("=== %9s ", zPrefix); - if( pCur->bPoint ){ - tracePoint(&pCur->sPoint, -1, pCur); - } - for(ii=0; iinPoint; ii++){ - if( ii>0 || pCur->bPoint ) printf(" "); - tracePoint(&pCur->aPoint[ii], ii, pCur); - } + +#if !SQLITE_CORE +#ifdef _WIN32 +__declspec(dllexport) +#endif +SQLITE_API int SQLITE_STDCALL sqlite3_icu_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + SQLITE_EXTENSION_INIT2(pApi) + return sqlite3IcuInit(db); } -# define RTREE_QUEUE_TRACE(A,B) traceQueue(A,B) -#else -# define RTREE_QUEUE_TRACE(A,B) /* no-op */ #endif -/* Remove the search point with the lowest current score. +#endif + +/************** End of icu.c *************************************************/ +/************** Begin file fts3_icu.c ****************************************/ +/* +** 2007 June 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file implements a tokenizer for fts3 based on the ICU library. */ -static void rtreeSearchPointPop(RtreeCursor *p){ - int i, j, k, n; - i = 1 - p->bPoint; - assert( i==0 || i==1 ); - if( p->aNode[i] ){ - nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); - p->aNode[i] = 0; - } - if( p->bPoint ){ - p->anQueue[p->sPoint.iLevel]--; - p->bPoint = 0; - }else if( p->nPoint ){ - p->anQueue[p->aPoint[0].iLevel]--; - n = --p->nPoint; - p->aPoint[0] = p->aPoint[n]; - if( naNode[1] = p->aNode[n+1]; - p->aNode[n+1] = 0; - } - i = 0; - while( (j = i*2+1)aPoint[k], &p->aPoint[j])<0 ){ - if( rtreeSearchPointCompare(&p->aPoint[k], &p->aPoint[i])<0 ){ - rtreeSearchPointSwap(p, i, k); - i = k; - }else{ - break; - } - }else{ - if( rtreeSearchPointCompare(&p->aPoint[j], &p->aPoint[i])<0 ){ - rtreeSearchPointSwap(p, i, j); - i = j; - }else{ - break; - } - } - } - } -} +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) +#ifdef SQLITE_ENABLE_ICU + +/* #include */ +/* #include */ +/* #include "fts3_tokenizer.h" */ + +#include +/* #include */ +/* #include */ +#include + +typedef struct IcuTokenizer IcuTokenizer; +typedef struct IcuCursor IcuCursor; + +struct IcuTokenizer { + sqlite3_tokenizer base; + char *zLocale; +}; + +struct IcuCursor { + sqlite3_tokenizer_cursor base; + + UBreakIterator *pIter; /* ICU break-iterator object */ + int nChar; /* Number of UChar elements in pInput */ + UChar *aChar; /* Copy of input using utf-16 encoding */ + int *aOffset; /* Offsets of each character in utf-8 input */ + + int nBuffer; + char *zBuffer; + int iToken; +}; /* -** Continue the search on cursor pCur until the front of the queue -** contains an entry suitable for returning as a result-set row, -** or until the RtreeSearchPoint queue is empty, indicating that the -** query has completed. +** Create a new tokenizer instance. */ -static int rtreeStepToLeaf(RtreeCursor *pCur){ - RtreeSearchPoint *p; - Rtree *pRtree = RTREE_OF_CURSOR(pCur); - RtreeNode *pNode; - int eWithin; - int rc = SQLITE_OK; - int nCell; - int nConstraint = pCur->nConstraint; - int ii; - int eInt; - RtreeSearchPoint x; +static int icuCreate( + int argc, /* Number of entries in argv[] */ + const char * const *argv, /* Tokenizer creation arguments */ + sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ +){ + IcuTokenizer *p; + int n = 0; - eInt = pRtree->eCoordType==RTREE_COORD_INT32; - while( (p = rtreeSearchPointFirst(pCur))!=0 && p->iLevel>0 ){ - pNode = rtreeNodeOfFirstSearchPoint(pCur, &rc); - if( rc ) return rc; - nCell = NCELL(pNode); - assert( nCell<200 ); - while( p->iCellzData + (4+pRtree->nBytesPerCell*p->iCell); - eWithin = FULLY_WITHIN; - for(ii=0; iiaConstraint + ii; - if( pConstraint->op>=RTREE_MATCH ){ - rc = rtreeCallbackConstraint(pConstraint, eInt, pCellData, p, - &rScore, &eWithin); - if( rc ) return rc; - }else if( p->iLevel==1 ){ - rtreeLeafConstraint(pConstraint, eInt, pCellData, &eWithin); - }else{ - rtreeNonleafConstraint(pConstraint, eInt, pCellData, &eWithin); - } - if( eWithin==NOT_WITHIN ) break; - } - p->iCell++; - if( eWithin==NOT_WITHIN ) continue; - x.iLevel = p->iLevel - 1; - if( x.iLevel ){ - x.id = readInt64(pCellData); - x.iCell = 0; - }else{ - x.id = p->id; - x.iCell = p->iCell - 1; - } - if( p->iCell>=nCell ){ - RTREE_QUEUE_TRACE(pCur, "POP-S:"); - rtreeSearchPointPop(pCur); - } - if( rScoreeWithin = eWithin; - p->id = x.id; - p->iCell = x.iCell; - RTREE_QUEUE_TRACE(pCur, "PUSH-S:"); - break; - } - if( p->iCell>=nCell ){ - RTREE_QUEUE_TRACE(pCur, "POP-Se:"); - rtreeSearchPointPop(pCur); - } + if( argc>0 ){ + n = strlen(argv[0])+1; } - pCur->atEOF = p==0; + p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n); + if( !p ){ + return SQLITE_NOMEM; + } + memset(p, 0, sizeof(IcuTokenizer)); + + if( n ){ + p->zLocale = (char *)&p[1]; + memcpy(p->zLocale, argv[0], n); + } + + *ppTokenizer = (sqlite3_tokenizer *)p; + return SQLITE_OK; } -/* -** Rtree virtual table module xNext method. +/* +** Destroy a tokenizer */ -static int rtreeNext(sqlite3_vtab_cursor *pVtabCursor){ - RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; - int rc = SQLITE_OK; - - /* Move to the next entry that matches the configured constraints. */ - RTREE_QUEUE_TRACE(pCsr, "POP-Nx:"); - rtreeSearchPointPop(pCsr); - rc = rtreeStepToLeaf(pCsr); - return rc; +static int icuDestroy(sqlite3_tokenizer *pTokenizer){ + IcuTokenizer *p = (IcuTokenizer *)pTokenizer; + sqlite3_free(p); + return SQLITE_OK; } -/* -** Rtree virtual table module xRowid method. +/* +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is pInput[0..nBytes-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. */ -static int rtreeRowid(sqlite3_vtab_cursor *pVtabCursor, sqlite_int64 *pRowid){ - RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; - RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); - int rc = SQLITE_OK; - RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); - if( rc==SQLITE_OK && p ){ - *pRowid = nodeGetRowid(RTREE_OF_CURSOR(pCsr), pNode, p->iCell); +static int icuOpen( + sqlite3_tokenizer *pTokenizer, /* The tokenizer */ + const char *zInput, /* Input string */ + int nInput, /* Length of zInput in bytes */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ +){ + IcuTokenizer *p = (IcuTokenizer *)pTokenizer; + IcuCursor *pCsr; + + const int32_t opt = U_FOLD_CASE_DEFAULT; + UErrorCode status = U_ZERO_ERROR; + int nChar; + + UChar32 c; + int iInput = 0; + int iOut = 0; + + *ppCursor = 0; + + if( zInput==0 ){ + nInput = 0; + zInput = ""; + }else if( nInput<0 ){ + nInput = strlen(zInput); } - return rc; -} + nChar = nInput+1; + pCsr = (IcuCursor *)sqlite3_malloc( + sizeof(IcuCursor) + /* IcuCursor */ + ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */ + (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ + ); + if( !pCsr ){ + return SQLITE_NOMEM; + } + memset(pCsr, 0, sizeof(IcuCursor)); + pCsr->aChar = (UChar *)&pCsr[1]; + pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3]; -/* -** Rtree virtual table module xColumn method. -*/ -static int rtreeColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ - Rtree *pRtree = (Rtree *)cur->pVtab; - RtreeCursor *pCsr = (RtreeCursor *)cur; - RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); - RtreeCoord c; - int rc = SQLITE_OK; - RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); + pCsr->aOffset[iOut] = iInput; + U8_NEXT(zInput, iInput, nInput, c); + while( c>0 ){ + int isError = 0; + c = u_foldCase(c, opt); + U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); + if( isError ){ + sqlite3_free(pCsr); + return SQLITE_ERROR; + } + pCsr->aOffset[iOut] = iInput; - if( rc ) return rc; - if( p==0 ) return SQLITE_OK; - if( i==0 ){ - sqlite3_result_int64(ctx, nodeGetRowid(pRtree, pNode, p->iCell)); - }else{ - if( rc ) return rc; - nodeGetCoord(pRtree, pNode, p->iCell, i-1, &c); -#ifndef SQLITE_RTREE_INT_ONLY - if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ - sqlite3_result_double(ctx, c.f); - }else -#endif - { - assert( pRtree->eCoordType==RTREE_COORD_INT32 ); - sqlite3_result_int(ctx, c.i); + if( iInputpIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status); + if( !U_SUCCESS(status) ){ + sqlite3_free(pCsr); + return SQLITE_ERROR; + } + pCsr->nChar = iOut; + + ubrk_first(pCsr->pIter); + *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; return SQLITE_OK; } -/* -** Use nodeAcquire() to obtain the leaf node containing the record with -** rowid iRowid. If successful, set *ppLeaf to point to the node and -** return SQLITE_OK. If there is no such record in the table, set -** *ppLeaf to 0 and return SQLITE_OK. If an error occurs, set *ppLeaf -** to zero and return an SQLite error code. +/* +** Close a tokenization cursor previously opened by a call to icuOpen(). */ -static int findLeafNode( - Rtree *pRtree, /* RTree to search */ - i64 iRowid, /* The rowid searching for */ - RtreeNode **ppLeaf, /* Write the node here */ - sqlite3_int64 *piNode /* Write the node-id here */ -){ - int rc; - *ppLeaf = 0; - sqlite3_bind_int64(pRtree->pReadRowid, 1, iRowid); - if( sqlite3_step(pRtree->pReadRowid)==SQLITE_ROW ){ - i64 iNode = sqlite3_column_int64(pRtree->pReadRowid, 0); - if( piNode ) *piNode = iNode; - rc = nodeAcquire(pRtree, iNode, 0, ppLeaf); - sqlite3_reset(pRtree->pReadRowid); - }else{ - rc = sqlite3_reset(pRtree->pReadRowid); - } - return rc; +static int icuClose(sqlite3_tokenizer_cursor *pCursor){ + IcuCursor *pCsr = (IcuCursor *)pCursor; + ubrk_close(pCsr->pIter); + sqlite3_free(pCsr->zBuffer); + sqlite3_free(pCsr); + return SQLITE_OK; } /* -** This function is called to configure the RtreeConstraint object passed -** as the second argument for a MATCH constraint. The value passed as the -** first argument to this function is the right-hand operand to the MATCH -** operator. +** Extract the next token from a tokenization cursor. */ -static int deserializeGeometry(sqlite3_value *pValue, RtreeConstraint *pCons){ - RtreeMatchArg *pBlob; /* BLOB returned by geometry function */ - sqlite3_rtree_query_info *pInfo; /* Callback information */ - int nBlob; /* Size of the geometry function blob */ - int nExpected; /* Expected size of the BLOB */ +static int icuNext( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ + const char **ppToken, /* OUT: *ppToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ +){ + IcuCursor *pCsr = (IcuCursor *)pCursor; - /* Check that value is actually a blob. */ - if( sqlite3_value_type(pValue)!=SQLITE_BLOB ) return SQLITE_ERROR; + int iStart = 0; + int iEnd = 0; + int nByte = 0; - /* Check that the blob is roughly the right size. */ - nBlob = sqlite3_value_bytes(pValue); - if( nBlob<(int)sizeof(RtreeMatchArg) - || ((nBlob-sizeof(RtreeMatchArg))%sizeof(RtreeDValue))!=0 - ){ - return SQLITE_ERROR; - } + while( iStart==iEnd ){ + UChar32 c; - pInfo = (sqlite3_rtree_query_info*)sqlite3_malloc( sizeof(*pInfo)+nBlob ); - if( !pInfo ) return SQLITE_NOMEM; - memset(pInfo, 0, sizeof(*pInfo)); - pBlob = (RtreeMatchArg*)&pInfo[1]; + iStart = ubrk_current(pCsr->pIter); + iEnd = ubrk_next(pCsr->pIter); + if( iEnd==UBRK_DONE ){ + return SQLITE_DONE; + } - memcpy(pBlob, sqlite3_value_blob(pValue), nBlob); - nExpected = (int)(sizeof(RtreeMatchArg) + - (pBlob->nParam-1)*sizeof(RtreeDValue)); - if( pBlob->magic!=RTREE_GEOMETRY_MAGIC || nBlob!=nExpected ){ - sqlite3_free(pInfo); - return SQLITE_ERROR; + while( iStartaChar, iWhite, pCsr->nChar, c); + if( u_isspace(c) ){ + iStart = iWhite; + }else{ + break; + } + } + assert(iStart<=iEnd); } - pInfo->pContext = pBlob->cb.pContext; - pInfo->nParam = pBlob->nParam; - pInfo->aParam = pBlob->aParam; - if( pBlob->cb.xGeom ){ - pCons->u.xGeom = pBlob->cb.xGeom; - }else{ - pCons->op = RTREE_QUERY; - pCons->u.xQueryFunc = pBlob->cb.xQueryFunc; - } - pCons->pInfo = pInfo; + do { + UErrorCode status = U_ZERO_ERROR; + if( nByte ){ + char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte); + if( !zNew ){ + return SQLITE_NOMEM; + } + pCsr->zBuffer = zNew; + pCsr->nBuffer = nByte; + } + + u_strToUTF8( + pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */ + &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */ + &status /* Output success/failure */ + ); + } while( nByte>pCsr->nBuffer ); + + *ppToken = pCsr->zBuffer; + *pnBytes = nByte; + *piStartOffset = pCsr->aOffset[iStart]; + *piEndOffset = pCsr->aOffset[iEnd]; + *piPosition = pCsr->iToken++; + return SQLITE_OK; } -/* -** Rtree virtual table module xFilter method. +/* +** The set of routines that implement the simple tokenizer */ -static int rtreeFilter( - sqlite3_vtab_cursor *pVtabCursor, - int idxNum, const char *idxStr, - int argc, sqlite3_value **argv +static const sqlite3_tokenizer_module icuTokenizerModule = { + 0, /* iVersion */ + icuCreate, /* xCreate */ + icuDestroy, /* xCreate */ + icuOpen, /* xOpen */ + icuClose, /* xClose */ + icuNext, /* xNext */ + 0, /* xLanguageid */ +}; + +/* +** Set *ppModule to point at the implementation of the ICU tokenizer. +*/ +SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule( + sqlite3_tokenizer_module const**ppModule ){ - Rtree *pRtree = (Rtree *)pVtabCursor->pVtab; - RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; - RtreeNode *pRoot = 0; - int ii; - int rc = SQLITE_OK; - int iCell = 0; + *ppModule = &icuTokenizerModule; +} - rtreeReference(pRtree); +#endif /* defined(SQLITE_ENABLE_ICU) */ +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ - /* Reset the cursor to the same state as rtreeOpen() leaves it in. */ - freeCursorConstraints(pCsr); - sqlite3_free(pCsr->aPoint); - memset(pCsr, 0, sizeof(RtreeCursor)); - pCsr->base.pVtab = (sqlite3_vtab*)pRtree; +/************** End of fts3_icu.c ********************************************/ +/************** Begin file sqlite3rbu.c **************************************/ +/* +** 2014 August 30 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** +** OVERVIEW +** +** The RBU extension requires that the RBU update be packaged as an +** SQLite database. The tables it expects to find are described in +** sqlite3rbu.h. Essentially, for each table xyz in the target database +** that the user wishes to write to, a corresponding data_xyz table is +** created in the RBU database and populated with one row for each row to +** update, insert or delete from the target table. +** +** The update proceeds in three stages: +** +** 1) The database is updated. The modified database pages are written +** to a *-oal file. A *-oal file is just like a *-wal file, except +** that it is named "-oal" instead of "-wal". +** Because regular SQLite clients do not look for file named +** "-oal", they go on using the original database in +** rollback mode while the *-oal file is being generated. +** +** During this stage RBU does not update the database by writing +** directly to the target tables. Instead it creates "imposter" +** tables using the SQLITE_TESTCTRL_IMPOSTER interface that it uses +** to update each b-tree individually. All updates required by each +** b-tree are completed before moving on to the next, and all +** updates are done in sorted key order. +** +** 2) The "-oal" file is moved to the equivalent "-wal" +** location using a call to rename(2). Before doing this the RBU +** module takes an EXCLUSIVE lock on the database file, ensuring +** that there are no other active readers. +** +** Once the EXCLUSIVE lock is released, any other database readers +** detect the new *-wal file and read the database in wal mode. At +** this point they see the new version of the database - including +** the updates made as part of the RBU update. +** +** 3) The new *-wal file is checkpointed. This proceeds in the same way +** as a regular database checkpoint, except that a single frame is +** checkpointed each time sqlite3rbu_step() is called. If the RBU +** handle is closed before the entire *-wal file is checkpointed, +** the checkpoint progress is saved in the RBU database and the +** checkpoint can be resumed by another RBU client at some point in +** the future. +** +** POTENTIAL PROBLEMS +** +** The rename() call might not be portable. And RBU is not currently +** syncing the directory after renaming the file. +** +** When state is saved, any commit to the *-oal file and the commit to +** the RBU update database are not atomic. So if the power fails at the +** wrong moment they might get out of sync. As the main database will be +** committed before the RBU update database this will likely either just +** pass unnoticed, or result in SQLITE_CONSTRAINT errors (due to UNIQUE +** constraint violations). +** +** If some client does modify the target database mid RBU update, or some +** other error occurs, the RBU extension will keep throwing errors. It's +** not really clear how to get out of this state. The system could just +** by delete the RBU update database and *-oal file and have the device +** download the update again and start over. +** +** At present, for an UPDATE, both the new.* and old.* records are +** collected in the rbu_xyz table. And for both UPDATEs and DELETEs all +** fields are collected. This means we're probably writing a lot more +** data to disk when saving the state of an ongoing update to the RBU +** update database than is strictly necessary. +** +*/ - pCsr->iStrategy = idxNum; - if( idxNum==1 ){ - /* Special case - lookup by rowid. */ - RtreeNode *pLeaf; /* Leaf on which the required cell resides */ - RtreeSearchPoint *p; /* Search point for the the leaf */ - i64 iRowid = sqlite3_value_int64(argv[0]); - i64 iNode = 0; - rc = findLeafNode(pRtree, iRowid, &pLeaf, &iNode); - if( rc==SQLITE_OK && pLeaf!=0 ){ - p = rtreeSearchPointNew(pCsr, RTREE_ZERO, 0); - assert( p!=0 ); /* Always returns pCsr->sPoint */ - pCsr->aNode[0] = pLeaf; - p->id = iNode; - p->eWithin = PARTLY_WITHIN; - rc = nodeRowidIndex(pRtree, pLeaf, iRowid, &iCell); - p->iCell = iCell; - RTREE_QUEUE_TRACE(pCsr, "PUSH-F1:"); - }else{ - pCsr->atEOF = 1; - } - }else{ - /* Normal case - r-tree scan. Set up the RtreeCursor.aConstraint array - ** with the configured constraints. - */ - rc = nodeAcquire(pRtree, 1, 0, &pRoot); - if( rc==SQLITE_OK && argc>0 ){ - pCsr->aConstraint = sqlite3_malloc(sizeof(RtreeConstraint)*argc); - pCsr->nConstraint = argc; - if( !pCsr->aConstraint ){ - rc = SQLITE_NOMEM; - }else{ - memset(pCsr->aConstraint, 0, sizeof(RtreeConstraint)*argc); - memset(pCsr->anQueue, 0, sizeof(u32)*(pRtree->iDepth + 1)); - assert( (idxStr==0 && argc==0) - || (idxStr && (int)strlen(idxStr)==argc*2) ); - for(ii=0; iiaConstraint[ii]; - p->op = idxStr[ii*2]; - p->iCoord = idxStr[ii*2+1]-'0'; - if( p->op>=RTREE_MATCH ){ - /* A MATCH operator. The right-hand-side must be a blob that - ** can be cast into an RtreeMatchArg object. One created using - ** an sqlite3_rtree_geometry_callback() SQL user function. - */ - rc = deserializeGeometry(argv[ii], p); - if( rc!=SQLITE_OK ){ - break; - } - p->pInfo->nCoord = pRtree->nDim*2; - p->pInfo->anQueue = pCsr->anQueue; - p->pInfo->mxLevel = pRtree->iDepth + 1; - }else{ -#ifdef SQLITE_RTREE_INT_ONLY - p->u.rValue = sqlite3_value_int64(argv[ii]); -#else - p->u.rValue = sqlite3_value_double(argv[ii]); +/* #include */ +/* #include */ +/* #include */ + +#if !defined(_WIN32) +/* # include */ #endif - } - } - } - } - if( rc==SQLITE_OK ){ - RtreeSearchPoint *pNew; - pNew = rtreeSearchPointNew(pCsr, RTREE_ZERO, pRtree->iDepth+1); - if( pNew==0 ) return SQLITE_NOMEM; - pNew->id = 1; - pNew->iCell = 0; - pNew->eWithin = PARTLY_WITHIN; - assert( pCsr->bPoint==1 ); - pCsr->aNode[0] = pRoot; - pRoot = 0; - RTREE_QUEUE_TRACE(pCsr, "PUSH-Fm:"); - rc = rtreeStepToLeaf(pCsr); - } - } - nodeRelease(pRtree, pRoot); - rtreeRelease(pRtree); - return rc; -} +/* #include "sqlite3.h" */ + +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RBU) +/************** Include sqlite3rbu.h in the middle of sqlite3rbu.c ***********/ +/************** Begin file sqlite3rbu.h **************************************/ +/* +** 2014 August 30 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains the public interface for the RBU extension. +*/ + +/* +** SUMMARY +** +** Writing a transaction containing a large number of operations on +** b-tree indexes that are collectively larger than the available cache +** memory can be very inefficient. +** +** The problem is that in order to update a b-tree, the leaf page (at least) +** containing the entry being inserted or deleted must be modified. If the +** working set of leaves is larger than the available cache memory, then a +** single leaf that is modified more than once as part of the transaction +** may be loaded from or written to the persistent media multiple times. +** Additionally, because the index updates are likely to be applied in +** random order, access to pages within the database is also likely to be in +** random order, which is itself quite inefficient. +** +** One way to improve the situation is to sort the operations on each index +** by index key before applying them to the b-tree. This leads to an IO +** pattern that resembles a single linear scan through the index b-tree, +** and all but guarantees each modified leaf page is loaded and stored +** exactly once. SQLite uses this trick to improve the performance of +** CREATE INDEX commands. This extension allows it to be used to improve +** the performance of large transactions on existing databases. +** +** Additionally, this extension allows the work involved in writing the +** large transaction to be broken down into sub-transactions performed +** sequentially by separate processes. This is useful if the system cannot +** guarantee that a single update process will run for long enough to apply +** the entire update, for example because the update is being applied on a +** mobile device that is frequently rebooted. Even after the writer process +** has committed one or more sub-transactions, other database clients continue +** to read from the original database snapshot. In other words, partially +** applied transactions are not visible to other clients. +** +** "RBU" stands for "Resumable Bulk Update". As in a large database update +** transmitted via a wireless network to a mobile device. A transaction +** applied using this extension is hence refered to as an "RBU update". +** +** +** LIMITATIONS +** +** An "RBU update" transaction is subject to the following limitations: +** +** * The transaction must consist of INSERT, UPDATE and DELETE operations +** only. +** +** * INSERT statements may not use any default values. +** +** * UPDATE and DELETE statements must identify their target rows by +** non-NULL PRIMARY KEY values. Rows with NULL values stored in PRIMARY +** KEY fields may not be updated or deleted. If the table being written +** has no PRIMARY KEY, affected rows must be identified by rowid. +** +** * UPDATE statements may not modify PRIMARY KEY columns. +** +** * No triggers will be fired. +** +** * No foreign key violations are detected or reported. +** +** * CHECK constraints are not enforced. +** +** * No constraint handling mode except for "OR ROLLBACK" is supported. +** +** +** PREPARATION +** +** An "RBU update" is stored as a separate SQLite database. A database +** containing an RBU update is an "RBU database". For each table in the +** target database to be updated, the RBU database should contain a table +** named "data_" containing the same set of columns as the +** target table, and one more - "rbu_control". The data_% table should +** have no PRIMARY KEY or UNIQUE constraints, but each column should have +** the same type as the corresponding column in the target database. +** The "rbu_control" column should have no type at all. For example, if +** the target database contains: +** +** CREATE TABLE t1(a INTEGER PRIMARY KEY, b TEXT, c UNIQUE); +** +** Then the RBU database should contain: +** +** CREATE TABLE data_t1(a INTEGER, b TEXT, c, rbu_control); +** +** The order of the columns in the data_% table does not matter. +** +** If the target database table is a virtual table or a table that has no +** PRIMARY KEY declaration, the data_% table must also contain a column +** named "rbu_rowid". This column is mapped to the tables implicit primary +** key column - "rowid". Virtual tables for which the "rowid" column does +** not function like a primary key value cannot be updated using RBU. For +** example, if the target db contains either of the following: +** +** CREATE VIRTUAL TABLE x1 USING fts3(a, b); +** CREATE TABLE x1(a, b) +** +** then the RBU database should contain: +** +** CREATE TABLE data_x1(a, b, rbu_rowid, rbu_control); +** +** All non-hidden columns (i.e. all columns matched by "SELECT *") of the +** target table must be present in the input table. For virtual tables, +** hidden columns are optional - they are updated by RBU if present in +** the input table, or not otherwise. For example, to write to an fts4 +** table with a hidden languageid column such as: +** +** CREATE VIRTUAL TABLE ft1 USING fts4(a, b, languageid='langid'); +** +** Either of the following input table schemas may be used: +** +** CREATE TABLE data_ft1(a, b, langid, rbu_rowid, rbu_control); +** CREATE TABLE data_ft1(a, b, rbu_rowid, rbu_control); +** +** For each row to INSERT into the target database as part of the RBU +** update, the corresponding data_% table should contain a single record +** with the "rbu_control" column set to contain integer value 0. The +** other columns should be set to the values that make up the new record +** to insert. +** +** If the target database table has an INTEGER PRIMARY KEY, it is not +** possible to insert a NULL value into the IPK column. Attempting to +** do so results in an SQLITE_MISMATCH error. +** +** For each row to DELETE from the target database as part of the RBU +** update, the corresponding data_% table should contain a single record +** with the "rbu_control" column set to contain integer value 1. The +** real primary key values of the row to delete should be stored in the +** corresponding columns of the data_% table. The values stored in the +** other columns are not used. +** +** For each row to UPDATE from the target database as part of the RBU +** update, the corresponding data_% table should contain a single record +** with the "rbu_control" column set to contain a value of type text. +** The real primary key values identifying the row to update should be +** stored in the corresponding columns of the data_% table row, as should +** the new values of all columns being update. The text value in the +** "rbu_control" column must contain the same number of characters as +** there are columns in the target database table, and must consist entirely +** of 'x' and '.' characters (or in some special cases 'd' - see below). For +** each column that is being updated, the corresponding character is set to +** 'x'. For those that remain as they are, the corresponding character of the +** rbu_control value should be set to '.'. For example, given the tables +** above, the update statement: +** +** UPDATE t1 SET c = 'usa' WHERE a = 4; +** +** is represented by the data_t1 row created by: +** +** INSERT INTO data_t1(a, b, c, rbu_control) VALUES(4, NULL, 'usa', '..x'); +** +** Instead of an 'x' character, characters of the rbu_control value specified +** for UPDATEs may also be set to 'd'. In this case, instead of updating the +** target table with the value stored in the corresponding data_% column, the +** user-defined SQL function "rbu_delta()" is invoked and the result stored in +** the target table column. rbu_delta() is invoked with two arguments - the +** original value currently stored in the target table column and the +** value specified in the data_xxx table. +** +** For example, this row: +** +** INSERT INTO data_t1(a, b, c, rbu_control) VALUES(4, NULL, 'usa', '..d'); +** +** is similar to an UPDATE statement such as: +** +** UPDATE t1 SET c = rbu_delta(c, 'usa') WHERE a = 4; +** +** If the target database table is a virtual table or a table with no PRIMARY +** KEY, the rbu_control value should not include a character corresponding +** to the rbu_rowid value. For example, this: +** +** INSERT INTO data_ft1(a, b, rbu_rowid, rbu_control) +** VALUES(NULL, 'usa', 12, '.x'); +** +** causes a result similar to: +** +** UPDATE ft1 SET b = 'usa' WHERE rowid = 12; +** +** The data_xxx tables themselves should have no PRIMARY KEY declarations. +** However, RBU is more efficient if reading the rows in from each data_xxx +** table in "rowid" order is roughly the same as reading them sorted by +** the PRIMARY KEY of the corresponding target database table. In other +** words, rows should be sorted using the destination table PRIMARY KEY +** fields before they are inserted into the data_xxx tables. +** +** USAGE +** +** The API declared below allows an application to apply an RBU update +** stored on disk to an existing target database. Essentially, the +** application: +** +** 1) Opens an RBU handle using the sqlite3rbu_open() function. +** +** 2) Registers any required virtual table modules with the database +** handle returned by sqlite3rbu_db(). Also, if required, register +** the rbu_delta() implementation. +** +** 3) Calls the sqlite3rbu_step() function one or more times on +** the new handle. Each call to sqlite3rbu_step() performs a single +** b-tree operation, so thousands of calls may be required to apply +** a complete update. +** +** 4) Calls sqlite3rbu_close() to close the RBU update handle. If +** sqlite3rbu_step() has been called enough times to completely +** apply the update to the target database, then the RBU database +** is marked as fully applied. Otherwise, the state of the RBU +** update application is saved in the RBU database for later +** resumption. +** +** See comments below for more detail on APIs. +** +** If an update is only partially applied to the target database by the +** time sqlite3rbu_close() is called, various state information is saved +** within the RBU database. This allows subsequent processes to automatically +** resume the RBU update from where it left off. +** +** To remove all RBU extension state information, returning an RBU database +** to its original contents, it is sufficient to drop all tables that begin +** with the prefix "rbu_" +** +** DATABASE LOCKING +** +** An RBU update may not be applied to a database in WAL mode. Attempting +** to do so is an error (SQLITE_ERROR). +** +** While an RBU handle is open, a SHARED lock may be held on the target +** database file. This means it is possible for other clients to read the +** database, but not to write it. +** +** If an RBU update is started and then suspended before it is completed, +** then an external client writes to the database, then attempting to resume +** the suspended RBU update is also an error (SQLITE_BUSY). +*/ + +#ifndef _SQLITE3RBU_H +#define _SQLITE3RBU_H + +/* #include "sqlite3.h" ** Required for error code definitions ** */ + +typedef struct sqlite3rbu sqlite3rbu; /* -** Set the pIdxInfo->estimatedRows variable to nRow. Unless this -** extension is currently being used by a version of SQLite too old to -** support estimatedRows. In that case this function is a no-op. +** Open an RBU handle. +** +** Argument zTarget is the path to the target database. Argument zRbu is +** the path to the RBU database. Each call to this function must be matched +** by a call to sqlite3rbu_close(). When opening the databases, RBU passes +** the SQLITE_CONFIG_URI flag to sqlite3_open_v2(). So if either zTarget +** or zRbu begin with "file:", it will be interpreted as an SQLite +** database URI, not a regular file name. +** +** If the zState argument is passed a NULL value, the RBU extension stores +** the current state of the update (how many rows have been updated, which +** indexes are yet to be updated etc.) within the RBU database itself. This +** can be convenient, as it means that the RBU application does not need to +** organize removing a separate state file after the update is concluded. +** Or, if zState is non-NULL, it must be a path to a database file in which +** the RBU extension can store the state of the update. +** +** When resuming an RBU update, the zState argument must be passed the same +** value as when the RBU update was started. +** +** Once the RBU update is finished, the RBU extension does not +** automatically remove any zState database file, even if it created it. +** +** By default, RBU uses the default VFS to access the files on disk. To +** use a VFS other than the default, an SQLite "file:" URI containing a +** "vfs=..." option may be passed as the zTarget option. +** +** IMPORTANT NOTE FOR ZIPVFS USERS: The RBU extension works with all of +** SQLite's built-in VFSs, including the multiplexor VFS. However it does +** not work out of the box with zipvfs. Refer to the comment describing +** the zipvfs_create_vfs() API below for details on using RBU with zipvfs. */ -static void setEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ -#if SQLITE_VERSION_NUMBER>=3008002 - if( sqlite3_libversion_number()>=3008002 ){ - pIdxInfo->estimatedRows = nRow; - } -#endif -} +SQLITE_API sqlite3rbu *SQLITE_STDCALL sqlite3rbu_open( + const char *zTarget, + const char *zRbu, + const char *zState +); /* -** Rtree virtual table module xBestIndex method. There are three -** table scan strategies to choose from (in order from most to -** least desirable): +** Internally, each RBU connection uses a separate SQLite database +** connection to access the target and rbu update databases. This +** API allows the application direct access to these database handles. ** -** idxNum idxStr Strategy -** ------------------------------------------------ -** 1 Unused Direct lookup by rowid. -** 2 See below R-tree query or full-table scan. -** ------------------------------------------------ +** The first argument passed to this function must be a valid, open, RBU +** handle. The second argument should be passed zero to access the target +** database handle, or non-zero to access the rbu update database handle. +** Accessing the underlying database handles may be useful in the +** following scenarios: ** -** If strategy 1 is used, then idxStr is not meaningful. If strategy -** 2 is used, idxStr is formatted to contain 2 bytes for each -** constraint used. The first two bytes of idxStr correspond to -** the constraint in sqlite3_index_info.aConstraintUsage[] with -** (argvIndex==1) etc. +** * If any target tables are virtual tables, it may be necessary to +** call sqlite3_create_module() on the target database handle to +** register the required virtual table implementations. ** -** The first of each pair of bytes in idxStr identifies the constraint -** operator as follows: +** * If the data_xxx tables in the RBU source database are virtual +** tables, the application may need to call sqlite3_create_module() on +** the rbu update db handle to any required virtual table +** implementations. ** -** Operator Byte Value -** ---------------------- -** = 0x41 ('A') -** <= 0x42 ('B') -** < 0x43 ('C') -** >= 0x44 ('D') -** > 0x45 ('E') -** MATCH 0x46 ('F') -** ---------------------- +** * If the application uses the "rbu_delta()" feature described above, +** it must use sqlite3_create_function() or similar to register the +** rbu_delta() implementation with the target database handle. ** -** The second of each pair of bytes identifies the coordinate column -** to which the constraint applies. The leftmost coordinate column -** is 'a', the second from the left 'b' etc. +** If an error has occurred, either while opening or stepping the RBU object, +** this function may return NULL. The error code and message may be collected +** when sqlite3rbu_close() is called. */ -static int rtreeBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ - Rtree *pRtree = (Rtree*)tab; - int rc = SQLITE_OK; - int ii; - i64 nRow; /* Estimated rows returned by this scan */ +SQLITE_API sqlite3 *SQLITE_STDCALL sqlite3rbu_db(sqlite3rbu*, int bRbu); - int iIdx = 0; - char zIdxStr[RTREE_MAX_DIMENSIONS*8+1]; - memset(zIdxStr, 0, sizeof(zIdxStr)); +/* +** Do some work towards applying the RBU update to the target db. +** +** Return SQLITE_DONE if the update has been completely applied, or +** SQLITE_OK if no error occurs but there remains work to do to apply +** the RBU update. If an error does occur, some other error code is +** returned. +** +** Once a call to sqlite3rbu_step() has returned a value other than +** SQLITE_OK, all subsequent calls on the same RBU handle are no-ops +** that immediately return the same value. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3rbu_step(sqlite3rbu *pRbu); - assert( pIdxInfo->idxStr==0 ); - for(ii=0; iinConstraint && iIdx<(int)(sizeof(zIdxStr)-1); ii++){ - struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[ii]; +/* +** Close an RBU handle. +** +** If the RBU update has been completely applied, mark the RBU database +** as fully applied. Otherwise, assuming no error has occurred, save the +** current state of the RBU update appliation to the RBU database. +** +** If an error has already occurred as part of an sqlite3rbu_step() +** or sqlite3rbu_open() call, or if one occurs within this function, an +** SQLite error code is returned. Additionally, *pzErrmsg may be set to +** point to a buffer containing a utf-8 formatted English language error +** message. It is the responsibility of the caller to eventually free any +** such buffer using sqlite3_free(). +** +** Otherwise, if no error occurs, this function returns SQLITE_OK if the +** update has been partially applied, or SQLITE_DONE if it has been +** completely applied. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3rbu_close(sqlite3rbu *pRbu, char **pzErrmsg); - if( p->usable && p->iColumn==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ ){ - /* We have an equality constraint on the rowid. Use strategy 1. */ - int jj; - for(jj=0; jjaConstraintUsage[jj].argvIndex = 0; - pIdxInfo->aConstraintUsage[jj].omit = 0; - } - pIdxInfo->idxNum = 1; - pIdxInfo->aConstraintUsage[ii].argvIndex = 1; - pIdxInfo->aConstraintUsage[jj].omit = 1; +/* +** Return the total number of key-value operations (inserts, deletes or +** updates) that have been performed on the target database since the +** current RBU update was started. +*/ +SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3rbu_progress(sqlite3rbu *pRbu); - /* This strategy involves a two rowid lookups on an B-Tree structures - ** and then a linear search of an R-Tree node. This should be - ** considered almost as quick as a direct rowid lookup (for which - ** sqlite uses an internal cost of 0.0). It is expected to return - ** a single row. - */ - pIdxInfo->estimatedCost = 30.0; - setEstimatedRows(pIdxInfo, 1); - return SQLITE_OK; - } +/* +** Create an RBU VFS named zName that accesses the underlying file-system +** via existing VFS zParent. Or, if the zParent parameter is passed NULL, +** then the new RBU VFS uses the default system VFS to access the file-system. +** The new object is registered as a non-default VFS with SQLite before +** returning. +** +** Part of the RBU implementation uses a custom VFS object. Usually, this +** object is created and deleted automatically by RBU. +** +** The exception is for applications that also use zipvfs. In this case, +** the custom VFS must be explicitly created by the user before the RBU +** handle is opened. The RBU VFS should be installed so that the zipvfs +** VFS uses the RBU VFS, which in turn uses any other VFS layers in use +** (for example multiplexor) to access the file-system. For example, +** to assemble an RBU enabled VFS stack that uses both zipvfs and +** multiplexor (error checking omitted): +** +** // Create a VFS named "multiplex" (not the default). +** sqlite3_multiplex_initialize(0, 0); +** +** // Create an rbu VFS named "rbu" that uses multiplexor. If the +** // second argument were replaced with NULL, the "rbu" VFS would +** // access the file-system via the system default VFS, bypassing the +** // multiplexor. +** sqlite3rbu_create_vfs("rbu", "multiplex"); +** +** // Create a zipvfs VFS named "zipvfs" that uses rbu. +** zipvfs_create_vfs_v3("zipvfs", "rbu", 0, xCompressorAlgorithmDetector); +** +** // Make zipvfs the default VFS. +** sqlite3_vfs_register(sqlite3_vfs_find("zipvfs"), 1); +** +** Because the default VFS created above includes a RBU functionality, it +** may be used by RBU clients. Attempting to use RBU with a zipvfs VFS stack +** that does not include the RBU layer results in an error. +** +** The overhead of adding the "rbu" VFS to the system is negligible for +** non-RBU users. There is no harm in an application accessing the +** file-system via "rbu" all the time, even if it only uses RBU functionality +** occasionally. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3rbu_create_vfs(const char *zName, const char *zParent); - if( p->usable && (p->iColumn>0 || p->op==SQLITE_INDEX_CONSTRAINT_MATCH) ){ - u8 op; - switch( p->op ){ - case SQLITE_INDEX_CONSTRAINT_EQ: op = RTREE_EQ; break; - case SQLITE_INDEX_CONSTRAINT_GT: op = RTREE_GT; break; - case SQLITE_INDEX_CONSTRAINT_LE: op = RTREE_LE; break; - case SQLITE_INDEX_CONSTRAINT_LT: op = RTREE_LT; break; - case SQLITE_INDEX_CONSTRAINT_GE: op = RTREE_GE; break; - default: - assert( p->op==SQLITE_INDEX_CONSTRAINT_MATCH ); - op = RTREE_MATCH; - break; - } - zIdxStr[iIdx++] = op; - zIdxStr[iIdx++] = p->iColumn - 1 + '0'; - pIdxInfo->aConstraintUsage[ii].argvIndex = (iIdx/2); - pIdxInfo->aConstraintUsage[ii].omit = 1; - } - } +/* +** Deregister and destroy an RBU vfs created by an earlier call to +** sqlite3rbu_create_vfs(). +** +** VFS objects are not reference counted. If a VFS object is destroyed +** before all database handles that use it have been closed, the results +** are undefined. +*/ +SQLITE_API void SQLITE_STDCALL sqlite3rbu_destroy_vfs(const char *zName); - pIdxInfo->idxNum = 2; - pIdxInfo->needToFreeIdxStr = 1; - if( iIdx>0 && 0==(pIdxInfo->idxStr = sqlite3_mprintf("%s", zIdxStr)) ){ - return SQLITE_NOMEM; - } +#endif /* _SQLITE3RBU_H */ - nRow = pRtree->nRowEst / (iIdx + 1); - pIdxInfo->estimatedCost = (double)6.0 * (double)nRow; - setEstimatedRows(pIdxInfo, nRow); +/************** End of sqlite3rbu.h ******************************************/ +/************** Continuing where we left off in sqlite3rbu.c *****************/ - return rc; -} +/* Maximum number of prepared UPDATE statements held by this module */ +#define SQLITE_RBU_UPDATE_CACHESIZE 16 /* -** Return the N-dimensional volumn of the cell stored in *p. +** Swap two objects of type TYPE. */ -static RtreeDValue cellArea(Rtree *pRtree, RtreeCell *p){ - RtreeDValue area = (RtreeDValue)1; - int ii; - for(ii=0; ii<(pRtree->nDim*2); ii+=2){ - area = (area * (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii]))); - } - return area; -} +#if !defined(SQLITE_AMALGAMATION) +# define SWAP(TYPE,A,B) {TYPE t=A; A=B; B=t;} +#endif + +/* +** The rbu_state table is used to save the state of a partially applied +** update so that it can be resumed later. The table consists of integer +** keys mapped to values as follows: +** +** RBU_STATE_STAGE: +** May be set to integer values 1, 2, 4 or 5. As follows: +** 1: the *-rbu file is currently under construction. +** 2: the *-rbu file has been constructed, but not yet moved +** to the *-wal path. +** 4: the checkpoint is underway. +** 5: the rbu update has been checkpointed. +** +** RBU_STATE_TBL: +** Only valid if STAGE==1. The target database name of the table +** currently being written. +** +** RBU_STATE_IDX: +** Only valid if STAGE==1. The target database name of the index +** currently being written, or NULL if the main table is currently being +** updated. +** +** RBU_STATE_ROW: +** Only valid if STAGE==1. Number of rows already processed for the current +** table/index. +** +** RBU_STATE_PROGRESS: +** Trbul number of sqlite3rbu_step() calls made so far as part of this +** rbu update. +** +** RBU_STATE_CKPT: +** Valid if STAGE==4. The 64-bit checksum associated with the wal-index +** header created by recovering the *-wal file. This is used to detect +** cases when another client appends frames to the *-wal file in the +** middle of an incremental checkpoint (an incremental checkpoint cannot +** be continued if this happens). +** +** RBU_STATE_COOKIE: +** Valid if STAGE==1. The current change-counter cookie value in the +** target db file. +** +** RBU_STATE_OALSZ: +** Valid if STAGE==1. The size in bytes of the *-oal file. +*/ +#define RBU_STATE_STAGE 1 +#define RBU_STATE_TBL 2 +#define RBU_STATE_IDX 3 +#define RBU_STATE_ROW 4 +#define RBU_STATE_PROGRESS 5 +#define RBU_STATE_CKPT 6 +#define RBU_STATE_COOKIE 7 +#define RBU_STATE_OALSZ 8 + +#define RBU_STAGE_OAL 1 +#define RBU_STAGE_MOVE 2 +#define RBU_STAGE_CAPTURE 3 +#define RBU_STAGE_CKPT 4 +#define RBU_STAGE_DONE 5 + + +#define RBU_CREATE_STATE \ + "CREATE TABLE IF NOT EXISTS %s.rbu_state(k INTEGER PRIMARY KEY, v)" + +typedef struct RbuFrame RbuFrame; +typedef struct RbuObjIter RbuObjIter; +typedef struct RbuState RbuState; +typedef struct rbu_vfs rbu_vfs; +typedef struct rbu_file rbu_file; +typedef struct RbuUpdateStmt RbuUpdateStmt; + +#if !defined(SQLITE_AMALGAMATION) +typedef unsigned int u32; +typedef unsigned char u8; +typedef sqlite3_int64 i64; +#endif /* -** Return the margin length of cell p. The margin length is the sum -** of the objects size in each dimension. +** These values must match the values defined in wal.c for the equivalent +** locks. These are not magic numbers as they are part of the SQLite file +** format. */ -static RtreeDValue cellMargin(Rtree *pRtree, RtreeCell *p){ - RtreeDValue margin = (RtreeDValue)0; - int ii; - for(ii=0; ii<(pRtree->nDim*2); ii+=2){ - margin += (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii])); - } - return margin; -} +#define WAL_LOCK_WRITE 0 +#define WAL_LOCK_CKPT 1 +#define WAL_LOCK_READ0 3 /* -** Store the union of cells p1 and p2 in p1. +** A structure to store values read from the rbu_state table in memory. */ -static void cellUnion(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){ - int ii; - if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ - for(ii=0; ii<(pRtree->nDim*2); ii+=2){ - p1->aCoord[ii].f = MIN(p1->aCoord[ii].f, p2->aCoord[ii].f); - p1->aCoord[ii+1].f = MAX(p1->aCoord[ii+1].f, p2->aCoord[ii+1].f); - } - }else{ - for(ii=0; ii<(pRtree->nDim*2); ii+=2){ - p1->aCoord[ii].i = MIN(p1->aCoord[ii].i, p2->aCoord[ii].i); - p1->aCoord[ii+1].i = MAX(p1->aCoord[ii+1].i, p2->aCoord[ii+1].i); - } - } -} +struct RbuState { + int eStage; + char *zTbl; + char *zIdx; + i64 iWalCksum; + int nRow; + i64 nProgress; + u32 iCookie; + i64 iOalSz; +}; + +struct RbuUpdateStmt { + char *zMask; /* Copy of update mask used with pUpdate */ + sqlite3_stmt *pUpdate; /* Last update statement (or NULL) */ + RbuUpdateStmt *pNext; +}; /* -** Return true if the area covered by p2 is a subset of the area covered -** by p1. False otherwise. +** An iterator of this type is used to iterate through all objects in +** the target database that require updating. For each such table, the +** iterator visits, in order: +** +** * the table itself, +** * each index of the table (zero or more points to visit), and +** * a special "cleanup table" state. +** +** abIndexed: +** If the table has no indexes on it, abIndexed is set to NULL. Otherwise, +** it points to an array of flags nTblCol elements in size. The flag is +** set for each column that is either a part of the PK or a part of an +** index. Or clear otherwise. +** */ -static int cellContains(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){ - int ii; - int isInt = (pRtree->eCoordType==RTREE_COORD_INT32); - for(ii=0; ii<(pRtree->nDim*2); ii+=2){ - RtreeCoord *a1 = &p1->aCoord[ii]; - RtreeCoord *a2 = &p2->aCoord[ii]; - if( (!isInt && (a2[0].fa1[1].f)) - || ( isInt && (a2[0].ia1[1].i)) - ){ - return 0; - } - } - return 1; -} +struct RbuObjIter { + sqlite3_stmt *pTblIter; /* Iterate through tables */ + sqlite3_stmt *pIdxIter; /* Index iterator */ + int nTblCol; /* Size of azTblCol[] array */ + char **azTblCol; /* Array of unquoted target column names */ + char **azTblType; /* Array of target column types */ + int *aiSrcOrder; /* src table col -> target table col */ + u8 *abTblPk; /* Array of flags, set on target PK columns */ + u8 *abNotNull; /* Array of flags, set on NOT NULL columns */ + u8 *abIndexed; /* Array of flags, set on indexed & PK cols */ + int eType; /* Table type - an RBU_PK_XXX value */ + + /* Output variables. zTbl==0 implies EOF. */ + int bCleanup; /* True in "cleanup" state */ + const char *zTbl; /* Name of target db table */ + const char *zIdx; /* Name of target db index (or null) */ + int iTnum; /* Root page of current object */ + int iPkTnum; /* If eType==EXTERNAL, root of PK index */ + int bUnique; /* Current index is unique */ + + /* Statements created by rbuObjIterPrepareAll() */ + int nCol; /* Number of columns in current object */ + sqlite3_stmt *pSelect; /* Source data */ + sqlite3_stmt *pInsert; /* Statement for INSERT operations */ + sqlite3_stmt *pDelete; /* Statement for DELETE ops */ + sqlite3_stmt *pTmpInsert; /* Insert into rbu_tmp_$zTbl */ + + /* Last UPDATE used (for PK b-tree updates only), or NULL. */ + RbuUpdateStmt *pRbuUpdate; +}; /* -** Return the amount cell p would grow by if it were unioned with pCell. +** Values for RbuObjIter.eType +** +** 0: Table does not exist (error) +** 1: Table has an implicit rowid. +** 2: Table has an explicit IPK column. +** 3: Table has an external PK index. +** 4: Table is WITHOUT ROWID. +** 5: Table is a virtual table. */ -static RtreeDValue cellGrowth(Rtree *pRtree, RtreeCell *p, RtreeCell *pCell){ - RtreeDValue area; - RtreeCell cell; - memcpy(&cell, p, sizeof(RtreeCell)); - area = cellArea(pRtree, &cell); - cellUnion(pRtree, &cell, pCell); - return (cellArea(pRtree, &cell)-area); -} +#define RBU_PK_NOTABLE 0 +#define RBU_PK_NONE 1 +#define RBU_PK_IPK 2 +#define RBU_PK_EXTERNAL 3 +#define RBU_PK_WITHOUT_ROWID 4 +#define RBU_PK_VTAB 5 -static RtreeDValue cellOverlap( - Rtree *pRtree, - RtreeCell *p, - RtreeCell *aCell, - int nCell -){ - int ii; - RtreeDValue overlap = RTREE_ZERO; - for(ii=0; iinDim*2); jj+=2){ - RtreeDValue x1, x2; - x1 = MAX(DCOORD(p->aCoord[jj]), DCOORD(aCell[ii].aCoord[jj])); - x2 = MIN(DCOORD(p->aCoord[jj+1]), DCOORD(aCell[ii].aCoord[jj+1])); - if( x2iDepth-iHeight); ii++){ - int iCell; - sqlite3_int64 iBest = 0; +/* +** RBU handle. +*/ +struct sqlite3rbu { + int eStage; /* Value of RBU_STATE_STAGE field */ + sqlite3 *dbMain; /* target database handle */ + sqlite3 *dbRbu; /* rbu database handle */ + char *zTarget; /* Path to target db */ + char *zRbu; /* Path to rbu db */ + char *zState; /* Path to state db (or NULL if zRbu) */ + char zStateDb[5]; /* Db name for state ("stat" or "main") */ + int rc; /* Value returned by last rbu_step() call */ + char *zErrmsg; /* Error message if rc!=SQLITE_OK */ + int nStep; /* Rows processed for current object */ + int nProgress; /* Rows processed for all objects */ + RbuObjIter objiter; /* Iterator for skipping through tbl/idx */ + const char *zVfsName; /* Name of automatically created rbu vfs */ + rbu_file *pTargetFd; /* File handle open on target db */ + i64 iOalSz; + + /* The following state variables are used as part of the incremental + ** checkpoint stage (eStage==RBU_STAGE_CKPT). See comments surrounding + ** function rbuSetupCheckpoint() for details. */ + u32 iMaxFrame; /* Largest iWalFrame value in aFrame[] */ + u32 mLock; + int nFrame; /* Entries in aFrame[] array */ + int nFrameAlloc; /* Allocated size of aFrame[] array */ + RbuFrame *aFrame; + int pgsz; + u8 *aBuf; + i64 iWalCksum; +}; - RtreeDValue fMinGrowth = RTREE_ZERO; - RtreeDValue fMinArea = RTREE_ZERO; +/* +** An rbu VFS is implemented using an instance of this structure. +*/ +struct rbu_vfs { + sqlite3_vfs base; /* rbu VFS shim methods */ + sqlite3_vfs *pRealVfs; /* Underlying VFS */ + sqlite3_mutex *mutex; /* Mutex to protect pMain */ + rbu_file *pMain; /* Linked list of main db files */ +}; - int nCell = NCELL(pNode); - RtreeCell cell; - RtreeNode *pChild; +/* +** Each file opened by an rbu VFS is represented by an instance of +** the following structure. +*/ +struct rbu_file { + sqlite3_file base; /* sqlite3_file methods */ + sqlite3_file *pReal; /* Underlying file handle */ + rbu_vfs *pRbuVfs; /* Pointer to the rbu_vfs object */ + sqlite3rbu *pRbu; /* Pointer to rbu object (rbu target only) */ - RtreeCell *aCell = 0; + int openFlags; /* Flags this file was opened with */ + u32 iCookie; /* Cookie value for main db files */ + u8 iWriteVer; /* "write-version" value for main db files */ - /* Select the child node which will be enlarged the least if pCell - ** is inserted into it. Resolve ties by choosing the entry with - ** the smallest area. - */ - for(iCell=0; iCellpParent ){ - RtreeNode *pParent = p->pParent; - RtreeCell cell; - int iCell; - - if( nodeParentIndex(pRtree, p, &iCell) ){ - return SQLITE_CORRUPT_VTAB; - } - - nodeGetCell(pRtree, pParent, iCell, &cell); - if( !cellContains(pRtree, &cell, pCell) ){ - cellUnion(pRtree, &cell, pCell); - nodeOverwriteCell(pRtree, pParent, &cell, iCell); - } - - p = pParent; + int rc; + assert( *pzErrmsg==0 ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + *ppStmt = 0; + }else{ + rc = prepareAndCollectError(db, ppStmt, pzErrmsg, zSql); + sqlite3_free(zSql); } - return SQLITE_OK; + return rc; } /* -** Write mapping (iRowid->iNode) to the _rowid table. +** Free the RbuObjIter.azTblCol[] and RbuObjIter.abTblPk[] arrays allocated +** by an earlier call to rbuObjIterCacheTableInfo(). */ -static int rowidWrite(Rtree *pRtree, sqlite3_int64 iRowid, sqlite3_int64 iNode){ - sqlite3_bind_int64(pRtree->pWriteRowid, 1, iRowid); - sqlite3_bind_int64(pRtree->pWriteRowid, 2, iNode); - sqlite3_step(pRtree->pWriteRowid); - return sqlite3_reset(pRtree->pWriteRowid); +static void rbuObjIterFreeCols(RbuObjIter *pIter){ + int i; + for(i=0; inTblCol; i++){ + sqlite3_free(pIter->azTblCol[i]); + sqlite3_free(pIter->azTblType[i]); + } + sqlite3_free(pIter->azTblCol); + pIter->azTblCol = 0; + pIter->azTblType = 0; + pIter->aiSrcOrder = 0; + pIter->abTblPk = 0; + pIter->abNotNull = 0; + pIter->nTblCol = 0; + pIter->eType = 0; /* Invalid value */ +} + +/* +** Finalize all statements and free all allocations that are specific to +** the current object (table/index pair). +*/ +static void rbuObjIterClearStatements(RbuObjIter *pIter){ + RbuUpdateStmt *pUp; + + sqlite3_finalize(pIter->pSelect); + sqlite3_finalize(pIter->pInsert); + sqlite3_finalize(pIter->pDelete); + sqlite3_finalize(pIter->pTmpInsert); + pUp = pIter->pRbuUpdate; + while( pUp ){ + RbuUpdateStmt *pTmp = pUp->pNext; + sqlite3_finalize(pUp->pUpdate); + sqlite3_free(pUp); + pUp = pTmp; + } + + pIter->pSelect = 0; + pIter->pInsert = 0; + pIter->pDelete = 0; + pIter->pRbuUpdate = 0; + pIter->pTmpInsert = 0; + pIter->nCol = 0; } /* -** Write mapping (iNode->iPar) to the _parent table. +** Clean up any resources allocated as part of the iterator object passed +** as the only argument. */ -static int parentWrite(Rtree *pRtree, sqlite3_int64 iNode, sqlite3_int64 iPar){ - sqlite3_bind_int64(pRtree->pWriteParent, 1, iNode); - sqlite3_bind_int64(pRtree->pWriteParent, 2, iPar); - sqlite3_step(pRtree->pWriteParent); - return sqlite3_reset(pRtree->pWriteParent); +static void rbuObjIterFinalize(RbuObjIter *pIter){ + rbuObjIterClearStatements(pIter); + sqlite3_finalize(pIter->pTblIter); + sqlite3_finalize(pIter->pIdxIter); + rbuObjIterFreeCols(pIter); + memset(pIter, 0, sizeof(RbuObjIter)); } -static int rtreeInsertCell(Rtree *, RtreeNode *, RtreeCell *, int); - - /* -** Arguments aIdx, aDistance and aSpare all point to arrays of size -** nIdx. The aIdx array contains the set of integers from 0 to -** (nIdx-1) in no particular order. This function sorts the values -** in aIdx according to the indexed values in aDistance. For -** example, assuming the inputs: -** -** aIdx = { 0, 1, 2, 3 } -** aDistance = { 5.0, 2.0, 7.0, 6.0 } -** -** this function sets the aIdx array to contain: -** -** aIdx = { 0, 1, 2, 3 } +** Advance the iterator to the next position. ** -** The aSpare array is used as temporary working space by the -** sorting algorithm. +** If no error occurs, SQLITE_OK is returned and the iterator is left +** pointing to the next entry. Otherwise, an error code and message is +** left in the RBU handle passed as the first argument. A copy of the +** error code is returned. */ -static void SortByDistance( - int *aIdx, - int nIdx, - RtreeDValue *aDistance, - int *aSpare -){ - if( nIdx>1 ){ - int iLeft = 0; - int iRight = 0; - - int nLeft = nIdx/2; - int nRight = nIdx-nLeft; - int *aLeft = aIdx; - int *aRight = &aIdx[nLeft]; - - SortByDistance(aLeft, nLeft, aDistance, aSpare); - SortByDistance(aRight, nRight, aDistance, aSpare); +static int rbuObjIterNext(sqlite3rbu *p, RbuObjIter *pIter){ + int rc = p->rc; + if( rc==SQLITE_OK ){ - memcpy(aSpare, aLeft, sizeof(int)*nLeft); - aLeft = aSpare; + /* Free any SQLite statements used while processing the previous object */ + rbuObjIterClearStatements(pIter); + if( pIter->zIdx==0 ){ + rc = sqlite3_exec(p->dbMain, + "DROP TRIGGER IF EXISTS temp.rbu_insert_tr;" + "DROP TRIGGER IF EXISTS temp.rbu_update1_tr;" + "DROP TRIGGER IF EXISTS temp.rbu_update2_tr;" + "DROP TRIGGER IF EXISTS temp.rbu_delete_tr;" + , 0, 0, &p->zErrmsg + ); + } - while( iLeftbCleanup ){ + rbuObjIterFreeCols(pIter); + pIter->bCleanup = 0; + rc = sqlite3_step(pIter->pTblIter); + if( rc!=SQLITE_ROW ){ + rc = resetAndCollectError(pIter->pTblIter, &p->zErrmsg); + pIter->zTbl = 0; }else{ - aIdx[iLeft+iRight] = aRight[iRight]; - iRight++; + pIter->zTbl = (const char*)sqlite3_column_text(pIter->pTblIter, 0); + rc = pIter->zTbl ? SQLITE_OK : SQLITE_NOMEM; + } + }else{ + if( pIter->zIdx==0 ){ + sqlite3_stmt *pIdx = pIter->pIdxIter; + rc = sqlite3_bind_text(pIdx, 1, pIter->zTbl, -1, SQLITE_STATIC); + } + if( rc==SQLITE_OK ){ + rc = sqlite3_step(pIter->pIdxIter); + if( rc!=SQLITE_ROW ){ + rc = resetAndCollectError(pIter->pIdxIter, &p->zErrmsg); + pIter->bCleanup = 1; + pIter->zIdx = 0; + }else{ + pIter->zIdx = (const char*)sqlite3_column_text(pIter->pIdxIter, 0); + pIter->iTnum = sqlite3_column_int(pIter->pIdxIter, 1); + pIter->bUnique = sqlite3_column_int(pIter->pIdxIter, 2); + rc = pIter->zIdx ? SQLITE_OK : SQLITE_NOMEM; + } } } } + } -#if 0 - /* Check that the sort worked */ - { - int jj; - for(jj=1; jjrc = rc; } + return rc; } /* -** Arguments aIdx, aCell and aSpare all point to arrays of size -** nIdx. The aIdx array contains the set of integers from 0 to -** (nIdx-1) in no particular order. This function sorts the values -** in aIdx according to dimension iDim of the cells in aCell. The -** minimum value of dimension iDim is considered first, the -** maximum used to break ties. +** Initialize the iterator structure passed as the second argument. ** -** The aSpare array is used as temporary working space by the -** sorting algorithm. +** If no error occurs, SQLITE_OK is returned and the iterator is left +** pointing to the first entry. Otherwise, an error code and message is +** left in the RBU handle passed as the first argument. A copy of the +** error code is returned. */ -static void SortByDimension( - Rtree *pRtree, - int *aIdx, - int nIdx, - int iDim, - RtreeCell *aCell, - int *aSpare -){ - if( nIdx>1 ){ +static int rbuObjIterFirst(sqlite3rbu *p, RbuObjIter *pIter){ + int rc; + memset(pIter, 0, sizeof(RbuObjIter)); - int iLeft = 0; - int iRight = 0; + rc = prepareAndCollectError(p->dbRbu, &pIter->pTblIter, &p->zErrmsg, + "SELECT substr(name, 6) FROM sqlite_master " + "WHERE type IN ('table', 'view') AND name LIKE 'data_%'" + ); - int nLeft = nIdx/2; - int nRight = nIdx-nLeft; - int *aLeft = aIdx; - int *aRight = &aIdx[nLeft]; + if( rc==SQLITE_OK ){ + rc = prepareAndCollectError(p->dbMain, &pIter->pIdxIter, &p->zErrmsg, + "SELECT name, rootpage, sql IS NULL OR substr(8, 6)=='UNIQUE' " + " FROM main.sqlite_master " + " WHERE type='index' AND tbl_name = ?" + ); + } - SortByDimension(pRtree, aLeft, nLeft, iDim, aCell, aSpare); - SortByDimension(pRtree, aRight, nRight, iDim, aCell, aSpare); + pIter->bCleanup = 1; + p->rc = rc; + return rbuObjIterNext(p, pIter); +} - memcpy(aSpare, aLeft, sizeof(int)*nLeft); - aLeft = aSpare; - while( iLeftrc is already set to something other +** than SQLITE_OK), then this function returns NULL without modifying the +** stored error code. In this case it still calls sqlite3_free() on any +** printf() parameters associated with %z conversions. +*/ +static char *rbuMPrintf(sqlite3rbu *p, const char *zFmt, ...){ + char *zSql = 0; + va_list ap; + va_start(ap, zFmt); + zSql = sqlite3_vmprintf(zFmt, ap); + if( p->rc==SQLITE_OK ){ + if( zSql==0 ) p->rc = SQLITE_NOMEM; + }else{ + sqlite3_free(zSql); + zSql = 0; + } + va_end(ap); + return zSql; +} -#if 0 - /* Check that the sort worked */ - { - int jj; - for(jj=1; jjrc==SQLITE_OK ){ + if( zSql==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + p->rc = sqlite3_exec(db, zSql, 0, 0, &p->zErrmsg); } -#endif } + sqlite3_free(zSql); + va_end(ap); + return p->rc; } /* -** Implementation of the R*-tree variant of SplitNode from Beckman[1990]. +** Attempt to allocate and return a pointer to a zeroed block of nByte +** bytes. +** +** If an error (i.e. an OOM condition) occurs, return NULL and leave an +** error code in the rbu handle passed as the first argument. Or, if an +** error has already occurred when this function is called, return NULL +** immediately without attempting the allocation or modifying the stored +** error code. */ -static int splitNodeStartree( - Rtree *pRtree, - RtreeCell *aCell, - int nCell, - RtreeNode *pLeft, - RtreeNode *pRight, - RtreeCell *pBboxLeft, - RtreeCell *pBboxRight -){ - int **aaSorted; - int *aSpare; - int ii; +static void *rbuMalloc(sqlite3rbu *p, int nByte){ + void *pRet = 0; + if( p->rc==SQLITE_OK ){ + assert( nByte>0 ); + pRet = sqlite3_malloc(nByte); + if( pRet==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, nByte); + } + } + return pRet; +} - int iBestDim = 0; - int iBestSplit = 0; - RtreeDValue fBestMargin = RTREE_ZERO; - int nByte = (pRtree->nDim+1)*(sizeof(int*)+nCell*sizeof(int)); +/* +** Allocate and zero the pIter->azTblCol[] and abTblPk[] arrays so that +** there is room for at least nCol elements. If an OOM occurs, store an +** error code in the RBU handle passed as the first argument. +*/ +static void rbuAllocateIterArrays(sqlite3rbu *p, RbuObjIter *pIter, int nCol){ + int nByte = (2*sizeof(char*) + sizeof(int) + 3*sizeof(u8)) * nCol; + char **azNew; - aaSorted = (int **)sqlite3_malloc(nByte); - if( !aaSorted ){ - return SQLITE_NOMEM; + azNew = (char**)rbuMalloc(p, nByte); + if( azNew ){ + pIter->azTblCol = azNew; + pIter->azTblType = &azNew[nCol]; + pIter->aiSrcOrder = (int*)&pIter->azTblType[nCol]; + pIter->abTblPk = (u8*)&pIter->aiSrcOrder[nCol]; + pIter->abNotNull = (u8*)&pIter->abTblPk[nCol]; + pIter->abIndexed = (u8*)&pIter->abNotNull[nCol]; } +} - aSpare = &((int *)&aaSorted[pRtree->nDim])[pRtree->nDim*nCell]; - memset(aaSorted, 0, nByte); - for(ii=0; iinDim; ii++){ - int jj; - aaSorted[ii] = &((int *)&aaSorted[pRtree->nDim])[ii*nCell]; - for(jj=0; jjnDim; ii++){ - RtreeDValue margin = RTREE_ZERO; - RtreeDValue fBestOverlap = RTREE_ZERO; - RtreeDValue fBestArea = RTREE_ZERO; - int iBestLeft = 0; - int nLeft; + return zRet; +} - for( - nLeft=RTREE_MINCELLS(pRtree); - nLeft<=(nCell-RTREE_MINCELLS(pRtree)); - nLeft++ - ){ - RtreeCell left; - RtreeCell right; - int kk; - RtreeDValue overlap; - RtreeDValue area; +/* +** Finalize the statement passed as the second argument. +** +** If the sqlite3_finalize() call indicates that an error occurs, and the +** rbu handle error code is not already set, set the error code and error +** message accordingly. +*/ +static void rbuFinalize(sqlite3rbu *p, sqlite3_stmt *pStmt){ + sqlite3 *db = sqlite3_db_handle(pStmt); + int rc = sqlite3_finalize(pStmt); + if( p->rc==SQLITE_OK && rc!=SQLITE_OK ){ + p->rc = rc; + p->zErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + } +} + +/* Determine the type of a table. +** +** peType is of type (int*), a pointer to an output parameter of type +** (int). This call sets the output parameter as follows, depending +** on the type of the table specified by parameters dbName and zTbl. +** +** RBU_PK_NOTABLE: No such table. +** RBU_PK_NONE: Table has an implicit rowid. +** RBU_PK_IPK: Table has an explicit IPK column. +** RBU_PK_EXTERNAL: Table has an external PK index. +** RBU_PK_WITHOUT_ROWID: Table is WITHOUT ROWID. +** RBU_PK_VTAB: Table is a virtual table. +** +** Argument *piPk is also of type (int*), and also points to an output +** parameter. Unless the table has an external primary key index +** (i.e. unless *peType is set to 3), then *piPk is set to zero. Or, +** if the table does have an external primary key index, then *piPk +** is set to the root page number of the primary key index before +** returning. +** +** ALGORITHM: +** +** if( no entry exists in sqlite_master ){ +** return RBU_PK_NOTABLE +** }else if( sql for the entry starts with "CREATE VIRTUAL" ){ +** return RBU_PK_VTAB +** }else if( "PRAGMA index_list()" for the table contains a "pk" index ){ +** if( the index that is the pk exists in sqlite_master ){ +** *piPK = rootpage of that index. +** return RBU_PK_EXTERNAL +** }else{ +** return RBU_PK_WITHOUT_ROWID +** } +** }else if( "PRAGMA table_info()" lists one or more "pk" columns ){ +** return RBU_PK_IPK +** }else{ +** return RBU_PK_NONE +** } +*/ +static void rbuTableType( + sqlite3rbu *p, + const char *zTab, + int *peType, + int *piTnum, + int *piPk +){ + /* + ** 0) SELECT count(*) FROM sqlite_master where name=%Q AND IsVirtual(%Q) + ** 1) PRAGMA index_list = ? + ** 2) SELECT count(*) FROM sqlite_master where name=%Q + ** 3) PRAGMA table_info = ? + */ + sqlite3_stmt *aStmt[4] = {0, 0, 0, 0}; + + *peType = RBU_PK_NOTABLE; + *piPk = 0; + + assert( p->rc==SQLITE_OK ); + p->rc = prepareFreeAndCollectError(p->dbMain, &aStmt[0], &p->zErrmsg, + sqlite3_mprintf( + "SELECT (sql LIKE 'create virtual%%'), rootpage" + " FROM sqlite_master" + " WHERE name=%Q", zTab + )); + if( p->rc!=SQLITE_OK || sqlite3_step(aStmt[0])!=SQLITE_ROW ){ + /* Either an error, or no such table. */ + goto rbuTableType_end; + } + if( sqlite3_column_int(aStmt[0], 0) ){ + *peType = RBU_PK_VTAB; /* virtual table */ + goto rbuTableType_end; + } + *piTnum = sqlite3_column_int(aStmt[0], 1); - memcpy(&left, &aCell[aaSorted[ii][0]], sizeof(RtreeCell)); - memcpy(&right, &aCell[aaSorted[ii][nCell-1]], sizeof(RtreeCell)); - for(kk=1; kk<(nCell-1); kk++){ - if( kkrc = prepareFreeAndCollectError(p->dbMain, &aStmt[1], &p->zErrmsg, + sqlite3_mprintf("PRAGMA index_list=%Q",zTab) + ); + if( p->rc ) goto rbuTableType_end; + while( sqlite3_step(aStmt[1])==SQLITE_ROW ){ + const u8 *zOrig = sqlite3_column_text(aStmt[1], 3); + const u8 *zIdx = sqlite3_column_text(aStmt[1], 1); + if( zOrig && zIdx && zOrig[0]=='p' ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &aStmt[2], &p->zErrmsg, + sqlite3_mprintf( + "SELECT rootpage FROM sqlite_master WHERE name = %Q", zIdx + )); + if( p->rc==SQLITE_OK ){ + if( sqlite3_step(aStmt[2])==SQLITE_ROW ){ + *piPk = sqlite3_column_int(aStmt[2], 0); + *peType = RBU_PK_EXTERNAL; }else{ - cellUnion(pRtree, &right, &aCell[aaSorted[ii][kk]]); + *peType = RBU_PK_WITHOUT_ROWID; } } - margin += cellMargin(pRtree, &left); - margin += cellMargin(pRtree, &right); - overlap = cellOverlap(pRtree, &left, &right, 1); - area = cellArea(pRtree, &left) + cellArea(pRtree, &right); - if( (nLeft==RTREE_MINCELLS(pRtree)) - || (overlaprc = prepareFreeAndCollectError(p->dbMain, &aStmt[3], &p->zErrmsg, + sqlite3_mprintf("PRAGMA table_info=%Q",zTab) + ); + if( p->rc==SQLITE_OK ){ + while( sqlite3_step(aStmt[3])==SQLITE_ROW ){ + if( sqlite3_column_int(aStmt[3],5)>0 ){ + *peType = RBU_PK_IPK; /* explicit IPK column */ + goto rbuTableType_end; + } } + *peType = RBU_PK_NONE; } - memcpy(pBboxLeft, &aCell[aaSorted[iBestDim][0]], sizeof(RtreeCell)); - memcpy(pBboxRight, &aCell[aaSorted[iBestDim][iBestSplit]], sizeof(RtreeCell)); - for(ii=0; iiabIndexed[] array. +*/ +static void rbuObjIterCacheIndexedCols(sqlite3rbu *p, RbuObjIter *pIter){ + sqlite3_stmt *pList = 0; + int bIndex = 0; -static int updateMapping( - Rtree *pRtree, - i64 iRowid, - RtreeNode *pNode, - int iHeight -){ - int (*xSetMapping)(Rtree *, sqlite3_int64, sqlite3_int64); - xSetMapping = ((iHeight==0)?rowidWrite:parentWrite); - if( iHeight>0 ){ - RtreeNode *pChild = nodeHashLookup(pRtree, iRowid); - if( pChild ){ - nodeRelease(pRtree, pChild->pParent); - nodeReference(pNode); - pChild->pParent = pNode; + if( p->rc==SQLITE_OK ){ + memcpy(pIter->abIndexed, pIter->abTblPk, sizeof(u8)*pIter->nTblCol); + p->rc = prepareFreeAndCollectError(p->dbMain, &pList, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_list = %Q", pIter->zTbl) + ); + } + + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pList) ){ + const char *zIdx = (const char*)sqlite3_column_text(pList, 1); + sqlite3_stmt *pXInfo = 0; + if( zIdx==0 ) break; + p->rc = prepareFreeAndCollectError(p->dbMain, &pXInfo, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_xinfo = %Q", zIdx) + ); + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXInfo) ){ + int iCid = sqlite3_column_int(pXInfo, 1); + if( iCid>=0 ) pIter->abIndexed[iCid] = 1; } + rbuFinalize(p, pXInfo); + bIndex = 1; } - return xSetMapping(pRtree, iRowid, pNode->iNode); + + rbuFinalize(p, pList); + if( bIndex==0 ) pIter->abIndexed = 0; } -static int SplitNode( - Rtree *pRtree, - RtreeNode *pNode, - RtreeCell *pCell, - int iHeight -){ - int i; - int newCellIsRight = 0; - int rc = SQLITE_OK; - int nCell = NCELL(pNode); - RtreeCell *aCell; - int *aiUsed; +/* +** If they are not already populated, populate the pIter->azTblCol[], +** pIter->abTblPk[], pIter->nTblCol and pIter->bRowid variables according to +** the table (not index) that the iterator currently points to. +** +** Return SQLITE_OK if successful, or an SQLite error code otherwise. If +** an error does occur, an error code and error message are also left in +** the RBU handle. +*/ +static int rbuObjIterCacheTableInfo(sqlite3rbu *p, RbuObjIter *pIter){ + if( pIter->azTblCol==0 ){ + sqlite3_stmt *pStmt = 0; + int nCol = 0; + int i; /* for() loop iterator variable */ + int bRbuRowid = 0; /* If input table has column "rbu_rowid" */ + int iOrder = 0; + int iTnum = 0; + + /* Figure out the type of table this step will deal with. */ + assert( pIter->eType==0 ); + rbuTableType(p, pIter->zTbl, &pIter->eType, &iTnum, &pIter->iPkTnum); + if( p->rc==SQLITE_OK && pIter->eType==RBU_PK_NOTABLE ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("no such table: %s", pIter->zTbl); + } + if( p->rc ) return p->rc; + if( pIter->zIdx==0 ) pIter->iTnum = iTnum; + + assert( pIter->eType==RBU_PK_NONE || pIter->eType==RBU_PK_IPK + || pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_WITHOUT_ROWID + || pIter->eType==RBU_PK_VTAB + ); - RtreeNode *pLeft = 0; - RtreeNode *pRight = 0; + /* Populate the azTblCol[] and nTblCol variables based on the columns + ** of the input table. Ignore any input table columns that begin with + ** "rbu_". */ + p->rc = prepareFreeAndCollectError(p->dbRbu, &pStmt, &p->zErrmsg, + sqlite3_mprintf("SELECT * FROM 'data_%q'", pIter->zTbl) + ); + if( p->rc==SQLITE_OK ){ + nCol = sqlite3_column_count(pStmt); + rbuAllocateIterArrays(p, pIter, nCol); + } + for(i=0; p->rc==SQLITE_OK && irc); + pIter->aiSrcOrder[pIter->nTblCol] = pIter->nTblCol; + pIter->azTblCol[pIter->nTblCol++] = zCopy; + } + else if( 0==sqlite3_stricmp("rbu_rowid", zName) ){ + bRbuRowid = 1; + } + } + sqlite3_finalize(pStmt); + pStmt = 0; - RtreeCell leftbbox; - RtreeCell rightbbox; + if( p->rc==SQLITE_OK + && bRbuRowid!=(pIter->eType==RBU_PK_VTAB || pIter->eType==RBU_PK_NONE) + ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf( + "table data_%q %s rbu_rowid column", pIter->zTbl, + (bRbuRowid ? "may not have" : "requires") + ); + } - /* Allocate an array and populate it with a copy of pCell and - ** all cells from node pLeft. Then zero the original node. - */ - aCell = sqlite3_malloc((sizeof(RtreeCell)+sizeof(int))*(nCell+1)); - if( !aCell ){ - rc = SQLITE_NOMEM; - goto splitnode_out; - } - aiUsed = (int *)&aCell[nCell+1]; - memset(aiUsed, 0, sizeof(int)*(nCell+1)); - for(i=0; irc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pStmt, &p->zErrmsg, + sqlite3_mprintf("PRAGMA table_info(%Q)", pIter->zTbl) + ); + } + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + const char *zName = (const char*)sqlite3_column_text(pStmt, 1); + if( zName==0 ) break; /* An OOM - finalize() below returns S_NOMEM */ + for(i=iOrder; inTblCol; i++){ + if( 0==strcmp(zName, pIter->azTblCol[i]) ) break; + } + if( i==pIter->nTblCol ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("column missing from data_%q: %s", + pIter->zTbl, zName + ); + }else{ + int iPk = sqlite3_column_int(pStmt, 5); + int bNotNull = sqlite3_column_int(pStmt, 3); + const char *zType = (const char*)sqlite3_column_text(pStmt, 2); - if( pNode->iNode==1 ){ - pRight = nodeNew(pRtree, pNode); - pLeft = nodeNew(pRtree, pNode); - pRtree->iDepth++; - pNode->isDirty = 1; - writeInt16(pNode->zData, pRtree->iDepth); - }else{ - pLeft = pNode; - pRight = nodeNew(pRtree, pLeft->pParent); - nodeReference(pLeft); - } + if( i!=iOrder ){ + SWAP(int, pIter->aiSrcOrder[i], pIter->aiSrcOrder[iOrder]); + SWAP(char*, pIter->azTblCol[i], pIter->azTblCol[iOrder]); + } - if( !pLeft || !pRight ){ - rc = SQLITE_NOMEM; - goto splitnode_out; + pIter->azTblType[iOrder] = rbuStrndup(zType, &p->rc); + pIter->abTblPk[iOrder] = (iPk!=0); + pIter->abNotNull[iOrder] = (u8)bNotNull || (iPk!=0); + iOrder++; + } + } + + rbuFinalize(p, pStmt); + rbuObjIterCacheIndexedCols(p, pIter); + assert( pIter->eType!=RBU_PK_VTAB || pIter->abIndexed==0 ); } - memset(pLeft->zData, 0, pRtree->iNodeSize); - memset(pRight->zData, 0, pRtree->iNodeSize); + return p->rc; +} - rc = splitNodeStartree(pRtree, aCell, nCell, pLeft, pRight, - &leftbbox, &rightbbox); - if( rc!=SQLITE_OK ){ - goto splitnode_out; +/* +** This function constructs and returns a pointer to a nul-terminated +** string containing some SQL clause or list based on one or more of the +** column names currently stored in the pIter->azTblCol[] array. +*/ +static char *rbuObjIterGetCollist( + sqlite3rbu *p, /* RBU object */ + RbuObjIter *pIter /* Object iterator for column names */ +){ + char *zList = 0; + const char *zSep = ""; + int i; + for(i=0; inTblCol; i++){ + const char *z = pIter->azTblCol[i]; + zList = rbuMPrintf(p, "%z%s\"%w\"", zList, zSep, z); + zSep = ", "; } + return zList; +} - /* Ensure both child nodes have node numbers assigned to them by calling - ** nodeWrite(). Node pRight always needs a node number, as it was created - ** by nodeNew() above. But node pLeft sometimes already has a node number. - ** In this case avoid the all to nodeWrite(). - */ - if( SQLITE_OK!=(rc = nodeWrite(pRtree, pRight)) - || (0==pLeft->iNode && SQLITE_OK!=(rc = nodeWrite(pRtree, pLeft))) - ){ - goto splitnode_out; +/* +** This function is used to create a SELECT list (the list of SQL +** expressions that follows a SELECT keyword) for a SELECT statement +** used to read from an data_xxx or rbu_tmp_xxx table while updating the +** index object currently indicated by the iterator object passed as the +** second argument. A "PRAGMA index_xinfo = " statement is used +** to obtain the required information. +** +** If the index is of the following form: +** +** CREATE INDEX i1 ON t1(c, b COLLATE nocase); +** +** and "t1" is a table with an explicit INTEGER PRIMARY KEY column +** "ipk", the returned string is: +** +** "`c` COLLATE 'BINARY', `b` COLLATE 'NOCASE', `ipk` COLLATE 'BINARY'" +** +** As well as the returned string, three other malloc'd strings are +** returned via output parameters. As follows: +** +** pzImposterCols: ... +** pzImposterPk: ... +** pzWhere: ... +*/ +static char *rbuObjIterGetIndexCols( + sqlite3rbu *p, /* RBU object */ + RbuObjIter *pIter, /* Object iterator for column names */ + char **pzImposterCols, /* OUT: Columns for imposter table */ + char **pzImposterPk, /* OUT: Imposter PK clause */ + char **pzWhere, /* OUT: WHERE clause */ + int *pnBind /* OUT: Trbul number of columns */ +){ + int rc = p->rc; /* Error code */ + int rc2; /* sqlite3_finalize() return code */ + char *zRet = 0; /* String to return */ + char *zImpCols = 0; /* String to return via *pzImposterCols */ + char *zImpPK = 0; /* String to return via *pzImposterPK */ + char *zWhere = 0; /* String to return via *pzWhere */ + int nBind = 0; /* Value to return via *pnBind */ + const char *zCom = ""; /* Set to ", " later on */ + const char *zAnd = ""; /* Set to " AND " later on */ + sqlite3_stmt *pXInfo = 0; /* PRAGMA index_xinfo = ? */ + + if( rc==SQLITE_OK ){ + assert( p->zErrmsg==0 ); + rc = prepareFreeAndCollectError(p->dbMain, &pXInfo, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_xinfo = %Q", pIter->zIdx) + ); } - rightbbox.iRowid = pRight->iNode; - leftbbox.iRowid = pLeft->iNode; + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXInfo) ){ + int iCid = sqlite3_column_int(pXInfo, 1); + int bDesc = sqlite3_column_int(pXInfo, 3); + const char *zCollate = (const char*)sqlite3_column_text(pXInfo, 4); + const char *zCol; + const char *zType; - if( pNode->iNode==1 ){ - rc = rtreeInsertCell(pRtree, pLeft->pParent, &leftbbox, iHeight+1); - if( rc!=SQLITE_OK ){ - goto splitnode_out; - } - }else{ - RtreeNode *pParent = pLeft->pParent; - int iCell; - rc = nodeParentIndex(pRtree, pLeft, &iCell); - if( rc==SQLITE_OK ){ - nodeOverwriteCell(pRtree, pParent, &leftbbox, iCell); - rc = AdjustTree(pRtree, pParent, &leftbbox); + if( iCid<0 ){ + /* An integer primary key. If the table has an explicit IPK, use + ** its name. Otherwise, use "rbu_rowid". */ + if( pIter->eType==RBU_PK_IPK ){ + int i; + for(i=0; pIter->abTblPk[i]==0; i++); + assert( inTblCol ); + zCol = pIter->azTblCol[i]; + }else{ + zCol = "rbu_rowid"; + } + zType = "INTEGER"; + }else{ + zCol = pIter->azTblCol[iCid]; + zType = pIter->azTblType[iCid]; } - if( rc!=SQLITE_OK ){ - goto splitnode_out; + + zRet = sqlite3_mprintf("%z%s\"%w\" COLLATE %Q", zRet, zCom, zCol, zCollate); + if( pIter->bUnique==0 || sqlite3_column_int(pXInfo, 5) ){ + const char *zOrder = (bDesc ? " DESC" : ""); + zImpPK = sqlite3_mprintf("%z%s\"rbu_imp_%d%w\"%s", + zImpPK, zCom, nBind, zCol, zOrder + ); } + zImpCols = sqlite3_mprintf("%z%s\"rbu_imp_%d%w\" %s COLLATE %Q", + zImpCols, zCom, nBind, zCol, zType, zCollate + ); + zWhere = sqlite3_mprintf( + "%z%s\"rbu_imp_%d%w\" IS ?", zWhere, zAnd, nBind, zCol + ); + if( zRet==0 || zImpPK==0 || zImpCols==0 || zWhere==0 ) rc = SQLITE_NOMEM; + zCom = ", "; + zAnd = " AND "; + nBind++; } - if( (rc = rtreeInsertCell(pRtree, pRight->pParent, &rightbbox, iHeight+1)) ){ - goto splitnode_out; + + rc2 = sqlite3_finalize(pXInfo); + if( rc==SQLITE_OK ) rc = rc2; + + if( rc!=SQLITE_OK ){ + sqlite3_free(zRet); + sqlite3_free(zImpCols); + sqlite3_free(zImpPK); + sqlite3_free(zWhere); + zRet = 0; + zImpCols = 0; + zImpPK = 0; + zWhere = 0; + p->rc = rc; } - for(i=0; iiRowid ){ - newCellIsRight = 1; + *pzImposterCols = zImpCols; + *pzImposterPk = zImpPK; + *pzWhere = zWhere; + *pnBind = nBind; + return zRet; +} + +/* +** Assuming the current table columns are "a", "b" and "c", and the zObj +** paramter is passed "old", return a string of the form: +** +** "old.a, old.b, old.b" +** +** With the column names escaped. +** +** For tables with implicit rowids - RBU_PK_EXTERNAL and RBU_PK_NONE, append +** the text ", old._rowid_" to the returned value. +*/ +static char *rbuObjIterGetOldlist( + sqlite3rbu *p, + RbuObjIter *pIter, + const char *zObj +){ + char *zList = 0; + if( p->rc==SQLITE_OK && pIter->abIndexed ){ + const char *zS = ""; + int i; + for(i=0; inTblCol; i++){ + if( pIter->abIndexed[i] ){ + const char *zCol = pIter->azTblCol[i]; + zList = sqlite3_mprintf("%z%s%s.\"%w\"", zList, zS, zObj, zCol); + }else{ + zList = sqlite3_mprintf("%z%sNULL", zList, zS); + } + zS = ", "; + if( zList==0 ){ + p->rc = SQLITE_NOMEM; + break; + } } - if( rc!=SQLITE_OK ){ - goto splitnode_out; + + /* For a table with implicit rowids, append "old._rowid_" to the list. */ + if( pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE ){ + zList = rbuMPrintf(p, "%z, %s._rowid_", zList, zObj); } } - if( pNode->iNode==1 ){ - for(i=0; ieType==RBU_PK_VTAB || pIter->eType==RBU_PK_NONE ){ + zList = rbuMPrintf(p, "_rowid_ = ?%d", pIter->nTblCol+1); + }else if( pIter->eType==RBU_PK_EXTERNAL ){ + const char *zSep = ""; + int i; + for(i=0; inTblCol; i++){ + if( pIter->abTblPk[i] ){ + zList = rbuMPrintf(p, "%z%sc%d=?%d", zList, zSep, i, i+1); + zSep = " AND "; } } - }else if( newCellIsRight==0 ){ - rc = updateMapping(pRtree, pCell->iRowid, pLeft, iHeight); - } + zList = rbuMPrintf(p, + "_rowid_ = (SELECT id FROM rbu_imposter2 WHERE %z)", zList + ); - if( rc==SQLITE_OK ){ - rc = nodeRelease(pRtree, pRight); - pRight = 0; - } - if( rc==SQLITE_OK ){ - rc = nodeRelease(pRtree, pLeft); - pLeft = 0; + }else{ + const char *zSep = ""; + int i; + for(i=0; inTblCol; i++){ + if( pIter->abTblPk[i] ){ + const char *zCol = pIter->azTblCol[i]; + zList = rbuMPrintf(p, "%z%s\"%w\"=?%d", zList, zSep, zCol, i+1); + zSep = " AND "; + } + } } + return zList; +} -splitnode_out: - nodeRelease(pRtree, pRight); - nodeRelease(pRtree, pLeft); - sqlite3_free(aCell); - return rc; +/* +** The SELECT statement iterating through the keys for the current object +** (p->objiter.pSelect) currently points to a valid row. However, there +** is something wrong with the rbu_control value in the rbu_control value +** stored in the (p->nCol+1)'th column. Set the error code and error message +** of the RBU handle to something reflecting this. +*/ +static void rbuBadControlError(sqlite3rbu *p){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("invalid rbu_control value"); } + /* -** If node pLeaf is not the root of the r-tree and its pParent pointer is -** still NULL, load all ancestor nodes of pLeaf into memory and populate -** the pLeaf->pParent chain all the way up to the root node. +** Return a nul-terminated string containing the comma separated list of +** assignments that should be included following the "SET" keyword of +** an UPDATE statement used to update the table object that the iterator +** passed as the second argument currently points to if the rbu_control +** column of the data_xxx table entry is set to zMask. ** -** This operation is required when a row is deleted (or updated - an update -** is implemented as a delete followed by an insert). SQLite provides the -** rowid of the row to delete, which can be used to find the leaf on which -** the entry resides (argument pLeaf). Once the leaf is located, this -** function is called to determine its ancestry. +** The memory for the returned string is obtained from sqlite3_malloc(). +** It is the responsibility of the caller to eventually free it using +** sqlite3_free(). +** +** If an OOM error is encountered when allocating space for the new +** string, an error code is left in the rbu handle passed as the first +** argument and NULL is returned. Or, if an error has already occurred +** when this function is called, NULL is returned immediately, without +** attempting the allocation or modifying the stored error code. */ -static int fixLeafParent(Rtree *pRtree, RtreeNode *pLeaf){ - int rc = SQLITE_OK; - RtreeNode *pChild = pLeaf; - while( rc==SQLITE_OK && pChild->iNode!=1 && pChild->pParent==0 ){ - int rc2 = SQLITE_OK; /* sqlite3_reset() return code */ - sqlite3_bind_int64(pRtree->pReadParent, 1, pChild->iNode); - rc = sqlite3_step(pRtree->pReadParent); - if( rc==SQLITE_ROW ){ - RtreeNode *pTest; /* Used to test for reference loops */ - i64 iNode; /* Node number of parent node */ +static char *rbuObjIterGetSetlist( + sqlite3rbu *p, + RbuObjIter *pIter, + const char *zMask +){ + char *zList = 0; + if( p->rc==SQLITE_OK ){ + int i; - /* Before setting pChild->pParent, test that we are not creating a - ** loop of references (as we would if, say, pChild==pParent). We don't - ** want to do this as it leads to a memory leak when trying to delete - ** the referenced counted node structures. - */ - iNode = sqlite3_column_int64(pRtree->pReadParent, 0); - for(pTest=pLeaf; pTest && pTest->iNode!=iNode; pTest=pTest->pParent); - if( !pTest ){ - rc2 = nodeAcquire(pRtree, iNode, 0, &pChild->pParent); + if( strlen(zMask)!=pIter->nTblCol ){ + rbuBadControlError(p); + }else{ + const char *zSep = ""; + for(i=0; inTblCol; i++){ + char c = zMask[pIter->aiSrcOrder[i]]; + if( c=='x' ){ + zList = rbuMPrintf(p, "%z%s\"%w\"=?%d", + zList, zSep, pIter->azTblCol[i], i+1 + ); + zSep = ", "; + } + if( c=='d' ){ + zList = rbuMPrintf(p, "%z%s\"%w\"=rbu_delta(\"%w\", ?%d)", + zList, zSep, pIter->azTblCol[i], pIter->azTblCol[i], i+1 + ); + zSep = ", "; + } } } - rc = sqlite3_reset(pRtree->pReadParent); - if( rc==SQLITE_OK ) rc = rc2; - if( rc==SQLITE_OK && !pChild->pParent ) rc = SQLITE_CORRUPT_VTAB; - pChild = pChild->pParent; } - return rc; + return zList; } -static int deleteCell(Rtree *, RtreeNode *, int, int); +/* +** Return a nul-terminated string consisting of nByte comma separated +** "?" expressions. For example, if nByte is 3, return a pointer to +** a buffer containing the string "?,?,?". +** +** The memory for the returned string is obtained from sqlite3_malloc(). +** It is the responsibility of the caller to eventually free it using +** sqlite3_free(). +** +** If an OOM error is encountered when allocating space for the new +** string, an error code is left in the rbu handle passed as the first +** argument and NULL is returned. Or, if an error has already occurred +** when this function is called, NULL is returned immediately, without +** attempting the allocation or modifying the stored error code. +*/ +static char *rbuObjIterGetBindlist(sqlite3rbu *p, int nBind){ + char *zRet = 0; + int nByte = nBind*2 + 1; -static int removeNode(Rtree *pRtree, RtreeNode *pNode, int iHeight){ - int rc; - int rc2; - RtreeNode *pParent = 0; - int iCell; + zRet = (char*)rbuMalloc(p, nByte); + if( zRet ){ + int i; + for(i=0; inRef==1 ); +/* +** The iterator currently points to a table (not index) of type +** RBU_PK_WITHOUT_ROWID. This function creates the PRIMARY KEY +** declaration for the corresponding imposter table. For example, +** if the iterator points to a table created as: +** +** CREATE TABLE t1(a, b, c, PRIMARY KEY(b, a DESC)) WITHOUT ROWID +** +** this function returns: +** +** PRIMARY KEY("b", "a" DESC) +*/ +static char *rbuWithoutRowidPK(sqlite3rbu *p, RbuObjIter *pIter){ + char *z = 0; + assert( pIter->zIdx==0 ); + if( p->rc==SQLITE_OK ){ + const char *zSep = "PRIMARY KEY("; + sqlite3_stmt *pXList = 0; /* PRAGMA index_list = (pIter->zTbl) */ + sqlite3_stmt *pXInfo = 0; /* PRAGMA index_xinfo = */ + + p->rc = prepareFreeAndCollectError(p->dbMain, &pXList, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_list = %Q", pIter->zTbl) + ); + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXList) ){ + const char *zOrig = (const char*)sqlite3_column_text(pXList,3); + if( zOrig && strcmp(zOrig, "pk")==0 ){ + const char *zIdx = (const char*)sqlite3_column_text(pXList,1); + if( zIdx ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pXInfo, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_xinfo = %Q", zIdx) + ); + } + break; + } + } + rbuFinalize(p, pXList); - /* Remove the entry in the parent cell. */ - rc = nodeParentIndex(pRtree, pNode, &iCell); - if( rc==SQLITE_OK ){ - pParent = pNode->pParent; - pNode->pParent = 0; - rc = deleteCell(pRtree, pParent, iCell, iHeight+1); - } - rc2 = nodeRelease(pRtree, pParent); - if( rc==SQLITE_OK ){ - rc = rc2; - } - if( rc!=SQLITE_OK ){ - return rc; + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXInfo) ){ + if( sqlite3_column_int(pXInfo, 5) ){ + /* int iCid = sqlite3_column_int(pXInfo, 0); */ + const char *zCol = (const char*)sqlite3_column_text(pXInfo, 2); + const char *zDesc = sqlite3_column_int(pXInfo, 3) ? " DESC" : ""; + z = rbuMPrintf(p, "%z%s\"%w\"%s", z, zSep, zCol, zDesc); + zSep = ", "; + } + } + z = rbuMPrintf(p, "%z)", z); + rbuFinalize(p, pXInfo); } + return z; +} - /* Remove the xxx_node entry. */ - sqlite3_bind_int64(pRtree->pDeleteNode, 1, pNode->iNode); - sqlite3_step(pRtree->pDeleteNode); - if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteNode)) ){ - return rc; - } +/* +** This function creates the second imposter table used when writing to +** a table b-tree where the table has an external primary key. If the +** iterator passed as the second argument does not currently point to +** a table (not index) with an external primary key, this function is a +** no-op. +** +** Assuming the iterator does point to a table with an external PK, this +** function creates a WITHOUT ROWID imposter table named "rbu_imposter2" +** used to access that PK index. For example, if the target table is +** declared as follows: +** +** CREATE TABLE t1(a, b TEXT, c REAL, PRIMARY KEY(b, c)); +** +** then the imposter table schema is: +** +** CREATE TABLE rbu_imposter2(c1 TEXT, c2 REAL, id INTEGER) WITHOUT ROWID; +** +*/ +static void rbuCreateImposterTable2(sqlite3rbu *p, RbuObjIter *pIter){ + if( p->rc==SQLITE_OK && pIter->eType==RBU_PK_EXTERNAL ){ + int tnum = pIter->iPkTnum; /* Root page of PK index */ + sqlite3_stmt *pQuery = 0; /* SELECT name ... WHERE rootpage = $tnum */ + const char *zIdx = 0; /* Name of PK index */ + sqlite3_stmt *pXInfo = 0; /* PRAGMA main.index_xinfo = $zIdx */ + const char *zComma = ""; + char *zCols = 0; /* Used to build up list of table cols */ + char *zPk = 0; /* Used to build up table PK declaration */ - /* Remove the xxx_parent entry. */ - sqlite3_bind_int64(pRtree->pDeleteParent, 1, pNode->iNode); - sqlite3_step(pRtree->pDeleteParent); - if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteParent)) ){ - return rc; - } - - /* Remove the node from the in-memory hash table and link it into - ** the Rtree.pDeleted list. Its contents will be re-inserted later on. - */ - nodeHashDelete(pRtree, pNode); - pNode->iNode = iHeight; - pNode->pNext = pRtree->pDeleted; - pNode->nRef++; - pRtree->pDeleted = pNode; + /* Figure out the name of the primary key index for the current table. + ** This is needed for the argument to "PRAGMA index_xinfo". Set + ** zIdx to point to a nul-terminated string containing this name. */ + p->rc = prepareAndCollectError(p->dbMain, &pQuery, &p->zErrmsg, + "SELECT name FROM sqlite_master WHERE rootpage = ?" + ); + if( p->rc==SQLITE_OK ){ + sqlite3_bind_int(pQuery, 1, tnum); + if( SQLITE_ROW==sqlite3_step(pQuery) ){ + zIdx = (const char*)sqlite3_column_text(pQuery, 0); + } + } + if( zIdx ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pXInfo, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_xinfo = %Q", zIdx) + ); + } + rbuFinalize(p, pQuery); - return SQLITE_OK; + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXInfo) ){ + int bKey = sqlite3_column_int(pXInfo, 5); + if( bKey ){ + int iCid = sqlite3_column_int(pXInfo, 1); + int bDesc = sqlite3_column_int(pXInfo, 3); + const char *zCollate = (const char*)sqlite3_column_text(pXInfo, 4); + zCols = rbuMPrintf(p, "%z%sc%d %s COLLATE %s", zCols, zComma, + iCid, pIter->azTblType[iCid], zCollate + ); + zPk = rbuMPrintf(p, "%z%sc%d%s", zPk, zComma, iCid, bDesc?" DESC":""); + zComma = ", "; + } + } + zCols = rbuMPrintf(p, "%z, id INTEGER", zCols); + rbuFinalize(p, pXInfo); + + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 1, tnum); + rbuMPrintfExec(p, p->dbMain, + "CREATE TABLE rbu_imposter2(%z, PRIMARY KEY(%z)) WITHOUT ROWID", + zCols, zPk + ); + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 0); + } } -static int fixBoundingBox(Rtree *pRtree, RtreeNode *pNode){ - RtreeNode *pParent = pNode->pParent; - int rc = SQLITE_OK; - if( pParent ){ - int ii; - int nCell = NCELL(pNode); - RtreeCell box; /* Bounding box for pNode */ - nodeGetCell(pRtree, pNode, 0, &box); - for(ii=1; iirc==SQLITE_OK && pIter->eType!=RBU_PK_VTAB ){ + int tnum = pIter->iTnum; + const char *zComma = ""; + char *zSql = 0; + int iCol; + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 1); + + for(iCol=0; p->rc==SQLITE_OK && iColnTblCol; iCol++){ + const char *zPk = ""; + const char *zCol = pIter->azTblCol[iCol]; + const char *zColl = 0; + + p->rc = sqlite3_table_column_metadata( + p->dbMain, "main", pIter->zTbl, zCol, 0, &zColl, 0, 0, 0 + ); + + if( pIter->eType==RBU_PK_IPK && pIter->abTblPk[iCol] ){ + /* If the target table column is an "INTEGER PRIMARY KEY", add + ** "PRIMARY KEY" to the imposter table column declaration. */ + zPk = "PRIMARY KEY "; + } + zSql = rbuMPrintf(p, "%z%s\"%w\" %s %sCOLLATE %s%s", + zSql, zComma, zCol, pIter->azTblType[iCol], zPk, zColl, + (pIter->abNotNull[iCol] ? " NOT NULL" : "") + ); + zComma = ", "; } - box.iRowid = pNode->iNode; - rc = nodeParentIndex(pRtree, pNode, &ii); - if( rc==SQLITE_OK ){ - nodeOverwriteCell(pRtree, pParent, &box, ii); - rc = fixBoundingBox(pRtree, pParent); + + if( pIter->eType==RBU_PK_WITHOUT_ROWID ){ + char *zPk = rbuWithoutRowidPK(p, pIter); + if( zPk ){ + zSql = rbuMPrintf(p, "%z, %z", zSql, zPk); + } } + + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 1, tnum); + rbuMPrintfExec(p, p->dbMain, "CREATE TABLE \"rbu_imp_%w\"(%z)%s", + pIter->zTbl, zSql, + (pIter->eType==RBU_PK_WITHOUT_ROWID ? " WITHOUT ROWID" : "") + ); + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 0); } - return rc; } /* -** Delete the cell at index iCell of node pNode. After removing the -** cell, adjust the r-tree data structure if required. +** Prepare a statement used to insert rows into the "rbu_tmp_xxx" table. +** Specifically a statement of the form: +** +** INSERT INTO rbu_tmp_xxx VALUES(?, ?, ? ...); +** +** The number of bound variables is equal to the number of columns in +** the target table, plus one (for the rbu_control column), plus one more +** (for the rbu_rowid column) if the target table is an implicit IPK or +** virtual table. */ -static int deleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell, int iHeight){ - RtreeNode *pParent; - int rc; - - if( SQLITE_OK!=(rc = fixLeafParent(pRtree, pNode)) ){ - return rc; +static void rbuObjIterPrepareTmpInsert( + sqlite3rbu *p, + RbuObjIter *pIter, + const char *zCollist, + const char *zRbuRowid +){ + int bRbuRowid = (pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE); + char *zBind = rbuObjIterGetBindlist(p, pIter->nTblCol + 1 + bRbuRowid); + if( zBind ){ + assert( pIter->pTmpInsert==0 ); + p->rc = prepareFreeAndCollectError( + p->dbRbu, &pIter->pTmpInsert, &p->zErrmsg, sqlite3_mprintf( + "INSERT INTO %s.'rbu_tmp_%q'(rbu_control,%s%s) VALUES(%z)", + p->zStateDb, pIter->zTbl, zCollist, zRbuRowid, zBind + )); } +} - /* Remove the cell from the node. This call just moves bytes around - ** the in-memory node image, so it cannot fail. - */ - nodeDeleteCell(pRtree, pNode, iCell); +static void rbuTmpInsertFunc( + sqlite3_context *pCtx, + int nVal, + sqlite3_value **apVal +){ + sqlite3rbu *p = sqlite3_user_data(pCtx); + int rc = SQLITE_OK; + int i; - /* If the node is not the tree root and now has less than the minimum - ** number of cells, remove it from the tree. Otherwise, update the - ** cell in the parent node so that it tightly contains the updated - ** node. - */ - pParent = pNode->pParent; - assert( pParent || pNode->iNode==1 ); - if( pParent ){ - if( NCELL(pNode)objiter.pTmpInsert, i+1, apVal[i]); + } + if( rc==SQLITE_OK ){ + sqlite3_step(p->objiter.pTmpInsert); + rc = sqlite3_reset(p->objiter.pTmpInsert); } - return rc; + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + } } -static int Reinsert( - Rtree *pRtree, - RtreeNode *pNode, - RtreeCell *pCell, - int iHeight +/* +** Ensure that the SQLite statement handles required to update the +** target database object currently indicated by the iterator passed +** as the second argument are available. +*/ +static int rbuObjIterPrepareAll( + sqlite3rbu *p, + RbuObjIter *pIter, + int nOffset /* Add "LIMIT -1 OFFSET $nOffset" to SELECT */ ){ - int *aOrder; - int *aSpare; - RtreeCell *aCell; - RtreeDValue *aDistance; - int nCell; - RtreeDValue aCenterCoord[RTREE_MAX_DIMENSIONS]; - int iDim; - int ii; - int rc = SQLITE_OK; - int n; + assert( pIter->bCleanup==0 ); + if( pIter->pSelect==0 && rbuObjIterCacheTableInfo(p, pIter)==SQLITE_OK ){ + const int tnum = pIter->iTnum; + char *zCollist = 0; /* List of indexed columns */ + char **pz = &p->zErrmsg; + const char *zIdx = pIter->zIdx; + char *zLimit = 0; - memset(aCenterCoord, 0, sizeof(RtreeDValue)*RTREE_MAX_DIMENSIONS); + if( nOffset ){ + zLimit = sqlite3_mprintf(" LIMIT -1 OFFSET %d", nOffset); + if( !zLimit ) p->rc = SQLITE_NOMEM; + } - nCell = NCELL(pNode)+1; - n = (nCell+1)&(~1); + if( zIdx ){ + const char *zTbl = pIter->zTbl; + char *zImposterCols = 0; /* Columns for imposter table */ + char *zImposterPK = 0; /* Primary key declaration for imposter */ + char *zWhere = 0; /* WHERE clause on PK columns */ + char *zBind = 0; + int nBind = 0; - /* Allocate the buffers used by this operation. The allocation is - ** relinquished before this function returns. - */ - aCell = (RtreeCell *)sqlite3_malloc(n * ( - sizeof(RtreeCell) + /* aCell array */ - sizeof(int) + /* aOrder array */ - sizeof(int) + /* aSpare array */ - sizeof(RtreeDValue) /* aDistance array */ - )); - if( !aCell ){ - return SQLITE_NOMEM; - } - aOrder = (int *)&aCell[n]; - aSpare = (int *)&aOrder[n]; - aDistance = (RtreeDValue *)&aSpare[n]; + assert( pIter->eType!=RBU_PK_VTAB ); + zCollist = rbuObjIterGetIndexCols( + p, pIter, &zImposterCols, &zImposterPK, &zWhere, &nBind + ); + zBind = rbuObjIterGetBindlist(p, nBind); + + /* Create the imposter table used to write to this index. */ + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 1); + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 1,tnum); + rbuMPrintfExec(p, p->dbMain, + "CREATE TABLE \"rbu_imp_%w\"( %s, PRIMARY KEY( %s ) ) WITHOUT ROWID", + zTbl, zImposterCols, zImposterPK + ); + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 0); + + /* Create the statement to insert index entries */ + pIter->nCol = nBind; + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError( + p->dbMain, &pIter->pInsert, &p->zErrmsg, + sqlite3_mprintf("INSERT INTO \"rbu_imp_%w\" VALUES(%s)", zTbl, zBind) + ); + } - for(ii=0; iirc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError( + p->dbMain, &pIter->pDelete, &p->zErrmsg, + sqlite3_mprintf("DELETE FROM \"rbu_imp_%w\" WHERE %s", zTbl, zWhere) + ); + } + + /* Create the SELECT statement to read keys in sorted order */ + if( p->rc==SQLITE_OK ){ + char *zSql; + if( pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE ){ + zSql = sqlite3_mprintf( + "SELECT %s, rbu_control FROM %s.'rbu_tmp_%q' ORDER BY %s%s", + zCollist, p->zStateDb, pIter->zTbl, + zCollist, zLimit + ); + }else{ + zSql = sqlite3_mprintf( + "SELECT %s, rbu_control FROM 'data_%q' " + "WHERE typeof(rbu_control)='integer' AND rbu_control!=1 " + "UNION ALL " + "SELECT %s, rbu_control FROM %s.'rbu_tmp_%q' " + "ORDER BY %s%s", + zCollist, pIter->zTbl, + zCollist, p->zStateDb, pIter->zTbl, + zCollist, zLimit + ); + } + p->rc = prepareFreeAndCollectError(p->dbRbu, &pIter->pSelect, pz, zSql); + } + + sqlite3_free(zImposterCols); + sqlite3_free(zImposterPK); + sqlite3_free(zWhere); + sqlite3_free(zBind); }else{ - nodeGetCell(pRtree, pNode, ii, &aCell[ii]); - } - aOrder[ii] = ii; - for(iDim=0; iDimnDim; iDim++){ - aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2]); - aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2+1]); - } - } - for(iDim=0; iDimnDim; iDim++){ - aCenterCoord[iDim] = (aCenterCoord[iDim]/(nCell*(RtreeDValue)2)); - } + int bRbuRowid = (pIter->eType==RBU_PK_VTAB || pIter->eType==RBU_PK_NONE); + const char *zTbl = pIter->zTbl; /* Table this step applies to */ + const char *zWrite; /* Imposter table name */ + + char *zBindings = rbuObjIterGetBindlist(p, pIter->nTblCol + bRbuRowid); + char *zWhere = rbuObjIterGetWhere(p, pIter); + char *zOldlist = rbuObjIterGetOldlist(p, pIter, "old"); + char *zNewlist = rbuObjIterGetOldlist(p, pIter, "new"); + + zCollist = rbuObjIterGetCollist(p, pIter); + pIter->nCol = pIter->nTblCol; + + /* Create the SELECT statement to read keys from data_xxx */ + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbRbu, &pIter->pSelect, pz, + sqlite3_mprintf( + "SELECT %s, rbu_control%s FROM 'data_%q'%s", + zCollist, (bRbuRowid ? ", rbu_rowid" : ""), zTbl, zLimit + ) + ); + } - for(ii=0; iinDim; iDim++){ - RtreeDValue coord = (DCOORD(aCell[ii].aCoord[iDim*2+1]) - - DCOORD(aCell[ii].aCoord[iDim*2])); - aDistance[ii] += (coord-aCenterCoord[iDim])*(coord-aCenterCoord[iDim]); - } - } + /* Create the imposter table or tables (if required). */ + rbuCreateImposterTable(p, pIter); + rbuCreateImposterTable2(p, pIter); + zWrite = (pIter->eType==RBU_PK_VTAB ? "" : "rbu_imp_"); - SortByDistance(aOrder, nCell, aDistance, aSpare); - nodeZero(pRtree, pNode); + /* Create the INSERT statement to write to the target PK b-tree */ + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pIter->pInsert, pz, + sqlite3_mprintf( + "INSERT INTO \"%s%w\"(%s%s) VALUES(%s)", + zWrite, zTbl, zCollist, (bRbuRowid ? ", _rowid_" : ""), zBindings + ) + ); + } - for(ii=0; rc==SQLITE_OK && ii<(nCell-(RTREE_MINCELLS(pRtree)+1)); ii++){ - RtreeCell *p = &aCell[aOrder[ii]]; - nodeInsertCell(pRtree, pNode, p); - if( p->iRowid==pCell->iRowid ){ - if( iHeight==0 ){ - rc = rowidWrite(pRtree, p->iRowid, pNode->iNode); - }else{ - rc = parentWrite(pRtree, p->iRowid, pNode->iNode); + /* Create the DELETE statement to write to the target PK b-tree */ + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pIter->pDelete, pz, + sqlite3_mprintf( + "DELETE FROM \"%s%w\" WHERE %s", zWrite, zTbl, zWhere + ) + ); } - } - } - if( rc==SQLITE_OK ){ - rc = fixBoundingBox(pRtree, pNode); - } - for(; rc==SQLITE_OK && iiiNode currently contains - ** the height of the sub-tree headed by the cell. - */ - RtreeNode *pInsert; - RtreeCell *p = &aCell[aOrder[ii]]; - rc = ChooseLeaf(pRtree, p, iHeight, &pInsert); - if( rc==SQLITE_OK ){ - int rc2; - rc = rtreeInsertCell(pRtree, pInsert, p, iHeight); - rc2 = nodeRelease(pRtree, pInsert); - if( rc==SQLITE_OK ){ - rc = rc2; + + if( pIter->abIndexed ){ + const char *zRbuRowid = ""; + if( pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE ){ + zRbuRowid = ", rbu_rowid"; + } + + /* Create the rbu_tmp_xxx table and the triggers to populate it. */ + rbuMPrintfExec(p, p->dbRbu, + "CREATE TABLE IF NOT EXISTS %s.'rbu_tmp_%q' AS " + "SELECT *%s FROM 'data_%q' WHERE 0;" + , p->zStateDb + , zTbl, (pIter->eType==RBU_PK_EXTERNAL ? ", 0 AS rbu_rowid" : "") + , zTbl + ); + + rbuMPrintfExec(p, p->dbMain, + "CREATE TEMP TRIGGER rbu_delete_tr BEFORE DELETE ON \"%s%w\" " + "BEGIN " + " SELECT rbu_tmp_insert(2, %s);" + "END;" + + "CREATE TEMP TRIGGER rbu_update1_tr BEFORE UPDATE ON \"%s%w\" " + "BEGIN " + " SELECT rbu_tmp_insert(2, %s);" + "END;" + + "CREATE TEMP TRIGGER rbu_update2_tr AFTER UPDATE ON \"%s%w\" " + "BEGIN " + " SELECT rbu_tmp_insert(3, %s);" + "END;", + zWrite, zTbl, zOldlist, + zWrite, zTbl, zOldlist, + zWrite, zTbl, zNewlist + ); + + if( pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE ){ + rbuMPrintfExec(p, p->dbMain, + "CREATE TEMP TRIGGER rbu_insert_tr AFTER INSERT ON \"%s%w\" " + "BEGIN " + " SELECT rbu_tmp_insert(0, %s);" + "END;", + zWrite, zTbl, zNewlist + ); + } + + rbuObjIterPrepareTmpInsert(p, pIter, zCollist, zRbuRowid); } + + sqlite3_free(zWhere); + sqlite3_free(zOldlist); + sqlite3_free(zNewlist); + sqlite3_free(zBindings); } + sqlite3_free(zCollist); + sqlite3_free(zLimit); } - - sqlite3_free(aCell); - return rc; + + return p->rc; } /* -** Insert cell pCell into node pNode. Node pNode is the head of a -** subtree iHeight high (leaf nodes have iHeight==0). +** Set output variable *ppStmt to point to an UPDATE statement that may +** be used to update the imposter table for the main table b-tree of the +** table object that pIter currently points to, assuming that the +** rbu_control column of the data_xyz table contains zMask. +** +** If the zMask string does not specify any columns to update, then this +** is not an error. Output variable *ppStmt is set to NULL in this case. */ -static int rtreeInsertCell( - Rtree *pRtree, - RtreeNode *pNode, - RtreeCell *pCell, - int iHeight +static int rbuGetUpdateStmt( + sqlite3rbu *p, /* RBU handle */ + RbuObjIter *pIter, /* Object iterator */ + const char *zMask, /* rbu_control value ('x.x.') */ + sqlite3_stmt **ppStmt /* OUT: UPDATE statement handle */ ){ - int rc = SQLITE_OK; - if( iHeight>0 ){ - RtreeNode *pChild = nodeHashLookup(pRtree, pCell->iRowid); - if( pChild ){ - nodeRelease(pRtree, pChild->pParent); - nodeReference(pNode); - pChild->pParent = pNode; + RbuUpdateStmt **pp; + RbuUpdateStmt *pUp = 0; + int nUp = 0; + + /* In case an error occurs */ + *ppStmt = 0; + + /* Search for an existing statement. If one is found, shift it to the front + ** of the LRU queue and return immediately. Otherwise, leave nUp pointing + ** to the number of statements currently in the cache and pUp to the + ** last object in the list. */ + for(pp=&pIter->pRbuUpdate; *pp; pp=&((*pp)->pNext)){ + pUp = *pp; + if( strcmp(pUp->zMask, zMask)==0 ){ + *pp = pUp->pNext; + pUp->pNext = pIter->pRbuUpdate; + pIter->pRbuUpdate = pUp; + *ppStmt = pUp->pUpdate; + return SQLITE_OK; } + nUp++; } - if( nodeInsertCell(pRtree, pNode, pCell) ){ - if( iHeight<=pRtree->iReinsertHeight || pNode->iNode==1){ - rc = SplitNode(pRtree, pNode, pCell, iHeight); - }else{ - pRtree->iReinsertHeight = iHeight; - rc = Reinsert(pRtree, pNode, pCell, iHeight); - } + assert( pUp==0 || pUp->pNext==0 ); + + if( nUp>=SQLITE_RBU_UPDATE_CACHESIZE ){ + for(pp=&pIter->pRbuUpdate; *pp!=pUp; pp=&((*pp)->pNext)); + *pp = 0; + sqlite3_finalize(pUp->pUpdate); + pUp->pUpdate = 0; }else{ - rc = AdjustTree(pRtree, pNode, pCell); - if( rc==SQLITE_OK ){ - if( iHeight==0 ){ - rc = rowidWrite(pRtree, pCell->iRowid, pNode->iNode); - }else{ - rc = parentWrite(pRtree, pCell->iRowid, pNode->iNode); - } - } + pUp = (RbuUpdateStmt*)rbuMalloc(p, sizeof(RbuUpdateStmt)+pIter->nTblCol+1); } - return rc; -} -static int reinsertNodeContent(Rtree *pRtree, RtreeNode *pNode){ - int ii; - int rc = SQLITE_OK; - int nCell = NCELL(pNode); + if( pUp ){ + char *zWhere = rbuObjIterGetWhere(p, pIter); + char *zSet = rbuObjIterGetSetlist(p, pIter, zMask); + char *zUpdate = 0; - for(ii=0; rc==SQLITE_OK && iizMask = (char*)&pUp[1]; + memcpy(pUp->zMask, zMask, pIter->nTblCol); + pUp->pNext = pIter->pRbuUpdate; + pIter->pRbuUpdate = pUp; - /* Find a node to store this cell in. pNode->iNode currently contains - ** the height of the sub-tree headed by the cell. - */ - rc = ChooseLeaf(pRtree, &cell, (int)pNode->iNode, &pInsert); - if( rc==SQLITE_OK ){ - int rc2; - rc = rtreeInsertCell(pRtree, pInsert, &cell, (int)pNode->iNode); - rc2 = nodeRelease(pRtree, pInsert); - if( rc==SQLITE_OK ){ - rc = rc2; - } + if( zSet ){ + const char *zPrefix = ""; + + if( pIter->eType!=RBU_PK_VTAB ) zPrefix = "rbu_imp_"; + zUpdate = sqlite3_mprintf("UPDATE \"%s%w\" SET %s WHERE %s", + zPrefix, pIter->zTbl, zSet, zWhere + ); + p->rc = prepareFreeAndCollectError( + p->dbMain, &pUp->pUpdate, &p->zErrmsg, zUpdate + ); + *ppStmt = pUp->pUpdate; } + sqlite3_free(zWhere); + sqlite3_free(zSet); } - return rc; + + return p->rc; } -/* -** Select a currently unused rowid for a new r-tree record. -*/ -static int newRowid(Rtree *pRtree, i64 *piRowid){ - int rc; - sqlite3_bind_null(pRtree->pWriteRowid, 1); - sqlite3_bind_null(pRtree->pWriteRowid, 2); - sqlite3_step(pRtree->pWriteRowid); - rc = sqlite3_reset(pRtree->pWriteRowid); - *piRowid = sqlite3_last_insert_rowid(pRtree->db); - return rc; +static sqlite3 *rbuOpenDbhandle(sqlite3rbu *p, const char *zName){ + sqlite3 *db = 0; + if( p->rc==SQLITE_OK ){ + const int flags = SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_URI; + p->rc = sqlite3_open_v2(zName, &db, flags, p->zVfsName); + if( p->rc ){ + p->zErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + sqlite3_close(db); + db = 0; + } + } + return db; } /* -** Remove the entry with rowid=iDelete from the r-tree structure. +** Open the database handle and attach the RBU database as "rbu". If an +** error occurs, leave an error code and message in the RBU handle. */ -static int rtreeDeleteRowid(Rtree *pRtree, sqlite3_int64 iDelete){ - int rc; /* Return code */ - RtreeNode *pLeaf = 0; /* Leaf node containing record iDelete */ - int iCell; /* Index of iDelete cell in pLeaf */ - RtreeNode *pRoot; /* Root node of rtree structure */ +static void rbuOpenDatabase(sqlite3rbu *p){ + assert( p->rc==SQLITE_OK ); + assert( p->dbMain==0 && p->dbRbu==0 ); + p->eStage = 0; + p->dbMain = rbuOpenDbhandle(p, p->zTarget); + p->dbRbu = rbuOpenDbhandle(p, p->zRbu); - /* Obtain a reference to the root node to initialize Rtree.iDepth */ - rc = nodeAcquire(pRtree, 1, 0, &pRoot); + /* If using separate RBU and state databases, attach the state database to + ** the RBU db handle now. */ + if( p->zState ){ + rbuMPrintfExec(p, p->dbRbu, "ATTACH %Q AS stat", p->zState); + memcpy(p->zStateDb, "stat", 4); + }else{ + memcpy(p->zStateDb, "main", 4); + } - /* Obtain a reference to the leaf node that contains the entry - ** about to be deleted. - */ - if( rc==SQLITE_OK ){ - rc = findLeafNode(pRtree, iDelete, &pLeaf, 0); + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_create_function(p->dbMain, + "rbu_tmp_insert", -1, SQLITE_UTF8, (void*)p, rbuTmpInsertFunc, 0, 0 + ); } - /* Delete the cell in question from the leaf node. */ - if( rc==SQLITE_OK ){ - int rc2; - rc = nodeRowidIndex(pRtree, pLeaf, iDelete, &iCell); - if( rc==SQLITE_OK ){ - rc = deleteCell(pRtree, pLeaf, iCell, 0); - } - rc2 = nodeRelease(pRtree, pLeaf); - if( rc==SQLITE_OK ){ - rc = rc2; - } + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_file_control(p->dbMain, "main", SQLITE_FCNTL_RBU, (void*)p); } + rbuMPrintfExec(p, p->dbMain, "SELECT * FROM sqlite_master"); - /* Delete the corresponding entry in the _rowid table. */ - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pRtree->pDeleteRowid, 1, iDelete); - sqlite3_step(pRtree->pDeleteRowid); - rc = sqlite3_reset(pRtree->pDeleteRowid); + /* Mark the database file just opened as an RBU target database. If + ** this call returns SQLITE_NOTFOUND, then the RBU vfs is not in use. + ** This is an error. */ + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_file_control(p->dbMain, "main", SQLITE_FCNTL_RBU, (void*)p); } - /* Check if the root node now has exactly one child. If so, remove - ** it, schedule the contents of the child for reinsertion and - ** reduce the tree height by one. - ** - ** This is equivalent to copying the contents of the child into - ** the root node (the operation that Gutman's paper says to perform - ** in this scenario). - */ - if( rc==SQLITE_OK && pRtree->iDepth>0 && NCELL(pRoot)==1 ){ - int rc2; - RtreeNode *pChild; - i64 iChild = nodeGetRowid(pRtree, pRoot, 0); - rc = nodeAcquire(pRtree, iChild, pRoot, &pChild); - if( rc==SQLITE_OK ){ - rc = removeNode(pRtree, pChild, pRtree->iDepth-1); - } - rc2 = nodeRelease(pRtree, pChild); - if( rc==SQLITE_OK ) rc = rc2; - if( rc==SQLITE_OK ){ - pRtree->iDepth--; - writeInt16(pRoot->zData, pRtree->iDepth); - pRoot->isDirty = 1; + if( p->rc==SQLITE_NOTFOUND ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("rbu vfs not found"); + } +} + +/* +** This routine is a copy of the sqlite3FileSuffix3() routine from the core. +** It is a no-op unless SQLITE_ENABLE_8_3_NAMES is defined. +** +** If SQLITE_ENABLE_8_3_NAMES is set at compile-time and if the database +** filename in zBaseFilename is a URI with the "8_3_names=1" parameter and +** if filename in z[] has a suffix (a.k.a. "extension") that is longer than +** three characters, then shorten the suffix on z[] to be the last three +** characters of the original suffix. +** +** If SQLITE_ENABLE_8_3_NAMES is set to 2 at compile-time, then always +** do the suffix shortening regardless of URI parameter. +** +** Examples: +** +** test.db-journal => test.nal +** test.db-wal => test.wal +** test.db-shm => test.shm +** test.db-mj7f3319fa => test.9fa +*/ +static void rbuFileSuffix3(const char *zBase, char *z){ +#ifdef SQLITE_ENABLE_8_3_NAMES +#if SQLITE_ENABLE_8_3_NAMES<2 + if( sqlite3_uri_boolean(zBase, "8_3_names", 0) ) +#endif + { + int i, sz; + sz = sqlite3Strlen30(z); + for(i=sz-1; i>0 && z[i]!='/' && z[i]!='.'; i--){} + if( z[i]=='.' && ALWAYS(sz>i+4) ) memmove(&z[i+1], &z[sz-3], 4); + } +#endif +} + +/* +** Return the current wal-index header checksum for the target database +** as a 64-bit integer. +** +** The checksum is store in the first page of xShmMap memory as an 8-byte +** blob starting at byte offset 40. +*/ +static i64 rbuShmChecksum(sqlite3rbu *p){ + i64 iRet = 0; + if( p->rc==SQLITE_OK ){ + sqlite3_file *pDb = p->pTargetFd->pReal; + u32 volatile *ptr; + p->rc = pDb->pMethods->xShmMap(pDb, 0, 32*1024, 0, (void volatile**)&ptr); + if( p->rc==SQLITE_OK ){ + iRet = ((i64)ptr[10] << 32) + ptr[11]; } } + return iRet; +} - /* Re-insert the contents of any underfull nodes removed from the tree. */ - for(pLeaf=pRtree->pDeleted; pLeaf; pLeaf=pRtree->pDeleted){ - if( rc==SQLITE_OK ){ - rc = reinsertNodeContent(pRtree, pLeaf); +/* +** This function is called as part of initializing or reinitializing an +** incremental checkpoint. +** +** It populates the sqlite3rbu.aFrame[] array with the set of +** (wal frame -> db page) copy operations required to checkpoint the +** current wal file, and obtains the set of shm locks required to safely +** perform the copy operations directly on the file-system. +** +** If argument pState is not NULL, then the incremental checkpoint is +** being resumed. In this case, if the checksum of the wal-index-header +** following recovery is not the same as the checksum saved in the RbuState +** object, then the rbu handle is set to DONE state. This occurs if some +** other client appends a transaction to the wal file in the middle of +** an incremental checkpoint. +*/ +static void rbuSetupCheckpoint(sqlite3rbu *p, RbuState *pState){ + + /* If pState is NULL, then the wal file may not have been opened and + ** recovered. Running a read-statement here to ensure that doing so + ** does not interfere with the "capture" process below. */ + if( pState==0 ){ + p->eStage = 0; + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_exec(p->dbMain, "SELECT * FROM sqlite_master", 0, 0, 0); } - pRtree->pDeleted = pLeaf->pNext; - sqlite3_free(pLeaf); } - /* Release the reference to the root node. */ - if( rc==SQLITE_OK ){ - rc = nodeRelease(pRtree, pRoot); - }else{ - nodeRelease(pRtree, pRoot); + /* Assuming no error has occurred, run a "restart" checkpoint with the + ** sqlite3rbu.eStage variable set to CAPTURE. This turns on the following + ** special behaviour in the rbu VFS: + ** + ** * If the exclusive shm WRITER or READ0 lock cannot be obtained, + ** the checkpoint fails with SQLITE_BUSY (normally SQLite would + ** proceed with running a passive checkpoint instead of failing). + ** + ** * Attempts to read from the *-wal file or write to the database file + ** do not perform any IO. Instead, the frame/page combinations that + ** would be read/written are recorded in the sqlite3rbu.aFrame[] + ** array. + ** + ** * Calls to xShmLock(UNLOCK) to release the exclusive shm WRITER, + ** READ0 and CHECKPOINT locks taken as part of the checkpoint are + ** no-ops. These locks will not be released until the connection + ** is closed. + ** + ** * Attempting to xSync() the database file causes an SQLITE_INTERNAL + ** error. + ** + ** As a result, unless an error (i.e. OOM or SQLITE_BUSY) occurs, the + ** checkpoint below fails with SQLITE_INTERNAL, and leaves the aFrame[] + ** array populated with a set of (frame -> page) mappings. Because the + ** WRITER, CHECKPOINT and READ0 locks are still held, it is safe to copy + ** data from the wal file into the database file according to the + ** contents of aFrame[]. + */ + if( p->rc==SQLITE_OK ){ + int rc2; + p->eStage = RBU_STAGE_CAPTURE; + rc2 = sqlite3_exec(p->dbMain, "PRAGMA main.wal_checkpoint=restart", 0, 0,0); + if( rc2!=SQLITE_INTERNAL ) p->rc = rc2; } - return rc; + if( p->rc==SQLITE_OK ){ + p->eStage = RBU_STAGE_CKPT; + p->nStep = (pState ? pState->nRow : 0); + p->aBuf = rbuMalloc(p, p->pgsz); + p->iWalCksum = rbuShmChecksum(p); + } + + if( p->rc==SQLITE_OK && pState && pState->iWalCksum!=p->iWalCksum ){ + p->rc = SQLITE_DONE; + p->eStage = RBU_STAGE_DONE; + } } /* -** Rounding constants for float->double conversion. +** Called when iAmt bytes are read from offset iOff of the wal file while +** the rbu object is in capture mode. Record the frame number of the frame +** being read in the aFrame[] array. */ -#define RNDTOWARDS (1.0 - 1.0/8388608.0) /* Round towards zero */ -#define RNDAWAY (1.0 + 1.0/8388608.0) /* Round away from zero */ +static int rbuCaptureWalRead(sqlite3rbu *pRbu, i64 iOff, int iAmt){ + const u32 mReq = (1<mLock!=mReq ){ + pRbu->rc = SQLITE_BUSY; + return SQLITE_INTERNAL; + } + + pRbu->pgsz = iAmt; + if( pRbu->nFrame==pRbu->nFrameAlloc ){ + int nNew = (pRbu->nFrameAlloc ? pRbu->nFrameAlloc : 64) * 2; + RbuFrame *aNew; + aNew = (RbuFrame*)sqlite3_realloc(pRbu->aFrame, nNew * sizeof(RbuFrame)); + if( aNew==0 ) return SQLITE_NOMEM; + pRbu->aFrame = aNew; + pRbu->nFrameAlloc = nNew; + } + + iFrame = (u32)((iOff-32) / (i64)(iAmt+24)) + 1; + if( pRbu->iMaxFrame