Index: SQLite.Interop/src/core/sqlite3.c ================================================================== --- SQLite.Interop/src/core/sqlite3.c +++ SQLite.Interop/src/core/sqlite3.c @@ -664,11 +664,11 @@ ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ #define SQLITE_VERSION "3.7.13" #define SQLITE_VERSION_NUMBER 3007013 -#define SQLITE_SOURCE_ID "2012-06-07 07:24:04 506008f000ba4af0b35da023b8c52f7a3f5033bd" +#define SQLITE_SOURCE_ID "2012-06-08 14:01:53 025227be5495f950c466dfabac140cba69e498be" /* ** CAPI3REF: Run-Time Library Version Numbers ** KEYWORDS: sqlite3_version, sqlite3_sourceid ** @@ -5013,15 +5013,16 @@ ** CAPI3REF: Name Of The Folder Holding Database Files ** ** ^(If this global variable is made to point to a string which is ** the name of a folder (a.k.a. directory), then all database files ** specified with a relative pathname and created or accessed by -** SQLite when using a built-in [sqlite3_vfs | VFS] will be assumed +** SQLite when using a built-in windows [sqlite3_vfs | VFS] will be assumed ** to be relative to that directory.)^ ^If this variable is a NULL ** pointer, then SQLite assumes that all database files specified ** with a relative pathname are relative to the current directory -** for the process. +** for the process. Only the windows VFS makes use of this global +** variable; it is ignored by the unix VFS. ** ** Changing the value of this variable while a database connection is ** open can result in a corrupt database. ** ** It is not safe to read or modify this variable in more than one @@ -70017,11 +70018,10 @@ assert( u.bl.pC->isTable || pOp->opcode!=OP_RowData ); assert( u.bl.pC->isIndex || pOp->opcode==OP_RowData ); assert( u.bl.pC!=0 ); assert( u.bl.pC->nullRow==0 ); assert( u.bl.pC->pseudoTableReg==0 ); - assert( !u.bl.pC->isSorter ); assert( u.bl.pC->pCursor!=0 ); u.bl.pCrsr = u.bl.pC->pCursor; assert( sqlite3BtreeCursorIsValid(u.bl.pCrsr) ); /* The OP_RowKey and OP_RowData opcodes always follow OP_NotExists or @@ -115648,13 +115648,11 @@ if( nOpt==4 && memcmp("mode", zOpt, 4)==0 ){ static struct OpenMode aOpenMode[] = { { "ro", SQLITE_OPEN_READONLY }, { "rw", SQLITE_OPEN_READWRITE }, { "rwc", SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE }, - { "memory", - SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE - | SQLITE_OPEN_MEMORY }, + { "memory", SQLITE_OPEN_MEMORY }, { 0, 0 } }; mask = SQLITE_OPEN_READONLY | SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_MEMORY; @@ -125936,14 +125934,13 @@ /* #include */ /* ** Implementation of a special SQL scalar function for testing tokenizers ** designed to be used in concert with the Tcl testing framework. This -** function must be called with two arguments: +** function must be called with two or more arguments: ** -** SELECT (, ); -** SELECT (, ); +** SELECT (, ..., ); ** ** where is the name passed as the second argument ** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer') ** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test'). ** @@ -125976,31 +125973,31 @@ const char *zName; int nName; const char *zInput; int nInput; - const char *zArg = 0; + const char *azArg[64]; const char *zToken; int nToken; int iStart; int iEnd; int iPos; + int i; Tcl_Obj *pRet; - assert( argc==2 || argc==3 ); + if( argc<2 ){ + sqlite3_result_error(context, "insufficient arguments", -1); + return; + } nName = sqlite3_value_bytes(argv[0]); zName = (const char *)sqlite3_value_text(argv[0]); nInput = sqlite3_value_bytes(argv[argc-1]); zInput = (const char *)sqlite3_value_text(argv[argc-1]); - if( argc==3 ){ - zArg = (const char *)sqlite3_value_text(argv[1]); - } - pHash = (Fts3Hash *)sqlite3_user_data(context); p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); if( !p ){ char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); @@ -126010,11 +126007,15 @@ } pRet = Tcl_NewObj(); Tcl_IncrRefCount(pRet); - if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){ + for(i=1; ixCreate(argc-2, azArg, &pTokenizer) ){ zErr = "error in xCreate()"; goto finish; } pTokenizer->pModule = p; if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){ @@ -126194,14 +126195,11 @@ if( SQLITE_OK==rc ){ rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0); } #ifdef SQLITE_TEST if( SQLITE_OK==rc ){ - rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0); - } - if( SQLITE_OK==rc ){ - rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0); + rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0); } if( SQLITE_OK==rc ){ rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0); } #endif @@ -126588,11 +126586,12 @@ ** fts3SegReaderFirstDocid() ** fts3SegReaderNextDocid() */ struct Fts3SegReader { int iIdx; /* Index within level, or 0x7FFFFFFF for PT */ - int bLookup; /* True for a lookup only */ + u8 bLookup; /* True for a lookup only */ + u8 rootOnly; /* True for a root-only reader */ sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */ sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */ sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */ sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */ @@ -126622,11 +126621,11 @@ int nOffsetList; /* For descending pending seg-readers only */ sqlite3_int64 iDocid; }; #define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0) -#define fts3SegReaderIsRootOnly(p) ((p)->aNode==(char *)&(p)[1]) +#define fts3SegReaderIsRootOnly(p) ((p)->rootOnly!=0) /* ** An instance of this structure is used to create a segment b-tree in the ** database. The internal details of this type are only accessed by the ** following functions: @@ -128033,18 +128032,19 @@ if( !pReader ){ return SQLITE_NOMEM; } memset(pReader, 0, sizeof(Fts3SegReader)); pReader->iIdx = iAge; - pReader->bLookup = bLookup; + pReader->bLookup = bLookup!=0; pReader->iStartBlock = iStartLeaf; pReader->iLeafEndBlock = iEndLeaf; pReader->iEndBlock = iEndBlock; if( nExtra ){ /* The entire segment is stored in the root node. */ pReader->aNode = (char *)&pReader[1]; + pReader->rootOnly = 1; pReader->nNode = nRoot; memcpy(pReader->aNode, zRoot, nRoot); memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING); }else{ pReader->iCurrentBlock = iStartLeaf-1; @@ -133438,10 +133438,12 @@ typedef struct unicode_cursor unicode_cursor; struct unicode_tokenizer { sqlite3_tokenizer base; int bRemoveDiacritic; + int nException; + int *aiException; }; struct unicode_cursor { sqlite3_tokenizer_cursor base; const unsigned char *aInput; /* Input text being tokenized */ @@ -133450,10 +133452,125 @@ int iToken; /* Index of next token to be returned */ char *zToken; /* storage for current token */ int nAlloc; /* space allocated at zToken */ }; + +/* +** Destroy a tokenizer allocated by unicodeCreate(). +*/ +static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){ + if( pTokenizer ){ + unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer; + sqlite3_free(p->aiException); + sqlite3_free(p); + } + return SQLITE_OK; +} + +/* +** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE +** statement has specified that the tokenizer for this table shall consider +** all characters in string zIn/nIn to be separators (if bAlnum==0) or +** token characters (if bAlnum==1). +** +** For each codepoint in the zIn/nIn string, this function checks if the +** sqlite3FtsUnicodeIsalnum() function already returns the desired result. +** If so, no action is taken. Otherwise, the codepoint is added to the +** unicode_tokenizer.aiException[] array. For the purposes of tokenization, +** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all +** codepoints in the aiException[] array. +** +** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic() +** identifies as a diacritic) occurs in the zIn/nIn string it is ignored. +** It is not possible to change the behaviour of the tokenizer with respect +** to these codepoints. +*/ +static int unicodeAddExceptions( + unicode_tokenizer *p, /* Tokenizer to add exceptions to */ + int bAlnum, /* Replace Isalnum() return value with this */ + const char *zIn, /* Array of characters to make exceptions */ + int nIn /* Length of z in bytes */ +){ + const unsigned char *z = (const unsigned char *)zIn; + const unsigned char *zTerm = &z[nIn]; + int iCode; + int nEntry = 0; + + assert( bAlnum==0 || bAlnum==1 ); + + while( zaiException, (p->nException+nEntry)*sizeof(int)); + if( aNew==0 ) return SQLITE_NOMEM; + nNew = p->nException; + + z = (const unsigned char *)zIn; + while( zi; j--) aNew[j] = aNew[j-1]; + aNew[i] = iCode; + nNew++; + } + } + p->aiException = aNew; + p->nException = nNew; + } + + return SQLITE_OK; +} + +/* +** Return true if the p->aiException[] array contains the value iCode. +*/ +static int unicodeIsException(unicode_tokenizer *p, int iCode){ + if( p->nException>0 ){ + int *a = p->aiException; + int iLo = 0; + int iHi = p->nException-1; + + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( iCode==a[iTest] ){ + return 1; + }else if( iCode>a[iTest] ){ + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + } + + return 0; +} + +/* +** Return true if, for the purposes of tokenization, codepoint iCode is +** considered a token character (not a separator). +*/ +static int unicodeIsAlnum(unicode_tokenizer *p, int iCode){ + assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); + return sqlite3FtsUnicodeIsalnum(iCode) ^ unicodeIsException(p, iCode); +} + /* ** Create a new tokenizer instance. */ static int unicodeCreate( int nArg, /* Size of array argv[] */ @@ -133460,43 +133577,45 @@ const char * const *azArg, /* Tokenizer creation arguments */ sqlite3_tokenizer **pp /* OUT: New tokenizer handle */ ){ unicode_tokenizer *pNew; /* New tokenizer object */ int i; + int rc = SQLITE_OK; + pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer)); - if( pNew==NULL ){ - return SQLITE_NOMEM; - } + if( pNew==NULL ) return SQLITE_NOMEM; memset(pNew, 0, sizeof(unicode_tokenizer)); pNew->bRemoveDiacritic = 1; - for(i=0; ibRemoveDiacritic = 1; } else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){ pNew->bRemoveDiacritic = 0; } + else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){ + rc = unicodeAddExceptions(pNew, 1, &z[11], n-11); + } + else if( n>=11 && memcmp("separators=", z, 11)==0 ){ + rc = unicodeAddExceptions(pNew, 0, &z[11], n-11); + } else{ /* Unrecognized argument */ - return SQLITE_ERROR; + rc = SQLITE_ERROR; } } - *pp = &pNew->base; - return SQLITE_OK; -} - -/* -** Destroy a tokenizer allocated by unicodeCreate(). -*/ -static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){ - sqlite3_free(pTokenizer); - return SQLITE_OK; + if( rc!=SQLITE_OK ){ + unicodeDestroy((sqlite3_tokenizer *)pNew); + pNew = 0; + } + *pp = (sqlite3_tokenizer *)pNew; + return rc; } /* ** Prepare to begin tokenizing a particular string. The input ** string to be tokenized is pInput[0..nBytes-1]. A cursor @@ -133545,18 +133664,19 @@ /* ** Extract the next token from a tokenization cursor. The cursor must ** have been opened by a prior call to simpleOpen(). */ static int unicodeNext( - sqlite3_tokenizer_cursor *p, /* Cursor returned by simpleOpen */ + sqlite3_tokenizer_cursor *pC, /* Cursor returned by simpleOpen */ const char **paToken, /* OUT: Token text */ int *pnToken, /* OUT: Number of bytes at *paToken */ int *piStart, /* OUT: Starting offset of token */ int *piEnd, /* OUT: Ending offset of token */ int *piPos /* OUT: Position integer of token */ ){ - unicode_cursor *pCsr = (unicode_cursor *)p; + unicode_cursor *pCsr = (unicode_cursor *)pC; + unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer); int iCode; char *zOut; const unsigned char *z = &pCsr->aInput[pCsr->iOff]; const unsigned char *zStart = z; const unsigned char *zEnd; @@ -133565,11 +133685,11 @@ /* Scan past any delimiter characters before the start of the next token. ** Return SQLITE_DONE early if this takes us all the way to the end of ** the input. */ while( z=zTerm ) return SQLITE_DONE; zOut = pCsr->zToken; @@ -133585,21 +133705,19 @@ pCsr->nAlloc += 64; } /* Write the folded case of the last character read to the output */ zEnd = z; - iOut = sqlite3FtsUnicodeFold(iCode, - ((unicode_tokenizer *)pCsr->base.pTokenizer)->bRemoveDiacritic - ); + iOut = sqlite3FtsUnicodeFold(iCode, p->bRemoveDiacritic); if( iOut ){ WRITE_UTF8(zOut, iOut); } /* If the cursor is not at EOF, read the next character */ if( z>=zTerm ) break; READ_UTF8(z, zTerm, iCode); - }while( sqlite3FtsUnicodeIsalnum(iCode) + }while( unicodeIsAlnum(p, iCode) || sqlite3FtsUnicodeIsdiacritic(iCode) ); /* Set the output variables and return. */ pCsr->iOff = (z - pCsr->aInput); Index: SQLite.Interop/src/core/sqlite3.h ================================================================== --- SQLite.Interop/src/core/sqlite3.h +++ SQLite.Interop/src/core/sqlite3.h @@ -107,11 +107,11 @@ ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ #define SQLITE_VERSION "3.7.13" #define SQLITE_VERSION_NUMBER 3007013 -#define SQLITE_SOURCE_ID "2012-06-07 07:24:04 506008f000ba4af0b35da023b8c52f7a3f5033bd" +#define SQLITE_SOURCE_ID "2012-06-08 14:01:53 025227be5495f950c466dfabac140cba69e498be" /* ** CAPI3REF: Run-Time Library Version Numbers ** KEYWORDS: sqlite3_version, sqlite3_sourceid ** @@ -4456,15 +4456,16 @@ ** CAPI3REF: Name Of The Folder Holding Database Files ** ** ^(If this global variable is made to point to a string which is ** the name of a folder (a.k.a. directory), then all database files ** specified with a relative pathname and created or accessed by -** SQLite when using a built-in [sqlite3_vfs | VFS] will be assumed +** SQLite when using a built-in windows [sqlite3_vfs | VFS] will be assumed ** to be relative to that directory.)^ ^If this variable is a NULL ** pointer, then SQLite assumes that all database files specified ** with a relative pathname are relative to the current directory -** for the process. +** for the process. Only the windows VFS makes use of this global +** variable; it is ignored by the unix VFS. ** ** Changing the value of this variable while a database connection is ** open can result in a corrupt database. ** ** It is not safe to read or modify this variable in more than one