aboutsummaryrefslogtreecommitdiff
path: root/ext/fts5/fts5_expr.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fts5/fts5_expr.c')
-rw-r--r--ext/fts5/fts5_expr.c570
1 files changed, 425 insertions, 145 deletions
diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c
index a713bb7c5..559f0db82 100644
--- a/ext/fts5/fts5_expr.c
+++ b/ext/fts5/fts5_expr.c
@@ -22,6 +22,8 @@
*/
#define FTS5_EOF 0
+#define FTS5_LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
+
typedef struct Fts5ExprTerm Fts5ExprTerm;
/*
@@ -73,6 +75,7 @@ struct Fts5ExprTerm {
int bPrefix; /* True for a prefix term */
char *zTerm; /* nul-terminated term */
Fts5IndexIter *pIter; /* Iterator for this term */
+ Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */
};
/*
@@ -181,6 +184,10 @@ static int fts5ExprGetToken(
default: {
const char *z2;
+ if( sqlite3Fts5IsBareword(z[0])==0 ){
+ sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
+ return FTS5_EOF;
+ }
tok = FTS5_STRING;
for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
pToken->n = (z2 - z);
@@ -245,79 +252,6 @@ int sqlite3Fts5ExprNew(
}
/*
-** Create a new FTS5 expression by cloning phrase iPhrase of the
-** expression passed as the second argument.
-*/
-int sqlite3Fts5ExprPhraseExpr(
- Fts5Config *pConfig,
- Fts5Expr *pExpr,
- int iPhrase,
- Fts5Expr **ppNew
-){
- int rc = SQLITE_OK; /* Return code */
- Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */
- Fts5ExprPhrase *pCopy; /* Copy of pOrig */
- Fts5Expr *pNew = 0; /* Expression to return via *ppNew */
-
- pOrig = pExpr->apExprPhrase[iPhrase];
- pCopy = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(&rc,
- sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm
- );
- if( pCopy ){
- int i; /* Used to iterate through phrase terms */
- Fts5ExprPhrase **apPhrase;
- Fts5ExprNode *pNode;
- Fts5ExprNearset *pNear;
-
- pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
- apPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc,
- sizeof(Fts5ExprPhrase*)
- );
- pNode = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNode));
- pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc,
- sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)
- );
-
- for(i=0; i<pOrig->nTerm; i++){
- pCopy->aTerm[i].zTerm = sqlite3Fts5Strndup(&rc, pOrig->aTerm[i].zTerm,-1);
- pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
- }
-
- if( rc==SQLITE_OK ){
- /* All the allocations succeeded. Put the expression object together. */
- pNew->pIndex = pExpr->pIndex;
- pNew->pRoot = pNode;
- pNew->nPhrase = 1;
- pNew->apExprPhrase = apPhrase;
- pNew->apExprPhrase[0] = pCopy;
-
- pNode->eType = (pOrig->nTerm==1 ? FTS5_TERM : FTS5_STRING);
- pNode->pNear = pNear;
-
- pNear->nPhrase = 1;
- pNear->apPhrase[0] = pCopy;
-
- pCopy->nTerm = pOrig->nTerm;
- pCopy->pNode = pNode;
- }else{
- /* At least one allocation failed. Free them all. */
- for(i=0; i<pOrig->nTerm; i++){
- sqlite3_free(pCopy->aTerm[i].zTerm);
- }
- sqlite3_free(pCopy);
- sqlite3_free(pNear);
- sqlite3_free(pNode);
- sqlite3_free(apPhrase);
- sqlite3_free(pNew);
- pNew = 0;
- }
- }
-
- *ppNew = pNew;
- return rc;
-}
-
-/*
** Free the expression node object passed as the only argument.
*/
void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
@@ -351,6 +285,114 @@ static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
}
/*
+** Argument pTerm must be a synonym iterator. Return the current rowid
+** that it points to.
+*/
+static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){
+ i64 iRet = 0;
+ int bRetValid = 0;
+ Fts5ExprTerm *p;
+
+ assert( pTerm->pSynonym );
+ assert( bDesc==0 || bDesc==1 );
+ for(p=pTerm; p; p=p->pSynonym){
+ if( 0==sqlite3Fts5IterEof(p->pIter) ){
+ i64 iRowid = sqlite3Fts5IterRowid(p->pIter);
+ if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
+ iRet = iRowid;
+ bRetValid = 1;
+ }
+ }
+ }
+
+ if( pbEof && bRetValid==0 ) *pbEof = 1;
+ return iRet;
+}
+
+/*
+** Argument pTerm must be a synonym iterator.
+*/
+static int fts5ExprSynonymPoslist(
+ Fts5ExprTerm *pTerm,
+ i64 iRowid,
+ int *pbDel, /* OUT: Caller should sqlite3_free(*pa) */
+ u8 **pa, int *pn
+){
+ Fts5PoslistReader aStatic[4];
+ Fts5PoslistReader *aIter = aStatic;
+ int nIter = 0;
+ int nAlloc = 4;
+ int rc = SQLITE_OK;
+ Fts5ExprTerm *p;
+
+ assert( pTerm->pSynonym );
+ for(p=pTerm; p; p=p->pSynonym){
+ Fts5IndexIter *pIter = p->pIter;
+ if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){
+ const u8 *a;
+ int n;
+ i64 dummy;
+ rc = sqlite3Fts5IterPoslist(pIter, &a, &n, &dummy);
+ if( rc!=SQLITE_OK ) goto synonym_poslist_out;
+ if( nIter==nAlloc ){
+ int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
+ Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte);
+ if( aNew==0 ){
+ rc = SQLITE_NOMEM;
+ goto synonym_poslist_out;
+ }
+ memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
+ nAlloc = nAlloc*2;
+ if( aIter!=aStatic ) sqlite3_free(aIter);
+ aIter = aNew;
+ }
+ sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[nIter]);
+ assert( aIter[nIter].bEof==0 );
+ nIter++;
+ }
+ }
+
+ assert( *pbDel==0 );
+ if( nIter==1 ){
+ *pa = (u8*)aIter[0].a;
+ *pn = aIter[0].n;
+ }else{
+ Fts5PoslistWriter writer = {0};
+ Fts5Buffer buf = {0,0,0};
+ i64 iPrev = -1;
+ while( 1 ){
+ int i;
+ i64 iMin = FTS5_LARGEST_INT64;
+ for(i=0; i<nIter; i++){
+ if( aIter[i].bEof==0 ){
+ if( aIter[i].iPos==iPrev ){
+ if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue;
+ }
+ if( aIter[i].iPos<iMin ){
+ iMin = aIter[i].iPos;
+ }
+ }
+ }
+ if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break;
+ rc = sqlite3Fts5PoslistWriterAppend(&buf, &writer, iMin);
+ iPrev = iMin;
+ }
+ if( rc ){
+ sqlite3_free(buf.p);
+ }else{
+ *pa = buf.p;
+ *pn = buf.n;
+ *pbDel = 1;
+ }
+ }
+
+ synonym_poslist_out:
+ if( aIter!=aStatic ) sqlite3_free(aIter);
+ return rc;
+}
+
+
+/*
** All individual term iterators in pPhrase are guaranteed to be valid and
** pointing to the same rowid when this function is called. This function
** checks if the current rowid really is a match, and if so populates
@@ -362,7 +404,7 @@ static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
** not a match.
*/
static int fts5ExprPhraseIsMatch(
- Fts5Expr *pExpr, /* Expression pPhrase belongs to */
+ Fts5ExprNode *pNode, /* Node pPhrase belongs to */
Fts5ExprColset *pColset, /* Restrict matches to these columns */
Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */
int *pbMatch /* OUT: Set to true if really a match */
@@ -388,16 +430,24 @@ static int fts5ExprPhraseIsMatch(
aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
if( !aIter ) return SQLITE_NOMEM;
}
+ memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);
/* Initialize a term iterator for each term in the phrase */
for(i=0; i<pPhrase->nTerm; i++){
+ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
i64 dummy;
- int n;
- const u8 *a;
- rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy);
- if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){
- goto ismatch_out;
+ int n = 0;
+ int bFlag = 0;
+ const u8 *a = 0;
+ if( pTerm->pSynonym ){
+ rc = fts5ExprSynonymPoslist(pTerm, pNode->iRowid, &bFlag, (u8**)&a, &n);
+ }else{
+ rc = sqlite3Fts5IterPoslist(pTerm->pIter, &a, &n, &dummy);
}
+ if( rc!=SQLITE_OK ) goto ismatch_out;
+ sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]);
+ aIter[i].bFlag = bFlag;
+ if( aIter[i].bEof ) goto ismatch_out;
}
while( 1 ){
@@ -431,6 +481,9 @@ static int fts5ExprPhraseIsMatch(
ismatch_out:
*pbMatch = (pPhrase->poslist.n>0);
+ for(i=0; i<pPhrase->nTerm; i++){
+ if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a);
+ }
if( aIter!=aStatic ) sqlite3_free(aIter);
return rc;
}
@@ -598,17 +651,55 @@ static int fts5ExprNearAdvanceFirst(
int bFromValid,
i64 iFrom
){
- Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
- int rc;
+ Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
+ int rc = SQLITE_OK;
- assert( Fts5NodeIsString(pNode) );
- if( bFromValid ){
- rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
+ if( pTerm->pSynonym ){
+ int bEof = 1;
+ Fts5ExprTerm *p;
+
+ /* Find the firstest rowid any synonym points to. */
+ i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0);
+
+ /* Advance each iterator that currently points to iRowid. Or, if iFrom
+ ** is valid - each iterator that points to a rowid before iFrom. */
+ for(p=pTerm; p; p=p->pSynonym){
+ if( sqlite3Fts5IterEof(p->pIter)==0 ){
+ i64 ii = sqlite3Fts5IterRowid(p->pIter);
+ if( ii==iRowid
+ || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc)
+ ){
+ if( bFromValid ){
+ rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom);
+ }else{
+ rc = sqlite3Fts5IterNext(p->pIter);
+ }
+ if( rc!=SQLITE_OK ) break;
+ if( sqlite3Fts5IterEof(p->pIter)==0 ){
+ bEof = 0;
+ }
+ }else{
+ bEof = 0;
+ }
+ }
+ }
+
+ /* Set the EOF flag if either all synonym iterators are at EOF or an
+ ** error has occurred. */
+ pNode->bEof = (rc || bEof);
}else{
- rc = sqlite3Fts5IterNext(pIter);
+ Fts5IndexIter *pIter = pTerm->pIter;
+
+ assert( Fts5NodeIsString(pNode) );
+ if( bFromValid ){
+ rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
+ }else{
+ rc = sqlite3Fts5IterNext(pIter);
+ }
+
+ pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
}
- pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
return rc;
}
@@ -647,6 +738,35 @@ static int fts5ExprAdvanceto(
return 0;
}
+static int fts5ExprSynonymAdvanceto(
+ Fts5ExprTerm *pTerm, /* Term iterator to advance */
+ int bDesc, /* True if iterator is "rowid DESC" */
+ i64 *piLast, /* IN/OUT: Lastest rowid seen so far */
+ int *pRc /* OUT: Error code */
+){
+ int rc = SQLITE_OK;
+ i64 iLast = *piLast;
+ Fts5ExprTerm *p;
+ int bEof = 0;
+
+ for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){
+ if( sqlite3Fts5IterEof(p->pIter)==0 ){
+ i64 iRowid = sqlite3Fts5IterRowid(p->pIter);
+ if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
+ rc = sqlite3Fts5IterNextFrom(p->pIter, iLast);
+ }
+ }
+ }
+
+ if( rc!=SQLITE_OK ){
+ *pRc = rc;
+ bEof = 1;
+ }else{
+ *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
+ }
+ return bEof;
+}
+
/*
** IN/OUT parameter (*pa) points to a position list n bytes in size. If
** the position list contains entries for column iCol, then (*pa) is set
@@ -717,9 +837,9 @@ static int fts5ExprNearTest(
** phrase is not a match, break out of the loop early. */
for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
- if( pPhrase->nTerm>1 || pNear->pColset ){
+ if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){
int bMatch = 0;
- rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch);
+ rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch);
if( bMatch==0 ) break;
}else{
rc = sqlite3Fts5IterPoslistBuffer(
@@ -755,6 +875,7 @@ static int fts5ExprTokenTest(
assert( pNode->eType==FTS5_TERM );
assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );
+ assert( pPhrase->aTerm[0].pSynonym==0 );
rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);
@@ -801,69 +922,97 @@ static int fts5ExprNearNextMatch(
i64 iLast; /* Lastest rowid any iterator points to */
int i, j; /* Phrase and token index, respectively */
int bMatch; /* True if all terms are at the same rowid */
+ const int bDesc = pExpr->bDesc;
- assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 );
+ /* Check that this node should not be FTS5_TERM */
+ assert( pNear->nPhrase>1
+ || pNear->apPhrase[0]->nTerm>1
+ || pNear->apPhrase[0]->aTerm[0].pSynonym
+ );
/* Initialize iLast, the "lastest" rowid any iterator points to. If the
** iterator skips through rowids in the default ascending order, this means
** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
** means the minimum rowid. */
- iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);
+ if( pLeft->aTerm[0].pSynonym ){
+ iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0);
+ }else{
+ iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);
+ }
do {
bMatch = 1;
for(i=0; i<pNear->nPhrase; i++){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
for(j=0; j<pPhrase->nTerm; j++){
- Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
- i64 iRowid = sqlite3Fts5IterRowid(pIter);
- if( iRowid!=iLast ) bMatch = 0;
- if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){
- return rc;
+ Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
+ if( pTerm->pSynonym ){
+ i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0);
+ if( iRowid==iLast ) continue;
+ bMatch = 0;
+ if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){
+ pNode->bEof = 1;
+ return rc;
+ }
+ }else{
+ Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
+ i64 iRowid = sqlite3Fts5IterRowid(pIter);
+ if( iRowid==iLast ) continue;
+ bMatch = 0;
+ if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
+ return rc;
+ }
}
}
}
}while( bMatch==0 );
- pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));
pNode->iRowid = iLast;
+ pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));
return rc;
}
/*
** Initialize all term iterators in the pNear object. If any term is found
-** to match no documents at all, set *pbEof to true and return immediately,
-** without initializing any further iterators.
+** to match no documents at all, return immediately without initializing any
+** further iterators.
*/
static int fts5ExprNearInitAll(
Fts5Expr *pExpr,
Fts5ExprNode *pNode
){
Fts5ExprNearset *pNear = pNode->pNear;
- Fts5ExprTerm *pTerm;
- Fts5ExprPhrase *pPhrase;
int i, j;
int rc = SQLITE_OK;
for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
- pPhrase = pNear->apPhrase[i];
+ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
for(j=0; j<pPhrase->nTerm; j++){
- pTerm = &pPhrase->aTerm[j];
- if( pTerm->pIter ){
- sqlite3Fts5IterClose(pTerm->pIter);
- pTerm->pIter = 0;
+ Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
+ Fts5ExprTerm *p;
+ int bEof = 1;
+
+ for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){
+ if( p->pIter ){
+ sqlite3Fts5IterClose(p->pIter);
+ p->pIter = 0;
+ }
+ rc = sqlite3Fts5IndexQuery(
+ pExpr->pIndex, p->zTerm, strlen(p->zTerm),
+ (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
+ (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
+ &p->pIter
+ );
+ assert( rc==SQLITE_OK || p->pIter==0 );
+ if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){
+ bEof = 0;
+ }
}
- rc = sqlite3Fts5IndexQuery(
- pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm),
- (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
- (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
- &pTerm->pIter
- );
- assert( rc==SQLITE_OK || pTerm->pIter==0 );
- if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){
+
+ if( bEof ){
pNode->bEof = 1;
- break;
+ return rc;
}
}
}
@@ -1029,10 +1178,17 @@ static int fts5ExprNodeNext(
};
case FTS5_TERM: {
- rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom);
- if( pNode->bEof==0 ){
+ Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
+ if( bFromValid ){
+ rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
+ }else{
+ rc = sqlite3Fts5IterNext(pIter);
+ }
+ if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){
assert( rc==SQLITE_OK );
rc = fts5ExprTokenTest(pExpr, pNode);
+ }else{
+ pNode->bEof = 1;
}
return rc;
};
@@ -1266,10 +1422,16 @@ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
if( pPhrase ){
int i;
for(i=0; i<pPhrase->nTerm; i++){
+ Fts5ExprTerm *pSyn;
+ Fts5ExprTerm *pNext;
Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
sqlite3_free(pTerm->zTerm);
- if( pTerm->pIter ){
- sqlite3Fts5IterClose(pTerm->pIter);
+ sqlite3Fts5IterClose(pTerm->pIter);
+
+ for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
+ pNext = pSyn->pSynonym;
+ sqlite3Fts5IterClose(pSyn->pIter);
+ sqlite3_free(pSyn);
}
}
if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
@@ -1331,6 +1493,7 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset(
typedef struct TokenCtx TokenCtx;
struct TokenCtx {
Fts5ExprPhrase *pPhrase;
+ int rc;
};
/*
@@ -1338,34 +1501,60 @@ struct TokenCtx {
*/
static int fts5ParseTokenize(
void *pContext, /* Pointer to Fts5InsertCtx object */
+ int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
- int iStart, /* Start offset of token */
- int iEnd /* End offset of token */
+ int iUnused1, /* Start offset of token */
+ int iUnused2 /* End offset of token */
){
int rc = SQLITE_OK;
const int SZALLOC = 8;
TokenCtx *pCtx = (TokenCtx*)pContext;
Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
- Fts5ExprTerm *pTerm;
- if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
- Fts5ExprPhrase *pNew;
- int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
+ /* If an error has already occurred, this is a no-op */
+ if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;
- pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase,
- sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
- );
- if( pNew==0 ) return SQLITE_NOMEM;
- if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
- pCtx->pPhrase = pPhrase = pNew;
- pNew->nTerm = nNew - SZALLOC;
- }
+ assert( pPhrase==0 || pPhrase->nTerm>0 );
+ if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
+ Fts5ExprTerm *pSyn;
+ int nByte = sizeof(Fts5ExprTerm) + nToken+1;
+ pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
+ if( pSyn==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(pSyn, 0, nByte);
+ pSyn->zTerm = (char*)&pSyn[1];
+ memcpy(pSyn->zTerm, pToken, nToken);
+ pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
+ pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
+ }
+ }else{
+ Fts5ExprTerm *pTerm;
+ if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
+ Fts5ExprPhrase *pNew;
+ int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
- pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
- memset(pTerm, 0, sizeof(Fts5ExprTerm));
- pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
+ pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase,
+ sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
+ );
+ if( pNew==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
+ pCtx->pPhrase = pPhrase = pNew;
+ pNew->nTerm = nNew - SZALLOC;
+ }
+ }
+ if( rc==SQLITE_OK ){
+ pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
+ memset(pTerm, 0, sizeof(Fts5ExprTerm));
+ pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
+ }
+ }
+
+ pCtx->rc = rc;
return rc;
}
@@ -1417,11 +1606,14 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm(
rc = fts5ParseStringFromToken(pToken, &z);
if( rc==SQLITE_OK ){
+ int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0);
+ int n;
sqlite3Fts5Dequote(z);
- rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize);
+ n = strlen(z);
+ rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
}
sqlite3_free(z);
- if( rc ){
+ if( rc || (rc = sCtx.rc) ){
pParse->rc = rc;
fts5ExprPhraseFree(sCtx.pPhrase);
sCtx.pPhrase = 0;
@@ -1451,6 +1643,79 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm(
}
/*
+** Create a new FTS5 expression by cloning phrase iPhrase of the
+** expression passed as the second argument.
+*/
+int sqlite3Fts5ExprClonePhrase(
+ Fts5Config *pConfig,
+ Fts5Expr *pExpr,
+ int iPhrase,
+ Fts5Expr **ppNew
+){
+ int rc = SQLITE_OK; /* Return code */
+ Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */
+ int i; /* Used to iterate through phrase terms */
+
+ Fts5Expr *pNew = 0; /* Expression to return via *ppNew */
+
+ TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */
+
+
+ pOrig = pExpr->apExprPhrase[iPhrase];
+
+ pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
+ if( rc==SQLITE_OK ){
+ pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc,
+ sizeof(Fts5ExprPhrase*));
+ }
+ if( rc==SQLITE_OK ){
+ pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc,
+ sizeof(Fts5ExprNode));
+ }
+ if( rc==SQLITE_OK ){
+ pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc,
+ sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*));
+ }
+
+ for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
+ int tflags = 0;
+ Fts5ExprTerm *p;
+ for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
+ const char *zTerm = p->zTerm;
+ rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, strlen(zTerm), 0, 0);
+ tflags = FTS5_TOKEN_COLOCATED;
+ }
+ if( rc==SQLITE_OK ){
+ sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ /* All the allocations succeeded. Put the expression object together. */
+ pNew->pIndex = pExpr->pIndex;
+ pNew->nPhrase = 1;
+ pNew->apExprPhrase[0] = sCtx.pPhrase;
+ pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
+ pNew->pRoot->pNear->nPhrase = 1;
+ sCtx.pPhrase->pNode = pNew->pRoot;
+
+ if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){
+ pNew->pRoot->eType = FTS5_TERM;
+ }else{
+ pNew->pRoot->eType = FTS5_STRING;
+ }
+ }else{
+ sqlite3Fts5ExprFree(pNew);
+ fts5ExprPhraseFree(sCtx.pPhrase);
+ pNew = 0;
+ }
+
+ *ppNew = pNew;
+ return rc;
+}
+
+
+/*
** Token pTok has appeared in a MATCH expression where the NEAR operator
** is expected. If token pTok does not contain "NEAR", store an error
** in the pParse object.
@@ -1630,7 +1895,10 @@ Fts5ExprNode *sqlite3Fts5ParseNode(
for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
pNear->apPhrase[iPhrase]->pNode = pRet;
}
- if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){
+ if( pNear->nPhrase==1
+ && pNear->apPhrase[0]->nTerm==1
+ && pNear->apPhrase[0]->aTerm[0].pSynonym==0
+ ){
pRet->eType = FTS5_TERM;
}
}else{
@@ -1650,16 +1918,28 @@ Fts5ExprNode *sqlite3Fts5ParseNode(
}
static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
- char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2);
+ int nByte = 0;
+ Fts5ExprTerm *p;
+ char *zQuoted;
+
+ /* Determine the maximum amount of space required. */
+ for(p=pTerm; p; p=p->pSynonym){
+ nByte += strlen(pTerm->zTerm) * 2 + 3 + 2;
+ }
+ zQuoted = sqlite3_malloc(nByte);
+
if( zQuoted ){
int i = 0;
- char *zIn = pTerm->zTerm;
- zQuoted[i++] = '"';
- while( *zIn ){
- if( *zIn=='"' ) zQuoted[i++] = '"';
- zQuoted[i++] = *zIn++;
+ for(p=pTerm; p; p=p->pSynonym){
+ char *zIn = p->zTerm;
+ zQuoted[i++] = '"';
+ while( *zIn ){
+ if( *zIn=='"' ) zQuoted[i++] = '"';
+ zQuoted[i++] = *zIn++;
+ }
+ zQuoted[i++] = '"';
+ if( p->pSynonym ) zQuoted[i++] = '|';
}
- zQuoted[i++] = '"';
if( pTerm->bPrefix ){
zQuoted[i++] = ' ';
zQuoted[i++] = '*';