diff options
author | drh <> | 2024-10-07 12:19:23 +0000 |
---|---|---|
committer | drh <> | 2024-10-07 12:19:23 +0000 |
commit | ce527f2e971d0c50c779f00fa6a3af61cc94e52e (patch) | |
tree | 194eac6ea8594d4ba5322825c2e493941a6328ef /src | |
parent | 29f976432a87793d7b9e29fd4ed0209d634113f7 (diff) | |
download | sqlite-ce527f2e971d0c50c779f00fa6a3af61cc94e52e.tar.gz sqlite-ce527f2e971d0c50c779f00fa6a3af61cc94e52e.zip |
Fix handling of U+fffd in the LIKE optimization.
dbsqlfuzz eee57fb9eea1dfa5aa40dfa87865cf8c84d12f96.
FossilOrigin-Name: bce52ce2a6e7f3d3d1b2807d1ea95243d9b655e557c1bb6f0b8a9a6cefb1aed6
Diffstat (limited to 'src')
-rw-r--r-- | src/whereexpr.c | 25 |
1 files changed, 15 insertions, 10 deletions
diff --git a/src/whereexpr.c b/src/whereexpr.c index 7ea2956a7..24d203046 100644 --- a/src/whereexpr.c +++ b/src/whereexpr.c @@ -219,20 +219,25 @@ static int isLikeOrGlob( z = (u8*)pRight->u.zToken; } if( z ){ - - /* Count the number of prefix characters prior to the first wildcard. - ** If the underlying database has a UTF16LE encoding, then only consider - ** ASCII characters. Note that the encoding of z[] is UTF8 - we are - ** dealing with only UTF8 here in this code, but the database engine - ** itself might be processing content using a different encoding. */ + /* Count the number of prefix bytes prior to the first wildcard. + ** or U+fffd character. If the underlying database has a UTF16LE + ** encoding, then only consider ASCII characters. Note that the + ** encoding of z[] is UTF8 - we are dealing with only UTF8 here in + ** this code, but the database engine itself might be processing + ** content using a different encoding. */ cnt = 0; while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ cnt++; if( c==wc[3] && z[cnt]!=0 ){ cnt++; - }else if( c>=0x80 && ENC(db)==SQLITE_UTF16LE ){ - cnt--; - break; + }else if( c>=0x80 ){ + const u8 *z2 = z+cnt-1; + if( sqlite3Utf8Read(&z2)==0xfffd || ENC(db)==SQLITE_UTF16LE ){ + cnt--; + break; + }else{ + cnt = (int)(z2-z); + } } } @@ -244,7 +249,7 @@ static int isLikeOrGlob( ** range search. The third is because the caller assumes that the pattern ** consists of at least one character after all escapes have been ** removed. */ - if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && 255!=(u8)z[cnt-1] ){ + if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && ALWAYS(255!=(u8)z[cnt-1]) ){ Expr *pPrefix; /* A "complete" match if the pattern ends with "*" or "%" */ |