aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordrh <>2024-10-07 12:19:23 +0000
committerdrh <>2024-10-07 12:19:23 +0000
commitce527f2e971d0c50c779f00fa6a3af61cc94e52e (patch)
tree194eac6ea8594d4ba5322825c2e493941a6328ef /src
parent29f976432a87793d7b9e29fd4ed0209d634113f7 (diff)
downloadsqlite-ce527f2e971d0c50c779f00fa6a3af61cc94e52e.tar.gz
sqlite-ce527f2e971d0c50c779f00fa6a3af61cc94e52e.zip
Fix handling of U+fffd in the LIKE optimization.
dbsqlfuzz eee57fb9eea1dfa5aa40dfa87865cf8c84d12f96. FossilOrigin-Name: bce52ce2a6e7f3d3d1b2807d1ea95243d9b655e557c1bb6f0b8a9a6cefb1aed6
Diffstat (limited to 'src')
-rw-r--r--src/whereexpr.c25
1 files changed, 15 insertions, 10 deletions
diff --git a/src/whereexpr.c b/src/whereexpr.c
index 7ea2956a7..24d203046 100644
--- a/src/whereexpr.c
+++ b/src/whereexpr.c
@@ -219,20 +219,25 @@ static int isLikeOrGlob(
z = (u8*)pRight->u.zToken;
}
if( z ){
-
- /* Count the number of prefix characters prior to the first wildcard.
- ** If the underlying database has a UTF16LE encoding, then only consider
- ** ASCII characters. Note that the encoding of z[] is UTF8 - we are
- ** dealing with only UTF8 here in this code, but the database engine
- ** itself might be processing content using a different encoding. */
+ /* Count the number of prefix bytes prior to the first wildcard.
+ ** or U+fffd character. If the underlying database has a UTF16LE
+ ** encoding, then only consider ASCII characters. Note that the
+ ** encoding of z[] is UTF8 - we are dealing with only UTF8 here in
+ ** this code, but the database engine itself might be processing
+ ** content using a different encoding. */
cnt = 0;
while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){
cnt++;
if( c==wc[3] && z[cnt]!=0 ){
cnt++;
- }else if( c>=0x80 && ENC(db)==SQLITE_UTF16LE ){
- cnt--;
- break;
+ }else if( c>=0x80 ){
+ const u8 *z2 = z+cnt-1;
+ if( sqlite3Utf8Read(&z2)==0xfffd || ENC(db)==SQLITE_UTF16LE ){
+ cnt--;
+ break;
+ }else{
+ cnt = (int)(z2-z);
+ }
}
}
@@ -244,7 +249,7 @@ static int isLikeOrGlob(
** range search. The third is because the caller assumes that the pattern
** consists of at least one character after all escapes have been
** removed. */
- if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && 255!=(u8)z[cnt-1] ){
+ if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && ALWAYS(255!=(u8)z[cnt-1]) ){
Expr *pPrefix;
/* A "complete" match if the pattern ends with "*" or "%" */