From ce527f2e971d0c50c779f00fa6a3af61cc94e52e Mon Sep 17 00:00:00 2001 From: drh <> Date: Mon, 7 Oct 2024 12:19:23 +0000 Subject: Fix handling of U+fffd in the LIKE optimization. dbsqlfuzz eee57fb9eea1dfa5aa40dfa87865cf8c84d12f96. FossilOrigin-Name: bce52ce2a6e7f3d3d1b2807d1ea95243d9b655e557c1bb6f0b8a9a6cefb1aed6 --- src/whereexpr.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/whereexpr.c b/src/whereexpr.c index 7ea2956a7..24d203046 100644 --- a/src/whereexpr.c +++ b/src/whereexpr.c @@ -219,20 +219,25 @@ static int isLikeOrGlob( z = (u8*)pRight->u.zToken; } if( z ){ - - /* Count the number of prefix characters prior to the first wildcard. - ** If the underlying database has a UTF16LE encoding, then only consider - ** ASCII characters. Note that the encoding of z[] is UTF8 - we are - ** dealing with only UTF8 here in this code, but the database engine - ** itself might be processing content using a different encoding. */ + /* Count the number of prefix bytes prior to the first wildcard. + ** or U+fffd character. If the underlying database has a UTF16LE + ** encoding, then only consider ASCII characters. Note that the + ** encoding of z[] is UTF8 - we are dealing with only UTF8 here in + ** this code, but the database engine itself might be processing + ** content using a different encoding. */ cnt = 0; while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ cnt++; if( c==wc[3] && z[cnt]!=0 ){ cnt++; - }else if( c>=0x80 && ENC(db)==SQLITE_UTF16LE ){ - cnt--; - break; + }else if( c>=0x80 ){ + const u8 *z2 = z+cnt-1; + if( sqlite3Utf8Read(&z2)==0xfffd || ENC(db)==SQLITE_UTF16LE ){ + cnt--; + break; + }else{ + cnt = (int)(z2-z); + } } } @@ -244,7 +249,7 @@ static int isLikeOrGlob( ** range search. The third is because the caller assumes that the pattern ** consists of at least one character after all escapes have been ** removed. */ - if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && 255!=(u8)z[cnt-1] ){ + if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && ALWAYS(255!=(u8)z[cnt-1]) ){ Expr *pPrefix; /* A "complete" match if the pattern ends with "*" or "%" */ -- cgit v1.2.3