diff options
author | drh <> | 2023-12-13 14:31:15 +0000 |
---|---|---|
committer | drh <> | 2023-12-13 14:31:15 +0000 |
commit | 001d1e795cfb58c2e0f217374efe2fdef797ad22 (patch) | |
tree | 4127e50cec1e66128e26a9a76f1a29ee0068e3c6 /src | |
parent | 891f1dc0542504fe402d1879651c307ae9355825 (diff) | |
download | sqlite-001d1e795cfb58c2e0f217374efe2fdef797ad22.tar.gz sqlite-001d1e795cfb58c2e0f217374efe2fdef797ad22.zip |
Improvements to UTF8 handling, and especially the handling of invalid UTF8,
in the JSON routines.
FossilOrigin-Name: 1b229c1101d6c384a30f343c5e47b471ab084b2d8e81170eb8f642afc1c67e3b
Diffstat (limited to 'src')
-rw-r--r-- | src/json.c | 30 | ||||
-rw-r--r-- | src/sqliteInt.h | 1 | ||||
-rw-r--r-- | src/utf.c | 33 |
3 files changed, 57 insertions, 7 deletions
diff --git a/src/json.c b/src/json.c index a24a77894..f3166187b 100644 --- a/src/json.c +++ b/src/json.c @@ -2449,8 +2449,8 @@ static u32 jsonUnescapeOneChar(const char *z, u32 n, u32 *piOut){ }else if( z[nSkip]=='\\' ){ return nSkip + jsonUnescapeOneChar(&z[nSkip], n-nSkip, piOut); }else{ - *piOut = z[nSkip]; - return nSkip+1; + int sz = sqlite3Utf8ReadLimited((u8*)&z[nSkip], n-nSkip, piOut); + return nSkip + sz; } } default: { @@ -2483,8 +2483,14 @@ static SQLITE_NOINLINE int jsonLabelCompareEscaped( cLeft = 0; }else if( rawLeft || zLeft[0]!='\\' ){ cLeft = ((u8*)zLeft)[0]; - zLeft++; - nLeft--; + if( cLeft>=0xc0 ){ + int sz = sqlite3Utf8ReadLimited((u8*)zLeft, nLeft, &cLeft); + zLeft += sz; + nLeft -= sz; + }else{ + zLeft++; + nLeft--; + } }else{ u32 n = jsonUnescapeOneChar(zLeft, nLeft, &cLeft); zLeft += n; @@ -2495,8 +2501,14 @@ static SQLITE_NOINLINE int jsonLabelCompareEscaped( cRight = 0; }else if( rawRight || zRight[0]!='\\' ){ cRight = ((u8*)zRight)[0]; - zRight++; - nRight--; + if( cRight>=0xc0 ){ + int sz = sqlite3Utf8ReadLimited((u8*)zRight, nRight, &cRight); + zRight += sz; + nRight -= sz; + }else{ + zRight++; + nRight--; + } }else{ u32 n = jsonUnescapeOneChar(zRight, nRight, &cRight); zRight += n; @@ -2916,14 +2928,19 @@ static void jsonReturnFromBlob( u32 szEscape = jsonUnescapeOneChar(&z[iIn], sz-iIn, &v); if( v<=0x7f ){ zOut[iOut++] = (char)v; + }else if( v==0xfffd ){ + /* Silently ignore illegal unicode */ }else if( v<=0x7ff ){ + assert( szEscape>=2 ); zOut[iOut++] = (char)(0xc0 | (v>>6)); zOut[iOut++] = 0x80 | (v&0x3f); }else if( v<0x10000 ){ + assert( szEscape>=3 ); zOut[iOut++] = 0xe0 | (v>>12); zOut[iOut++] = 0x80 | ((v>>6)&0x3f); zOut[iOut++] = 0x80 | (v&0x3f); }else{ + assert( szEscape>=4 ); zOut[iOut++] = 0xf0 | (v>>18); zOut[iOut++] = 0x80 | ((v>>12)&0x3f); zOut[iOut++] = 0x80 | ((v>>6)&0x3f); @@ -2934,6 +2951,7 @@ static void jsonReturnFromBlob( zOut[iOut++] = c; } } /* end for() */ + assert( iOut<=nOut ); zOut[iOut] = 0; sqlite3_result_text(pCtx, zOut, iOut, sqlite3_free); break; diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 83226b575..7d6596909 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -5171,6 +5171,7 @@ int sqlite3Utf16ByteLen(const void *pData, int nChar); #endif int sqlite3Utf8CharLen(const char *pData, int nByte); u32 sqlite3Utf8Read(const u8**); +int sqlite3Utf8ReadLimited(const u8*, int, u32*); LogEst sqlite3LogEst(u64); LogEst sqlite3LogEstAdd(LogEst,LogEst); LogEst sqlite3LogEstFromDouble(double); @@ -164,7 +164,38 @@ u32 sqlite3Utf8Read( return c; } - +/* +** Read a single UTF8 character out of buffer z[], but reading no +** more than n characters from the buffer. z[] is not zero-terminated. +** +** Return the number of bytes used to construct the character. +** +** Invalid UTF8 might generate a strange result. No effort is made +** to detect invalid UTF8. +** +** At most 4 bytes will be read out of z[]. The return value will always +** be between 1 and 4. +*/ +int sqlite3Utf8ReadLimited( + const u8 *z, + int n, + u32 *piOut +){ + u32 c; + int i = 1; + assert( n>0 ); + c = z[0]; + if( c>=0xc0 ){ + c = sqlite3Utf8Trans1[c-0xc0]; + if( n>4 ) n = 4; + while( i<n && (z[i] & 0xc0)==0x80 ){ + c = (c<<6) + (0x3f & z[i]); + i++; + } + } + *piOut = c; + return i; +} /* |