aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordrh <>2023-12-13 14:31:15 +0000
committerdrh <>2023-12-13 14:31:15 +0000
commit001d1e795cfb58c2e0f217374efe2fdef797ad22 (patch)
tree4127e50cec1e66128e26a9a76f1a29ee0068e3c6 /src
parent891f1dc0542504fe402d1879651c307ae9355825 (diff)
downloadsqlite-001d1e795cfb58c2e0f217374efe2fdef797ad22.tar.gz
sqlite-001d1e795cfb58c2e0f217374efe2fdef797ad22.zip
Improvements to UTF8 handling, and especially the handling of invalid UTF8,
in the JSON routines. FossilOrigin-Name: 1b229c1101d6c384a30f343c5e47b471ab084b2d8e81170eb8f642afc1c67e3b
Diffstat (limited to 'src')
-rw-r--r--src/json.c30
-rw-r--r--src/sqliteInt.h1
-rw-r--r--src/utf.c33
3 files changed, 57 insertions, 7 deletions
diff --git a/src/json.c b/src/json.c
index a24a77894..f3166187b 100644
--- a/src/json.c
+++ b/src/json.c
@@ -2449,8 +2449,8 @@ static u32 jsonUnescapeOneChar(const char *z, u32 n, u32 *piOut){
}else if( z[nSkip]=='\\' ){
return nSkip + jsonUnescapeOneChar(&z[nSkip], n-nSkip, piOut);
}else{
- *piOut = z[nSkip];
- return nSkip+1;
+ int sz = sqlite3Utf8ReadLimited((u8*)&z[nSkip], n-nSkip, piOut);
+ return nSkip + sz;
}
}
default: {
@@ -2483,8 +2483,14 @@ static SQLITE_NOINLINE int jsonLabelCompareEscaped(
cLeft = 0;
}else if( rawLeft || zLeft[0]!='\\' ){
cLeft = ((u8*)zLeft)[0];
- zLeft++;
- nLeft--;
+ if( cLeft>=0xc0 ){
+ int sz = sqlite3Utf8ReadLimited((u8*)zLeft, nLeft, &cLeft);
+ zLeft += sz;
+ nLeft -= sz;
+ }else{
+ zLeft++;
+ nLeft--;
+ }
}else{
u32 n = jsonUnescapeOneChar(zLeft, nLeft, &cLeft);
zLeft += n;
@@ -2495,8 +2501,14 @@ static SQLITE_NOINLINE int jsonLabelCompareEscaped(
cRight = 0;
}else if( rawRight || zRight[0]!='\\' ){
cRight = ((u8*)zRight)[0];
- zRight++;
- nRight--;
+ if( cRight>=0xc0 ){
+ int sz = sqlite3Utf8ReadLimited((u8*)zRight, nRight, &cRight);
+ zRight += sz;
+ nRight -= sz;
+ }else{
+ zRight++;
+ nRight--;
+ }
}else{
u32 n = jsonUnescapeOneChar(zRight, nRight, &cRight);
zRight += n;
@@ -2916,14 +2928,19 @@ static void jsonReturnFromBlob(
u32 szEscape = jsonUnescapeOneChar(&z[iIn], sz-iIn, &v);
if( v<=0x7f ){
zOut[iOut++] = (char)v;
+ }else if( v==0xfffd ){
+ /* Silently ignore illegal unicode */
}else if( v<=0x7ff ){
+ assert( szEscape>=2 );
zOut[iOut++] = (char)(0xc0 | (v>>6));
zOut[iOut++] = 0x80 | (v&0x3f);
}else if( v<0x10000 ){
+ assert( szEscape>=3 );
zOut[iOut++] = 0xe0 | (v>>12);
zOut[iOut++] = 0x80 | ((v>>6)&0x3f);
zOut[iOut++] = 0x80 | (v&0x3f);
}else{
+ assert( szEscape>=4 );
zOut[iOut++] = 0xf0 | (v>>18);
zOut[iOut++] = 0x80 | ((v>>12)&0x3f);
zOut[iOut++] = 0x80 | ((v>>6)&0x3f);
@@ -2934,6 +2951,7 @@ static void jsonReturnFromBlob(
zOut[iOut++] = c;
}
} /* end for() */
+ assert( iOut<=nOut );
zOut[iOut] = 0;
sqlite3_result_text(pCtx, zOut, iOut, sqlite3_free);
break;
diff --git a/src/sqliteInt.h b/src/sqliteInt.h
index 83226b575..7d6596909 100644
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -5171,6 +5171,7 @@ int sqlite3Utf16ByteLen(const void *pData, int nChar);
#endif
int sqlite3Utf8CharLen(const char *pData, int nByte);
u32 sqlite3Utf8Read(const u8**);
+int sqlite3Utf8ReadLimited(const u8*, int, u32*);
LogEst sqlite3LogEst(u64);
LogEst sqlite3LogEstAdd(LogEst,LogEst);
LogEst sqlite3LogEstFromDouble(double);
diff --git a/src/utf.c b/src/utf.c
index 5f27babdf..216864f5c 100644
--- a/src/utf.c
+++ b/src/utf.c
@@ -164,7 +164,38 @@ u32 sqlite3Utf8Read(
return c;
}
-
+/*
+** Read a single UTF8 character out of buffer z[], but reading no
+** more than n characters from the buffer. z[] is not zero-terminated.
+**
+** Return the number of bytes used to construct the character.
+**
+** Invalid UTF8 might generate a strange result. No effort is made
+** to detect invalid UTF8.
+**
+** At most 4 bytes will be read out of z[]. The return value will always
+** be between 1 and 4.
+*/
+int sqlite3Utf8ReadLimited(
+ const u8 *z,
+ int n,
+ u32 *piOut
+){
+ u32 c;
+ int i = 1;
+ assert( n>0 );
+ c = z[0];
+ if( c>=0xc0 ){
+ c = sqlite3Utf8Trans1[c-0xc0];
+ if( n>4 ) n = 4;
+ while( i<n && (z[i] & 0xc0)==0x80 ){
+ c = (c<<6) + (0x3f & z[i]);
+ i++;
+ }
+ }
+ *piOut = c;
+ return i;
+}
/*