diff options
author | drh <> | 2023-12-06 17:50:16 +0000 |
---|---|---|
committer | drh <> | 2023-12-06 17:50:16 +0000 |
commit | a9c8469d033d610c52f666e827f2bae2df47fbdb (patch) | |
tree | 4a5cfb204b75156540d6e17c71385171e77c2402 /src | |
parent | b597fea89436a57b8b4bc39f24dc7e14f5e92dcb (diff) | |
parent | 9df01b5ccf78535dd44e1f8c0b83fcee40ea5042 (diff) | |
download | sqlite-a9c8469d033d610c52f666e827f2bae2df47fbdb.tar.gz sqlite-a9c8469d033d610c52f666e827f2bae2df47fbdb.zip |
Do correct comparisons between object labels in JSON even when the two labels
contain different JSON escapes.
FossilOrigin-Name: bda2e30cc22e180b19a7a05824dd345880eb402ae5450b2d2dd954946c3ae135
Diffstat (limited to 'src')
-rw-r--r-- | src/json.c | 356 |
1 files changed, 250 insertions, 106 deletions
diff --git a/src/json.c b/src/json.c index e91c9b86d..46d7a3fa8 100644 --- a/src/json.c +++ b/src/json.c @@ -2133,6 +2133,188 @@ static void jsonBlobEdit( } /* +** Return the number of escaped newlines to be ignored. +** An escaped newline is a one of the following byte sequences: +** +** 0x5c 0x0a +** 0x5c 0x0d +** 0x5c 0x0d 0x0a +** 0x5c 0xe2 0x80 0xa8 +** 0x5c 0xe2 0x80 0xa9 +*/ +static u32 jsonBytesToBypass(const char *z, u32 n){ + u32 i = 0; + while( i+1<n ){ + if( z[i]!='\\' ) return i; + if( z[i+1]=='\n' ){ + i += 2; + continue; + } + if( z[i+1]=='\r' ){ + if( i+2<n && z[i+2]=='\n' ){ + i += 3; + }else{ + i += 2; + } + continue; + } + if( 0xe2==(u8)z[i+1] + && i+3<n + && 0x80==(u8)z[i+2] + && (0xa8==(u8)z[i+3] || 0xa9==(u8)z[i+3]) + ){ + i += 4; + continue; + } + break; + } + return i; +} + +/* +** Input z[0..n] defines JSON escape sequence including the leading '\\'. +** Decode that escape sequence into a single character. Write that +** character into *piOut. Return the number of bytes in the escape sequence. +*/ +static u32 jsonUnescapeOneChar(const char *z, u32 n, u32 *piOut){ + assert( n>0 ); + assert( z[0]=='\\' ); + if( n<2 ){ + *piOut = 0xFFFD; + return n; + } + switch( (u8)z[1] ){ + case 'u': { + u32 v, vlo; + if( n<6 ){ + *piOut = 0xFFFD; + return n; + } + v = jsonHexToInt4(&z[2]); + if( (v & 0xfc00)==0xd800 + && n>=12 + && z[6]=='\\' + && z[7]=='u' + && ((vlo = jsonHexToInt4(&z[8]))&0xfc00)==0xdc00 + ){ + *piOut = ((v&0x3ff)<<10) + (vlo&0x3ff) + 0x10000; + return 12; + }else{ + *piOut = v; + return 6; + } + } + case 'b': { *piOut = '\b'; return 2; } + case 'f': { *piOut = '\f'; return 2; } + case 'n': { *piOut = '\n'; return 2; } + case 'r': { *piOut = '\r'; return 2; } + case 't': { *piOut = '\t'; return 2; } + case 'v': { *piOut = '\v'; return 2; } + case '0': { *piOut = 0; return 2; } + case '\'': + case '"': + case '/': + case '\\':{ *piOut = z[1]; return 2; } + case 'x': { + if( n<4 ){ + *piOut = 0xFFFD; + return n; + } + *piOut = (jsonHexToInt(z[2])<<4) | jsonHexToInt(z[3]); + return 4; + } + case 0xe2: + case '\r': + case '\n': { + u32 nSkip = jsonBytesToBypass(z, n); + if( nSkip==0 ){ + *piOut = 0xFFFD; + return n; + }else if( nSkip==n ){ + *piOut = 0; + return n; + }else if( z[nSkip]=='\\' ){ + return nSkip + jsonUnescapeOneChar(&z[nSkip], n-nSkip, piOut); + }else{ + *piOut = z[nSkip]; + return nSkip+1; + } + } + default: { + *piOut = 0xFFFD; + return 2; + } + } +} + + +/* +** Compare two object labels. Return 1 if they are equal and +** 0 if they differ. +** +** In this version, we know that one or the other or both of the +** two comparands contains an escape sequence. +*/ +static SQLITE_NOINLINE int jsonLabelCompareEscaped( + const char *zLeft, /* The left label */ + u32 nLeft, /* Size of the left label in bytes */ + int rawLeft, /* True if zLeft contains no escapes */ + const char *zRight, /* The right label */ + u32 nRight, /* Size of the right label in bytes */ + int rawRight /* True if zRight is escape-free */ +){ + u32 cLeft, cRight; + assert( rawLeft==0 || rawRight==0 ); + while( nLeft>0 && nRight>0 ){ + if( rawLeft || zLeft[0]!='\\' ){ + cLeft = ((u8*)zLeft)[0]; + zLeft++; + nLeft--; + }else{ + u32 n = jsonUnescapeOneChar(zLeft, nLeft, &cLeft); + zLeft += n; + assert( n<=nLeft ); + nLeft -= n; + } + if( rawRight || zRight[0]!='\\' ){ + cRight = ((u8*)zRight)[0]; + zRight++; + nRight--; + }else{ + u32 n = jsonUnescapeOneChar(zRight, nRight, &cRight); + zRight += n; + assert( n<=nRight ); + nRight -= n; + } + if( cLeft!=cRight ) return 0; + } + return nLeft==0 && nRight==0; +} + +/* +** Compare two object labels. Return 1 if they are equal and +** 0 if they differ. Return -1 if an OOM occurs. +*/ +static int jsonLabelCompare( + const char *zLeft, /* The left label */ + u32 nLeft, /* Size of the left label in bytes */ + int rawLeft, /* True if zLeft contains no escapes */ + const char *zRight, /* The right label */ + u32 nRight, /* Size of the right label in bytes */ + int rawRight /* True if zRight is escape-free */ +){ + if( rawLeft && rawRight ){ + /* Simpliest case: Neither label contains escapes. A simple + ** memcmp() is sufficient. */ + if( nLeft!=nRight ) return 0; + return memcmp(zLeft, zRight, nLeft)==0; + }else{ + return jsonLabelCompareEscaped(zLeft, nLeft, rawLeft, + zRight, nRight, rawRight); + } +} + +/* ** Error returns from jsonLookupStep() */ #define JSON_LOOKUP_ERROR 0xffffffff @@ -2237,6 +2419,7 @@ static u32 jsonLookupStep( return iRoot; } if( zPath[0]=='.' ){ + int rawKey = 1; x = pParse->aBlob[iRoot]; zPath++; if( zPath[0]=='"' ){ @@ -2249,6 +2432,7 @@ static u32 jsonLookupStep( return JSON_LOOKUP_PATHERROR; } testcase( nKey==0 ); + rawKey = memchr(zKey, '\\', nKey)==0; }else{ zKey = zPath; for(i=0; zPath[i] && zPath[i]!='.' && zPath[i]!='['; i++){} @@ -2262,13 +2446,17 @@ static u32 jsonLookupStep( j = iRoot + n; /* j is the index of a label */ iEnd = j+sz; while( j<iEnd ){ + int rawLabel; + const char *zLabel; x = pParse->aBlob[j] & 0x0f; if( x<JSONB_TEXT || x>JSONB_TEXTRAW ) return JSON_LOOKUP_ERROR; n = jsonbPayloadSize(pParse, j, &sz); if( n==0 ) return JSON_LOOKUP_ERROR; k = j+n; /* k is the index of the label text */ if( k+sz>=iEnd ) return JSON_LOOKUP_ERROR; - if( sz==nKey && memcmp(&pParse->aBlob[k], zKey, nKey)==0 ){ + zLabel = (const char*)&pParse->aBlob[k]; + rawLabel = x==JSONB_TEXT || x==JSONB_TEXTRAW; + if( jsonLabelCompare(zKey, nKey, rawKey, zLabel, sz, rawLabel) ){ u32 v = k+sz; /* v is the index of the value */ if( ((pParse->aBlob[v])&0x0f)>JSONB_OBJECT ) return JSON_LOOKUP_ERROR; n = jsonbPayloadSize(pParse, v, &sz); @@ -2292,7 +2480,7 @@ static u32 jsonLookupStep( testcase( pParse->eEdit==JEDIT_INS ); testcase( pParse->eEdit==JEDIT_SET ); memset(&ix, 0, sizeof(ix)); - jsonBlobAppendNode(&ix,JSONB_TEXTRAW, nKey, 0); + jsonBlobAppendNode(&ix, rawKey?JSONB_TEXTRAW:JSONB_TEXT5, nKey, 0); pParse->oom |= ix.oom; rc = jsonCreateEditSubstructure(pParse, &v, &zPath[i]); if( !JSON_LOOKUP_ISERROR(rc) @@ -2496,72 +2684,27 @@ static void jsonReturnFromBlob( for(iIn=iOut=0; iIn<sz; iIn++){ char c = z[iIn]; if( c=='\\' ){ - c = z[++iIn]; - if( c=='u' ){ - u32 v = jsonHexToInt4(z+iIn+1); - iIn += 4; - if( v==0 ) break; - if( v<=0x7f ){ - zOut[iOut++] = (char)v; - }else if( v<=0x7ff ){ - zOut[iOut++] = (char)(0xc0 | (v>>6)); - zOut[iOut++] = 0x80 | (v&0x3f); - }else{ - u32 vlo; - if( (v&0xfc00)==0xd800 - && iIn<sz-6 - && z[iIn+1]=='\\' - && z[iIn+2]=='u' - && ((vlo = jsonHexToInt4(z+iIn+3))&0xfc00)==0xdc00 - ){ - /* We have a surrogate pair */ - v = ((v&0x3ff)<<10) + (vlo&0x3ff) + 0x10000; - iIn += 6; - zOut[iOut++] = 0xf0 | (v>>18); - zOut[iOut++] = 0x80 | ((v>>12)&0x3f); - zOut[iOut++] = 0x80 | ((v>>6)&0x3f); - zOut[iOut++] = 0x80 | (v&0x3f); - }else{ - zOut[iOut++] = 0xe0 | (v>>12); - zOut[iOut++] = 0x80 | ((v>>6)&0x3f); - zOut[iOut++] = 0x80 | (v&0x3f); - } - } - continue; - }else if( c=='b' ){ - c = '\b'; - }else if( c=='f' ){ - c = '\f'; - }else if( c=='n' ){ - c = '\n'; - }else if( c=='r' ){ - c = '\r'; - }else if( c=='t' ){ - c = '\t'; - }else if( c=='v' ){ - c = '\v'; - }else if( c=='\'' || c=='"' || c=='/' || c=='\\' ){ - /* pass through unchanged */ - }else if( c=='0' ){ - c = 0; - }else if( c=='x' ){ - c = (jsonHexToInt(z[iIn+1])<<4) | jsonHexToInt(z[iIn+2]); - iIn += 2; - }else if( c=='\r' && z[i+1]=='\n' ){ - iIn++; - continue; - }else if( 0xe2==(u8)c - && iIn<sz-2 - && 0x80==(u8)z[iIn+1] - && (0xa8==(u8)z[iIn+2] || 0xa9==(u8)z[iIn+2]) - ){ - iIn += 2; - continue; + u32 v; + u32 szEscape = jsonUnescapeOneChar(&z[iIn], sz-iIn, &v); + if( v<=0x7f ){ + zOut[iOut++] = (char)v; + }else if( v<=0x7ff ){ + zOut[iOut++] = (char)(0xc0 | (v>>6)); + zOut[iOut++] = 0x80 | (v&0x3f); + }else if( v<0x10000 ){ + zOut[iOut++] = 0xe0 | (v>>12); + zOut[iOut++] = 0x80 | ((v>>6)&0x3f); + zOut[iOut++] = 0x80 | (v&0x3f); }else{ - continue; + zOut[iOut++] = 0xf0 | (v>>18); + zOut[iOut++] = 0x80 | ((v>>12)&0x3f); + zOut[iOut++] = 0x80 | ((v>>6)&0x3f); + zOut[iOut++] = 0x80 | (v&0x3f); } - } /* end if( c=='\\' ) */ - zOut[iOut++] = c; + iIn += szEscape - 1; + }else{ + zOut[iOut++] = c; + } } /* end for() */ zOut[iOut] = 0; sqlite3_result_text(pCtx, zOut, iOut, sqlite3_free); @@ -3142,6 +3285,20 @@ static void jsonArrayLengthFunc( jsonParseFree(p); } +/* True if the string is all digits */ +static int jsonAllDigits(const char *z, int n){ + int i; + for(i=0; i<n && sqlite3Isdigit(z[i]); i++){} + return i==n; +} + +/* True if the string is all alphanumerics and underscores */ +static int jsonAllAlphanum(const char *z, int n){ + int i; + for(i=0; i<n && (sqlite3Isalnum(z[i]) || z[i]=='_'); i++){} + return i==n; +} + /* ** json_extract(JSON, PATH, ...) ** "->"(JSON,PATH) @@ -3199,15 +3356,19 @@ static void jsonExtractFunc( ** [NUMBER] ==> $[NUMBER] // Not PG. Purely for convenience */ jsonStringInit(&jx, ctx); - if( sqlite3Isdigit(zPath[0]) ){ + if( jsonAllDigits(zPath, nPath) ){ jsonAppendRawNZ(&jx, "[", 1); jsonAppendRaw(&jx, zPath, nPath); jsonAppendRawNZ(&jx, "]", 2); - }else if( zPath[0]!='[' ){ + }else if( jsonAllAlphanum(zPath, nPath) ){ jsonAppendRawNZ(&jx, ".", 1); jsonAppendRaw(&jx, zPath, nPath); + }else if( zPath[0]=='[' && nPath>=3 && zPath[nPath-1]==']' ){ + jsonAppendRaw(&jx, zPath, nPath); }else{ + jsonAppendRawNZ(&jx, ".\"", 2); jsonAppendRaw(&jx, zPath, nPath); + jsonAppendRawNZ(&jx, "\"", 1); } jsonStringTerminate(&jx); j = jsonLookupStep(p, 0, jx.zBuf, 0); @@ -3397,6 +3558,7 @@ static int jsonMergePatch( iTCursor = iTStart; iTEnd = iTEndBE + pTarget->delta; while( iTCursor<iTEnd ){ + int isEqual; /* true if the patch and target labels match */ iTLabel = iTCursor; eTLabel = pTarget->aBlob[iTCursor] & 0x0f; if( eTLabel<JSONB_TEXT || eTLabel>JSONB_TEXTRAW ){ @@ -3409,33 +3571,14 @@ static int jsonMergePatch( nTValue = jsonbPayloadSize(pTarget, iTValue, &szTValue); if( nTValue==0 ) return JSON_MERGE_BADTARGET; if( iTValue + nTValue + szTValue > iTEnd ) return JSON_MERGE_BADTARGET; - if( eTLabel==ePLabel ){ - /* Common case */ - if( szTLabel==szPLabel - && memcmp(&pTarget->aBlob[iTLabel+nTLabel], - &pPatch->aBlob[iPLabel+nPLabel], szTLabel)==0 - ){ - break; /* Labels match. */ - } - }else{ - /* Should rarely happen */ - JsonString s1, s2; - int isEqual, isOom; - jsonStringInit(&s1, 0); - jsonXlateBlobToText(pTarget, iTLabel, &s1); - jsonStringInit(&s2, 0); - jsonXlateBlobToText(pPatch, iPLabel, &s2); - isOom = s1.eErr || s2.eErr; - if( s1.nUsed==s2.nUsed && memcmp(s1.zBuf, s2.zBuf, s1.nUsed)==0 ){ - isEqual = 1; - }else{ - isEqual = 0; - } - jsonStringReset(&s1); - jsonStringReset(&s2); - if( isOom ) return JSON_MERGE_OOM; - if( isEqual ) break; - } + isEqual = jsonLabelCompare( + (const char*)&pPatch->aBlob[iPLabel+nPLabel], + szPLabel, + (ePLabel==JSONB_TEXT || ePLabel==JSONB_TEXTRAW), + (const char*)&pTarget->aBlob[iTLabel+nTLabel], + szTLabel, + (eTLabel==JSONB_TEXT || eTLabel==JSONB_TEXTRAW)); + if( isEqual ) break; iTCursor = iTValue + nTValue + szTValue; } x = pPatch->aBlob[iPValue] & 0x0f; @@ -4359,22 +4502,23 @@ static int jsonEachNext(sqlite3_vtab_cursor *cur){ */ static int jsonEachPathLength(JsonEachCursor *p){ u32 n = p->path.nUsed; + const char *z = p->path.zBuf; if( p->iRowid==0 && p->bRecursive && n>1 ){ - if( p->path.zBuf[n-1]==']' ){ + if( z[n-1]==']' ){ do{ - assert( n>0 ); + assert( n>1 ); n--; - }while( p->path.zBuf[n]!='[' ); + }while( z[n]!='[' ); + }else if( z[n-1]=='"' ){ + do{ + assert( n>1 ); + n--; + }while( z[n]!='.' || z[n+1]!='"' ); }else{ - u32 sz = 0; - jsonbPayloadSize(&p->sParse, p->i, &sz); - if( p->path.zBuf[n-1]=='"' ) sz += 2; - assert( sz<n ); - n -= sz; - while( p->path.zBuf[n]!='.' && ALWAYS(n>0) ){ + do{ + assert( n>1 ); n--; - assert( n>0 ); - } + }while( z[n]!='.' ); } } return n; |