diff options
-rw-r--r-- | manifest | 15 | ||||
-rw-r--r-- | manifest.uuid | 2 | ||||
-rw-r--r-- | src/json.c | 356 | ||||
-rw-r--r-- | test/json502.test | 25 |
4 files changed, 284 insertions, 114 deletions
@@ -1,5 +1,5 @@ -C Correctly\shandle\s8-byte\ssizes\sin\sthe\sJSONB\sformat.\n[forum:/forumpost/283daf08e91183fc|Forum\spost\s283daf08e91183fc]. -D 2023-12-06T17:39:31.569 +C Do\scorrect\scomparisons\sbetween\sobject\slabels\sin\sJSON\seven\swhen\sthe\stwo\slabels\ncontain\sdifferent\sJSON\sescapes. +D 2023-12-06T17:50:16.616 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -696,7 +696,7 @@ F src/hash.h 3340ab6e1d13e725571d7cee6d3e3135f0779a7d8e76a9ce0a85971fa3953c51 F src/hwtime.h f9c2dfb84dce7acf95ce6d289e46f5f9d3d1afd328e53da8f8e9008e3b3caae6 F src/in-operator.md 10cd8f4bcd225a32518407c2fb2484089112fd71 F src/insert.c 3f0a94082d978bbdd33c38fefea15346c6c6bffb70bc645a71dc0f1f87dd3276 -F src/json.c 07247c969e80e0a70241235ea4d00bb823830ce6a48f101fbcb1c72e8abf6f91 +F src/json.c c2e0fea06f40fb0319ed132fc181a25623585c943e08c690b522f216886ba316 F src/legacy.c d7874bc885906868cd51e6c2156698f2754f02d9eee1bae2d687323c3ca8e5aa F src/loadext.c 7432c944ff197046d67a1207790a1b13eec4548c85a9457eb0896bb3641dfb36 F src/main.c 1b89f3de98d1b59fec5bac1d66d6ece21f703821b8eaa0d53d9604c35309f6f9 @@ -1339,7 +1339,7 @@ F test/json103.test 53df87f83a4e5fa0c0a56eb29ff6c94055c6eb919f33316d62161a888011 F test/json104.test 1b844a70cddcfa2e4cd81a5db0657b2e61e7f00868310f24f56a9ba0114348c1 F test/json105.test e64a8d73677fbae67886642cd5076e2ef3efe89f8483b87595cf9c030216c9bd F test/json501.test ab168a12eb6eb14d479f8c1cdae3ac062fd5a4679f17f976e96f1af518408330 -F test/json502.test 98c38e3c4573841028a1381dfb81d4c3f9b105d39668167da10d055e503f6d0b +F test/json502.test 3c697e506fc38ccb455b49660b21b6e62e08ede0f2d0c869a7d171e17809093c F test/jsonb01.test cace70765b36a36aec9a85a41ea65667d3bbf647d4400ddc3ac76f8fe7d94f90 F test/keyword1.test 37ef6bba5d2ed5b07ecdd6810571de2956599dff F test/kvtest.c 6e0228409ea7ca0497dad503fbd109badb5e59545d131014b6aaac68b56f484a @@ -2153,8 +2153,9 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 63cb05a862532d2d56e9e81fe32ced09bf58f03146587a118f11c2a84e195e69 -R 3207d50f882d9851bd1114b702910d8f +P 73d390f39c0bbbc017e01544e4d43c76761f2599bd57f900131c706270dfd202 b9243ee8a37c62eb8848e765bd4af83bc1b3d3eb24fb4268a1357ad1f8b2e1fb +R fd697485adcd3130674f1a92f5c02f3d +T +closed b9243ee8a37c62eb8848e765bd4af83bc1b3d3eb24fb4268a1357ad1f8b2e1fb U drh -Z 9fdc41964fbd72df8cb3bfd54d04d4f1 +Z 9804c6a78387d2d6c50275c2413cc6ef # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index fdb030fa0..bba984a92 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -73d390f39c0bbbc017e01544e4d43c76761f2599bd57f900131c706270dfd202
\ No newline at end of file +bda2e30cc22e180b19a7a05824dd345880eb402ae5450b2d2dd954946c3ae135
\ No newline at end of file diff --git a/src/json.c b/src/json.c index e91c9b86d..46d7a3fa8 100644 --- a/src/json.c +++ b/src/json.c @@ -2133,6 +2133,188 @@ static void jsonBlobEdit( } /* +** Return the number of escaped newlines to be ignored. +** An escaped newline is a one of the following byte sequences: +** +** 0x5c 0x0a +** 0x5c 0x0d +** 0x5c 0x0d 0x0a +** 0x5c 0xe2 0x80 0xa8 +** 0x5c 0xe2 0x80 0xa9 +*/ +static u32 jsonBytesToBypass(const char *z, u32 n){ + u32 i = 0; + while( i+1<n ){ + if( z[i]!='\\' ) return i; + if( z[i+1]=='\n' ){ + i += 2; + continue; + } + if( z[i+1]=='\r' ){ + if( i+2<n && z[i+2]=='\n' ){ + i += 3; + }else{ + i += 2; + } + continue; + } + if( 0xe2==(u8)z[i+1] + && i+3<n + && 0x80==(u8)z[i+2] + && (0xa8==(u8)z[i+3] || 0xa9==(u8)z[i+3]) + ){ + i += 4; + continue; + } + break; + } + return i; +} + +/* +** Input z[0..n] defines JSON escape sequence including the leading '\\'. +** Decode that escape sequence into a single character. Write that +** character into *piOut. Return the number of bytes in the escape sequence. +*/ +static u32 jsonUnescapeOneChar(const char *z, u32 n, u32 *piOut){ + assert( n>0 ); + assert( z[0]=='\\' ); + if( n<2 ){ + *piOut = 0xFFFD; + return n; + } + switch( (u8)z[1] ){ + case 'u': { + u32 v, vlo; + if( n<6 ){ + *piOut = 0xFFFD; + return n; + } + v = jsonHexToInt4(&z[2]); + if( (v & 0xfc00)==0xd800 + && n>=12 + && z[6]=='\\' + && z[7]=='u' + && ((vlo = jsonHexToInt4(&z[8]))&0xfc00)==0xdc00 + ){ + *piOut = ((v&0x3ff)<<10) + (vlo&0x3ff) + 0x10000; + return 12; + }else{ + *piOut = v; + return 6; + } + } + case 'b': { *piOut = '\b'; return 2; } + case 'f': { *piOut = '\f'; return 2; } + case 'n': { *piOut = '\n'; return 2; } + case 'r': { *piOut = '\r'; return 2; } + case 't': { *piOut = '\t'; return 2; } + case 'v': { *piOut = '\v'; return 2; } + case '0': { *piOut = 0; return 2; } + case '\'': + case '"': + case '/': + case '\\':{ *piOut = z[1]; return 2; } + case 'x': { + if( n<4 ){ + *piOut = 0xFFFD; + return n; + } + *piOut = (jsonHexToInt(z[2])<<4) | jsonHexToInt(z[3]); + return 4; + } + case 0xe2: + case '\r': + case '\n': { + u32 nSkip = jsonBytesToBypass(z, n); + if( nSkip==0 ){ + *piOut = 0xFFFD; + return n; + }else if( nSkip==n ){ + *piOut = 0; + return n; + }else if( z[nSkip]=='\\' ){ + return nSkip + jsonUnescapeOneChar(&z[nSkip], n-nSkip, piOut); + }else{ + *piOut = z[nSkip]; + return nSkip+1; + } + } + default: { + *piOut = 0xFFFD; + return 2; + } + } +} + + +/* +** Compare two object labels. Return 1 if they are equal and +** 0 if they differ. +** +** In this version, we know that one or the other or both of the +** two comparands contains an escape sequence. +*/ +static SQLITE_NOINLINE int jsonLabelCompareEscaped( + const char *zLeft, /* The left label */ + u32 nLeft, /* Size of the left label in bytes */ + int rawLeft, /* True if zLeft contains no escapes */ + const char *zRight, /* The right label */ + u32 nRight, /* Size of the right label in bytes */ + int rawRight /* True if zRight is escape-free */ +){ + u32 cLeft, cRight; + assert( rawLeft==0 || rawRight==0 ); + while( nLeft>0 && nRight>0 ){ + if( rawLeft || zLeft[0]!='\\' ){ + cLeft = ((u8*)zLeft)[0]; + zLeft++; + nLeft--; + }else{ + u32 n = jsonUnescapeOneChar(zLeft, nLeft, &cLeft); + zLeft += n; + assert( n<=nLeft ); + nLeft -= n; + } + if( rawRight || zRight[0]!='\\' ){ + cRight = ((u8*)zRight)[0]; + zRight++; + nRight--; + }else{ + u32 n = jsonUnescapeOneChar(zRight, nRight, &cRight); + zRight += n; + assert( n<=nRight ); + nRight -= n; + } + if( cLeft!=cRight ) return 0; + } + return nLeft==0 && nRight==0; +} + +/* +** Compare two object labels. Return 1 if they are equal and +** 0 if they differ. Return -1 if an OOM occurs. +*/ +static int jsonLabelCompare( + const char *zLeft, /* The left label */ + u32 nLeft, /* Size of the left label in bytes */ + int rawLeft, /* True if zLeft contains no escapes */ + const char *zRight, /* The right label */ + u32 nRight, /* Size of the right label in bytes */ + int rawRight /* True if zRight is escape-free */ +){ + if( rawLeft && rawRight ){ + /* Simpliest case: Neither label contains escapes. A simple + ** memcmp() is sufficient. */ + if( nLeft!=nRight ) return 0; + return memcmp(zLeft, zRight, nLeft)==0; + }else{ + return jsonLabelCompareEscaped(zLeft, nLeft, rawLeft, + zRight, nRight, rawRight); + } +} + +/* ** Error returns from jsonLookupStep() */ #define JSON_LOOKUP_ERROR 0xffffffff @@ -2237,6 +2419,7 @@ static u32 jsonLookupStep( return iRoot; } if( zPath[0]=='.' ){ + int rawKey = 1; x = pParse->aBlob[iRoot]; zPath++; if( zPath[0]=='"' ){ @@ -2249,6 +2432,7 @@ static u32 jsonLookupStep( return JSON_LOOKUP_PATHERROR; } testcase( nKey==0 ); + rawKey = memchr(zKey, '\\', nKey)==0; }else{ zKey = zPath; for(i=0; zPath[i] && zPath[i]!='.' && zPath[i]!='['; i++){} @@ -2262,13 +2446,17 @@ static u32 jsonLookupStep( j = iRoot + n; /* j is the index of a label */ iEnd = j+sz; while( j<iEnd ){ + int rawLabel; + const char *zLabel; x = pParse->aBlob[j] & 0x0f; if( x<JSONB_TEXT || x>JSONB_TEXTRAW ) return JSON_LOOKUP_ERROR; n = jsonbPayloadSize(pParse, j, &sz); if( n==0 ) return JSON_LOOKUP_ERROR; k = j+n; /* k is the index of the label text */ if( k+sz>=iEnd ) return JSON_LOOKUP_ERROR; - if( sz==nKey && memcmp(&pParse->aBlob[k], zKey, nKey)==0 ){ + zLabel = (const char*)&pParse->aBlob[k]; + rawLabel = x==JSONB_TEXT || x==JSONB_TEXTRAW; + if( jsonLabelCompare(zKey, nKey, rawKey, zLabel, sz, rawLabel) ){ u32 v = k+sz; /* v is the index of the value */ if( ((pParse->aBlob[v])&0x0f)>JSONB_OBJECT ) return JSON_LOOKUP_ERROR; n = jsonbPayloadSize(pParse, v, &sz); @@ -2292,7 +2480,7 @@ static u32 jsonLookupStep( testcase( pParse->eEdit==JEDIT_INS ); testcase( pParse->eEdit==JEDIT_SET ); memset(&ix, 0, sizeof(ix)); - jsonBlobAppendNode(&ix,JSONB_TEXTRAW, nKey, 0); + jsonBlobAppendNode(&ix, rawKey?JSONB_TEXTRAW:JSONB_TEXT5, nKey, 0); pParse->oom |= ix.oom; rc = jsonCreateEditSubstructure(pParse, &v, &zPath[i]); if( !JSON_LOOKUP_ISERROR(rc) @@ -2496,72 +2684,27 @@ static void jsonReturnFromBlob( for(iIn=iOut=0; iIn<sz; iIn++){ char c = z[iIn]; if( c=='\\' ){ - c = z[++iIn]; - if( c=='u' ){ - u32 v = jsonHexToInt4(z+iIn+1); - iIn += 4; - if( v==0 ) break; - if( v<=0x7f ){ - zOut[iOut++] = (char)v; - }else if( v<=0x7ff ){ - zOut[iOut++] = (char)(0xc0 | (v>>6)); - zOut[iOut++] = 0x80 | (v&0x3f); - }else{ - u32 vlo; - if( (v&0xfc00)==0xd800 - && iIn<sz-6 - && z[iIn+1]=='\\' - && z[iIn+2]=='u' - && ((vlo = jsonHexToInt4(z+iIn+3))&0xfc00)==0xdc00 - ){ - /* We have a surrogate pair */ - v = ((v&0x3ff)<<10) + (vlo&0x3ff) + 0x10000; - iIn += 6; - zOut[iOut++] = 0xf0 | (v>>18); - zOut[iOut++] = 0x80 | ((v>>12)&0x3f); - zOut[iOut++] = 0x80 | ((v>>6)&0x3f); - zOut[iOut++] = 0x80 | (v&0x3f); - }else{ - zOut[iOut++] = 0xe0 | (v>>12); - zOut[iOut++] = 0x80 | ((v>>6)&0x3f); - zOut[iOut++] = 0x80 | (v&0x3f); - } - } - continue; - }else if( c=='b' ){ - c = '\b'; - }else if( c=='f' ){ - c = '\f'; - }else if( c=='n' ){ - c = '\n'; - }else if( c=='r' ){ - c = '\r'; - }else if( c=='t' ){ - c = '\t'; - }else if( c=='v' ){ - c = '\v'; - }else if( c=='\'' || c=='"' || c=='/' || c=='\\' ){ - /* pass through unchanged */ - }else if( c=='0' ){ - c = 0; - }else if( c=='x' ){ - c = (jsonHexToInt(z[iIn+1])<<4) | jsonHexToInt(z[iIn+2]); - iIn += 2; - }else if( c=='\r' && z[i+1]=='\n' ){ - iIn++; - continue; - }else if( 0xe2==(u8)c - && iIn<sz-2 - && 0x80==(u8)z[iIn+1] - && (0xa8==(u8)z[iIn+2] || 0xa9==(u8)z[iIn+2]) - ){ - iIn += 2; - continue; + u32 v; + u32 szEscape = jsonUnescapeOneChar(&z[iIn], sz-iIn, &v); + if( v<=0x7f ){ + zOut[iOut++] = (char)v; + }else if( v<=0x7ff ){ + zOut[iOut++] = (char)(0xc0 | (v>>6)); + zOut[iOut++] = 0x80 | (v&0x3f); + }else if( v<0x10000 ){ + zOut[iOut++] = 0xe0 | (v>>12); + zOut[iOut++] = 0x80 | ((v>>6)&0x3f); + zOut[iOut++] = 0x80 | (v&0x3f); }else{ - continue; + zOut[iOut++] = 0xf0 | (v>>18); + zOut[iOut++] = 0x80 | ((v>>12)&0x3f); + zOut[iOut++] = 0x80 | ((v>>6)&0x3f); + zOut[iOut++] = 0x80 | (v&0x3f); } - } /* end if( c=='\\' ) */ - zOut[iOut++] = c; + iIn += szEscape - 1; + }else{ + zOut[iOut++] = c; + } } /* end for() */ zOut[iOut] = 0; sqlite3_result_text(pCtx, zOut, iOut, sqlite3_free); @@ -3142,6 +3285,20 @@ static void jsonArrayLengthFunc( jsonParseFree(p); } +/* True if the string is all digits */ +static int jsonAllDigits(const char *z, int n){ + int i; + for(i=0; i<n && sqlite3Isdigit(z[i]); i++){} + return i==n; +} + +/* True if the string is all alphanumerics and underscores */ +static int jsonAllAlphanum(const char *z, int n){ + int i; + for(i=0; i<n && (sqlite3Isalnum(z[i]) || z[i]=='_'); i++){} + return i==n; +} + /* ** json_extract(JSON, PATH, ...) ** "->"(JSON,PATH) @@ -3199,15 +3356,19 @@ static void jsonExtractFunc( ** [NUMBER] ==> $[NUMBER] // Not PG. Purely for convenience */ jsonStringInit(&jx, ctx); - if( sqlite3Isdigit(zPath[0]) ){ + if( jsonAllDigits(zPath, nPath) ){ jsonAppendRawNZ(&jx, "[", 1); jsonAppendRaw(&jx, zPath, nPath); jsonAppendRawNZ(&jx, "]", 2); - }else if( zPath[0]!='[' ){ + }else if( jsonAllAlphanum(zPath, nPath) ){ jsonAppendRawNZ(&jx, ".", 1); jsonAppendRaw(&jx, zPath, nPath); + }else if( zPath[0]=='[' && nPath>=3 && zPath[nPath-1]==']' ){ + jsonAppendRaw(&jx, zPath, nPath); }else{ + jsonAppendRawNZ(&jx, ".\"", 2); jsonAppendRaw(&jx, zPath, nPath); + jsonAppendRawNZ(&jx, "\"", 1); } jsonStringTerminate(&jx); j = jsonLookupStep(p, 0, jx.zBuf, 0); @@ -3397,6 +3558,7 @@ static int jsonMergePatch( iTCursor = iTStart; iTEnd = iTEndBE + pTarget->delta; while( iTCursor<iTEnd ){ + int isEqual; /* true if the patch and target labels match */ iTLabel = iTCursor; eTLabel = pTarget->aBlob[iTCursor] & 0x0f; if( eTLabel<JSONB_TEXT || eTLabel>JSONB_TEXTRAW ){ @@ -3409,33 +3571,14 @@ static int jsonMergePatch( nTValue = jsonbPayloadSize(pTarget, iTValue, &szTValue); if( nTValue==0 ) return JSON_MERGE_BADTARGET; if( iTValue + nTValue + szTValue > iTEnd ) return JSON_MERGE_BADTARGET; - if( eTLabel==ePLabel ){ - /* Common case */ - if( szTLabel==szPLabel - && memcmp(&pTarget->aBlob[iTLabel+nTLabel], - &pPatch->aBlob[iPLabel+nPLabel], szTLabel)==0 - ){ - break; /* Labels match. */ - } - }else{ - /* Should rarely happen */ - JsonString s1, s2; - int isEqual, isOom; - jsonStringInit(&s1, 0); - jsonXlateBlobToText(pTarget, iTLabel, &s1); - jsonStringInit(&s2, 0); - jsonXlateBlobToText(pPatch, iPLabel, &s2); - isOom = s1.eErr || s2.eErr; - if( s1.nUsed==s2.nUsed && memcmp(s1.zBuf, s2.zBuf, s1.nUsed)==0 ){ - isEqual = 1; - }else{ - isEqual = 0; - } - jsonStringReset(&s1); - jsonStringReset(&s2); - if( isOom ) return JSON_MERGE_OOM; - if( isEqual ) break; - } + isEqual = jsonLabelCompare( + (const char*)&pPatch->aBlob[iPLabel+nPLabel], + szPLabel, + (ePLabel==JSONB_TEXT || ePLabel==JSONB_TEXTRAW), + (const char*)&pTarget->aBlob[iTLabel+nTLabel], + szTLabel, + (eTLabel==JSONB_TEXT || eTLabel==JSONB_TEXTRAW)); + if( isEqual ) break; iTCursor = iTValue + nTValue + szTValue; } x = pPatch->aBlob[iPValue] & 0x0f; @@ -4359,22 +4502,23 @@ static int jsonEachNext(sqlite3_vtab_cursor *cur){ */ static int jsonEachPathLength(JsonEachCursor *p){ u32 n = p->path.nUsed; + const char *z = p->path.zBuf; if( p->iRowid==0 && p->bRecursive && n>1 ){ - if( p->path.zBuf[n-1]==']' ){ + if( z[n-1]==']' ){ do{ - assert( n>0 ); + assert( n>1 ); n--; - }while( p->path.zBuf[n]!='[' ); + }while( z[n]!='[' ); + }else if( z[n-1]=='"' ){ + do{ + assert( n>1 ); + n--; + }while( z[n]!='.' || z[n+1]!='"' ); }else{ - u32 sz = 0; - jsonbPayloadSize(&p->sParse, p->i, &sz); - if( p->path.zBuf[n-1]=='"' ) sz += 2; - assert( sz<n ); - n -= sz; - while( p->path.zBuf[n]!='.' && ALWAYS(n>0) ){ + do{ + assert( n>1 ); n--; - assert( n>0 ); - } + }while( z[n]!='.' ); } } return n; diff --git a/test/json502.test b/test/json502.test index 595bf6331..48c372c4f 100644 --- a/test/json502.test +++ b/test/json502.test @@ -36,5 +36,30 @@ do_catchsql_test 2.3 { SELECT '{a:null,{"h":[1,[1,2,3]],"j":"abc"}:true}'->'$h[#-1]'; } {1 {malformed JSON}} +# Verify that escaped label names are compared correctly. +# +do_execsql_test 3.1 { + SELECT '{"a\x62c":123}' ->> 'abc'; +} 123 +do_execsql_test 3.2 { + SELECT '{"abc":123}' ->> 'a\x62c'; +} 123 + +db null null +do_execsql_test 3.3 { + DROP TABLE t1; + CREATE TABLE t1(x); + INSERT INTO t1 VALUES(json_insert('{}','$.a\',111,'$."b\\"',222)); + INSERT INTO t1 VALUES(jsonb_insert('{}','$.a\',111,'$."b\\"',222)); + SELECT x->'$.a\', x->'$.a\\', x->'$."a\\"', x->'$."b\\"' FROM t1; +} {111 null 111 222 111 null 111 222} + +do_execsql_test 3.4 { + SELECT json_patch('{"a\x62c":123}','{"ab\x63":456}') ->> 'abc'; +} 456 + +do_execsql_test 4.1 { + SELECT * FROM json_tree('{"\u0017":1}','$."\x17"'); +} {{\x17} 1 integer 1 1 null {$."\x17"} {$}} finish_test |