From d54ceb9e176152f930e60709e07c636e8e5414f5 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 11 Aug 2019 22:54:53 +0300 Subject: Adjust string comparison in jsonpath We have implemented jsonpath string comparison using default database locale. However, standard requires us to compare Unicode codepoints. This commit implements that, but for performance reasons we still use per-byte comparison for "==" operator. Thus, for consistency other comparison operators do per-byte comparison if Unicode codepoints appear to be equal. In some edge cases, when same Unicode codepoints have different binary representations in database encoding, we diverge standard to achieve better performance of "==" operator. In future to implement strict standard conformance, we can do normalization of input JSON strings. Original patch was written by Nikita Glukhov, rewritten by me. Reported-by: Markus Winand Discussion: https://postgr.es/m/8B7FA3B4-328D-43D7-95A8-37B8891B8C78%40winand.at Author: Nikita Glukhov, Alexander Korotkov Backpatch-through: 12 --- src/test/regress/sql/jsonb_jsonpath.sql | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src/test/regress/sql/jsonb_jsonpath.sql') diff --git a/src/test/regress/sql/jsonb_jsonpath.sql b/src/test/regress/sql/jsonb_jsonpath.sql index 733fbd4e0d0..e7629fb7f9d 100644 --- a/src/test/regress/sql/jsonb_jsonpath.sql +++ b/src/test/regress/sql/jsonb_jsonpath.sql @@ -387,3 +387,19 @@ SELECT jsonb_path_match('[true, true]', '$[*]', silent => false); SELECT jsonb '[{"a": 1}, {"a": 2}]' @@ '$[*].a > 1'; SELECT jsonb '[{"a": 1}, {"a": 2}]' @@ '$[*].a > 2'; SELECT jsonb_path_match('[{"a": 1}, {"a": 2}]', '$[*].a > 1'); + +-- test string comparison (Unicode codepoint collation) +WITH str(j, num) AS +( + SELECT jsonb_build_object('s', s), num + FROM unnest('{"", "a", "ab", "abc", "abcd", "b", "A", "AB", "ABC", "ABc", "ABcD", "B"}'::text[]) WITH ORDINALITY AS a(s, num) +) +SELECT + s1.j, s2.j, + jsonb_path_query_first(s1.j, '$.s < $s', vars => s2.j) lt, + jsonb_path_query_first(s1.j, '$.s <= $s', vars => s2.j) le, + jsonb_path_query_first(s1.j, '$.s == $s', vars => s2.j) eq, + jsonb_path_query_first(s1.j, '$.s >= $s', vars => s2.j) ge, + jsonb_path_query_first(s1.j, '$.s > $s', vars => s2.j) gt +FROM str s1, str s2 +ORDER BY s1.num, s2.num; -- cgit v1.2.3