diff options
author | Peter Eisentraut <peter@eisentraut.org> | 2020-06-29 11:04:42 +0200 |
---|---|---|
committer | Peter Eisentraut <peter@eisentraut.org> | 2020-06-29 11:05:00 +0200 |
commit | 78c887679d7632c1211f85eb95723f3226bf1b46 (patch) | |
tree | 7c333d0f5f4c6238ea74bc44038564a179882107 /src | |
parent | aafefb4dcbf79e8cb1439e888a9cdb3dfefa7657 (diff) | |
download | postgresql-78c887679d7632c1211f85eb95723f3226bf1b46.tar.gz postgresql-78c887679d7632c1211f85eb95723f3226bf1b46.zip |
Add current substring regular expression syntax
SQL:1999 had syntax
SUBSTRING(text FROM pattern FOR escapechar)
but this was replaced in SQL:2003 by the more clear
SUBSTRING(text SIMILAR pattern ESCAPE escapechar)
but this was never implemented in PostgreSQL. This patch adds that
new syntax as an alternative in the parser, and updates documentation
and tests to indicate that this is the preferred alternative now.
Reviewed-by: Pavel Stehule <pavel.stehule@gmail.com>
Reviewed-by: Vik Fearing <vik@postgresfriends.org>
Reviewed-by: Fabien COELHO <coelho@cri.ensmp.fr>
Discussion: https://www.postgresql.org/message-id/flat/a15db31c-d0f8-8ce0-9039-578a31758adb%402ndquadrant.com
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/catalog/information_schema.sql | 2 | ||||
-rw-r--r-- | src/backend/parser/gram.y | 26 | ||||
-rw-r--r-- | src/test/regress/expected/strings.out | 31 | ||||
-rw-r--r-- | src/test/regress/sql/strings.sql | 26 |
4 files changed, 59 insertions, 26 deletions
diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql index 3e07fb107eb..5ab47e77431 100644 --- a/src/backend/catalog/information_schema.sql +++ b/src/backend/catalog/information_schema.sql @@ -182,7 +182,7 @@ CREATE FUNCTION _pg_interval_type(typid oid, mod int4) RETURNS text AS $$SELECT CASE WHEN $1 IN (1186) /* interval */ - THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) from 'interval[()0-9]* #"%#"' for '#')) + THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) similar 'interval[()0-9]* #"%#"' escape '#')) ELSE null END$$; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 1a843049f05..4ff35095b85 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -14451,7 +14451,27 @@ position_list: | /*EMPTY*/ { $$ = NIL; } ; -/* SUBSTRING() arguments */ +/* + * SUBSTRING() arguments + * + * Note that SQL:1999 has both + * + * text FROM int FOR int + * + * and + * + * text FROM pattern FOR escape + * + * In the parser we map them both to a call to the substring() function and + * rely on type resolution to pick the right one. + * + * In SQL:2003, the second variant was changed to + * + * text SIMILAR pattern ESCAPE escape + * + * We could in theory map that to a different function internally, but + * since we still support the SQL:1999 version, we don't. + */ substr_list: a_expr FROM a_expr FOR a_expr { @@ -14483,6 +14503,10 @@ substr_list: makeTypeCast($3, SystemTypeName("int4"), -1)); } + | a_expr SIMILAR a_expr ESCAPE a_expr + { + $$ = list_make3($1, $3, $5); + } /* * We also want to support generic substring functions that * accept the usual generic list of arguments. diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 6e98d183f61..8c034c9599f 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -397,6 +397,13 @@ SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456"; (1 row) -- T581 regular expression substring (with SQL's bizarre regexp syntax) +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd"; + bcd +----- + bcd +(1 row) + +-- obsolete SQL99 syntax SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd"; bcd ----- @@ -404,75 +411,75 @@ SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd"; (1 row) -- No match should return NULL -SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True"; +SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True"; True ------ t (1 row) -- Null inputs should return NULL -SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True"; +SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True"; True ------ t (1 row) -SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True"; +SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True"; True ------ t (1 row) -SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True"; +SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True"; True ------ t (1 row) -- The first and last parts should act non-greedy -SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef"; bcdef ------- bcdef (1 row) -SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg"; abcdefg --------- abcdefg (1 row) -- Vertical bar in any part affects only that part -SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef"; bcdef ------- bcdef (1 row) -SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef"; bcdef ------- bcdef (1 row) -SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef"; bcdef ------- bcdef (1 row) -- Can't have more than two part separators -SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error"; ERROR: SQL regular expression may not contain more than two escape-double-quote separators CONTEXT: SQL function "substring" statement 1 -- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty -SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg"; bcdefg -------- bcdefg (1 row) -SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg"; abcdefg --------- abcdefg diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 3e89159a4fd..14901a26923 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -132,31 +132,33 @@ SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890"; SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456"; -- T581 regular expression substring (with SQL's bizarre regexp syntax) +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd"; +-- obsolete SQL99 syntax SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd"; -- No match should return NULL -SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True"; +SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True"; -- Null inputs should return NULL -SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True"; -SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True"; -SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True"; +SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True"; +SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True"; +SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True"; -- The first and last parts should act non-greedy -SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef"; -SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg"; -- Vertical bar in any part affects only that part -SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef"; -SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef"; -SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef"; -- Can't have more than two part separators -SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error"; -- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty -SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg"; -SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg"; -- substring() with just two arguments is not allowed by SQL spec; -- we accept it, but we interpret the pattern as a POSIX regexp not SQL |