From 2a0af7fe460eb46f9af996075972bf7c2e3f211d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 25 Feb 2021 13:00:40 -0500 Subject: Allow complemented character class escapes within regex brackets. The complement-class escapes \D, \S, \W are now allowed within bracket expressions. There is no semantic difficulty with doing that, but the rather hokey macro-expansion-based implementation previously used here couldn't cope. Also, invent "word" as an allowed character class name, thus "\w" is now equivalent to "[[:word:]]" outside brackets, or "[:word:]" within brackets. POSIX allows such implementation-specific extensions, and the same name is used in e.g. bash. One surprising compatibility issue this raises is that constructs such as "[\w-_]" are now disallowed, as our documentation has always said they should be: character classes can't be endpoints of a range. Previously, because \w was just a macro for "[:alnum:]_", such a construct was read as "[[:alnum:]_-_]", so it was accepted so long as the character after "-" was numerically greater than or equal to "_". Some implementation cleanup along the way: * Remove the lexnest() hack, and in consequence clean up wordchrs() to not interact with the lexer. * Fix colorcomplement() to not be O(N^2) in the number of colors involved. * Get rid of useless-as-far-as-I-can-see calls of element() on single-character character element names in brackpart(). element() always maps these to the character itself, and things would be quite broken if it didn't --- should "[a]" match something different than "a" does? Besides, the shortcut path in brackpart() wasn't doing this anyway, making it even more inconsistent. Discussion: https://postgr.es/m/2845172.1613674385@sss.pgh.pa.us Discussion: https://postgr.es/m/3220564.1613859619@sss.pgh.pa.us --- src/test/modules/test_regex/sql/test_regex.sql | 44 ++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'src/test/modules/test_regex/sql') diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql index 31e947ee9c6..b99329391e8 100644 --- a/src/test/modules/test_regex/sql/test_regex.sql +++ b/src/test/modules/test_regex/sql/test_regex.sql @@ -597,6 +597,50 @@ select * from test_regex('a[\s]b', 'a b', 'LPE'); -- expectMatch 12.18 LPE {a[\w]b} axb axb select * from test_regex('a[\w]b', 'axb', 'LPE'); +-- these should be invalid +select * from test_regex('[\w-~]*', 'ab01_~-`**', 'LNPSE'); +select * from test_regex('[~-\w]*', 'ab01_~-`**', 'LNPSE'); +select * from test_regex('[[:alnum:]-~]*', 'ab01~-`**', 'LNS'); +select * from test_regex('[~-[:alnum:]]*', 'ab01~-`**', 'LNS'); + +-- test complemented char classes within brackets +select * from test_regex('[\D]', '0123456789abc*', 'LPE'); +select * from test_regex('[^\D]', 'abc0123456789*', 'LPE'); +select * from test_regex('[1\D7]', '0123456789abc*', 'LPE'); +select * from test_regex('[7\D1]', '0123456789abc*', 'LPE'); +select * from test_regex('[^0\D1]', 'abc0123456789*', 'LPE'); +select * from test_regex('[^1\D0]', 'abc0123456789*', 'LPE'); +select * from test_regex('\W', '0123456789abc_*', 'LP'); +select * from test_regex('[\W]', '0123456789abc_*', 'LPE'); +select * from test_regex('[\s\S]*', '012 3456789abc_*', 'LNPE'); + +-- check char classes' handling of newlines +select * from test_regex('\s+', E'abc \n def', 'LP'); +select * from test_regex('\s+', E'abc \n def', 'nLP'); +select * from test_regex('[\s]+', E'abc \n def', 'LPE'); +select * from test_regex('[\s]+', E'abc \n def', 'nLPE'); +select * from test_regex('\S+', E'abc\ndef', 'LP'); +select * from test_regex('\S+', E'abc\ndef', 'nLP'); +select * from test_regex('[\S]+', E'abc\ndef', 'LPE'); +select * from test_regex('[\S]+', E'abc\ndef', 'nLPE'); +select * from test_regex('\d+', E'012\n345', 'LP'); +select * from test_regex('\d+', E'012\n345', 'nLP'); +select * from test_regex('[\d]+', E'012\n345', 'LPE'); +select * from test_regex('[\d]+', E'012\n345', 'nLPE'); +select * from test_regex('\D+', E'abc\ndef345', 'LP'); +select * from test_regex('\D+', E'abc\ndef345', 'nLP'); +select * from test_regex('[\D]+', E'abc\ndef345', 'LPE'); +select * from test_regex('[\D]+', E'abc\ndef345', 'nLPE'); +select * from test_regex('\w+', E'abc_012\ndef', 'LP'); +select * from test_regex('\w+', E'abc_012\ndef', 'nLP'); +select * from test_regex('[\w]+', E'abc_012\ndef', 'LPE'); +select * from test_regex('[\w]+', E'abc_012\ndef', 'nLPE'); +select * from test_regex('\W+', E'***\n@@@___', 'LP'); +select * from test_regex('\W+', E'***\n@@@___', 'nLP'); +select * from test_regex('[\W]+', E'***\n@@@___', 'LPE'); +select * from test_regex('[\W]+', E'***\n@@@___', 'nLPE'); + + -- doing 13 "escapes" -- expectError 13.1 & "a\\" EESCAPE -- cgit v1.2.3