diff options
author | Fabrice Bellard <fabrice@bellard.org> | 2024-01-08 18:42:29 +0100 |
---|---|---|
committer | Fabrice Bellard <fabrice@bellard.org> | 2024-01-08 18:42:29 +0100 |
commit | af308614a8b89e80a6e6a3f270292ef724931d1e (patch) | |
tree | f9f0bf6510bdccc486f1010543386d10abe93874 /libregexp.c | |
parent | aac24640b13825b550d3e6dbce0a3f9ac8baf577 (diff) | |
download | quickjs-af308614a8b89e80a6e6a3f270292ef724931d1e.tar.gz quickjs-af308614a8b89e80a6e6a3f270292ef724931d1e.zip |
fixed regexp case insensitive flag
Diffstat (limited to 'libregexp.c')
-rw-r--r-- | libregexp.c | 57 |
1 files changed, 1 insertions, 56 deletions
diff --git a/libregexp.c b/libregexp.c index dab8fa1..eeafd0b 100644 --- a/libregexp.c +++ b/libregexp.c @@ -34,9 +34,6 @@ /* TODO: - - Add full unicode canonicalize rules for character ranges (not - really useful but needed for exact "ignorecase" compatibility). - - Add a lock step execution mode (=linear time execution guaranteed) when the regular expression is "simple" i.e. no backreference nor complicated lookahead. The opcodes are designed for this execution @@ -120,33 +117,6 @@ static int dbuf_insert(DynBuf *s, int pos, int len) return 0; } -/* canonicalize with the specific JS regexp rules */ -static uint32_t lre_canonicalize(uint32_t c, BOOL is_utf16) -{ - uint32_t res[LRE_CC_RES_LEN_MAX]; - int len; - if (is_utf16) { - if (likely(c < 128)) { - if (c >= 'A' && c <= 'Z') - c = c - 'A' + 'a'; - } else { - lre_case_conv(res, c, 2); - c = res[0]; - } - } else { - if (likely(c < 128)) { - if (c >= 'a' && c <= 'z') - c = c - 'a' + 'A'; - } else { - /* legacy regexp: to upper case if single char >= 128 */ - len = lre_case_conv(res, c, FALSE); - if (len == 1 && res[0] >= 128) - c = res[0]; - } - } - return c; -} - static const uint16_t char_range_d[] = { 1, 0x0030, 0x0039 + 1, @@ -245,31 +215,6 @@ static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c) return -1; } -static int cr_canonicalize(CharRange *cr) -{ - CharRange a; - uint32_t pt[2]; - int i, ret; - - cr_init(&a, cr->mem_opaque, lre_realloc); - pt[0] = 'a'; - pt[1] = 'z' + 1; - ret = cr_op(&a, cr->points, cr->len, pt, 2, CR_OP_INTER); - if (ret) - goto fail; - /* convert to upper case */ - /* XXX: the generic unicode case would be much more complicated - and not really useful */ - for(i = 0; i < a.len; i++) { - a.points[i] += 'A' - 'a'; - } - /* Note: for simplicity we keep the lower case ranges */ - ret = cr_union1(cr, a.points, a.len); - fail: - cr_free(&a); - return ret; -} - #ifdef DUMP_REOP static __maybe_unused void lre_dump_bytecode(const uint8_t *buf, int buf_len) @@ -922,7 +867,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp) } } if (s->ignore_case) { - if (cr_canonicalize(cr)) + if (cr_regexp_canonicalize(cr, s->is_utf16)) goto memory_error; } if (invert) { |