From: Dmitry Volyntsev Date: Thu, 2 Jul 2020 13:59:33 +0000 (+0000) Subject: Introduced RegExpBuiltinExec(). X-Git-Url: http://git.kaiwu.me/postgresql/log/contrib/postgres_fdw/static/gitweb.js?a=commitdiff_plain;h=cdf66fc521a4f98541acd448c8e6b85510279ee4;p=njs.git Introduced RegExpBuiltinExec(). --- diff --git a/src/njs_pcre.c b/src/njs_pcre.c index f3537c92..c8fa5ba2 100644 --- a/src/njs_pcre.c +++ b/src/njs_pcre.c @@ -280,13 +280,13 @@ njs_pcre_default_free(void *p, void *memory_data) njs_int_t -njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t len, - njs_regex_match_data_t *match_data, njs_regex_context_t *ctx) +njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t off, + size_t len, njs_regex_match_data_t *match_data, njs_regex_context_t *ctx) { int ret; ret = pcre_exec(regex->code, regex->extra, (const char *) subject, len, - 0, 0, match_data->captures, match_data->ncaptures); + off, 0, match_data->captures, match_data->ncaptures); /* PCRE_ERROR_NOMATCH is -1. */ diff --git a/src/njs_regex.h b/src/njs_regex.h index 99e1587c..bd4f5d3a 100644 --- a/src/njs_regex.h +++ b/src/njs_regex.h @@ -39,7 +39,8 @@ NJS_EXPORT njs_regex_match_data_t *njs_regex_match_data(njs_regex_t *regex, NJS_EXPORT void njs_regex_match_data_free(njs_regex_match_data_t *match_data, njs_regex_context_t *ctx); NJS_EXPORT njs_int_t njs_regex_match(njs_regex_t *regex, const u_char *subject, - size_t len, njs_regex_match_data_t *match_data, njs_regex_context_t *ctx); + size_t off, size_t len, njs_regex_match_data_t *match_data, + njs_regex_context_t *ctx); NJS_EXPORT int *njs_regex_captures(njs_regex_match_data_t *match_data); diff --git a/src/njs_regexp.c b/src/njs_regexp.c index 05a02031..fbed0fd5 100644 --- a/src/njs_regexp.c +++ b/src/njs_regexp.c @@ -26,9 +26,9 @@ static u_char *njs_regexp_compile_trace_handler(njs_trace_t *trace, njs_trace_data_t *td, u_char *start); static u_char *njs_regexp_match_trace_handler(njs_trace_t *trace, njs_trace_data_t *td, u_char *start); -static njs_int_t njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, - njs_utf8_t utf8, u_char *string, njs_regex_match_data_t *match_data, - uint32_t last_index); +static njs_array_t *njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, + njs_regexp_utf8_t type, njs_string_prop_t *string, + njs_regex_match_data_t *data); static njs_int_t njs_regexp_string_create(njs_vm_t *vm, njs_value_t *value, u_char *start, uint32_t size, int32_t length); @@ -550,7 +550,7 @@ njs_regexp_compile_trace_handler(njs_trace_t *trace, njs_trace_data_t *td, njs_int_t njs_regexp_match(njs_vm_t *vm, njs_regex_t *regex, const u_char *subject, - size_t len, njs_regex_match_data_t *match_data) + size_t off, size_t len, njs_regex_match_data_t *match_data) { njs_int_t ret; njs_trace_handler_t handler; @@ -558,7 +558,8 @@ njs_regexp_match(njs_vm_t *vm, njs_regex_t *regex, const u_char *subject, handler = vm->trace.handler; vm->trace.handler = njs_regexp_match_trace_handler; - ret = njs_regex_match(regex, subject, len, match_data, vm->regex_context); + ret = njs_regex_match(regex, subject, off, len, match_data, + vm->regex_context); vm->trace.handler = handler; @@ -617,9 +618,7 @@ static njs_int_t njs_regexp_prototype_last_index(njs_vm_t *vm, njs_object_prop_t *unused, njs_value_t *value, njs_value_t *setval, njs_value_t *retval) { - uint32_t index, last_index; - njs_regexp_t *regexp; - njs_string_prop_t string; + njs_regexp_t *regexp; regexp = njs_object_proto_lookup(njs_object(value), NJS_REGEXP, njs_regexp_t); @@ -635,23 +634,7 @@ njs_regexp_prototype_last_index(njs_vm_t *vm, njs_object_prop_t *unused, return NJS_OK; } - if (njs_slow_path(!njs_is_number(®exp->last_index))) { - *retval = regexp->last_index; - return NJS_OK; - } - - (void) njs_string_prop(&string, ®exp->string); - - last_index = njs_number(®exp->last_index); - - if (njs_slow_path(string.size < last_index)) { - *retval = regexp->last_index; - return NJS_OK; - } - - index = njs_string_index(&string, last_index); - njs_set_number(retval, index); - + *retval = regexp->last_index; return NJS_OK; } @@ -802,8 +785,8 @@ njs_regexp_prototype_test(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, } } - match = njs_regexp_match(vm, regex, string.start, string.size, - match_data); + match = njs_regexp_match(vm, regex, string.start, 0, string.size, + match_data); if (match >= 0) { retval = &njs_value_true; @@ -844,36 +827,25 @@ done: } -njs_int_t -njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, - njs_index_t unused) +/** + * TODO: sticky, unicode flags. + */ +static njs_int_t +njs_regexp_builtin_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, + njs_value_t *retval) { + size_t length, offset; int64_t last_index; njs_int_t ret; - njs_utf8_t utf8; - njs_value_t *value, lvalue; + njs_array_t *result; njs_regexp_t *regexp; njs_string_prop_t string; njs_regexp_utf8_t type; njs_regexp_pattern_t *pattern; njs_regex_match_data_t *match_data; - if (!njs_is_regexp(njs_arg(args, nargs, 0))) { - njs_type_error(vm, "\"this\" argument is not a regexp"); - return NJS_ERROR; - } - - value = njs_lvalue_arg(&lvalue, args, nargs, 1); - - if (!njs_is_string(value)) { - ret = njs_value_to_string(vm, value, value); - if (njs_slow_path(ret != NJS_OK)) { - return ret; - } - } - - regexp = njs_regexp(&args[0]); - regexp->string = *value; + regexp = njs_regexp(r); + regexp->string = *s; pattern = regexp->pattern; ret = njs_value_to_length(vm, ®exp->last_index, &last_index); @@ -885,94 +857,113 @@ njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, last_index = 0; } - (void) njs_string_prop(&string, value); + length = njs_string_prop(&string, s); - if (string.size >= (size_t) last_index) { - utf8 = NJS_STRING_BYTE; - type = NJS_REGEXP_BYTE; + if (njs_slow_path((size_t) last_index > length)) { + goto not_found; + } - if (string.length != 0) { - utf8 = NJS_STRING_ASCII; - type = NJS_REGEXP_UTF8; + type = NJS_REGEXP_BYTE; - if (string.length != string.size) { - utf8 = NJS_STRING_UTF8; - } - } + if (length != string.size) { + /* UTF-8 string. */ + type = NJS_REGEXP_UTF8; + } - pattern = regexp->pattern; + pattern = regexp->pattern; - if (njs_regex_is_valid(&pattern->regex[type])) { - string.start += last_index; - string.size -= last_index; + if (njs_slow_path(!njs_regex_is_valid(&pattern->regex[type]))) { + goto not_found; + } - match_data = njs_regex_match_data(&pattern->regex[type], - vm->regex_context); - if (njs_slow_path(match_data == NULL)) { - njs_memory_error(vm); - return NJS_ERROR; - } + match_data = njs_regex_match_data(&pattern->regex[type], vm->regex_context); + if (njs_slow_path(match_data == NULL)) { + njs_memory_error(vm); + return NJS_ERROR; + } - ret = njs_regexp_match(vm, &pattern->regex[type], string.start, - string.size, match_data); - if (ret >= 0) { - return njs_regexp_exec_result(vm, regexp, utf8, string.start, - match_data, last_index); - } + if (type != NJS_REGEXP_UTF8) { + offset = last_index; - if (njs_slow_path(ret != NJS_REGEX_NOMATCH)) { - njs_regex_match_data_free(match_data, vm->regex_context); + } else { + /* UTF-8 string. */ + offset = njs_string_offset(string.start, string.start + string.size, + last_index) - string.start; + } - return NJS_ERROR; - } + ret = njs_regexp_match(vm, &pattern->regex[type], string.start, offset, + string.size, match_data); + if (ret >= 0) { + result = njs_regexp_exec_result(vm, regexp, type, &string, match_data); + if (njs_slow_path(result == NULL)) { + return NJS_ERROR; } + + njs_set_array(retval, result); + return NJS_OK; } + if (njs_slow_path(ret != NJS_REGEX_NOMATCH)) { + njs_regex_match_data_free(match_data, vm->regex_context); + + return NJS_ERROR; + } + +not_found: + if (pattern->global) { njs_set_number(®exp->last_index, 0); } - vm->retval = njs_value_null; + njs_set_null(retval); return NJS_OK; } -static njs_int_t -njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, njs_utf8_t utf8, - u_char *string, njs_regex_match_data_t *match_data, uint32_t last_index) +static njs_array_t * +njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, + njs_regexp_utf8_t type, njs_string_prop_t *string, + njs_regex_match_data_t *match_data) { - int *captures; - u_char *start; - int32_t size, length; - njs_int_t ret; - njs_uint_t i, n; - njs_array_t *array; - njs_value_t name; - njs_object_t *groups; - njs_object_prop_t *prop; - njs_regexp_group_t *group; - njs_lvlhsh_query_t lhq; + int *captures; + u_char *start; + int32_t size, length; + njs_int_t ret; + njs_uint_t i, n; + njs_array_t *array; + njs_value_t name; + njs_object_t *groups; + njs_object_prop_t *prop; + njs_regexp_group_t *group; + njs_lvlhsh_query_t lhq; + njs_regexp_pattern_t *pattern; static const njs_value_t string_index = njs_string("index"); static const njs_value_t string_input = njs_string("input"); static const njs_value_t string_groups = njs_string("groups"); - array = njs_array_alloc(vm, 0, regexp->pattern->ncaptures, 0); + pattern = regexp->pattern; + array = njs_array_alloc(vm, 0, pattern->ncaptures, 0); if (njs_slow_path(array == NULL)) { goto fail; } captures = njs_regex_captures(match_data); - for (i = 0; i < regexp->pattern->ncaptures; i++) { + for (i = 0; i < pattern->ncaptures; i++) { n = 2 * i; if (captures[n] != -1) { - start = &string[captures[n]]; + start = &string->start[captures[n]]; size = captures[n + 1] - captures[n]; - length = njs_string_calc_length(utf8, start, size); + if (type == NJS_REGEXP_UTF8) { + length = njs_max(njs_utf8_length(start, size), 0); + + } else { + length = size; + } ret = njs_regexp_string_create(vm, &array->start[i], start, size, length); @@ -985,17 +976,17 @@ njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, njs_utf8_t utf8, } } + /* FIXME: implement fast CreateDataPropertyOrThrow(). */ prop = njs_object_prop_alloc(vm, &string_index, &njs_value_undefined, 1); if (njs_slow_path(prop == NULL)) { goto fail; } - /* TODO: Non UTF-8 position */ + njs_set_number(&prop->value, njs_string_index(string, captures[0])); - njs_set_number(&prop->value, last_index + captures[0]); - - if (regexp->pattern->global) { - njs_set_number(®exp->last_index, last_index + captures[1]); + if (pattern->global) { + njs_set_number(®exp->last_index, + njs_string_index(string, captures[1])); } lhq.key_hash = NJS_INDEX_HASH; @@ -1038,7 +1029,7 @@ njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, njs_utf8_t utf8, goto insert_fail; } - if (regexp->pattern->ngroups != 0) { + if (pattern->ngroups != 0) { groups = njs_object_alloc(vm); if (njs_slow_path(groups == NULL)) { goto fail; @@ -1049,7 +1040,7 @@ njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, njs_utf8_t utf8, i = 0; do { - group = ®exp->pattern->groups[i]; + group = &pattern->groups[i]; ret = njs_string_set(vm, &name, group->name.start, group->name.length); @@ -1074,11 +1065,9 @@ njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, njs_utf8_t utf8, i++; - } while (i < regexp->pattern->ngroups); + } while (i < pattern->ngroups); } - njs_set_array(&vm->retval, array); - ret = NJS_OK; goto done; @@ -1094,7 +1083,71 @@ done: njs_regex_match_data_free(match_data, vm->regex_context); - return ret; + return (ret == NJS_OK) ? array : NULL; +} + + +njs_int_t +njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, + njs_index_t unused) +{ + njs_int_t ret; + njs_value_t *r, *s; + njs_value_t string_lvalue; + + r = njs_argument(args, 0); + + if (njs_slow_path(!njs_is_regexp(r))) { + njs_type_error(vm, "\"this\" argument is not a regexp"); + return NJS_ERROR; + } + + s = njs_lvalue_arg(&string_lvalue, args, nargs, 1); + + ret = njs_value_to_string(vm, s, s); + if (njs_slow_path(ret != NJS_OK)) { + return ret; + } + + return njs_regexp_builtin_exec(vm, r, s, &vm->retval); +} + + +njs_int_t +njs_regexp_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, + njs_value_t *retval) +{ + njs_int_t ret; + njs_value_t exec; + + static const njs_value_t string_exec = njs_string("exec"); + + ret = njs_value_property(vm, r, njs_value_arg(&string_exec), &exec); + if (njs_slow_path(ret == NJS_ERROR)) { + return NJS_ERROR; + } + + if (njs_is_function(&exec)) { + ret = njs_function_call(vm, njs_function(&exec), r, s, 1, retval); + if (njs_slow_path(ret == NJS_ERROR)) { + return NJS_ERROR; + } + + if (njs_slow_path(!njs_is_object(retval) && !njs_is_null(retval))) { + njs_type_error(vm, "unexpected \"%s\" retval in njs_regexp_exec()", + njs_type_string(retval->type)); + return NJS_ERROR; + } + + return NJS_OK; + } + + if (njs_slow_path(!njs_is_regexp(r))) { + njs_type_error(vm, "receiver argument is not a regexp"); + return NJS_ERROR; + } + + return njs_regexp_builtin_exec(vm, r, s, retval); } diff --git a/src/njs_regexp.h b/src/njs_regexp.h index 500c6fa7..b9f4a922 100644 --- a/src/njs_regexp.h +++ b/src/njs_regexp.h @@ -24,8 +24,10 @@ njs_regexp_flags_t njs_regexp_flags(u_char **start, u_char *end); njs_regexp_pattern_t *njs_regexp_pattern_create(njs_vm_t *vm, u_char *string, size_t length, njs_regexp_flags_t flags); njs_int_t njs_regexp_match(njs_vm_t *vm, njs_regex_t *regex, - const u_char *subject, size_t len, njs_regex_match_data_t *match_data); + const u_char *subject, size_t off, size_t len, njs_regex_match_data_t *d); njs_regexp_t *njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern); +njs_int_t njs_regexp_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, + njs_value_t *retval); njs_int_t njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, njs_index_t unused); diff --git a/src/njs_string.c b/src/njs_string.c index 371605c7..ba7e5869 100644 --- a/src/njs_string.c +++ b/src/njs_string.c @@ -3022,7 +3022,7 @@ njs_string_prototype_search(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, if (njs_regex_is_valid(&pattern->regex[n])) { ret = njs_regexp_match(vm, &pattern->regex[n], string.start, - string.size, vm->single_match_data); + 0, string.size, vm->single_match_data); if (ret >= 0) { captures = njs_regex_captures(vm->single_match_data); index = njs_string_index(&string, captures[0]); @@ -3147,7 +3147,7 @@ njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args, end = p + string.size; do { - ret = njs_regexp_match(vm, &pattern->regex[type], p, string.size, + ret = njs_regexp_match(vm, &pattern->regex[type], p, 0, string.size, vm->single_match_data); if (ret < 0) { if (njs_fast_path(ret == NJS_REGEX_NOMATCH)) { @@ -3286,7 +3286,7 @@ njs_string_prototype_split(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, end = string.start + string.size; do { - ret = njs_regexp_match(vm, &pattern->regex[type], start, + ret = njs_regexp_match(vm, &pattern->regex[type], start, 0, end - start, vm->single_match_data); if (ret >= 0) { captures = njs_regex_captures(vm->single_match_data); @@ -3564,7 +3564,7 @@ njs_string_replace_regexp(njs_vm_t *vm, njs_value_t *this, njs_value_t *regex, do { ret = njs_regexp_match(vm, &pattern->regex[r->type], - r->part[0].start, r->part[0].size, + r->part[0].start, 0, r->part[0].size, r->match_data); if (ret < 0) { diff --git a/src/test/njs_unit_test.c b/src/test/njs_unit_test.c index 2f087b8d..82079ed2 100644 --- a/src/test/njs_unit_test.c +++ b/src/test/njs_unit_test.c @@ -9711,6 +9711,9 @@ static njs_unit_test_t njs_test[] = { njs_str("var s; var r = /./g; while (s = r.exec('abc')); s"), njs_str("null") }, + { njs_str("(/α/).exec('γαβγ').index"), + njs_str("1") }, + { njs_str("var r = /LS/i.exec(false); r[0]"), njs_str("ls") },