From: Alexander Borisov Date: Tue, 28 May 2019 17:49:58 +0000 (+0300) Subject: Improved processing of invalid surrogate pairs in strings. X-Git-Tag: 0.3.3~25 X-Git-Url: http://git.kaiwu.me/postgresql/log/contrib/postgres_fdw/static/gitweb.js?a=commitdiff_plain;h=721af4cc5ec2470a69f0c680b0027b4d1339aff9;p=njs.git Improved processing of invalid surrogate pairs in strings. Previously, an exception was thrown on invalid surrogate pairs. Now, all such pairs are converted to replacement character. This closes #170 issue on GitHub. --- diff --git a/njs/njs_parser_terminal.c b/njs/njs_parser_terminal.c index 7f1122b7..3c85ccda 100644 --- a/njs/njs_parser_terminal.c +++ b/njs/njs_parser_terminal.c @@ -1049,12 +1049,27 @@ njs_parser_escape_string_create(njs_vm_t *vm, njs_parser_t *parser, } if (cp_pair != 0) { - cp = njs_string_surrogate_pair(cp_pair, cp); + if (nxt_fast_path(cp >= 0xdc00 && cp <= 0xdfff)) { + cp = njs_string_surrogate_pair(cp_pair, cp); + + } else if (nxt_slow_path(cp >= 0xd800 && cp <= 0xdbff)) { + cp = NXT_UTF8_REPLACEMENT; + + dst = nxt_utf8_encode(dst, (uint32_t) cp); + + } else { + dst = nxt_utf8_encode(dst, NXT_UTF8_REPLACEMENT); + } + cp_pair = 0; } else if (cp >= 0xd800 && cp <= 0xdfff) { - cp_pair = cp; - continue; + if (cp <= 0xdbff && src[0] == '\\' && src[1] == 'u') { + cp_pair = cp; + continue; + } + + cp = NXT_UTF8_REPLACEMENT; } dst = nxt_utf8_encode(dst, (uint32_t) cp); @@ -1183,20 +1198,29 @@ njs_parser_escape_string_calc_length(njs_vm_t *vm, njs_parser_t *parser, } if (cp_pair != 0) { - if (nxt_slow_path(cp < 0xdc00 || cp > 0xdfff)) { - goto invalid_pair; + if (nxt_fast_path(cp >= 0xdc00 && cp <= 0xdfff)) { + cp = njs_string_surrogate_pair(cp_pair, cp); + + } else if (nxt_slow_path(cp >= 0xd800 && cp <= 0xdbff)) { + cp = NXT_UTF8_REPLACEMENT; + + size += nxt_utf8_size(cp); + length++; + + } else { + size += nxt_utf8_size(NXT_UTF8_REPLACEMENT); + length++; } - cp = njs_string_surrogate_pair(cp_pair, cp); cp_pair = 0; } else if (cp >= 0xd800 && cp <= 0xdfff) { - if (nxt_slow_path(cp > 0xdbff || src[0] != '\\' || src[1] != 'u')) { - goto invalid_pair; + if (cp <= 0xdbff && src[0] == '\\' && src[1] == 'u') { + cp_pair = cp; + continue; } - cp_pair = cp; - continue; + cp = NXT_UTF8_REPLACEMENT; } size += nxt_utf8_size(cp); @@ -1214,11 +1238,4 @@ invalid: njs_parser_text(parser)); return NJS_ERROR; - -invalid_pair: - - njs_parser_syntax_error(vm, parser, "Invalid surrogate pair \"%V\"", - njs_parser_text(parser)); - - return NJS_ERROR; } diff --git a/njs/test/njs_unit_test.c b/njs/test/njs_unit_test.c index 458dc8ff..921e98a3 100644 --- a/njs/test/njs_unit_test.c +++ b/njs/test/njs_unit_test.c @@ -4448,15 +4448,25 @@ static njs_unit_test_t njs_test[] = nxt_string("1") }, { nxt_string("'\\ud83d abc \\udc4d'"), - nxt_string("SyntaxError: Invalid surrogate pair " - "\"\\ud83d abc \\udc4d\" in 1") }, + nxt_string("� abc �") }, { nxt_string("'\\ud83d'"), - nxt_string("SyntaxError: Invalid surrogate pair \"\\ud83d\" in 1") }, + nxt_string("�") }, { nxt_string("'\\ud83d\\uabcd'"), - nxt_string("SyntaxError: Invalid surrogate pair " - "\"\\ud83d\\uabcd\" in 1") }, + nxt_string("�ꯍ") }, + + { nxt_string("'\\u{d800}\\u{dB00}'"), + nxt_string("��") }, + + { nxt_string("'\\u{d800}\\u{d7ff}'"), + nxt_string("�퟿") }, + + { nxt_string("'\\u{d800}['"), + nxt_string("�[") }, + + { nxt_string("'\\u{D800}\\u{'"), + nxt_string("SyntaxError: Invalid Unicode code point \"\\u{D800}\\u{\" in 1") }, { nxt_string("''.hasOwnProperty('length')"), nxt_string("true") }, diff --git a/nxt/nxt_utf8.h b/nxt/nxt_utf8.h index 8362b40a..dc77f02e 100644 --- a/nxt/nxt_utf8.h +++ b/nxt/nxt_utf8.h @@ -15,6 +15,8 @@ */ #define NXT_UTF8_SORT_INVALID 0x0EEE0EEE +#define NXT_UTF8_REPLACEMENT 0xFFFD + NXT_EXPORT u_char *nxt_utf8_encode(u_char *p, uint32_t u); NXT_EXPORT uint32_t nxt_utf8_decode(const u_char **start, const u_char *end);