From: Dmitry Volyntsev Date: Mon, 27 Jul 2020 14:34:35 +0000 (+0000) Subject: Improved readability of surrogate pairs handling. X-Git-Url: http://git.kaiwu.me/postgresql/log/contrib/postgres_fdw/static/gitweb.js?a=commitdiff_plain;h=87a4c7963121fe02112898cf3ff2a904ffff2169;p=njs.git Improved readability of surrogate pairs handling. --- diff --git a/src/njs_json.c b/src/njs_json.c index 975693b2..07767f91 100644 --- a/src/njs_json.c +++ b/src/njs_json.c @@ -738,7 +738,7 @@ njs_json_parse_string(njs_json_parse_ctx_t *ctx, njs_value_t *value, p += 4; if (njs_fast_path(njs_surrogate_trailing(utf_low))) { - utf = njs_string_surrogate_pair(utf, utf_low); + utf = njs_surrogate_pair(utf, utf_low); } else if (njs_surrogate_leading(utf_low)) { utf = NJS_UNICODE_REPLACEMENT; diff --git a/src/njs_parser.c b/src/njs_parser.c index 81443db2..f93f3043 100644 --- a/src/njs_parser.c +++ b/src/njs_parser.c @@ -8088,7 +8088,7 @@ njs_parser_escape_string_create(njs_parser_t *parser, njs_lexer_token_t *token, if (cp_pair != 0) { if (njs_fast_path(njs_surrogate_trailing(cp))) { - cp = njs_string_surrogate_pair(cp_pair, cp); + cp = njs_surrogate_pair(cp_pair, cp); } else if (njs_slow_path(njs_surrogate_leading(cp))) { cp = NJS_UNICODE_REPLACEMENT; @@ -8238,7 +8238,7 @@ njs_parser_escape_string_calc_length(njs_parser_t *parser, if (cp_pair != 0) { if (njs_fast_path(njs_surrogate_trailing(cp))) { - cp = njs_string_surrogate_pair(cp_pair, cp); + cp = njs_surrogate_pair(cp_pair, cp); } else if (njs_slow_path(njs_surrogate_leading(cp))) { cp = NJS_UNICODE_REPLACEMENT; diff --git a/src/njs_string.c b/src/njs_string.c index 6bd5d7f0..206d4884 100644 --- a/src/njs_string.c +++ b/src/njs_string.c @@ -4272,7 +4272,7 @@ njs_string_encode_uri(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, goto uri_error; } - cp = njs_string_surrogate_pair(cp, cp_low); + cp = njs_surrogate_pair(cp, cp_low); size += njs_utf8_size(cp) * 3; continue; } @@ -4312,7 +4312,7 @@ njs_string_encode_uri(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, if (njs_slow_path(njs_surrogate_leading(cp))) { cp_low = njs_utf8_decode(&ctx, &src, end); - cp = njs_string_surrogate_pair(cp, cp_low); + cp = njs_surrogate_pair(cp, cp_low); } njs_utf8_encode(encode, cp); diff --git a/src/njs_string.h b/src/njs_string.h index fbd07d15..3ec22c62 100644 --- a/src/njs_string.h +++ b/src/njs_string.h @@ -26,16 +26,6 @@ /* The maximum signed int32_t. */ #define NJS_STRING_MAX_LENGTH 0x7fffffff -#define njs_surrogate_leading(cp) ((cp) >= 0xd800 && (cp) <= 0xdbff) - -#define njs_surrogate_trailing(cp) ((cp) >= 0xdc00 && (cp) <= 0xdfff) - -#define njs_surrogate_any(cp) ((cp) >= 0xd800 && (cp) <= 0xdfff) - -/* Converting surrogate pair to code point. */ -#define njs_string_surrogate_pair(high, low) \ - (0x10000 + ((high - 0xd800) << 10) + (low - 0xdc00)) - /* * NJS_STRING_MAP_STRIDE should be power of two to use shift and binary * AND operations instead of division and remainder operations but no diff --git a/src/njs_unicode.h b/src/njs_unicode.h index a45ce682..2e0bcba7 100644 --- a/src/njs_unicode.h +++ b/src/njs_unicode.h @@ -23,5 +23,17 @@ typedef struct { u_char upper; } njs_unicode_decode_t; +#define njs_surrogate_leading(cp) \ + (((unsigned) (cp) - 0xd800) <= 0xdbff - 0xd800) + +#define njs_surrogate_trailing(cp) \ + (((unsigned) (cp) - 0xdc00) <= 0xdfff - 0xdc00) + +#define njs_surrogate_any(cp) \ + (((unsigned) (cp) - 0xd800) <= 0xdfff - 0xd800) + +#define njs_surrogate_pair(high, low) \ + (0x10000 + (((high) - 0xd800) << 10) + ((low) - 0xdc00)) + #endif /* _NJS_UNICODE_H_INCLUDED_ */ diff --git a/src/njs_utf16.c b/src/njs_utf16.c index 6626286a..b47fbf50 100644 --- a/src/njs_utf16.c +++ b/src/njs_utf16.c @@ -79,9 +79,8 @@ lead_state: #endif if (ctx->codepoint != 0x00) { - if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) { - unit = 0x10000 + ((ctx->codepoint - 0xD800) << 10) - + (unit - 0xDC00); + if (njs_surrogate_trailing(unit)) { + unit = njs_surrogate_pair(ctx->codepoint, unit); ctx->codepoint = 0x00; @@ -96,10 +95,8 @@ lead_state: return NJS_UNICODE_ERROR; } - /* Surrogate pair. */ - - if ((unsigned) (unit - 0xD800) <= (0xDFFF - 0xD800)) { - if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) { + if (njs_surrogate_any(unit)) { + if (njs_surrogate_trailing(unit)) { return NJS_UNICODE_ERROR; }