From: Dmitry Volyntsev Date: Thu, 25 Jun 2026 00:28:02 +0000 (-0700) Subject: QuickJS: add missing btoa() and atob() and improve njs version X-Git-Url: http://git.kaiwu.me/postgresql/log/contrib/postgres_fdw/NGINX-js-1660x332.png%20%22NGINX%20JavaScript%20Banner%22?a=commitdiff_plain;p=njs.git QuickJS: add missing btoa() and atob() and improve njs version The functions implement the WHATWG btoa()/atob() global functions for the QuickJS engine, mirroring the built-in njs engine. Both engines decode forgiving-base64 in a single pass using a bit accumulator, without allocating a scratch buffer for the whitespace-stripped copy. The shared behaviour is corrected and aligned with WHATWG: - atob() strips all ASCII whitespace (TAB, LF, FF, CR, SPACE), not only SPACE; VT and NBSP are not ASCII whitespace and are rejected. - btoa() error text says "(> U+00FF)" to match the cp > 0xff check. The narrower njs-only btoa()/atob() unit tests are dropped in favor of test/btoa.t.js, which exercises both engines. --- diff --git a/src/njs_string.c b/src/njs_string.c index 09b515ba..125ec967 100644 --- a/src/njs_string.c +++ b/src/njs_string.c @@ -4016,7 +4016,7 @@ njs_string_btoa(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, error: - njs_type_error(vm, "invalid character (>= U+00FF)"); + njs_type_error(vm, "invalid character (> U+00FF)"); return NJS_ERROR; } @@ -4034,14 +4034,14 @@ njs_int_t njs_string_atob(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, njs_index_t unused, njs_value_t *retval) { - size_t i, n, len, pad; - u_char *dst, *tmp, *p; - ssize_t size; + u_char c, v; + uint32_t acc, bits; + size_t i, total, pad; njs_str_t str; njs_int_t ret; njs_chb_t chain; njs_value_t *value, lvalue; - const u_char *b64, *s; + const u_char *b64; value = njs_lvalue_arg(&lvalue, args, nargs, 1); @@ -4055,114 +4055,86 @@ njs_string_atob(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, b64 = njs_basis64; njs_string_get(vm, value, &str); - tmp = njs_mp_alloc(vm->mem_pool, str.length); - if (tmp == NULL) { + /* + * Each significant character contributes six bits; even when every + * decoded byte expands to two UTF-8 bytes, the output is bounded by the + * input length, so the chain needs no byte cap. + */ + NJS_CHB_MP_INIT(&chain, njs_vm_memory_pool(vm)); + + if (njs_slow_path(njs_chb_reserve(&chain, str.length * 2) == NULL)) { njs_memory_error(vm); return NJS_ERROR; } - p = tmp; + acc = 0; + bits = 0; + total = 0; + pad = 0; for (i = 0; i < str.length; i++) { - if (njs_slow_path(str.start[i] == ' ')) { - continue; - } - - *p++ = str.start[i]; - } - - pad = 0; - str.start = tmp; - str.length = p - tmp; + c = str.start[i]; - if (str.length % 4 == 0) { - if (str.length > 0) { - if (str.start[str.length - 1] == '=') { - pad += 1; - } + switch (c) { + case ' ': + case '\t': + case '\n': + case '\f': + case '\r': + continue; - if (str.start[str.length - 2] == '=') { - pad += 1; - } + case '=': + pad++; + continue; } - } else if (str.length % 4 == 1) { - goto error; - } + if (njs_slow_path(pad > 0)) { + /* A significant character following the padding. */ + goto error; + } - for (i = 0; i < str.length - pad; i++) { - if (njs_slow_path(b64[str.start[i]] == 77)) { + v = b64[c]; + if (njs_slow_path(v == 77)) { goto error; } - } - len = str.length; + acc = (acc << 6) | v; + bits += 6; + total++; - if (len % 4 != 0) { - pad = 4 - (len % 4); - len += pad; + if (bits >= 8) { + bits -= 8; + njs_chb_write_byte_as_utf8(&chain, (acc >> bits) & 0xff); + } } - len = njs_base64_decoded_length(len, pad); - /* - * The chain holds a single reservation of at most twice the decoded - * length, which is bounded by the input string size, so it cannot - * grow unbounded and needs no byte cap. + * Padding may only complete the final quad: a remainder of one + * character is malformed and padding is allowed only when the cleaned + * input length is a multiple of four. */ - NJS_CHB_MP_INIT(&chain, njs_vm_memory_pool(vm)); - - dst = njs_chb_reserve(&chain, len * 2); - if (njs_slow_path(dst == NULL)) { - njs_memory_error(vm); - return NJS_ERROR; - } - - n = len; - s = str.start; - - while (n >= 3) { - njs_chb_write_byte_as_utf8(&chain, b64[s[0]] << 2 | b64[s[1]] >> 4); - njs_chb_write_byte_as_utf8(&chain, b64[s[1]] << 4 | b64[s[2]] >> 2); - njs_chb_write_byte_as_utf8(&chain, b64[s[2]] << 6 | b64[s[3]]); - - s += 4; - n -= 3; - } - - if (n >= 1) { - njs_chb_write_byte_as_utf8(&chain, b64[s[0]] << 2 | b64[s[1]] >> 4); - } - - if (n >= 2) { - njs_chb_write_byte_as_utf8(&chain, b64[s[1]] << 4 | b64[s[2]] >> 2); - } - - size = njs_chb_size(&chain); - if (njs_slow_path(size < 0)) { - njs_memory_error(vm); - return NJS_ERROR; + if (njs_slow_path(pad > 2 + || (total + pad) % 4 == 1 + || (pad > 0 && (total + pad) % 4 != 0))) + { + goto error; } - if (size == 0) { + if (total == 0) { + njs_chb_destroy(&chain); njs_set_empty_string(vm, retval); return NJS_OK; } - dst = njs_string_alloc(vm, retval, size, len); - if (njs_slow_path(dst == NULL)) { - return NJS_ERROR; - } - - njs_chb_join_to(&chain, dst); + ret = njs_string_create_chb(vm, retval, &chain); njs_chb_destroy(&chain); - njs_mp_free(vm->mem_pool, tmp); - - return NJS_OK; + return ret; error: + njs_chb_destroy(&chain); + njs_type_error(vm, "the string to be decoded is not correctly encoded"); return NJS_ERROR; diff --git a/src/qjs.c b/src/qjs.c index 26cdabb4..4731e38c 100644 --- a/src/qjs.c +++ b/src/qjs.c @@ -48,6 +48,12 @@ typedef struct { extern char **environ; +static int qjs_add_intrinsic_btoa_atob(JSContext *cx, JSValueConst global); +static JSValue qjs_global_btoa(JSContext *ctx, JSValueConst this_val, int argc, + JSValueConst *argv); +static JSValue qjs_global_atob(JSContext *ctx, JSValueConst this_val, int argc, + JSValueConst *argv); + static int qjs_add_intrinsic_njs(JSContext *cx, JSValueConst global); static JSValue qjs_njs_on(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv); @@ -159,6 +165,69 @@ static JSClassDef qjs_text_decoder_class = { }; +static int +qjs_add_intrinsic_btoa_atob(JSContext *cx, JSValueConst global) +{ + JSValue func; + + func = JS_NewCFunction(cx, qjs_global_btoa, "btoa", 1); + if (JS_IsException(func)) { + return -1; + } + + if (JS_SetPropertyStr(cx, global, "btoa", func) < 0) { + return -1; + } + + func = JS_NewCFunction(cx, qjs_global_atob, "atob", 1); + if (JS_IsException(func)) { + return -1; + } + + return JS_SetPropertyStr(cx, global, "atob", func); +} + + +static JSValue +qjs_global_btoa(JSContext *cx, JSValueConst this_val, int argc, + JSValueConst *argv) +{ + JSValue ret; + njs_str_t str; + + str.start = (u_char *) JS_ToCStringLen(cx, &str.length, argv[0]); + if (str.start == NULL) { + return JS_EXCEPTION; + } + + ret = qjs_string_btoa(cx, &str); + + JS_FreeCString(cx, (char *) str.start); + + return ret; +} + + +static JSValue +qjs_global_atob(JSContext *cx, JSValueConst this_val, int argc, + JSValueConst *argv) +{ + JSValue ret; + njs_str_t str; + + str.start = (u_char *) JS_ToCStringLen(cx, &str.length, argv[0]); + if (str.start == NULL) { + return JS_EXCEPTION; + } + + ret = qjs_string_atob(cx, &str); + + JS_FreeCString(cx, (char *) str.start); + + return ret; +} + + JSContext * qjs_new_context(JSRuntime *rt, qjs_module_t **addons) { @@ -214,6 +283,10 @@ qjs_new_context(JSRuntime *rt, qjs_module_t **addons) return NULL; } + if (qjs_add_intrinsic_btoa_atob(ctx, global_obj) < 0) { + return NULL; + } + prop = JS_NewAtom(ctx, "eval"); if (prop == JS_ATOM_NULL) { return NULL; diff --git a/src/qjs.h b/src/qjs.h index 0d911a86..36536152 100644 --- a/src/qjs.h +++ b/src/qjs.h @@ -131,6 +131,8 @@ JSValue qjs_promise_result(JSContext *cx, JSValue result); JSValue qjs_string_hex(JSContext *cx, const njs_str_t *src); JSValue qjs_string_base64(JSContext *cx, const njs_str_t *src); JSValue qjs_string_base64url(JSContext *cx, const njs_str_t *src); +JSValue qjs_string_btoa(JSContext *cx, const njs_str_t *src); +JSValue qjs_string_atob(JSContext *cx, const njs_str_t *src); static inline JS_BOOL JS_IsNullOrUndefined(JSValueConst v) { diff --git a/src/qjs_buffer.c b/src/qjs_buffer.c index d7c8204c..b081d1e7 100644 --- a/src/qjs_buffer.c +++ b/src/qjs_buffer.c @@ -2313,6 +2313,155 @@ qjs_base64url_decode_length(JSContext *ctx, const njs_str_t *src) } +JSValue +qjs_string_btoa(JSContext *cx, const njs_str_t *src) +{ + u_char *bytes, *b; + uint32_t cp; + JSValue ret; + njs_str_t bin; + const u_char *p, *end; + njs_unicode_decode_t ctx; + + if (src->length == 0) { + return JS_NewStringLen(cx, "", 0); + } + + bytes = js_malloc(cx, src->length); + if (bytes == NULL) { + return JS_ThrowOutOfMemory(cx); + } + + p = src->start; + end = src->start + src->length; + b = bytes; + + njs_utf8_decode_init(&ctx); + + while (p < end) { + cp = njs_utf8_decode(&ctx, &p, end); + + if (cp > 0xff) { + js_free(cx, bytes); + return JS_ThrowTypeError(cx, "invalid character (> U+00FF)"); + } + + *b++ = (u_char) cp; + } + + bin.start = bytes; + bin.length = b - bytes; + + ret = qjs_string_base64(cx, &bin); + + js_free(cx, bytes); + + return ret; +} + + +njs_inline void +qjs_chb_write_byte_as_utf8(njs_chb_t *chain, u_char byte) +{ + njs_utf8_encode(njs_chb_current(chain), byte); + njs_chb_written(chain, njs_utf8_size(byte)); +} + + +JSValue +qjs_string_atob(JSContext *cx, const njs_str_t *src) +{ + u_char c, v; + uint32_t acc, bits; + size_t i, total, pad; + njs_chb_t chain; + const u_char *b64; + + /* Forgiving-base64 decode. */ + + b64 = qjs_basis64; + + /* + * Each significant character contributes six bits; even when every + * decoded byte expands to two UTF-8 bytes, the output is bounded by the + * input length, so the chain needs no byte cap. + */ + NJS_CHB_CTX_INIT(&chain, cx); + + if (njs_chb_reserve(&chain, src->length * 2) == NULL) { + njs_chb_destroy(&chain); + return JS_ThrowOutOfMemory(cx); + } + + acc = 0; + bits = 0; + total = 0; + pad = 0; + + for (i = 0; i < src->length; i++) { + c = src->start[i]; + + switch (c) { + case ' ': + case '\t': + case '\n': + case '\f': + case '\r': + continue; + + case '=': + pad++; + continue; + } + + if (pad > 0) { + /* A significant character following the padding. */ + goto error; + } + + v = b64[c]; + if (v == 77) { + goto error; + } + + acc = (acc << 6) | v; + bits += 6; + total++; + + if (bits >= 8) { + bits -= 8; + qjs_chb_write_byte_as_utf8(&chain, (acc >> bits) & 0xff); + } + } + + /* + * Padding may only complete the final quad: a remainder of one + * character is malformed and padding is allowed only when the cleaned + * input length is a multiple of four. + */ + if (pad > 2 + || (total + pad) % 4 == 1 + || (pad > 0 && (total + pad) % 4 != 0)) + { + goto error; + } + + if (total == 0) { + njs_chb_destroy(&chain); + return JS_NewStringLen(cx, "", 0); + } + + return qjs_string_create_chb(cx, &chain); + +error: + + njs_chb_destroy(&chain); + + return JS_ThrowTypeError(cx, + "the string to be decoded is not correctly encoded"); +} + + njs_inline njs_int_t qjs_char_to_hex(u_char c) { diff --git a/src/test/njs_unit_test.c b/src/test/njs_unit_test.c index 0645e8b0..547eeef9 100644 --- a/src/test/njs_unit_test.c +++ b/src/test/njs_unit_test.c @@ -10715,79 +10715,6 @@ static njs_unit_test_t njs_test[] = ".every(v=>{var r = v(); return (typeof r === 'string') && r === 'undefined';})"), njs_str("true")}, - /* btoa() */ - - { njs_str("[" - " undefined," - " ''," - " '\\x00'," - " '\\x00\\x01'," - " '\\x00\\x01\\x02'," - " '\\x00\\xfe\\xff'," - " String.fromCodePoint(0x100)," - " String.fromCodePoint(0x00, 0x100)," - " String.fromCodePoint(0x00, 0x01, 0x100)," - "].map(v => { try { return btoa(v); } catch (e) { return '#'} })"), - njs_str("dW5kZWZpbmVk,,AA==,AAE=,AAEC,AP7/,#,#,#")}, - - /* atob() */ - - { njs_str("function c(s) {" - " let cp = [];" - " for (var i = 0; i < s.length; i++) {" - " cp.push(s.codePointAt(i));" - " }" - " return cp;" - "};" - "" - "[" - " undefined," - " ''," - " '='," - " '=='," - " '==='," - " '===='," - " 'AA@'," - " '@'," - " 'A==A'," - " btoa(String.fromCharCode.apply(null, [1]))," - " btoa(String.fromCharCode.apply(null, [1, 2]))," - " btoa(String.fromCharCode.apply(null, [1, 2, 255]))," - " btoa(String.fromCharCode.apply(null, [255, 1, 2, 3]))," - "].map(v => { try { return njs.dump(c(atob(v))); } catch (e) { return '#'} })"), - njs_str("#,[],#,#,#,#,#,#,#,[1],[1,2],[1,2,255],[255,1,2,3]")}, - - { njs_str("function c(s) {" - " let cp = [];" - " for (var i = 0; i < s.length; i++) {" - " cp.push(s.codePointAt(i));" - " }" - " return cp;" - "};" - "" - "[" - " 'CDRW'," - " ' CDRW'," - " 'C DRW'," - " 'CD RW'," - " 'CDR W'," - " 'CDRW '," - " ' C D R W '," - "].every(v => c(atob(v)).toString() == '8,52,86')"), - njs_str("true")}, - - { njs_str("atob('aGVsbG8=')"), - njs_str("hello") }, - - { njs_str("atob('aGVsbG8')"), - njs_str("hello") }, - - { njs_str("atob('TQ==')"), - njs_str("M") }, - - { njs_str("atob('TQ')"), - njs_str("M") }, - /* Functions. */ { njs_str("return"), diff --git a/test/btoa.t.js b/test/btoa.t.js new file mode 100644 index 00000000..e19ded5a --- /dev/null +++ b/test/btoa.t.js @@ -0,0 +1,122 @@ + +/*--- +includes: [runTsuite.js, compareArray.js] +flags: [async] +---*/ + +function codePoints(s) { + let cp = []; + for (var i = 0; i < s.length; i++) { + cp.push(s.codePointAt(i)); + } + + return cp; +} + +let btoa_tsuite = { + name: "btoa() tests", + T: async (params) => { + let res = btoa(params.value); + + if (res !== params.expected) { + throw Error(`unexpected output "${res}" != "${params.expected}"`); + } + + return 'SUCCESS'; + }, + + tests: [ + { value: undefined, expected: "dW5kZWZpbmVk" }, + { value: "", expected: "" }, + { value: "hello", expected: "aGVsbG8=" }, + { value: "\x00", expected: "AA==" }, + { value: "\x00\x01", expected: "AAE=" }, + { value: "\x00\x01\x02", expected: "AAEC" }, + { value: "\x00\xfe\xff", expected: "AP7/" }, + { value: String.fromCodePoint(0x100), + exception: 'TypeError: invalid character (> U+00FF)' }, + { value: String.fromCodePoint(0x00, 0x100), + exception: 'TypeError: invalid character (> U+00FF)' }, + { value: String.fromCodePoint(0x00, 0x01, 0x100), + exception: 'TypeError: invalid character (> U+00FF)' }, + ], +}; + +let atob_tsuite = { + name: "atob() tests", + T: async (params) => { + let res = codePoints(atob(params.value)); + + if (!compareArray(res, params.expected)) { + throw Error(`unexpected output "${res}" != "${params.expected}"`); + } + + return 'SUCCESS'; + }, + + tests: [ + { value: "", expected: [] }, + { value: "AAE=", expected: [0, 1] }, + { value: "AAEC", expected: [0, 1, 2] }, + { value: "AP7/", expected: [0, 254, 255] }, + { value: "dW5kZWZpbmVk", expected: codePoints("undefined") }, + + /* Forgiving-base64 ignores missing padding. */ + + { value: "aGVsbG8=", expected: codePoints("hello") }, + { value: "aGVsbG8", expected: codePoints("hello") }, + { value: "TQ==", expected: codePoints("M") }, + { value: "TQ", expected: codePoints("M") }, + + /* Forgiving-base64 ignores ASCII whitespace. */ + + { value: "CDRW", expected: [8, 52, 86] }, + { value: " CDRW", expected: [8, 52, 86] }, + { value: "C DRW", expected: [8, 52, 86] }, + { value: "CD RW", expected: [8, 52, 86] }, + { value: "CDR W", expected: [8, 52, 86] }, + { value: "CDRW ", expected: [8, 52, 86] }, + { value: " C D R W ", expected: [8, 52, 86] }, + { value: "\tCDRW", expected: [8, 52, 86] }, + { value: "CD\nRW", expected: [8, 52, 86] }, + { value: "CDRW\r", expected: [8, 52, 86] }, + { value: "CD\fRW", expected: [8, 52, 86] }, + { value: "\t\n\f\r CDRW \r\f\n\t", expected: [8, 52, 86] }, + { value: " ", expected: [] }, + { value: "\t\n\f\r ", expected: [] }, + + /* Invalid input. */ + + { value: undefined, + exception: 'TypeError: the string to be decoded is not correctly encoded' }, + { value: "=", + exception: 'TypeError: the string to be decoded is not correctly encoded' }, + { value: "==", + exception: 'TypeError: the string to be decoded is not correctly encoded' }, + { value: "===", + exception: 'TypeError: the string to be decoded is not correctly encoded' }, + { value: "====", + exception: 'TypeError: the string to be decoded is not correctly encoded' }, + { value: "AA@", + exception: 'TypeError: the string to be decoded is not correctly encoded' }, + { value: "@", + exception: 'TypeError: the string to be decoded is not correctly encoded' }, + { value: "A==A", + exception: 'TypeError: the string to be decoded is not correctly encoded' }, + + /* Only ASCII whitespace is stripped: VT and NBSP are not. */ + + { value: "\vCDRW", + exception: 'TypeError: the string to be decoded is not correctly encoded' }, + { value: "CD\vRW", + exception: 'TypeError: the string to be decoded is not correctly encoded' }, + { value: "\xa0CDRW", + exception: 'TypeError: the string to be decoded is not correctly encoded' }, + ], +}; + +run([ + btoa_tsuite, + atob_tsuite, +]) +.then($DONE, $DONE);