summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFabrice Bellard <fabrice@bellard.org>2025-03-13 18:13:25 +0100
committerFabrice Bellard <fabrice@bellard.org>2025-03-13 18:13:25 +0100
commit1be68b3345953c94e8072d12ad116e14403b66a8 (patch)
tree66ff0c2171ebd7a530f25181b9e9134d361aa79d
parent9bd10d8eb9d02f8235de6ce89ca1d86d601e2675 (diff)
downloadquickjs-1be68b3345953c94e8072d12ad116e14403b66a8.tar.gz
quickjs-1be68b3345953c94e8072d12ad116e14403b66a8.zip
fixed CONFIG_ALL_UNICODE compilation
-rw-r--r--libunicode.c402
1 files changed, 201 insertions, 201 deletions
diff --git a/libunicode.c b/libunicode.c
index c80d2f3..d1bf1e9 100644
--- a/libunicode.c
+++ b/libunicode.c
@@ -537,6 +537,207 @@ int cr_invert(CharRange *cr)
return 0;
}
+#define CASE_U (1 << 0)
+#define CASE_L (1 << 1)
+#define CASE_F (1 << 2)
+
+/* use the case conversion table to generate range of characters.
+ CASE_U: set char if modified by uppercasing,
+ CASE_L: set char if modified by lowercasing,
+ CASE_F: set char if modified by case folding,
+ */
+static int unicode_case1(CharRange *cr, int case_mask)
+{
+#define MR(x) (1 << RUN_TYPE_ ## x)
+ const uint32_t tab_run_mask[3] = {
+ MR(U) | MR(UF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(UF_D20) |
+ MR(UF_D1_EXT) | MR(U_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
+
+ MR(L) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2),
+
+ MR(UF) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2) | MR(UF_D20) | MR(UF_D1_EXT) | MR(LF_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
+ };
+#undef MR
+ uint32_t mask, v, code, type, len, i, idx;
+
+ if (case_mask == 0)
+ return 0;
+ mask = 0;
+ for(i = 0; i < 3; i++) {
+ if ((case_mask >> i) & 1)
+ mask |= tab_run_mask[i];
+ }
+ for(idx = 0; idx < countof(case_conv_table1); idx++) {
+ v = case_conv_table1[idx];
+ type = (v >> (32 - 17 - 7 - 4)) & 0xf;
+ code = v >> (32 - 17);
+ len = (v >> (32 - 17 - 7)) & 0x7f;
+ if ((mask >> type) & 1) {
+ // printf("%d: type=%d %04x %04x\n", idx, type, code, code + len - 1);
+ switch(type) {
+ case RUN_TYPE_UL:
+ if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F)))
+ goto def_case;
+ code += ((case_mask & CASE_U) != 0);
+ for(i = 0; i < len; i += 2) {
+ if (cr_add_interval(cr, code + i, code + i + 1))
+ return -1;
+ }
+ break;
+ case RUN_TYPE_LSU:
+ if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F)))
+ goto def_case;
+ if (!(case_mask & CASE_U)) {
+ if (cr_add_interval(cr, code, code + 1))
+ return -1;
+ }
+ if (cr_add_interval(cr, code + 1, code + 2))
+ return -1;
+ if (case_mask & CASE_U) {
+ if (cr_add_interval(cr, code + 2, code + 3))
+ return -1;
+ }
+ break;
+ default:
+ def_case:
+ if (cr_add_interval(cr, code, code + len))
+ return -1;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static int point_cmp(const void *p1, const void *p2, void *arg)
+{
+ uint32_t v1 = *(uint32_t *)p1;
+ uint32_t v2 = *(uint32_t *)p2;
+ return (v1 > v2) - (v1 < v2);
+}
+
+static void cr_sort_and_remove_overlap(CharRange *cr)
+{
+ uint32_t start, end, start1, end1, i, j;
+
+ /* the resulting ranges are not necessarily sorted and may overlap */
+ rqsort(cr->points, cr->len / 2, sizeof(cr->points[0]) * 2, point_cmp, NULL);
+ j = 0;
+ for(i = 0; i < cr->len; ) {
+ start = cr->points[i];
+ end = cr->points[i + 1];
+ i += 2;
+ while (i < cr->len) {
+ start1 = cr->points[i];
+ end1 = cr->points[i + 1];
+ if (start1 > end) {
+ /* |------|
+ * |-------| */
+ break;
+ } else if (end1 <= end) {
+ /* |------|
+ * |--| */
+ i += 2;
+ } else {
+ /* |------|
+ * |-------| */
+ end = end1;
+ i += 2;
+ }
+ }
+ cr->points[j] = start;
+ cr->points[j + 1] = end;
+ j += 2;
+ }
+ cr->len = j;
+}
+
+/* canonicalize a character set using the JS regex case folding rules
+ (see lre_canonicalize()) */
+int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode)
+{
+ CharRange cr_inter, cr_mask, cr_result, cr_sub;
+ uint32_t v, code, len, i, idx, start, end, c, d_start, d_end, d;
+
+ cr_init(&cr_mask, cr->mem_opaque, cr->realloc_func);
+ cr_init(&cr_inter, cr->mem_opaque, cr->realloc_func);
+ cr_init(&cr_result, cr->mem_opaque, cr->realloc_func);
+ cr_init(&cr_sub, cr->mem_opaque, cr->realloc_func);
+
+ if (unicode_case1(&cr_mask, is_unicode ? CASE_F : CASE_U))
+ goto fail;
+ if (cr_op(&cr_inter, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
+ goto fail;
+
+ if (cr_invert(&cr_mask))
+ goto fail;
+ if (cr_op(&cr_sub, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
+ goto fail;
+
+ /* cr_inter = cr & cr_mask */
+ /* cr_sub = cr & ~cr_mask */
+
+ /* use the case conversion table to compute the result */
+ d_start = -1;
+ d_end = -1;
+ idx = 0;
+ v = case_conv_table1[idx];
+ code = v >> (32 - 17);
+ len = (v >> (32 - 17 - 7)) & 0x7f;
+ for(i = 0; i < cr_inter.len; i += 2) {
+ start = cr_inter.points[i];
+ end = cr_inter.points[i + 1];
+
+ for(c = start; c < end; c++) {
+ for(;;) {
+ if (c >= code && c < code + len)
+ break;
+ idx++;
+ assert(idx < countof(case_conv_table1));
+ v = case_conv_table1[idx];
+ code = v >> (32 - 17);
+ len = (v >> (32 - 17 - 7)) & 0x7f;
+ }
+ d = lre_case_folding_entry(c, idx, v, is_unicode);
+ /* try to merge with the current interval */
+ if (d_start == -1) {
+ d_start = d;
+ d_end = d + 1;
+ } else if (d_end == d) {
+ d_end++;
+ } else {
+ cr_add_interval(&cr_result, d_start, d_end);
+ d_start = d;
+ d_end = d + 1;
+ }
+ }
+ }
+ if (d_start != -1) {
+ if (cr_add_interval(&cr_result, d_start, d_end))
+ goto fail;
+ }
+
+ /* the resulting ranges are not necessarily sorted and may overlap */
+ cr_sort_and_remove_overlap(&cr_result);
+
+ /* or with the character not affected by the case folding */
+ cr->len = 0;
+ if (cr_op(cr, cr_result.points, cr_result.len, cr_sub.points, cr_sub.len, CR_OP_UNION))
+ goto fail;
+
+ cr_free(&cr_inter);
+ cr_free(&cr_mask);
+ cr_free(&cr_result);
+ cr_free(&cr_sub);
+ return 0;
+ fail:
+ cr_free(&cr_inter);
+ cr_free(&cr_mask);
+ cr_free(&cr_result);
+ cr_free(&cr_sub);
+ return -1;
+}
+
#ifdef CONFIG_ALL_UNICODE
BOOL lre_is_id_start(uint32_t c)
@@ -1296,207 +1497,6 @@ static int unicode_prop1(CharRange *cr, int prop_idx)
return 0;
}
-#define CASE_U (1 << 0)
-#define CASE_L (1 << 1)
-#define CASE_F (1 << 2)
-
-/* use the case conversion table to generate range of characters.
- CASE_U: set char if modified by uppercasing,
- CASE_L: set char if modified by lowercasing,
- CASE_F: set char if modified by case folding,
- */
-static int unicode_case1(CharRange *cr, int case_mask)
-{
-#define MR(x) (1 << RUN_TYPE_ ## x)
- const uint32_t tab_run_mask[3] = {
- MR(U) | MR(UF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(UF_D20) |
- MR(UF_D1_EXT) | MR(U_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
-
- MR(L) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2),
-
- MR(UF) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2) | MR(UF_D20) | MR(UF_D1_EXT) | MR(LF_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
- };
-#undef MR
- uint32_t mask, v, code, type, len, i, idx;
-
- if (case_mask == 0)
- return 0;
- mask = 0;
- for(i = 0; i < 3; i++) {
- if ((case_mask >> i) & 1)
- mask |= tab_run_mask[i];
- }
- for(idx = 0; idx < countof(case_conv_table1); idx++) {
- v = case_conv_table1[idx];
- type = (v >> (32 - 17 - 7 - 4)) & 0xf;
- code = v >> (32 - 17);
- len = (v >> (32 - 17 - 7)) & 0x7f;
- if ((mask >> type) & 1) {
- // printf("%d: type=%d %04x %04x\n", idx, type, code, code + len - 1);
- switch(type) {
- case RUN_TYPE_UL:
- if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F)))
- goto def_case;
- code += ((case_mask & CASE_U) != 0);
- for(i = 0; i < len; i += 2) {
- if (cr_add_interval(cr, code + i, code + i + 1))
- return -1;
- }
- break;
- case RUN_TYPE_LSU:
- if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F)))
- goto def_case;
- if (!(case_mask & CASE_U)) {
- if (cr_add_interval(cr, code, code + 1))
- return -1;
- }
- if (cr_add_interval(cr, code + 1, code + 2))
- return -1;
- if (case_mask & CASE_U) {
- if (cr_add_interval(cr, code + 2, code + 3))
- return -1;
- }
- break;
- default:
- def_case:
- if (cr_add_interval(cr, code, code + len))
- return -1;
- break;
- }
- }
- }
- return 0;
-}
-
-static int point_cmp(const void *p1, const void *p2, void *arg)
-{
- uint32_t v1 = *(uint32_t *)p1;
- uint32_t v2 = *(uint32_t *)p2;
- return (v1 > v2) - (v1 < v2);
-}
-
-static void cr_sort_and_remove_overlap(CharRange *cr)
-{
- uint32_t start, end, start1, end1, i, j;
-
- /* the resulting ranges are not necessarily sorted and may overlap */
- rqsort(cr->points, cr->len / 2, sizeof(cr->points[0]) * 2, point_cmp, NULL);
- j = 0;
- for(i = 0; i < cr->len; ) {
- start = cr->points[i];
- end = cr->points[i + 1];
- i += 2;
- while (i < cr->len) {
- start1 = cr->points[i];
- end1 = cr->points[i + 1];
- if (start1 > end) {
- /* |------|
- * |-------| */
- break;
- } else if (end1 <= end) {
- /* |------|
- * |--| */
- i += 2;
- } else {
- /* |------|
- * |-------| */
- end = end1;
- i += 2;
- }
- }
- cr->points[j] = start;
- cr->points[j + 1] = end;
- j += 2;
- }
- cr->len = j;
-}
-
-/* canonicalize a character set using the JS regex case folding rules
- (see lre_canonicalize()) */
-int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode)
-{
- CharRange cr_inter, cr_mask, cr_result, cr_sub;
- uint32_t v, code, len, i, idx, start, end, c, d_start, d_end, d;
-
- cr_init(&cr_mask, cr->mem_opaque, cr->realloc_func);
- cr_init(&cr_inter, cr->mem_opaque, cr->realloc_func);
- cr_init(&cr_result, cr->mem_opaque, cr->realloc_func);
- cr_init(&cr_sub, cr->mem_opaque, cr->realloc_func);
-
- if (unicode_case1(&cr_mask, is_unicode ? CASE_F : CASE_U))
- goto fail;
- if (cr_op(&cr_inter, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
- goto fail;
-
- if (cr_invert(&cr_mask))
- goto fail;
- if (cr_op(&cr_sub, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
- goto fail;
-
- /* cr_inter = cr & cr_mask */
- /* cr_sub = cr & ~cr_mask */
-
- /* use the case conversion table to compute the result */
- d_start = -1;
- d_end = -1;
- idx = 0;
- v = case_conv_table1[idx];
- code = v >> (32 - 17);
- len = (v >> (32 - 17 - 7)) & 0x7f;
- for(i = 0; i < cr_inter.len; i += 2) {
- start = cr_inter.points[i];
- end = cr_inter.points[i + 1];
-
- for(c = start; c < end; c++) {
- for(;;) {
- if (c >= code && c < code + len)
- break;
- idx++;
- assert(idx < countof(case_conv_table1));
- v = case_conv_table1[idx];
- code = v >> (32 - 17);
- len = (v >> (32 - 17 - 7)) & 0x7f;
- }
- d = lre_case_folding_entry(c, idx, v, is_unicode);
- /* try to merge with the current interval */
- if (d_start == -1) {
- d_start = d;
- d_end = d + 1;
- } else if (d_end == d) {
- d_end++;
- } else {
- cr_add_interval(&cr_result, d_start, d_end);
- d_start = d;
- d_end = d + 1;
- }
- }
- }
- if (d_start != -1) {
- if (cr_add_interval(&cr_result, d_start, d_end))
- goto fail;
- }
-
- /* the resulting ranges are not necessarily sorted and may overlap */
- cr_sort_and_remove_overlap(&cr_result);
-
- /* or with the character not affected by the case folding */
- cr->len = 0;
- if (cr_op(cr, cr_result.points, cr_result.len, cr_sub.points, cr_sub.len, CR_OP_UNION))
- goto fail;
-
- cr_free(&cr_inter);
- cr_free(&cr_mask);
- cr_free(&cr_result);
- cr_free(&cr_sub);
- return 0;
- fail:
- cr_free(&cr_inter);
- cr_free(&cr_mask);
- cr_free(&cr_result);
- cr_free(&cr_sub);
- return -1;
-}
-
typedef enum {
POP_GC,
POP_PROP,