]> git.kaiwu.me - quickjs.git/commitdiff
added missing 'Unknown' unicode Script
authorFabrice Bellard <fabrice@bellard.org>
Sat, 24 May 2025 13:36:08 +0000 (15:36 +0200)
committerFabrice Bellard <fabrice@bellard.org>
Sat, 24 May 2025 13:36:08 +0000 (15:36 +0200)
libunicode-table.h
libunicode.c
unicode_gen.c

index 0120ea9f60c2005eab166e1fb337aa39e789f6b2..67df6b3a3ccded3ea9e111cbf9e760d94dbd6668 100644 (file)
@@ -3130,6 +3130,7 @@ typedef enum {
 } UnicodeScriptEnum;
 
 static const char unicode_script_name_table[] =
+    "Unknown,Zzzz"                "\0"
     "Adlam,Adlm"                  "\0"
     "Ahom,Ahom"                   "\0"
     "Anatolian_Hieroglyphs,Hluw"  "\0"
index b4a02062c0abf9099a352839aebbfad6d6216586..3791523d6a388bb10f90f844e0c389518275a01f 100644 (file)
@@ -1285,8 +1285,6 @@ int unicode_script(CharRange *cr,
     script_idx = unicode_find_name(unicode_script_name_table, script_name);
     if (script_idx < 0)
         return -2;
-    /* Note: we remove the "Unknown" Script */
-    script_idx += UNICODE_SCRIPT_Unknown + 1;
 
     is_common = (script_idx == UNICODE_SCRIPT_Common ||
                  script_idx == UNICODE_SCRIPT_Inherited);
@@ -1316,17 +1314,21 @@ int unicode_script(CharRange *cr,
             n |= *p++;
             n += 96 + (1 << 12);
         }
-        if (type == 0)
-            v = 0;
-        else
-            v = *p++;
         c1 = c + n + 1;
-        if (v == script_idx) {
-            if (cr_add_interval(cr1, c, c1))
-                goto fail;
+        if (type != 0) {
+            v = *p++;
+            if (v == script_idx || script_idx == UNICODE_SCRIPT_Unknown) {
+                if (cr_add_interval(cr1, c, c1))
+                    goto fail;
+            }
         }
         c = c1;
     }
+    if (script_idx == UNICODE_SCRIPT_Unknown) {
+        /* Unknown is all the characters outside scripts */
+        if (cr_invert(cr1))
+            goto fail;
+    }
 
     if (is_ext) {
         /* add the script extensions */
index 1b43538d28a169ace3c36c7bd91ec94825305dae..c793ba1e6935837fe9c282491eb4b1ad22a4bfda 100644 (file)
@@ -2087,10 +2087,9 @@ void build_script_table(FILE *f)
     fprintf(f, "    UNICODE_SCRIPT_COUNT,\n");
     fprintf(f, "} UnicodeScriptEnum;\n\n");
 
-    i = 1;
     dump_name_table(f, "unicode_script_name_table",
-                    unicode_script_name + i, SCRIPT_COUNT - i,
-                    unicode_script_short_name + i);
+                    unicode_script_name, SCRIPT_COUNT,
+                    unicode_script_short_name);
 
     dbuf_init(dbuf);
 #ifdef DUMP_TABLE_SIZE