Simplify code for getting a unicode codepoint's canonical class.

Three places of unicode_norm.c use a similar logic for getting the combining class from a codepoint. Commit 2991ac5 has added the function get_canonical_class() for this purpose, but it was only called by the backend. This commit refactors the code to use this function in all the places where the combining class is retrieved from a given codepoint. Author: John Naylor Discussion: https://postgr.es/m/CAFBsxsHUV7s7YrOm6hFz-Jq8Sc7K_yxTkfNZxsDV-DuM-k-gwg@mail.gmail.com
author: Michael Paquier <michael@paquier.xyz> 2020-12-09 13:24:38 +0900
committer: Michael Paquier <michael@paquier.xyz> 2020-12-09 13:24:38 +0900
commit: 16c302f51235eaec05a1f85a11c1df04ef3a6785 (patch)
tree: 9da21fcb5fcff1363fb1d3d58838d6ce9c075e87
parent: df99ddc70b971a991c5111a33f2f08bd7945d5c2 (diff)
download: postgresql-16c302f51235eaec05a1f85a11c1df04ef3a6785.tar.gz
postgresql-16c302f51235eaec05a1f85a11c1df04ef3a6785.zip
1 files changed, 22 insertions, 25 deletions
diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c
index d46e33d322c..fd7bdef2928 100644
--- a/src/common/unicode_norm.c
+++ b/src/common/unicode_norm.c
@@ -105,6 +105,23 @@ get_code_entry(pg_wchar code)
 #endif
 }
 
+/*
+ * Get the combining class of the given codepoint.
+ */
+static uint8
+get_canonical_class(pg_wchar code)
+{
+	const pg_unicode_decomposition *entry = get_code_entry(code);
+
+	/*
+	 * If no entries are found, the character used is either an Hangul
+	 * character or a character with a class of 0 and no decompositions.
+	 */
+	if (!entry)
+		return 0;
+	else
+		return entry->comb_class;
+}
 
 /*
  * Given a decomposition entry looked up earlier, get the decomposed
@@ -430,16 +447,8 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
 		pg_wchar	prev = decomp_chars[count - 1];
 		pg_wchar	next = decomp_chars[count];
 		pg_wchar	tmp;
-		const pg_unicode_decomposition *prevEntry = get_code_entry(prev);
-		const pg_unicode_decomposition *nextEntry = get_code_entry(next);
-
-		/*
-		 * If no entries are found, the character used is either an Hangul
-		 * character or a character with a class of 0 and no decompositions,
-		 * so move to next result.
-		 */
-		if (prevEntry == NULL || nextEntry == NULL)
-			continue;
+		const uint8 prevClass = get_canonical_class(prev);
+		const uint8 nextClass = get_canonical_class(next);
 
 		/*
 		 * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html)
@@ -449,10 +458,10 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
 		 * combining class for the second, and the second is not a starter.  A
 		 * character is a starter if its combining class is 0.
 		 */
-		if (nextEntry->comb_class == 0x0 || prevEntry->comb_class == 0x0)
+		if (prevClass == 0 || nextClass == 0)
 			continue;
 
-		if (prevEntry->comb_class <= nextEntry->comb_class)
+		if (prevClass <= nextClass)
 			continue;
 
 		/* exchange can happen */
@@ -489,8 +498,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
 	for (count = 1; count < decomp_size; count++)
 	{
 		pg_wchar	ch = decomp_chars[count];
-		const pg_unicode_decomposition *ch_entry = get_code_entry(ch);
-		int			ch_class = (ch_entry == NULL) ? 0 : ch_entry->comb_class;
+		int			ch_class = get_canonical_class(ch);
 		pg_wchar	composite;
 
 		if (last_class < ch_class &&
@@ -527,17 +535,6 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
 /* We only need this in the backend. */
 #ifndef FRONTEND
 
-static uint8
-get_canonical_class(pg_wchar ch)
-{
-	const pg_unicode_decomposition *entry = get_code_entry(ch);
-
-	if (!entry)
-		return 0;
-	else
-		return entry->comb_class;
-}
-
 static const pg_unicode_normprops *
 qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo)
 {
author	Michael Paquier <michael@paquier.xyz>	2020-12-09 13:24:38 +0900
committer	Michael Paquier <michael@paquier.xyz>	2020-12-09 13:24:38 +0900
commit	16c302f51235eaec05a1f85a11c1df04ef3a6785 (patch)
tree	9da21fcb5fcff1363fb1d3d58838d6ce9c075e87
parent	df99ddc70b971a991c5111a33f2f08bd7945d5c2 (diff)
download	postgresql-16c302f51235eaec05a1f85a11c1df04ef3a6785.tar.gz postgresql-16c302f51235eaec05a1f85a11c1df04ef3a6785.zip