src/common/unicode/case_test.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195

/*-------------------------------------------------------------------------
 * case_test.c
 *		Program to test Unicode case mapping functions.
 *
 * Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *	  src/common/unicode/case_test.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres_fe.h"

#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>

#ifdef USE_ICU
#include <unicode/uchar.h>
#endif
#include "common/unicode_case.h"
#include "common/unicode_category.h"
#include "common/unicode_version.h"

#ifdef USE_ICU

static void
icu_test_simple(pg_wchar code)
{
	pg_wchar	lower = unicode_lowercase_simple(code);
	pg_wchar	title = unicode_titlecase_simple(code);
	pg_wchar	upper = unicode_uppercase_simple(code);
	pg_wchar	iculower = u_tolower(code);
	pg_wchar	icutitle = u_totitle(code);
	pg_wchar	icuupper = u_toupper(code);

	if (lower != iculower || title != icutitle || upper != icuupper)
	{
		printf("case_test: FAILURE for codepoint 0x%06x\n", code);
		printf("case_test: Postgres lower/title/upper:	0x%06x/0x%06x/0x%06x\n",
			   lower, title, upper);
		printf("case_test: ICU lower/title/upper:		0x%06x/0x%06x/0x%06x\n",
			   iculower, icutitle, icuupper);
		printf("\n");
		exit(1);
	}
}

/*
 * Exhaustively compare case mappings with the results from ICU.
 */
static void
test_icu(void)
{
	int			successful = 0;
	int			skipped_mismatch = 0;

	for (pg_wchar code = 0; code <= 0x10ffff; code++)
	{
		pg_unicode_category category = unicode_category(code);

		if (category != PG_U_UNASSIGNED)
		{
			uint8_t		icu_category = u_charType(code);

			if (icu_category == PG_U_UNASSIGNED)
			{
				skipped_mismatch++;
				continue;
			}

			icu_test_simple(code);
			successful++;
		}
	}

	if (skipped_mismatch > 0)
		printf("case_test: skipped %d codepoints unassigned in ICU due to Unicode version mismatch\n",
			   skipped_mismatch);

	printf("case_test: ICU simple mapping test: %d codepoints successful\n",
		   successful);
}
#endif

static void
test_strlower(const char *test_string, const char *expected)
{
	size_t		src1len = strlen(test_string);
	size_t		src2len = -1;	/* NUL-terminated */
	size_t		dst1len = strlen(expected);
	size_t		dst2len = strlen(expected) + 1; /* NUL-terminated */
	char	   *src1 = malloc(src1len);
	char	   *dst1 = malloc(dst1len);
	char	   *src2 = strdup(test_string);
	char	   *dst2 = malloc(dst2len);
	size_t		needed;

	memcpy(src1, test_string, src1len); /* not NUL-terminated */

	/* neither source nor destination are NUL-terminated */
	memset(dst1, 0x7F, dst1len);
	needed = unicode_strlower(dst1, dst1len, src1, src1len);
	if (needed != strlen(expected))
	{
		printf("case_test: convert_case test1 FAILURE: needed %zu\n", needed);
		exit(1);
	}
	if (memcmp(dst1, expected, dst1len) != 0)
	{
		printf("case_test: convert_case test1 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n",
			   test_string, (int) dst1len, dst1, expected);
		exit(1);
	}

	/* destination is NUL-terminated and source is not */
	memset(dst2, 0x7F, dst2len);
	needed = unicode_strlower(dst2, dst2len, src1, src1len);
	if (needed != strlen(expected))
	{
		printf("case_test: convert_case test2 FAILURE: needed %zu\n", needed);
		exit(1);
	}
	if (strcmp(dst2, expected) != 0)
	{
		printf("case_test: convert_case test2 FAILURE: test: '%s' result: '%s' expected: '%s'\n",
			   test_string, dst2, expected);
		exit(1);
	}

	/* source is NUL-terminated and destination is not */
	memset(dst1, 0x7F, dst1len);
	needed = unicode_strlower(dst1, dst1len, src2, src2len);
	if (needed != strlen(expected))
	{
		printf("case_test: convert_case test3 FAILURE: needed %zu\n", needed);
		exit(1);
	}
	if (memcmp(dst1, expected, dst1len) != 0)
	{
		printf("case_test: convert_case test3 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n",
			   test_string, (int) dst1len, dst1, expected);
		exit(1);
	}

	/* both source and destination are NUL-terminated */
	memset(dst2, 0x7F, dst2len);
	needed = unicode_strlower(dst2, dst2len, src2, src2len);
	if (needed != strlen(expected))
	{
		printf("case_test: convert_case test4 FAILURE: needed %zu\n", needed);
		exit(1);
	}
	if (strcmp(dst2, expected) != 0)
	{
		printf("case_test: convert_case test4 FAILURE: test: '%s' result: '%s' expected: '%s'\n",
			   test_string, dst2, expected);
		exit(1);
	}

	free(src1);
	free(dst1);
	free(src2);
	free(dst2);
}

static void
test_convert_case()
{
	/* test string with no case changes */
	test_strlower("√∞", "√∞");
	/* test string with case changes */
	test_strlower("ABC", "abc");
	/* test string with case changes and byte length changes */
	test_strlower("ȺȺȺ", "ⱥⱥⱥ");

	printf("case_test: convert_case: success\n");
}

int
main(int argc, char **argv)
{
	printf("case_test: Postgres Unicode version:\t%s\n", PG_UNICODE_VERSION);
#ifdef USE_ICU
	printf("case_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION);
	test_icu();
#else
	printf("case_test: ICU not available; skipping\n");
#endif

	test_convert_case();
	exit(0);
}