aboutsummaryrefslogtreecommitdiff
path: root/src/backend/snowball/libstemmer/stem_UTF_8_turkish.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/snowball/libstemmer/stem_UTF_8_turkish.c')
-rw-r--r--src/backend/snowball/libstemmer/stem_UTF_8_turkish.c243
1 files changed, 123 insertions, 120 deletions
diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_turkish.c b/src/backend/snowball/libstemmer/stem_UTF_8_turkish.c
index 3d040327877..efb1b30604b 100644
--- a/src/backend/snowball/libstemmer/stem_UTF_8_turkish.c
+++ b/src/backend/snowball/libstemmer/stem_UTF_8_turkish.c
@@ -12,6 +12,7 @@ extern int turkish_UTF_8_stem(struct SN_env * z);
static int r_stem_suffix_chain_before_ki(struct SN_env * z);
static int r_stem_noun_suffixes(struct SN_env * z);
static int r_stem_nominal_verb_suffixes(struct SN_env * z);
+static int r_remove_proper_noun_suffix(struct SN_env * z);
static int r_postlude(struct SN_env * z);
static int r_post_process_last_consonants(struct SN_env * z);
static int r_more_than_one_syllable_word(struct SN_env * z);
@@ -458,49 +459,58 @@ static const symbol s_17[] = { 's', 'o', 'y' };
static int r_check_vowel_harmony(struct SN_env * z) {
{ int m_test1 = z->l - z->c;
+
if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) return 0;
{ int m2 = z->l - z->c; (void)m2;
if (z->c <= z->lb || z->p[z->c - 1] != 'a') goto lab1;
z->c--;
+
if (out_grouping_b_U(z, g_vowel1, 97, 305, 1) < 0) goto lab1;
goto lab0;
lab1:
z->c = z->l - m2;
if (z->c <= z->lb || z->p[z->c - 1] != 'e') goto lab2;
z->c--;
+
if (out_grouping_b_U(z, g_vowel2, 101, 252, 1) < 0) goto lab2;
goto lab0;
lab2:
z->c = z->l - m2;
if (!(eq_s_b(z, 2, s_0))) goto lab3;
+
if (out_grouping_b_U(z, g_vowel3, 97, 305, 1) < 0) goto lab3;
goto lab0;
lab3:
z->c = z->l - m2;
if (z->c <= z->lb || z->p[z->c - 1] != 'i') goto lab4;
z->c--;
+
if (out_grouping_b_U(z, g_vowel4, 101, 105, 1) < 0) goto lab4;
goto lab0;
lab4:
z->c = z->l - m2;
if (z->c <= z->lb || z->p[z->c - 1] != 'o') goto lab5;
z->c--;
+
if (out_grouping_b_U(z, g_vowel5, 111, 117, 1) < 0) goto lab5;
goto lab0;
lab5:
z->c = z->l - m2;
if (!(eq_s_b(z, 2, s_1))) goto lab6;
+
if (out_grouping_b_U(z, g_vowel6, 246, 252, 1) < 0) goto lab6;
goto lab0;
lab6:
z->c = z->l - m2;
if (z->c <= z->lb || z->p[z->c - 1] != 'u') goto lab7;
z->c--;
+
if (out_grouping_b_U(z, g_vowel5, 111, 117, 1) < 0) goto lab7;
goto lab0;
lab7:
z->c = z->l - m2;
if (!(eq_s_b(z, 2, s_2))) return 0;
+
if (out_grouping_b_U(z, g_vowel6, 246, 252, 1) < 0) return 0;
}
lab0:
@@ -645,7 +655,7 @@ lab0:
static int r_mark_possessives(struct SN_env * z) {
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((67133440 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
- if (!(find_among_b(z, a_0, 10))) return 0;
+ if (!find_among_b(z, a_0, 10)) return 0;
{ int ret = r_mark_suffix_with_optional_U_vowel(z);
if (ret <= 0) return ret;
}
@@ -665,7 +675,7 @@ static int r_mark_sU(struct SN_env * z) {
static int r_mark_lArI(struct SN_env * z) {
if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 177)) return 0;
- if (!(find_among_b(z, a_1, 2))) return 0;
+ if (!find_among_b(z, a_1, 2)) return 0;
return 1;
}
@@ -684,7 +694,7 @@ static int r_mark_nU(struct SN_env * z) {
{ int ret = r_check_vowel_harmony(z);
if (ret <= 0) return ret;
}
- if (!(find_among_b(z, a_2, 4))) return 0;
+ if (!find_among_b(z, a_2, 4)) return 0;
return 1;
}
@@ -693,7 +703,7 @@ static int r_mark_nUn(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 1 <= z->lb || z->p[z->c - 1] != 110) return 0;
- if (!(find_among_b(z, a_3, 4))) return 0;
+ if (!find_among_b(z, a_3, 4)) return 0;
{ int ret = r_mark_suffix_with_optional_n_consonant(z);
if (ret <= 0) return ret;
}
@@ -705,7 +715,7 @@ static int r_mark_yA(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0;
- if (!(find_among_b(z, a_4, 2))) return 0;
+ if (!find_among_b(z, a_4, 2)) return 0;
{ int ret = r_mark_suffix_with_optional_y_consonant(z);
if (ret <= 0) return ret;
}
@@ -717,7 +727,7 @@ static int r_mark_nA(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0;
- if (!(find_among_b(z, a_5, 2))) return 0;
+ if (!find_among_b(z, a_5, 2)) return 0;
return 1;
}
@@ -726,7 +736,7 @@ static int r_mark_DA(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0;
- if (!(find_among_b(z, a_6, 4))) return 0;
+ if (!find_among_b(z, a_6, 4)) return 0;
return 1;
}
@@ -735,7 +745,7 @@ static int r_mark_ndA(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 2 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0;
- if (!(find_among_b(z, a_7, 2))) return 0;
+ if (!find_among_b(z, a_7, 2)) return 0;
return 1;
}
@@ -744,7 +754,7 @@ static int r_mark_DAn(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 2 <= z->lb || z->p[z->c - 1] != 110) return 0;
- if (!(find_among_b(z, a_8, 4))) return 0;
+ if (!find_among_b(z, a_8, 4)) return 0;
return 1;
}
@@ -753,7 +763,7 @@ static int r_mark_ndAn(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 3 <= z->lb || z->p[z->c - 1] != 110) return 0;
- if (!(find_among_b(z, a_9, 2))) return 0;
+ if (!find_among_b(z, a_9, 2)) return 0;
return 1;
}
@@ -762,7 +772,7 @@ static int r_mark_ylA(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0;
- if (!(find_among_b(z, a_10, 2))) return 0;
+ if (!find_among_b(z, a_10, 2)) return 0;
{ int ret = r_mark_suffix_with_optional_y_consonant(z);
if (ret <= 0) return ret;
}
@@ -779,7 +789,7 @@ static int r_mark_ncA(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0;
- if (!(find_among_b(z, a_11, 2))) return 0;
+ if (!find_among_b(z, a_11, 2)) return 0;
{ int ret = r_mark_suffix_with_optional_n_consonant(z);
if (ret <= 0) return ret;
}
@@ -791,7 +801,7 @@ static int r_mark_yUm(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 1 <= z->lb || z->p[z->c - 1] != 109) return 0;
- if (!(find_among_b(z, a_12, 4))) return 0;
+ if (!find_among_b(z, a_12, 4)) return 0;
{ int ret = r_mark_suffix_with_optional_y_consonant(z);
if (ret <= 0) return ret;
}
@@ -803,7 +813,7 @@ static int r_mark_sUn(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 2 <= z->lb || z->p[z->c - 1] != 110) return 0;
- if (!(find_among_b(z, a_13, 4))) return 0;
+ if (!find_among_b(z, a_13, 4)) return 0;
return 1;
}
@@ -812,7 +822,7 @@ static int r_mark_yUz(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 1 <= z->lb || z->p[z->c - 1] != 122) return 0;
- if (!(find_among_b(z, a_14, 4))) return 0;
+ if (!find_among_b(z, a_14, 4)) return 0;
{ int ret = r_mark_suffix_with_optional_y_consonant(z);
if (ret <= 0) return ret;
}
@@ -821,7 +831,7 @@ static int r_mark_yUz(struct SN_env * z) {
static int r_mark_sUnUz(struct SN_env * z) {
if (z->c - 4 <= z->lb || z->p[z->c - 1] != 122) return 0;
- if (!(find_among_b(z, a_15, 4))) return 0;
+ if (!find_among_b(z, a_15, 4)) return 0;
return 1;
}
@@ -830,7 +840,7 @@ static int r_mark_lAr(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 2 <= z->lb || z->p[z->c - 1] != 114) return 0;
- if (!(find_among_b(z, a_16, 2))) return 0;
+ if (!find_among_b(z, a_16, 2)) return 0;
return 1;
}
@@ -839,7 +849,7 @@ static int r_mark_nUz(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 2 <= z->lb || z->p[z->c - 1] != 122) return 0;
- if (!(find_among_b(z, a_17, 4))) return 0;
+ if (!find_among_b(z, a_17, 4)) return 0;
return 1;
}
@@ -848,13 +858,13 @@ static int r_mark_DUr(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 2 <= z->lb || z->p[z->c - 1] != 114) return 0;
- if (!(find_among_b(z, a_18, 8))) return 0;
+ if (!find_among_b(z, a_18, 8)) return 0;
return 1;
}
static int r_mark_cAsInA(struct SN_env * z) {
if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0;
- if (!(find_among_b(z, a_19, 2))) return 0;
+ if (!find_among_b(z, a_19, 2)) return 0;
return 1;
}
@@ -862,7 +872,7 @@ static int r_mark_yDU(struct SN_env * z) {
{ int ret = r_check_vowel_harmony(z);
if (ret <= 0) return ret;
}
- if (!(find_among_b(z, a_20, 32))) return 0;
+ if (!find_among_b(z, a_20, 32)) return 0;
{ int ret = r_mark_suffix_with_optional_y_consonant(z);
if (ret <= 0) return ret;
}
@@ -871,7 +881,7 @@ static int r_mark_yDU(struct SN_env * z) {
static int r_mark_ysA(struct SN_env * z) {
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((26658 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
- if (!(find_among_b(z, a_21, 8))) return 0;
+ if (!find_among_b(z, a_21, 8)) return 0;
{ int ret = r_mark_suffix_with_optional_y_consonant(z);
if (ret <= 0) return ret;
}
@@ -883,7 +893,7 @@ static int r_mark_ymUs_(struct SN_env * z) {
if (ret <= 0) return ret;
}
if (z->c - 3 <= z->lb || z->p[z->c - 1] != 159) return 0;
- if (!(find_among_b(z, a_22, 4))) return 0;
+ if (!find_among_b(z, a_22, 4)) return 0;
{ int ret = r_mark_suffix_with_optional_y_consonant(z);
if (ret <= 0) return ret;
}
@@ -1865,7 +1875,7 @@ static int r_post_process_last_consonants(struct SN_env * z) {
int among_var;
z->ket = z->c;
among_var = find_among_b(z, a_23, 4);
- if (!(among_var)) return 0;
+ if (!among_var) return 0;
z->bra = z->c;
switch (among_var) {
case 1:
@@ -1893,108 +1903,77 @@ static int r_post_process_last_consonants(struct SN_env * z) {
}
static int r_append_U_to_stems_ending_with_d_or_g(struct SN_env * z) {
- { int m_test1 = z->l - z->c;
- { int m2 = z->l - z->c; (void)m2;
- if (z->c <= z->lb || z->p[z->c - 1] != 'd') goto lab1;
- z->c--;
- goto lab0;
- lab1:
- z->c = z->l - m2;
- if (z->c <= z->lb || z->p[z->c - 1] != 'g') return 0;
- z->c--;
- }
- lab0:
- z->c = z->l - m_test1;
+ z->ket = z->c;
+ z->bra = z->c;
+ { int m1 = z->l - z->c; (void)m1;
+ if (z->c <= z->lb || z->p[z->c - 1] != 'd') goto lab1;
+ z->c--;
+ goto lab0;
+ lab1:
+ z->c = z->l - m1;
+ if (z->c <= z->lb || z->p[z->c - 1] != 'g') return 0;
+ z->c--;
}
- { int m3 = z->l - z->c; (void)m3;
- { int m_test4 = z->l - z->c;
- if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) goto lab3;
- { int m5 = z->l - z->c; (void)m5;
- if (z->c <= z->lb || z->p[z->c - 1] != 'a') goto lab5;
- z->c--;
- goto lab4;
- lab5:
- z->c = z->l - m5;
- if (!(eq_s_b(z, 2, s_9))) goto lab3;
- }
- lab4:
- z->c = z->l - m_test4;
+lab0:
+
+ if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) return 0;
+ { int m2 = z->l - z->c; (void)m2;
+ { int m3 = z->l - z->c; (void)m3;
+ if (z->c <= z->lb || z->p[z->c - 1] != 'a') goto lab5;
+ z->c--;
+ goto lab4;
+ lab5:
+ z->c = z->l - m3;
+ if (!(eq_s_b(z, 2, s_9))) goto lab3;
}
- { int ret;
- { int saved_c = z->c;
- ret = insert_s(z, z->c, z->c, 2, s_10);
- z->c = saved_c;
- }
+ lab4:
+ { int ret = slice_from_s(z, 2, s_10);
if (ret < 0) return ret;
}
goto lab2;
lab3:
- z->c = z->l - m3;
- { int m_test6 = z->l - z->c;
- if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) goto lab6;
- { int m7 = z->l - z->c; (void)m7;
- if (z->c <= z->lb || z->p[z->c - 1] != 'e') goto lab8;
- z->c--;
- goto lab7;
- lab8:
- z->c = z->l - m7;
- if (z->c <= z->lb || z->p[z->c - 1] != 'i') goto lab6;
- z->c--;
- }
- lab7:
- z->c = z->l - m_test6;
+ z->c = z->l - m2;
+ { int m4 = z->l - z->c; (void)m4;
+ if (z->c <= z->lb || z->p[z->c - 1] != 'e') goto lab8;
+ z->c--;
+ goto lab7;
+ lab8:
+ z->c = z->l - m4;
+ if (z->c <= z->lb || z->p[z->c - 1] != 'i') goto lab6;
+ z->c--;
}
- { int ret;
- { int saved_c = z->c;
- ret = insert_s(z, z->c, z->c, 1, s_11);
- z->c = saved_c;
- }
+ lab7:
+ { int ret = slice_from_s(z, 1, s_11);
if (ret < 0) return ret;
}
goto lab2;
lab6:
- z->c = z->l - m3;
- { int m_test8 = z->l - z->c;
- if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) goto lab9;
- { int m9 = z->l - z->c; (void)m9;
- if (z->c <= z->lb || z->p[z->c - 1] != 'o') goto lab11;
- z->c--;
- goto lab10;
- lab11:
- z->c = z->l - m9;
- if (z->c <= z->lb || z->p[z->c - 1] != 'u') goto lab9;
- z->c--;
- }
- lab10:
- z->c = z->l - m_test8;
+ z->c = z->l - m2;
+ { int m5 = z->l - z->c; (void)m5;
+ if (z->c <= z->lb || z->p[z->c - 1] != 'o') goto lab11;
+ z->c--;
+ goto lab10;
+ lab11:
+ z->c = z->l - m5;
+ if (z->c <= z->lb || z->p[z->c - 1] != 'u') goto lab9;
+ z->c--;
}
- { int ret;
- { int saved_c = z->c;
- ret = insert_s(z, z->c, z->c, 1, s_12);
- z->c = saved_c;
- }
+ lab10:
+ { int ret = slice_from_s(z, 1, s_12);
if (ret < 0) return ret;
}
goto lab2;
lab9:
- z->c = z->l - m3;
- { int m_test10 = z->l - z->c;
- if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) return 0;
- { int m11 = z->l - z->c; (void)m11;
- if (!(eq_s_b(z, 2, s_13))) goto lab13;
- goto lab12;
- lab13:
- z->c = z->l - m11;
- if (!(eq_s_b(z, 2, s_14))) return 0;
- }
- lab12:
- z->c = z->l - m_test10;
+ z->c = z->l - m2;
+ { int m6 = z->l - z->c; (void)m6;
+ if (!(eq_s_b(z, 2, s_13))) goto lab13;
+ goto lab12;
+ lab13:
+ z->c = z->l - m6;
+ if (!(eq_s_b(z, 2, s_14))) return 0;
}
- { int ret;
- { int saved_c = z->c;
- ret = insert_s(z, z->c, z->c, 2, s_15);
- z->c = saved_c;
- }
+ lab12:
+ { int ret = slice_from_s(z, 2, s_15);
if (ret < 0) return ret;
}
}
@@ -2013,23 +1992,43 @@ static int r_is_reserved_word(struct SN_env * z) {
return 1;
}
+static int r_remove_proper_noun_suffix(struct SN_env * z) {
+ { int c1 = z->c;
+ while(1) {
+ int c2 = z->c;
+ if (z->c == z->l || z->p[z->c] != '\'') goto lab1;
+ z->c++;
+ z->c = c2;
+ break;
+ lab1:
+ z->c = c2;
+ { int ret = skip_utf8(z->p, z->c, z->l, 1);
+ if (ret < 0) goto lab0;
+ z->c = ret;
+ }
+ }
+ z->bra = z->c;
+ z->c = z->l;
+ z->ket = z->c;
+ { int ret = slice_del(z);
+ if (ret < 0) return ret;
+ }
+ lab0:
+ z->c = c1;
+ }
+ return 1;
+}
+
static int r_more_than_one_syllable_word(struct SN_env * z) {
{ int c_test1 = z->c;
- { int i = 2;
- while(1) {
- int c2 = z->c;
- {
+ { int i; for (i = 2; i > 0; i--)
+ {
+ {
int ret = out_grouping_U(z, g_vowel, 97, 305, 1);
- if (ret < 0) goto lab0;
+ if (ret < 0) return 0;
z->c += ret;
}
- i--;
- continue;
- lab0:
- z->c = c2;
- break;
}
- if (i > 0) return 0;
}
z->c = c_test1;
}
@@ -2065,6 +2064,10 @@ static int r_postlude(struct SN_env * z) {
}
extern int turkish_UTF_8_stem(struct SN_env * z) {
+
+ { int ret = r_remove_proper_noun_suffix(z);
+ if (ret < 0) return ret;
+ }
{ int ret = r_more_than_one_syllable_word(z);
if (ret <= 0) return ret;
}