diff options
Diffstat (limited to 'src/backend/snowball/libstemmer/utilities.c')
-rw-r--r-- | src/backend/snowball/libstemmer/utilities.c | 43 |
1 files changed, 23 insertions, 20 deletions
diff --git a/src/backend/snowball/libstemmer/utilities.c b/src/backend/snowball/libstemmer/utilities.c index 8c89af1cd5c..920b8dbe6dd 100644 --- a/src/backend/snowball/libstemmer/utilities.c +++ b/src/backend/snowball/libstemmer/utilities.c @@ -1,7 +1,5 @@ #include "header.h" -#define unless(C) if(!(C)) - #define CREATE_SIZE 1 extern symbol * create_s(void) { @@ -10,7 +8,7 @@ extern symbol * create_s(void) { if (mem == NULL) return NULL; p = (symbol *) (HEAD + (char *) mem); CAPACITY(p) = CREATE_SIZE; - SET_SIZE(p, CREATE_SIZE); + SET_SIZE(p, 0); return p; } @@ -22,7 +20,7 @@ extern void lose_s(symbol * p) { /* new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new - position, or 0 on failure. + position, or -1 on failure. -- used to implement hop and next in the utf8 case. */ @@ -85,14 +83,14 @@ static int get_b_utf8(const symbol * p, int c, int lb, int * slot) { if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */ * slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2; } - * slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3; + * slot = (p[--c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3; } extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; int w = get_utf8(z->p, z->c, z->l, & ch); - unless (w) return -1; + if (!w) return -1; if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return w; z->c += w; @@ -104,7 +102,7 @@ extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, do { int ch; int w = get_b_utf8(z->p, z->c, z->lb, & ch); - unless (w) return -1; + if (!w) return -1; if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return w; z->c -= w; @@ -116,8 +114,8 @@ extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, i do { int ch; int w = get_utf8(z->p, z->c, z->l, & ch); - unless (w) return -1; - unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + if (!w) return -1; + if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) return w; z->c += w; } while (repeat); @@ -128,8 +126,8 @@ extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, do { int ch; int w = get_b_utf8(z->p, z->c, z->lb, & ch); - unless (w) return -1; - unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + if (!w) return -1; + if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) return w; z->c -= w; } while (repeat); @@ -167,7 +165,7 @@ extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int int ch; if (z->c >= z->l) return -1; ch = z->p[z->c]; - unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) return 1; z->c++; } while (repeat); @@ -179,7 +177,7 @@ extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, i int ch; if (z->c <= z->lb) return -1; ch = z->p[z->c - 1]; - unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) return 1; z->c--; } while (repeat); @@ -366,7 +364,7 @@ extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const if (z->c > c_bra) z->c = c_bra; } - unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol)); + if (s_size) memmove(z->p + c_bra, s, s_size * sizeof(symbol)); if (adjptr != NULL) *adjptr = adjustment; return 0; @@ -412,12 +410,7 @@ extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbo } extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) { - int adjustment; - if (replace_s(z, bra, ket, SIZE(p), p, &adjustment)) - return -1; - if (bra <= z->bra) z->bra += adjustment; - if (bra <= z->ket) z->ket += adjustment; - return 0; + return insert_s(z, bra, ket, SIZE(p), p); } extern symbol * slice_to(struct SN_env * z, symbol * p) { @@ -450,6 +443,16 @@ extern symbol * assign_to(struct SN_env * z, symbol * p) { return p; } +extern int len_utf8(const symbol * p) { + int size = SIZE(p); + int len = 0; + while (size--) { + symbol b = *p++; + if (b >= 0xC0 || b < 0x80) ++len; + } + return len; +} + #if 0 extern void debug(struct SN_env * z, int number, int line_count) { int i; |