aboutsummaryrefslogtreecommitdiff
path: root/src/backend/snowball/libstemmer/utilities.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/snowball/libstemmer/utilities.c')
-rw-r--r--src/backend/snowball/libstemmer/utilities.c43
1 files changed, 23 insertions, 20 deletions
diff --git a/src/backend/snowball/libstemmer/utilities.c b/src/backend/snowball/libstemmer/utilities.c
index 8c89af1cd5c..920b8dbe6dd 100644
--- a/src/backend/snowball/libstemmer/utilities.c
+++ b/src/backend/snowball/libstemmer/utilities.c
@@ -1,7 +1,5 @@
#include "header.h"
-#define unless(C) if(!(C))
-
#define CREATE_SIZE 1
extern symbol * create_s(void) {
@@ -10,7 +8,7 @@ extern symbol * create_s(void) {
if (mem == NULL) return NULL;
p = (symbol *) (HEAD + (char *) mem);
CAPACITY(p) = CREATE_SIZE;
- SET_SIZE(p, CREATE_SIZE);
+ SET_SIZE(p, 0);
return p;
}
@@ -22,7 +20,7 @@ extern void lose_s(symbol * p) {
/*
new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c
if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new
- position, or 0 on failure.
+ position, or -1 on failure.
-- used to implement hop and next in the utf8 case.
*/
@@ -85,14 +83,14 @@ static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */
* slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
}
- * slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
+ * slot = (p[--c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
}
extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
int w = get_utf8(z->p, z->c, z->l, & ch);
- unless (w) return -1;
+ if (!w) return -1;
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c += w;
@@ -104,7 +102,7 @@ extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min,
do {
int ch;
int w = get_b_utf8(z->p, z->c, z->lb, & ch);
- unless (w) return -1;
+ if (!w) return -1;
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c -= w;
@@ -116,8 +114,8 @@ extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, i
do {
int ch;
int w = get_utf8(z->p, z->c, z->l, & ch);
- unless (w) return -1;
- unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+ if (!w) return -1;
+ if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
return w;
z->c += w;
} while (repeat);
@@ -128,8 +126,8 @@ extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min,
do {
int ch;
int w = get_b_utf8(z->p, z->c, z->lb, & ch);
- unless (w) return -1;
- unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+ if (!w) return -1;
+ if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
return w;
z->c -= w;
} while (repeat);
@@ -167,7 +165,7 @@ extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int
int ch;
if (z->c >= z->l) return -1;
ch = z->p[z->c];
- unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+ if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
return 1;
z->c++;
} while (repeat);
@@ -179,7 +177,7 @@ extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, i
int ch;
if (z->c <= z->lb) return -1;
ch = z->p[z->c - 1];
- unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+ if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
return 1;
z->c--;
} while (repeat);
@@ -366,7 +364,7 @@ extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const
if (z->c > c_bra)
z->c = c_bra;
}
- unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+ if (s_size) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
if (adjptr != NULL)
*adjptr = adjustment;
return 0;
@@ -412,12 +410,7 @@ extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbo
}
extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) {
- int adjustment;
- if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
- return -1;
- if (bra <= z->bra) z->bra += adjustment;
- if (bra <= z->ket) z->ket += adjustment;
- return 0;
+ return insert_s(z, bra, ket, SIZE(p), p);
}
extern symbol * slice_to(struct SN_env * z, symbol * p) {
@@ -450,6 +443,16 @@ extern symbol * assign_to(struct SN_env * z, symbol * p) {
return p;
}
+extern int len_utf8(const symbol * p) {
+ int size = SIZE(p);
+ int len = 0;
+ while (size--) {
+ symbol b = *p++;
+ if (b >= 0xC0 || b < 0x80) ++len;
+ }
+ return len;
+}
+
#if 0
extern void debug(struct SN_env * z, int number, int line_count) {
int i;