aboutsummaryrefslogtreecommitdiff
path: root/contrib/tsearch/dict/porter_english.dct
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/tsearch/dict/porter_english.dct')
-rw-r--r--contrib/tsearch/dict/porter_english.dct1289
1 files changed, 0 insertions, 1289 deletions
diff --git a/contrib/tsearch/dict/porter_english.dct b/contrib/tsearch/dict/porter_english.dct
deleted file mode 100644
index 6c472298284..00000000000
--- a/contrib/tsearch/dict/porter_english.dct
+++ /dev/null
@@ -1,1289 +0,0 @@
-/*
- * ----START-LICENCE----
- * Copyright 1999,2000 BrightStation PLC
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- * -----END-LICENCE-----
- */
-/* Version 1: see http://open.muscat.com/ for further information */
-
-
-#ifdef DICT_BODY
-#include <ctype.h> /* tolower */
-
-static void * setup_english_stemmer(void);
-
-static const char * english_stem(void * z, const char * q, int i0, int i1);
-
-static void closedown_english_stemmer(void * z);
-
-
-/* To set up the english stemming process:
-
- void * z = setup_stemmer();
-
- to use it:
-
- char * p = stem(z, q, i0, i1);
-
- The word to be stemmed is in byte address q offsets i0 to i1
- inclusive (i.e. from q[i0] to q[i1]). The stemmed result is the
- C string at address p.
-
- To close down the stemming process:
-
- closedown_stemmer(z);
-
-*/
-
-/* The English stemming algorithm is essentially the Porter stemming
- * algorithm, and has been coded up by its author. It follows the algorithm
- * presented in
- *
- * Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
- * no. 3, pp 130-137,
- *
- * only differing from it at the points marked -DEPARTURE- and -NEW-
- * below.
- *
- * For a more faithful version of the Porter algorithm, see
- *
- * http://www.muscat.com/~martin/stem.html
- *
- */
-
-/* Later additions:
-
- June 2000
-
- The 'l' of the 'logi' -> 'log' rule is put with the stem, so that
- short stems like 'geo' 'theo' etc work like 'archaeo' 'philo' etc.
-
- This follows a suggestion of Barry Wilkins, reasearch student at
- Birmingham.
-
-
- February 2000
-
- the cvc test for not dropping final -e now looks after vc at the
- beginning of a word, so are, eve, ice, ore, use keep final -e. In this
- test c is any consonant, including w, x and y. This extension was
- suggested by Chris Emerson.
-
- -fully -> -ful treated like -fulness -> -ful, and
- -tionally -> -tion treated like -tional -> -tion
-
- both in Step 2. These were suggested by Hiranmay Ghosh, of New Delhi.
-
- Invariants proceed, succeed, exceed. Also suggested by Hiranmay Ghosh.
-
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-struct pool {
-
- int size;
- struct pool_entry * entries;
-
-};
-
-/* This is used as a library to resolve exceptions in the various
- stemming algorithms. Typical use is,
-
- struct pool * p = create_pool(t);
- char * s_translated = search_pool(p, strlen(s), s);
- ...
- free_pool(p);
-
- t is an array of strings, e.g.
-
- static char * t[] = {
-
- "sky", "sky/skies/",
- "die", "dying/",
- "lie", "lying/",
- "tie", "tying/",
- ....
- 0, 0
-
- };
-
- if s is "sky", "skies", "dying" etc., translated_s is becomes "sky",
- "sky", "die" etc.
-
- The code includes a sort/merge capability which may be turned into
- (or replaced by) something more general later on.
-
-*/
-
-/* merge(n, p, q, r, l, k, f) repeatedly merges n-byte sequences of items of
- size k from addresses p and q into r. f is the comparison routine and
- l is the limit point for q.
-*/
-
-static void merge(int n, char * p, char * q, char * r, char * l, int k,
- int (*f)(char *, char *))
-{ char * q0 = q;
- if (q0 > l) { memmove(r, p, l-p); return; }
- while (p < q0)
- { char * pl = n+p;
- char * ql = n+q;
- if (ql > l) ql = l;
- while(true)
- { if (p >= pl) { memmove(r, q, ql-q); r += ql-q; q = ql; break; }
- if (q >= ql) { memmove(r, p, pl-p); r += pl-p; p = pl; break; }
- if (f(p, q)) { memmove(r, p, k); p += k; }
- else { memmove(r, q, k); q += k; }
- r += k;
- }
- }
- memmove(r, q, l-q);
-}
-
-/* In sort(p, c, k, f), p+c is a byte address at which begin a sequence of
- items of size k to be sorted. p+l is the address of the byte after the
- last of these items, so l - c is divisible by k. f is a comparison function
- for a pair of these items: f(p+i, q+j) is true if the item at p+i is before
- the item at q+j, false if it is equal to or after it.
-*/
-
-static void sort(char * p, int c, int l, int k,
- int (*f)(char *, char *))
-{
- char * q = malloc(l-c); /* temporary work space */
- int j = k;
- int w = l-c;
- while (j < w)
- { int cycle;
- for (cycle = 1; cycle <= 2; cycle++)
- { int h = (w+j-1) / j / 2 * j; /* half way */
- if (cycle == 1) merge(j, p+c, p+c+h, q, p+l, k, f);
- else merge(j, q, q+h, p+c, q+w, k, f);
- j *= 2;
- }
- }
- free(q);
-}
-
-struct pool_entry {
-
- const char * translation;
- const char * pointer;
- int length;
-
-};
-
-static void print_entry(struct pool_entry * p)
- {
- { int j; for (j=0;j<p->length;j++) fprintf(stderr, "%c", (p->pointer)[j]); }
- fprintf(stderr, " --> %s\n", p->translation);
- }
-
-/* - debugging aid
- static void print_pool(struct pool * p)
- { int i;
- int size = p->size;
- struct pool_entry * q = p->entries;
- fprintf(stderr, "\nPool:\n");
- for (i = 0; i < size; i++) print_entry(q+i);
- }
-*/
-
-/* compare(p, q) is our comparison function, used for f above
-*/
-
-static int compare(char * char_p, char * char_q)
-{ struct pool_entry * p = (struct pool_entry *) char_p;
- struct pool_entry * q = (struct pool_entry *) char_q;
- if (p->length == q->length) return memcmp(p->pointer, q->pointer, p->length) < 0;
- return p->length < q->length;
-}
-
-static int count_slashes(const char * s[])
-{ int slash_count = 0;
- int i;
- for (i = 1; s[i] != 0; i += 2)
- { const char * p = s[i];
- int j = 0;
- while (p[j] != 0) if (p[j++] == '/') slash_count++;
- }
- return slash_count;
-}
-
-static struct pool * create_pool(const char * s[])
-{ int size = count_slashes(s);
- struct pool_entry * z = (struct pool_entry *) malloc(size * sizeof(struct pool_entry));
- struct pool_entry * q = z;
- int i;
- for (i = 1; s[i] != 0; i += 2)
- { const char * p = s[i];
- int j = 0;
- int j0 = 0;
- while(true)
- { if (p[j] == 0)
- { if (j0 != j) { fprintf(stderr, "%s lacks final '/'\n", p); exit(1); }
- break;
- }
- if (p[j] == '/')
- {
- q->translation = s[i-1];
- q->pointer = p+j0; q->length = j-j0;
- q++;
- j0 = j+1;
- }
- j++;
- }
- }
- sort((char *) z, 0, size * sizeof(struct pool_entry), sizeof(struct pool_entry), compare);
-
- /* now validate the contents */
-
- for (i = 1; i < size; i++)
- { struct pool_entry * p = z+i;
- struct pool_entry * q = z+i-1;
- if (p->length == q->length && memcmp(p->pointer, q->pointer, p->length) == 0)
- { fprintf(stderr, "warning: "); print_entry(p);
- fprintf(stderr, " and "); print_entry(q);
- }
- }
-
- { struct pool * p = (struct pool *) malloc(sizeof(struct pool));
- p->entries = z;
- p->size = size;
- return p;
- }
-}
-
-static int compare_to_pool(int length, const char * s, int length_p, const char * s_p)
-{ if (length != length_p) return length-length_p;
- return memcmp(s, s_p, length);
-}
-
-static const char * search_pool(struct pool * p, int length, char * s)
-{ int i = 0;
- int j = p->size;
- struct pool_entry * q = p->entries;
- if (j == 0) return 0; /* empty pool */
- if (compare_to_pool(length, s, q->length, q->pointer) < 0) return 0;
- while(true)
- {
- int h = (i+j)/2;
- int diff = compare_to_pool(length, s, (q+h)->length, (q+h)->pointer);
- if (diff == 0) return (q+h)->translation;
- if (j-i <= 1) return 0;
- if (diff < 0) j = h; else i = h;
- }
-}
-
-static void free_pool(struct pool * p)
-{ free(p->entries);
- free(p);
-}
-
-struct english_stemmer
-{
- char * p;
- int p_size;
- int k;
- int j;
- struct pool * irregulars;
-};
-
-/* The main part of the stemming algorithm starts here. z->p is a buffer
- holding a word to be stemmed. The letters are in z->p[0], z->p[1] ...
- ending at z->p[z->k]. z->k is readjusted downwards as the stemming
- progresses. Zero termination is not in fact used in the algorithm.
-
- Note that only lower case sequences are stemmed. Forcing to lower case
- should be done before english_stem(...) is called.
-
- We will write p, k etc in place of z->p, z->k in the comments.
-*/
-
-/* cons(z, i) is true <=> p[i] is a consonant.
-*/
-
-static int cons(struct english_stemmer * z, int i)
-{ switch (z->p[i])
- { case 'a': case 'e': case 'i': case 'o': case 'u':
- return false;
- case 'y':
- return (i==0) ? true : !cons(z, i - 1);
- default: return true;
- }
-}
-
-/* m(z) measures the number of consonant sequences between 0 and j. if c is
- a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
- presence,
-
- <c><v> gives 0
- <c>vc<v> gives 1
- <c>vcvc<v> gives 2
- <c>vcvcvc<v> gives 3
- ....
-*/
-
-static int m(struct english_stemmer * z)
-{ int n = 0;
- int i = 0;
- while(true)
- { if (i > z->j) return n;
- if (! cons(z, i)) break; i++;
- }
- i++;
- while(true)
- { while(true)
- { if (i > z->j) return n;
- if (cons(z, i)) break;
- i++;
- }
- i++;
- n++;
- while(true)
- { if (i > z->j) return n;
- if (! cons(z, i)) break;
- i++;
- }
- i++;
- }
-}
-
-/* vowelinstem(z) is true p[0], ... p[j] contains a vowel
-*/
-
-static int vowelinstem(struct english_stemmer * z)
-{ int i;
- for (i = 0; i <= z->j; i++) if (! cons(z, i)) return true;
- return false;
-}
-
-/* doublec(z, i) is true <=> p[i], p[i - 1] contain a double consonant.
-*/
-
-static int doublec(struct english_stemmer * z, int i)
-{ if (i < 1) return false;
- if (z->p[i] != z->p[i - 1]) return false;
- return cons(z, i);
-}
-
-/* cvc(z, i) is true <=>
-
- a) ( -NEW- ) i == 1, and p[0] p[1] is vowel consonant, or
-
- b) p[i - 2], p[i - 1], p[i] has the form consonant -
- vowel - consonant and also if the second c is not w, x or y. this is used
- when trying to restore an e at the end of a short word. e.g.
-
- cav(e), lov(e), hop(e), crim(e), but
- snow, box, tray.
-
-*/
-
-static int cvc(struct english_stemmer * z, int i)
-{
- if (i == 0) return false; /* i == 0 never happens perhaps */
-
- if (i == 1) return !cons(z, 0) && cons(z, 1);
-
- if (!cons(z, i) || cons(z, i - 1) || !cons(z, i - 2)) return false;
- { int ch = z->p[i];
- if (ch == 'w' || ch == 'x' || ch == 'y') return false;
- }
- return true;
-}
-
-/* ends(z, s, length) is true <=> p[0], ... p[k] ends with the string s.
-*/
-
-static int ends(struct english_stemmer * z, const char * s, int length)
-{
- if (length > z->k + 1) return false;
- if (memcmp(z->p + z->k - length + 1, s, length) != 0) return false;
- z->j = z->k - length;
- return true;
-}
-
-/* setto(z, s, length) sets p[j + 1] ... to the characters in the string s,
- readjusting k.
-*/
-
-static void setto(struct english_stemmer * z, const char * s, int length)
-{
- memmove(z->p + z->j + 1, s, length);
- z->k = z->j + length;
-}
-
-/* r(z, s, length) is used further down. */
-
-static void r(struct english_stemmer * z, const char * s, int length)
-{
- if (m(z) > 0) setto(z, s, length);
-}
-
-/* step_1ab(z) gets rid of plurals and -ed or -ing. e.g.
-
- caresses -> caress
- ponies -> poni
- sties -> sti
- tie -> tie (-NEW-: see below)
- caress -> caress
- cats -> cat
-
- feed -> feed
- agreed -> agree
- disabled -> disable
-
- matting -> mat
- mating -> mate
- meeting -> meet
- milling -> mill
- messing -> mess
-
- meetings -> meet
-
-*/
-
-static void step_1ab(struct english_stemmer * z)
-{ if (z->p[z->k] == 's')
- { if (ends(z, "sses", 4)) z->k -= 2; else
- if (ends(z, "ies", 3))
- if (z->j == 0) z->k--; else z->k -= 2;
-
- /* this line extends the original algorithm, so that 'flies'->'fli' but
- 'dies'->'die' etc */
-
- else
- if (z->p[z->k - 1] != 's') z->k--;
- }
-
- if (ends(z, "ied", 3)) { if (z->j == 0) z->k--; else z->k -= 2; } else
-
- /* this line extends the original algorithm, so that 'spied'->'spi' but
- 'died'->'die' etc */
-
- if (ends(z, "eed", 3)) { if (m(z) > 0) z->k--; } else
- if ((ends(z, "ed", 2) || ends(z, "ing", 3)) && vowelinstem(z))
- { z->k = z->j;
- if (ends(z, "at", 2)) setto(z, "ate", 3); else
- if (ends(z, "bl", 2)) setto(z, "ble", 3); else
- if (ends(z, "iz", 2)) setto(z, "ize", 3); else
- if (doublec(z, z->k))
- { z->k--;
- { int ch = z->p[z->k];
- if (ch == 'l' || ch == 's' || ch == 'z') z->k++;
- }
- }
- else if (m(z) == 1 && cvc(z, z->k)) setto(z, "e", 1);
- }
-}
-
-/* step_1c(z) turns terminal y to i when there is another vowel in the stem.
-
- -NEW-: This has been modified from the original Porter algorithm so that y->i
- is only done when y is preceded by a consonant, but not if the stem
- is only a single consonant, i.e.
-
- (*c and not c) Y -> I
-
- So 'happy' -> 'happi', but
- 'enjoy' -> 'enjoy' etc
-
- This is a much better rule. Formerly 'enjoy'->'enjoi' and 'enjoyment'->
- 'enjoy'. Step 1c is perhaps done too soon; but with this modification that
- no longer really matters.
-
- Also, the removal of the vowelinstem(z) condition means that 'spy', 'fly',
- 'try' ... stem to 'spi', 'fli', 'tri' and conflate with 'spied', 'tried',
- 'flies' ...
-
-*/
-
-static void step_1c(struct english_stemmer * z)
-{
- if (ends(z, "y", 1) && z->j > 0 && cons(z, z->k - 1)) z->p[z->k] = 'i';
-}
-
-
-/* step_2(z) maps double suffices to single ones. so -ization ( = -ize plus
- -ation) maps to -ize etc. Note that the string before the suffix must give
- m(z) > 0.
-*/
-
-static void step_2(struct english_stemmer * z)
-{ switch (z->p[z->k - 1])
- {
- case 'a':
- if (ends(z, "ational", 7)) { r(z, "ate", 3); break; }
- if (ends(z, "tional", 6)) { r(z, "tion", 4); break; }
- break;
- case 'c':
- if (ends(z, "enci", 4)) { r(z, "ence", 4); break; }
- if (ends(z, "anci", 4)) { r(z, "ance", 4); break; }
- break;
- case 'e':
- if (ends(z, "izer", 4)) { r(z, "ize", 3); break; }
- break;
- case 'l':
- if (ends(z, "bli", 3)) { r(z, "ble", 3); break; } /*-DEPARTURE-*/
-
- /* To match the published algorithm, replace this line with
- case 'l':
- if (ends(z, "abli", 4)) { r(z, "able", 4); break; }
- */
- if (ends(z, "alli", 4))
- {
- if (m(z) > 0) { setto(z, "al", 2); step_2(z); } /*-NEW-*/
- break;
- }
-
- if (ends(z, "fulli", 5)) { r(z, "ful", 3); break; } /*-NEW-*/
- if (ends(z, "entli", 5)) { r(z, "ent", 3); break; }
- if (ends(z, "eli", 3)) { r(z, "e", 1); break; }
- if (ends(z, "ousli", 5)) { r(z, "ous", 3); break; }
- break;
- case 'o':
- if (ends(z, "ization", 7)) { r(z, "ize", 3); break; }
- if (ends(z, "ation", 5)) { r(z, "ate", 3); break; }
- if (ends(z, "ator", 4)) { r(z, "ate", 3); break; }
- break;
- case 's':
- if (ends(z, "alism", 5)) { r(z, "al", 2); break; }
- if (ends(z, "iveness", 7)) { r(z, "ive", 3); break; }
- if (ends(z, "fulness", 7)) { r(z, "ful", 3); break; }
- if (ends(z, "ousness", 7)) { r(z, "ous", 3); break; }
- break;
- case 't':
- if (ends(z, "aliti", 5)) { r(z, "al", 2); break; }
- if (ends(z, "iviti", 5)) { r(z, "ive", 3); break; }
- if (ends(z, "biliti", 6)) { r(z, "ble", 3); break; }
- break;
- case 'g':
- if (ends(z, "logi", 4))
- { z->j++; /*-NEW-*/ /*(Barry Wilkins)*/
- r(z, "og", 2); break;
- } /*-DEPARTURE-*/
-
- /* To match the published algorithm, delete this line */
-
- }
-}
-
-/* step_3(z) deals with -ic-, -full, -ness etc. Similar strategy to step_2.
-*/
-
-static void step_3(struct english_stemmer * z)
-{ switch (z->p[z->k])
- {
- case 'e':
- if (ends(z, "icate", 5)) { r(z, "ic", 2); break; }
- if (ends(z, "ative", 5)) { r(z, "", 0); break; }
- if (ends(z, "alize", 5)) { r(z, "al", 2); break; }
- break;
- case 'i':
- if (ends(z, "iciti", 5)) { r(z, "ic", 2); break; }
- break;
- case 'l':
- if (ends(z, "ical", 4)) { r(z, "ic", 2); break; }
- if (ends(z, "ful", 3)) { r(z, "", 0); break; }
- break;
- case 's':
- if (ends(z, "ness", 4)) { r(z, "", 0); break; }
- break;
- }
-}
-
-/* step_4() takes off -ant, -ence etc., in context <c>vcvc<v>.
-*/
-
-static void step_4(struct english_stemmer * z)
-{ switch (z->p[z->k - 1])
- { case 'a':
- if (ends(z, "al", 2)) break; return;
- case 'c':
- if (ends(z, "ance", 4)) break;
- if (ends(z, "ence", 4)) break; return;
- case 'e':
- if (ends(z, "er", 2)) break; return;
- case 'i':
- if (ends(z, "ic", 2)) break; return;
- case 'l':
- if (ends(z, "able", 4)) break;
- if (ends(z, "ible", 4)) break; return;
- case 'n':
- if (ends(z, "ant", 3)) break;
- if (ends(z, "ement", 5)) break;
- if (ends(z, "ment", 4)) break;
- if (ends(z, "ent", 3)) break; return;
- case 'o':
- if (ends(z, "ion", 3) && (z->p[z->j] == 's' ||
- z->p[z->j] == 't')) break;
- if (ends(z, "ou", 2)) break; return;
- /* takes care of -ous */
- case 's':
- if (ends(z, "ism", 3)) break; return;
- case 't':
- if (ends(z, "ate", 3)) break;
- if (ends(z, "iti", 3)) break; return;
- case 'u':
- if (ends(z, "ous", 3)) break; return;
- case 'v':
- if (ends(z, "ive", 3)) break; return;
- case 'z':
- if (ends(z, "ize", 3)) break; return;
- default:
- return;
- }
- if (m(z) > 1) z->k = z->j;
-}
-
-/* step_5(z) removes a final -e if m(z) > 1, and changes -ll to -l if
- m(z) > 1.
-*/
-
-static void step_5(struct english_stemmer * z)
-{ z->j = z->k;
- if (z->p[z->k] == 'e')
- { int a = m(z);
- if (a > 1 || (a == 1 && !cvc(z, z->k - 1))) z->k--;
- }
- if (z->p[z->k] == 'l' && doublec(z, z->k) && m(z) > 1) z->k--;
-}
-
-static const char * english_stem(void * z_, const char * q, int i0, int i1)
-{
- struct english_stemmer * z = (struct english_stemmer *) z_;
- int p_size = z->p_size;
-
- if (i1 - i0 + 50 > p_size)
- { free(z->p);
- p_size = i1 - i0 + 75; /* ample */ z->p_size = p_size;
- z->p = (char *) malloc(p_size);
- }
-
- memmove(z->p, q + i0, i1 - i0 + 1);
-
- z->k = i1 - i0;
-
-
- { const char * t = search_pool(z->irregulars, z->k + 1, z->p);
- if (t != 0) {
- z->k = strlen(t) - 1;
- return t;
- }
- }
-
- if (z->k > 1) /*-DEPARTURE-*/
-
- /* With this line, strings of length 1 or 2 don't go through the
- stemming process, although no mention is made of this in the
- published algorithm. Remove the line to match the published
- algorithm. */
-
- { step_1ab(z); step_1c(z);
- step_2(z);
- step_3(z);
- step_4(z);
- step_5(z);
- }
-
- z->p[z->k + 1] = 0; /* C string form for now */
- return z->p;
-}
-
-/* -NEW-
- This is a table of irregular forms. It is quite short, but still
- reflects the errors actually drawn to Martin Porter's attention over
- a 20 year period!
-
- Extend it as necessary.
-
- The form of the table is:
-
- "p1" "s11/s12/s13/ ... /"
- "p2" "s21/s22/s23/ ... /"
- ...
- "pn" "sn1/sn2/sn3/ ... /"
- 0, 0
-
- String sij is mapped to paradigm form pi, and the main stemming
- process is then bypassed.
-*/
-
-static const char * irregular_forms[] = {
-
- "sky", "sky/skies/",
- "die", "dying/",
- "lie", "lying/",
- "tie", "tying/",
- "news", "news/",
- "inning", "innings/inning/",
- "outing", "outings/outing/",
- "canning", "cannings/canning/",
- "howe", "howe/",
-
- /*-NEW-*/
- "proceed", "proceed/",
- "exceed", "exceed/",
- "succeed", "succeed/", /* Hiranmay Ghosh */
-
- 0, 0 /* terminator */
-
-};
-
-
-/*
- * is_stopword part
- */
-typedef struct {
- unsigned char val;
- unsigned char flag;
- unsigned char right;
-
- unsigned char child;
-} ESWNODE;
-
-/* is exists left tree ? */
-#define L 0x01
-/* finish word flag */
-#define F 0x02
-#define ISLEFT(x) (((ESWNODE*)x)->flag & L)
-#define ISFINISH(x) (((ESWNODE*)x)->flag & F)
-
-
-static ESWNODE engstoptree[] = {
- {'m',L,9,126},
- {'d',L,4,71},
- {'b',L,2,40},
- {'a',F,0,14},
- {'c',0,0,62},
- {'f',L,2,79},
- {'e',0,0,75},
- {'h',0,1,90},
- {'i',F,0,108},
- {'t',L,4,177},
- {'o',L,2,135},
- {'n',0,0,131},
- {'s',0,0,156},
- {'v',L,2,210},
- {'u',0,0,201},
- {'w',0,1,211},
- {'y',0,0,237},
-
- {'m',L|F,5,0},
- {'f',L,2,12},
- {'b',0,0,7},
- {'g',0,1,13},
- {'l',0,0,17},
- {'r',L,2,19},
- {'n',F,0,16},
- {'s',F,1,0},
- {'t',F,0,0},
-
- {'o',0,0,1},
-
- {'u',0,1,2},
- {'v',F,0,0},
-
- {'t',F,0,0},
-
- {'t',0,0,1},
-
- {'e',0,0,1},
-
- {'r',F,0,0},
-
- {'a',0,0,1},
-
- {'i',0,0,1},
-
- {'n',F,0,1},
-
- {'s',0,0,1},
-
- {'t',F,0,0},
-
- {'l',F,0,0},
-
- {'d',F,1,0},
- {'i',F,0,0},
-
- {'e',F,0,0},
-
- {'o',L,2,21},
- {'e',F,0,3},
- {'u',0,1,21},
- {'y',F,0,0},
-
- {'f',L,3,9},
- {'c',0,1,4},
- {'e',0,0,6},
- {'l',0,1,8},
- {'t',0,0,9},
-
- {'a',0,0,1},
-
- {'u',0,0,1},
-
- {'s',F,0,0},
-
- {'n',F,0,0},
-
- {'o',0,0,1},
-
- {'r',F,0,0},
-
- {'o',0,0,1},
-
- {'w',F,0,0},
-
- {'w',0,0,1},
-
- {'e',0,0,1},
-
- {'e',0,0,1},
-
- {'n',F,0,0},
-
- {'t',0,0,1},
-
- {'h',F,0,0},
-
- {'t',F,0,0},
-
- {'a',0,1,2},
- {'o',0,0,2},
-
- {'n',F,0,0},
-
- {'u',0,0,1},
-
- {'l',0,0,1},
-
- {'d',F,0,0},
-
- {'o',L|F,2,4},
- {'i',0,0,2},
- {'u',0,0,5},
-
- {'d',F,0,0},
-
- {'e',F,1,0},
- {'w',0,0,1},
-
- {'n',F,0,0},
-
- {'r',0,0,1},
-
- {'e',F,0,0},
-
- {'a',0,0,1},
-
- {'c',0,0,1},
-
- {'h',F,0,0},
-
- {'o',L,2,5},
- {'e',0,0,3},
- {'r',0,1,4},
- {'u',0,0,5},
-
- {'w',F,0,0},
-
- {'r',F,0,0},
-
- {'o',0,0,1},
-
- {'m',F,0,0},
-
- {'r',0,0,1},
-
- {'t',0,0,1},
-
- {'h',0,0,1},
-
- {'e',0,0,1},
-
- {'r',F,0,0},
-
- {'e',L|F,2,7},
- {'a',F,0,3},
- {'i',F,1,11},
- {'o',0,0,15},
-
- {'d',F,1,0},
- {'v',0,0,1},
-
- {'e',F,0,0},
-
- {'r',F,0,1},
-
- {'e',F,1,0},
- {'s',0,0,1},
-
- {'e',0,0,1},
-
- {'l',0,0,1},
-
- {'f',F,0,0},
-
- {'m',F,0,1},
-
- {'s',0,0,1},
-
- {'e',0,0,1},
-
- {'l',0,0,1},
-
- {'f',F,0,0},
-
- {'w',F,0,0},
-
- {'n',L|F,2,4},
- {'f',F,0,0},
- {'s',F,1,0},
- {'t',F,0,3},
-
- {'t',0,0,1},
-
- {'o',F,0,0},
-
- {'s',0,0,1},
-
- {'e',0,0,1},
-
- {'l',0,0,1},
-
- {'f',F,0,0},
-
- {'o',L,3,6},
- {'a',0,1,4},
- {'e',F,0,0},
- {'u',0,1,7},
- {'y',F,0,8},
-
- {'y',F,0,0},
-
- {'r',0,1,2},
- {'s',0,0,2},
-
- {'e',F,0,0},
-
- {'t',F,0,0},
-
- {'s',0,0,1},
-
- {'t',F,0,0},
-
- {'s',0,0,1},
-
- {'e',0,0,1},
-
- {'l',0,0,1},
-
- {'f',F,0,0},
-
- {'o',F,0,1},
-
- {'r',F,1,0},
- {'t',F,0,0},
-
- {'t',L,4,11},
- {'n',L|F,2,7},
- {'f',F,0,5},
- {'r',F,0,0},
- {'v',L,2,16},
- {'u',0,0,9},
- {'w',0,0,16},
-
- {'f',F,0,0},
-
- {'c',F,1,0},
- {'l',0,0,1},
-
- {'i',F,0,0},
-
- {'h',0,0,1},
-
- {'e',0,0,1},
-
- {'r',F,0,0},
-
- {'r',F,1,2},
- {'t',F,0,0},
-
- {'s',0,0,1},
-
- {'e',0,0,1},
-
- {'l',0,0,1},
-
- {'v',F,0,0},
-
- {'e',0,0,1},
-
- {'r',F,0,0},
-
- {'n',F,0,0},
-
- {'h',L,2,6},
- {'a',0,0,3},
- {'o',F,1,12},
- {'u',0,0,13},
-
- {'m',0,0,1},
-
- {'e',F,0,0},
-
- {'e',L|F,2,0},
- {'a',0,0,2},
- {'o',0,0,3},
-
- {'l',0,0,1},
-
- {'l',F,0,0},
-
- {'u',0,0,1},
-
- {'l',0,0,1},
-
- {'d',F,0,0},
-
- {'m',0,0,1},
-
- {'e',F,0,0},
-
- {'c',0,0,1},
-
- {'h',F,0,0},
-
- {'h',0,1,2},
- {'o',F,0,27},
-
- {'i',L|F,3,0},
- {'a',0,1,4},
- {'e',F,0,5},
- {'o',0,1,17},
- {'r',0,0,18},
-
- {'n',F,1,0},
- {'t',F,0,0},
-
- {'n',L|F,3,0},
- {'i',0,1,5},
- {'m',F,0,5},
- {'s',L,2,9},
- {'r',0,0,7},
- {'y',F,0,0},
-
- {'r',F,0,0},
-
- {'s',0,0,1},
-
- {'e',0,0,1},
-
- {'l',0,0,1},
-
- {'v',F,0,0},
-
- {'e',F,0,0},
-
- {'e',F,0,0},
-
- {'s',0,0,1},
-
- {'e',F,0,0},
-
- {'o',0,0,1},
-
- {'u',0,0,1},
-
- {'g',0,0,1},
-
- {'h',F,0,0},
-
- {'o',F,0,0},
-
- {'n',0,1,2},
- {'p',F,0,0},
-
- {'d',0,1,2},
- {'t',0,0,3},
-
- {'e',0,0,1},
-
- {'r',F,0,0},
-
- {'i',0,0,1},
-
- {'l',F,0,0},
-
- {'e',0,0,1},
-
- {'r',0,0,1},
-
- {'i',F,0,0},
-
- {'h',L,3,7},
- {'a',F,1,0},
- {'e',F,0,3},
- {'i',0,1,17},
- {'o',0,0,20},
-
- {'r',0,0,1},
-
- {'e',F,0,0},
-
- {'e',L,2,5},
- {'a',0,0,3},
- {'i',F,1,6},
- {'o',F,0,9},
-
- {'t',F,0,0},
-
- {'n',F,1,0},
- {'r',0,0,1},
-
- {'e',F,0,0},
-
- {'c',0,1,2},
- {'l',0,0,2},
-
- {'h',F,0,0},
-
- {'e',F,0,0},
-
- {'m',F,0,0},
-
- {'l',0,1,2},
- {'t',0,0,2},
-
- {'l',F,0,0},
-
- {'h',F,0,0},
-
- {'u',0,0,1},
-
- {'l',0,0,1},
-
- {'d',F,0,0},
-
- {'o',0,0,1},
-
- {'u',F,0,1},
-
- {'r',F,0,1},
-
- {'s',0,0,1},
-
- {'e',0,0,1},
-
- {'l',0,0,1},
-
- {'f',F,1,0},
- {'v',F,0,0}
-};
-
-static unsigned int
-find_english_stopword( unsigned char *buf, int len ) {
- ESWNODE *ptr = engstoptree;
- int result = 0;
- unsigned char *cur = buf;
-
- while( cur - buf < len ) {
- if ( ptr->val == *cur ) {
- cur++;
- if ( ISFINISH(ptr) ) result = cur - buf;
- if ( ! ptr->child ) break;
- ptr += ptr->child;
- } else if ( ptr->val > *cur ) {
- if ( ISLEFT(ptr) )
- ptr++;
- else
- break;
- } else {
- if ( ptr->right )
- ptr += ptr->right;
- else
- break;
- }
- }
- return result;
-}
-
-#undef L
-#undef F
-#undef ISLEFT
-#undef ISFINISH
-
-static int
-is_stopengword(void* obj,char* word,int len)
-{
- return ( len == find_english_stopword((unsigned char*)word, len) ) ? 1 : 0;
-}
-
-static void *
-setup_english_stemmer(void)
-{
- struct english_stemmer * z = (struct english_stemmer *) malloc(sizeof(struct english_stemmer));
- z->p = 0; z->p_size = 0;
- z->irregulars = create_pool(irregular_forms);
- return (void *) z;
-}
-
-static void
-closedown_english_stemmer(void * z_)
-{
- struct english_stemmer * z = (struct english_stemmer *) z_;
- free_pool(z->irregulars);
- free(z->p);
- free(z);
-}
-
-static char*
-engstemming(void* obj, char *word, int *len)
-{
- struct english_stemmer * z = (struct english_stemmer *) obj;
- const char* stemmed_word;
- char *result = word;
-
- while(result-word < *len) {
- *result = tolower((unsigned char) *result);
- result++;
- }
- stemmed_word = english_stem(obj, word, 0, *len-1);
- *len = z->k + 1;
-
- result = (char*)palloc( *len );
- memcpy((void*)result, (void*)stemmed_word, *len);
- return result;
-}
-#endif /* DICT_BODY */
-
-#ifdef DICT_TABLE
-TABLE_DICT_START
- "C",
- setup_english_stemmer,
- closedown_english_stemmer,
- engstemming,
- NULL,
- is_stopengword
-TABLE_DICT_END
-#endif
-