aboutsummaryrefslogtreecommitdiff
path: root/contrib/tsearch2/dict_snowball.c
blob: 0fbcc521bd507d58d6141dc222c63d9457c804db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/* 
 * example of Snowball dictionary
 * http://snowball.tartarus.org/ 
 * Teodor Sigaev <teodor@sigaev.ru>
 */
#include <stdlib.h>
#include <string.h>

#include "postgres.h"

#include "dict.h"
#include "common.h"
#include "snowball/header.h"
#include "snowball/english_stem.h"
#include "snowball/russian_stem.h"

typedef struct {
	struct SN_env *z;
	StopList	stoplist;
	int	(*stem)(struct SN_env * z);
} DictSnowball;


PG_FUNCTION_INFO_V1(snb_en_init);
Datum snb_en_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(snb_ru_init);
Datum snb_ru_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(snb_lexize);
Datum snb_lexize(PG_FUNCTION_ARGS);

Datum 
snb_en_init(PG_FUNCTION_ARGS) {
	DictSnowball	*d = (DictSnowball*)malloc( sizeof(DictSnowball) );

	if ( !d )
		elog(ERROR, "No memory");
	memset(d,0,sizeof(DictSnowball));
	d->stoplist.wordop=lowerstr;
		
	if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
		text       *in = PG_GETARG_TEXT_P(0);
		readstoplist(in, &(d->stoplist));
		sortstoplist(&(d->stoplist));
		PG_FREE_IF_COPY(in, 0);
	}

	d->z = english_create_env();
	if (!d->z) {
		freestoplist(&(d->stoplist));
		elog(ERROR,"No memory");
	}
	d->stem=english_stem;

	PG_RETURN_POINTER(d);
}

Datum 
snb_ru_init(PG_FUNCTION_ARGS) {
	DictSnowball	*d = (DictSnowball*)malloc( sizeof(DictSnowball) );

	if ( !d )
		elog(ERROR, "No memory");
	memset(d,0,sizeof(DictSnowball));
	d->stoplist.wordop=lowerstr;
		
	if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
		text       *in = PG_GETARG_TEXT_P(0);
		readstoplist(in, &(d->stoplist));
		sortstoplist(&(d->stoplist));
		PG_FREE_IF_COPY(in, 0);
	}

	d->z = russian_create_env();
	if (!d->z) {
		freestoplist(&(d->stoplist));
		elog(ERROR,"No memory");
	}
	d->stem=russian_stem;

	PG_RETURN_POINTER(d);
}

Datum
snb_lexize(PG_FUNCTION_ARGS) {
	DictSnowball *d = (DictSnowball*)PG_GETARG_POINTER(0);
	char       *in = (char*)PG_GETARG_POINTER(1);
	char *txt = pnstrdup(in, PG_GETARG_INT32(2));
	char	**res=palloc(sizeof(char*)*2);

	if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
		pfree(txt);
		res[0]=NULL;
	} else {
		SN_set_current(d->z, strlen(txt), txt);
		(d->stem)(d->z);
		if ( d->z->p && d->z->l ) {
			txt=repalloc(txt, d->z->l+1);
			memcpy( txt, d->z->p, d->z->l);
			txt[d->z->l]='\0';
		}	
		res[0]=txt;
	}
	res[1]=NULL;


	PG_RETURN_POINTER(res);
}