aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeodor Sigaev <teodor@sigaev.ru>2005-10-28 13:05:06 +0000
committerTeodor Sigaev <teodor@sigaev.ru>2005-10-28 13:05:06 +0000
commit21b748e76acc54ad0c3715e340fdff0865e201eb (patch)
tree703ab12dcff06b99109613fa74af4d22f33bdd8d
parentfbff2e96079cbcfe76bff09f379d0fc60bc79778 (diff)
downloadpostgresql-21b748e76acc54ad0c3715e340fdff0865e201eb.tar.gz
postgresql-21b748e76acc54ad0c3715e340fdff0865e201eb.zip
1 Fix problem with lost precision in rank with OR-ed lexemes
2 Allow tsquery_in to input void tsquery: resolve dump/restore problem with tsquery
-rw-r--r--contrib/tsearch2/expected/tsearch2.out18
-rw-r--r--contrib/tsearch2/query.c23
-rw-r--r--contrib/tsearch2/rank.c31
3 files changed, 49 insertions, 23 deletions
diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out
index 6c266a29ac6..a7ac240ef9e 100644
--- a/contrib/tsearch2/expected/tsearch2.out
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -746,21 +746,21 @@ select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
(1 row)
select rank(' a:1 s:2C d g'::tsvector, 'a | s');
- rank
-------
- 0.28
+ rank
+-----------
+ 0.0911891
(1 row)
select rank(' a:1 s:2B d g'::tsvector, 'a | s');
- rank
-------
- 0.46
+ rank
+----------
+ 0.151982
(1 row)
select rank(' a:1 s:2 d g'::tsvector, 'a | s');
- rank
-------
- 0.19
+ rank
+-----------
+ 0.0607927
(1 row)
select rank(' a:1 s:2C d g'::tsvector, 'a & s');
diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c
index d8b8d4c80d5..013f0031965 100644
--- a/contrib/tsearch2/query.c
+++ b/contrib/tsearch2/query.c
@@ -55,6 +55,7 @@ Datum to_tsquery_current(PG_FUNCTION_ARGS);
/* parser's states */
#define WAITOPERAND 1
#define WAITOPERATOR 2
+#define WAITFIRSTOPERAND 3
/*
* node of query tree, also used
@@ -137,6 +138,7 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
{
switch (state->state)
{
+ case WAITFIRSTOPERAND:
case WAITOPERAND:
if (*(state->buf) == '!')
{
@@ -159,14 +161,16 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
else if (*(state->buf) != ' ')
{
state->valstate.prsbuf = state->buf;
- state->state = WAITOPERATOR;
if (gettoken_tsvector(&(state->valstate)))
{
*strval = state->valstate.word;
*lenval = state->valstate.curpos - state->valstate.word;
state->buf = get_weight(state->valstate.prsbuf, weight);
+ state->state = WAITOPERATOR;
return VAL;
}
+ else if ( state->state == WAITFIRSTOPERAND )
+ return END;
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -596,7 +600,7 @@ static QUERYTYPE *
/* init state */
state.buf = buf;
- state.state = WAITOPERAND;
+ state.state = WAITFIRSTOPERAND;
state.count = 0;
state.num = 0;
state.str = NULL;
@@ -616,10 +620,13 @@ static QUERYTYPE *
/* parse query & make polish notation (postfix, but in reverse order) */
makepol(&state, pushval);
pfree(state.valstate.word);
- if (!state.num)
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("empty query")));
+ if (!state.num) {
+ elog(NOTICE, "Query doesn't contain lexem(s)");
+ query = (QUERYTYPE*)palloc( HDRSIZEQT );
+ query->len = HDRSIZEQT;
+ query->size = 0;
+ return query;
+ }
/* make finish struct */
commonlen = COMPUTESIZE(state.num, state.sumlen);
@@ -905,6 +912,10 @@ to_tsquery(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(in, 1);
query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+
+ if ( query->size == 0 )
+ PG_RETURN_POINTER(query);
+
res = clean_fakeval_v2(GETQUERY(query), &len);
if (!res)
{
diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c
index 081b0840875..40bec1f4840 100644
--- a/contrib/tsearch2/rank.c
+++ b/contrib/tsearch2/rank.c
@@ -257,7 +257,7 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
int4 dimt,
j,
i;
- float res = -1.0;
+ float res = 0.0;
ITEM **item;
int size = q->size;
@@ -266,6 +266,8 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
for (i = 0; i < size; i++)
{
+ float resj,wjm;
+ int4 jm;
entry = find_wordentry(t, q, item[i]);
if (!entry)
continue;
@@ -281,14 +283,27 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
post = POSNULL + 1;
}
- for (j = 0; j < dimt; j++)
- {
- if (res < 0)
- res = wpos(post[j]);
- else
- res = 1.0 - (1.0 - res) * (1.0 - wpos(post[j]));
- }
+ resj = 0.0;
+ wjm = -1.0;
+ jm = 0;
+ for (j = 0; j < dimt; j++)
+ {
+ resj = resj + wpos(post[j])/((j+1)*(j+1));
+ if ( wpos(post[j]) > wjm ) {
+ wjm = wpos(post[j]);
+ jm = j;
+ }
+ }
+/*
+ limit (sum(i/i^2),i->inf) = pi^2/6
+ resj = sum(wi/i^2),i=1,noccurence,
+ wi - should be sorted desc,
+ don't sort for now, just choose maximum weight. This should be corrected
+ Oleg Bartunov
+*/
+ res = res + ( wjm + resj - wjm/((jm+1)*(jm+1)))/1.64493406685;
}
+ res = res /size;
pfree(item);
return res;
}