diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2007-11-14 23:43:27 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2007-11-14 23:43:27 +0000 |
commit | 866bad9543897291319d0a309dbddeb9ea8808ac (patch) | |
tree | a21e5743d19f3cc3104ff39085a0cdd8a9b840d1 /src | |
parent | 5858990f8793881144f0c113f49493861c6c3004 (diff) | |
download | postgresql-866bad9543897291319d0a309dbddeb9ea8808ac.tar.gz postgresql-866bad9543897291319d0a309dbddeb9ea8808ac.zip |
Add a rank/(rank+1) normalization option to ts_rank(). While the usefulness
of this seems a bit marginal, if it's useful enough to be shown in the manual
then we probably ought to support doing it without double evaluation of the
ts_rank function. Per my proposal earlier today.
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/utils/adt/tsrank.c | 25 |
1 files changed, 17 insertions, 8 deletions
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c index bf0016d76bb..297724710ff 100644 --- a/src/backend/utils/adt/tsrank.c +++ b/src/backend/utils/adt/tsrank.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.8 2007/09/20 18:10:57 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.9 2007/11/14 23:43:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,13 +25,14 @@ static float weights[] = {0.1f, 0.2f, 0.4f, 1.0f}; #define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] ) -#define RANK_NO_NORM 0x00 +#define RANK_NO_NORM 0x00 #define RANK_NORM_LOGLENGTH 0x01 -#define RANK_NORM_LENGTH 0x02 -#define RANK_NORM_EXTDIST 0x04 -#define RANK_NORM_UNIQ 0x08 -#define RANK_NORM_LOGUNIQ 0x10 -#define DEF_NORM_METHOD RANK_NO_NORM +#define RANK_NORM_LENGTH 0x02 +#define RANK_NORM_EXTDIST 0x04 +#define RANK_NORM_UNIQ 0x08 +#define RANK_NORM_LOGUNIQ 0x10 +#define RANK_NORM_RDIVRPLUS1 0x20 +#define DEF_NORM_METHOD RANK_NO_NORM static float calc_rank_or(float *w, TSVector t, TSQuery q); static float calc_rank_and(float *w, TSVector t, TSQuery q); @@ -348,12 +349,17 @@ calc_rank(float *w, TSVector t, TSQuery q, int4 method) res /= (float) len; } + /* RANK_NORM_EXTDIST not applicable */ + if ((method & RANK_NORM_UNIQ) && t->size > 0) res /= (float) (t->size); if ((method & RANK_NORM_LOGUNIQ) && t->size > 0) res /= log((double) (t->size + 1)) / log(2.0); + if (method & RANK_NORM_RDIVRPLUS1) + res /= (res + 1); + return res; } @@ -762,7 +768,7 @@ calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method) Wdoc /= (double) len; } - if ((method & RANK_NORM_EXTDIST) && SumDist > 0) + if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0) Wdoc /= ((double) NExtent) / SumDist; if ((method & RANK_NORM_UNIQ) && txt->size > 0) @@ -771,6 +777,9 @@ calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method) if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0) Wdoc /= log((double) (txt->size + 1)) / log(2.0); + if (method & RANK_NORM_RDIVRPLUS1) + Wdoc /= (Wdoc + 1); + pfree(doc); pfree( qr.operandexist ); |