aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt')
-rw-r--r--src/backend/utils/adt/Makefile9
-rw-r--r--src/backend/utils/adt/regproc.c229
-rw-r--r--src/backend/utils/adt/selfuncs.c6
-rw-r--r--src/backend/utils/adt/tsginidx.c157
-rw-r--r--src/backend/utils/adt/tsgistidx.c784
-rw-r--r--src/backend/utils/adt/tsquery.c767
-rw-r--r--src/backend/utils/adt/tsquery_cleanup.c261
-rw-r--r--src/backend/utils/adt/tsquery_gist.c259
-rw-r--r--src/backend/utils/adt/tsquery_op.c289
-rw-r--r--src/backend/utils/adt/tsquery_rewrite.c524
-rw-r--r--src/backend/utils/adt/tsquery_util.c317
-rw-r--r--src/backend/utils/adt/tsrank.c804
-rw-r--r--src/backend/utils/adt/tsvector.c683
-rw-r--r--src/backend/utils/adt/tsvector_op.c1334
14 files changed, 6418 insertions, 5 deletions
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index 12d158a49bd..73fcebb496d 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -1,7 +1,7 @@
#
# Makefile for utils/adt
#
-# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.64 2007/04/02 03:49:39 tgl Exp $
+# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.65 2007/08/21 01:11:18 tgl Exp $
#
subdir = src/backend/utils/adt
@@ -25,8 +25,11 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \
tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \
network.o mac.o inet_net_ntop.o inet_net_pton.o \
ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \
- ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o xml.o \
- uuid.o
+ ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \
+ tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \
+ tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \
+ tsvector.o tsvector_op.o \
+ uuid.o xml.o
like.o: like.c like_match.c
diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c
index 7ec9db3ec9b..e49f323daa8 100644
--- a/src/backend/utils/adt/regproc.c
+++ b/src/backend/utils/adt/regproc.c
@@ -13,7 +13,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/regproc.c,v 1.102 2007/06/26 16:48:09 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/regproc.c,v 1.103 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -27,6 +27,8 @@
#include "catalog/namespace.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
+#include "catalog/pg_ts_config.h"
+#include "catalog/pg_ts_dict.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "parser/parse_type.h"
@@ -1066,6 +1068,231 @@ regtypesend(PG_FUNCTION_ARGS)
/*
+ * regconfigin - converts "tsconfigname" to tsconfig OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '-' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_ts_config entry.
+ *
+ * This function is not needed in bootstrap mode, so we don't worry about
+ * making it work then.
+ */
+Datum
+regconfigin(PG_FUNCTION_ARGS)
+{
+ char *cfg_name_or_oid = PG_GETARG_CSTRING(0);
+ Oid result;
+ List *names;
+
+ /* '-' ? */
+ if (strcmp(cfg_name_or_oid, "-") == 0)
+ PG_RETURN_OID(InvalidOid);
+
+ /* Numeric OID? */
+ if (cfg_name_or_oid[0] >= '0' &&
+ cfg_name_or_oid[0] <= '9' &&
+ strspn(cfg_name_or_oid, "0123456789") == strlen(cfg_name_or_oid))
+ {
+ result = DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(cfg_name_or_oid)));
+ PG_RETURN_OID(result);
+ }
+
+ /*
+ * Normal case: parse the name into components and see if it matches any
+ * pg_ts_config entries in the current search path.
+ */
+ names = stringToQualifiedNameList(cfg_name_or_oid);
+
+ result = TSConfigGetCfgid(names, false);
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * regconfigout - converts tsconfig OID to "tsconfigname"
+ */
+Datum
+regconfigout(PG_FUNCTION_ARGS)
+{
+ Oid cfgid = PG_GETARG_OID(0);
+ char *result;
+ HeapTuple cfgtup;
+
+ if (cfgid == InvalidOid)
+ {
+ result = pstrdup("-");
+ PG_RETURN_CSTRING(result);
+ }
+
+ cfgtup = SearchSysCache(TSCONFIGOID,
+ ObjectIdGetDatum(cfgid),
+ 0, 0, 0);
+
+ if (HeapTupleIsValid(cfgtup))
+ {
+ Form_pg_ts_config cfgform = (Form_pg_ts_config) GETSTRUCT(cfgtup);
+ char *cfgname = NameStr(cfgform->cfgname);
+ char *nspname;
+
+ /*
+ * Would this config be found by regconfigin? If not, qualify it.
+ */
+ if (TSConfigIsVisible(cfgid))
+ nspname = NULL;
+ else
+ nspname = get_namespace_name(cfgform->cfgnamespace);
+
+ result = quote_qualified_identifier(nspname, cfgname);
+
+ ReleaseSysCache(cfgtup);
+ }
+ else
+ {
+ /* If OID doesn't match any pg_ts_config row, return it numerically */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", cfgid);
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regconfigrecv - converts external binary format to regconfig
+ */
+Datum
+regconfigrecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regconfigsend - converts regconfig to binary format
+ */
+Datum
+regconfigsend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+
+/*
+ * regdictionaryin - converts "tsdictionaryname" to tsdictionary OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '-' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_ts_dict entry.
+ *
+ * This function is not needed in bootstrap mode, so we don't worry about
+ * making it work then.
+ */
+Datum
+regdictionaryin(PG_FUNCTION_ARGS)
+{
+ char *dict_name_or_oid = PG_GETARG_CSTRING(0);
+ Oid result;
+ List *names;
+
+ /* '-' ? */
+ if (strcmp(dict_name_or_oid, "-") == 0)
+ PG_RETURN_OID(InvalidOid);
+
+ /* Numeric OID? */
+ if (dict_name_or_oid[0] >= '0' &&
+ dict_name_or_oid[0] <= '9' &&
+ strspn(dict_name_or_oid, "0123456789") == strlen(dict_name_or_oid))
+ {
+ result = DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(dict_name_or_oid)));
+ PG_RETURN_OID(result);
+ }
+
+ /*
+ * Normal case: parse the name into components and see if it matches any
+ * pg_ts_dict entries in the current search path.
+ */
+ names = stringToQualifiedNameList(dict_name_or_oid);
+
+ result = TSDictionaryGetDictid(names, false);
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * regdictionaryout - converts tsdictionary OID to "tsdictionaryname"
+ */
+Datum
+regdictionaryout(PG_FUNCTION_ARGS)
+{
+ Oid dictid = PG_GETARG_OID(0);
+ char *result;
+ HeapTuple dicttup;
+
+ if (dictid == InvalidOid)
+ {
+ result = pstrdup("-");
+ PG_RETURN_CSTRING(result);
+ }
+
+ dicttup = SearchSysCache(TSDICTOID,
+ ObjectIdGetDatum(dictid),
+ 0, 0, 0);
+
+ if (HeapTupleIsValid(dicttup))
+ {
+ Form_pg_ts_dict dictform = (Form_pg_ts_dict) GETSTRUCT(dicttup);
+ char *dictname = NameStr(dictform->dictname);
+ char *nspname;
+
+ /*
+ * Would this dictionary be found by regdictionaryin?
+ * If not, qualify it.
+ */
+ if (TSDictionaryIsVisible(dictid))
+ nspname = NULL;
+ else
+ nspname = get_namespace_name(dictform->dictnamespace);
+
+ result = quote_qualified_identifier(nspname, dictname);
+
+ ReleaseSysCache(dicttup);
+ }
+ else
+ {
+ /* If OID doesn't match any pg_ts_dict row, return it numerically */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", dictid);
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regdictionaryrecv - converts external binary format to regdictionary
+ */
+Datum
+regdictionaryrecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regdictionarysend - converts regdictionary to binary format
+ */
+Datum
+regdictionarysend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+
+/*
* text_regclass: convert text to regclass
*
* This could be replaced by CoerceViaIO, except that we need to treat
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 1ba0b4eac48..60da08b1ec4 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.234 2007/05/05 17:05:48 mha Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.235 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -2822,6 +2822,8 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
case REGOPERATOROID:
case REGCLASSOID:
case REGTYPEOID:
+ case REGCONFIGOID:
+ case REGDICTIONARYOID:
*scaledvalue = convert_numeric_to_scalar(value, valuetypid);
*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
@@ -2925,6 +2927,8 @@ convert_numeric_to_scalar(Datum value, Oid typid)
case REGOPERATOROID:
case REGCLASSOID:
case REGTYPEOID:
+ case REGCONFIGOID:
+ case REGDICTIONARYOID:
/* we can treat OIDs as integers... */
return (double) DatumGetObjectId(value);
}
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
new file mode 100644
index 00000000000..491dd21aa81
--- /dev/null
+++ b/src/backend/utils/adt/tsginidx.c
@@ -0,0 +1,157 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsginidx.c
+ * GIN support functions for tsvector_ops
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/skey.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+
+
+Datum
+gin_extract_tsvector(PG_FUNCTION_ARGS)
+{
+ TSVector vector = PG_GETARG_TSVECTOR(0);
+ uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
+ Datum *entries = NULL;
+
+ *nentries = 0;
+ if (vector->size > 0)
+ {
+ int i;
+ WordEntry *we = ARRPTR(vector);
+
+ *nentries = (uint32) vector->size;
+ entries = (Datum *) palloc(sizeof(Datum) * vector->size);
+
+ for (i = 0; i < vector->size; i++)
+ {
+ text *txt = (text *) palloc(VARHDRSZ + we->len);
+
+ SET_VARSIZE(txt, VARHDRSZ + we->len);
+ memcpy(VARDATA(txt), STRPTR(vector) + we->pos, we->len);
+
+ entries[i] = PointerGetDatum(txt);
+
+ we++;
+ }
+ }
+
+ PG_FREE_IF_COPY(vector, 0);
+ PG_RETURN_POINTER(entries);
+}
+
+Datum
+gin_extract_query(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
+ StrategyNumber strategy = PG_GETARG_UINT16(2);
+ Datum *entries = NULL;
+
+ *nentries = 0;
+
+ if (query->size > 0)
+ {
+ int4 i,
+ j = 0,
+ len;
+ QueryItem *item;
+
+ item = clean_NOT(GETQUERY(query), &len);
+ if (!item)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("query requires full scan, which is not supported by GIN indexes")));
+
+ item = GETQUERY(query);
+
+ for (i = 0; i < query->size; i++)
+ if (item[i].type == VAL)
+ (*nentries)++;
+
+ entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
+
+ for (i = 0; i < query->size; i++)
+ if (item[i].type == VAL)
+ {
+ text *txt;
+
+ txt = (text *) palloc(VARHDRSZ + item[i].length);
+
+ SET_VARSIZE(txt, VARHDRSZ + item[i].length);
+ memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length);
+
+ entries[j++] = PointerGetDatum(txt);
+
+ if (strategy != TSearchWithClassStrategyNumber && item[i].weight != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("@@ operator does not support lexeme class restrictions"),
+ errhint("Use the @@@ operator instead.")));
+ }
+ }
+ else
+ *nentries = -1; /* nothing can be found */
+
+ PG_FREE_IF_COPY(query, 0);
+
+ PG_RETURN_POINTER(entries);
+}
+
+typedef struct
+{
+ QueryItem *frst;
+ bool *mapped_check;
+} GinChkVal;
+
+static bool
+checkcondition_gin(void *checkval, QueryItem * val)
+{
+ GinChkVal *gcv = (GinChkVal *) checkval;
+
+ return gcv->mapped_check[val - gcv->frst];
+}
+
+Datum
+gin_ts_consistent(PG_FUNCTION_ARGS)
+{
+ bool *check = (bool *) PG_GETARG_POINTER(0);
+ /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
+ TSQuery query = PG_GETARG_TSQUERY(2);
+ bool res = FALSE;
+
+ if (query->size > 0)
+ {
+ int4 i,
+ j = 0;
+ QueryItem *item;
+ GinChkVal gcv;
+
+ gcv.frst = item = GETQUERY(query);
+ gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
+
+ for (i = 0; i < query->size; i++)
+ if (item[i].type == VAL)
+ gcv.mapped_check[i] = check[j++];
+
+ res = TS_execute(
+ GETQUERY(query),
+ &gcv,
+ true,
+ checkcondition_gin
+ );
+ }
+
+ PG_RETURN_BOOL(res);
+}
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
new file mode 100644
index 00000000000..04c2b4c442f
--- /dev/null
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -0,0 +1,784 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsgistidx.c
+ * GiST support functions for tsvector_ops
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/tuptoaster.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+#include "utils/pg_crc.h"
+
+
+#define SIGLENINT 31 /* >121 => key will toast, so it will not work
+ * !!! */
+
+#define SIGLEN ( sizeof(int4) * SIGLENINT )
+#define SIGLENBIT (SIGLEN * BITS_PER_BYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+ for(i=0;i<SIGLEN;i++) {\
+ a;\
+}
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> (i) & 0x01 )
+#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITS_PER_BYTE ) )
+#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITS_PER_BYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 )
+
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+#define GETENTRY(vec,pos) ((SignTSVector *) DatumGetPointer((vec)->vector[(pos)].key))
+
+/*
+ * type of GiST index key
+ */
+
+typedef struct
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */ ;
+ int4 flag;
+ char data[1];
+} SignTSVector;
+
+#define ARRKEY 0x01
+#define SIGNKEY 0x02
+#define ALLISTRUE 0x04
+
+#define ISARRKEY(x) ( ((SignTSVector*)(x))->flag & ARRKEY )
+#define ISSIGNKEY(x) ( ((SignTSVector*)(x))->flag & SIGNKEY )
+#define ISALLTRUE(x) ( ((SignTSVector*)(x))->flag & ALLISTRUE )
+
+#define GTHDRSIZE ( VARHDRSZ + sizeof(int4) )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)(x)+GTHDRSIZE ) )
+#define GETARR(x) ( (int4*)( (char*)(x)+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int4) )
+
+/* Number of one-bits in an unsigned byte */
+static const uint8 number_of_ones[256] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+};
+
+static int4 sizebitvec(BITVECP sign);
+
+Datum
+gtsvectorin(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("gtsvector_in not implemented")));
+ PG_RETURN_DATUM(0);
+}
+
+#define SINGOUTSTR "%d true bits, %d false bits"
+#define ARROUTSTR "%d unique words"
+#define EXTRALEN ( 2*13 )
+
+static int outbuf_maxlen = 0;
+
+Datum
+gtsvectorout(PG_FUNCTION_ARGS)
+{
+ SignTSVector *key = (SignTSVector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_POINTER(0)));
+ char *outbuf;
+
+ if (outbuf_maxlen == 0)
+ outbuf_maxlen = 2 * EXTRALEN + Max(strlen(SINGOUTSTR), strlen(ARROUTSTR)) + 1;
+ outbuf = palloc(outbuf_maxlen);
+
+ if (ISARRKEY(key))
+ sprintf(outbuf, ARROUTSTR, (int) ARRNELEM(key));
+ else
+ {
+ int cnttrue = (ISALLTRUE(key)) ? SIGLENBIT : sizebitvec(GETSIGN(key));
+
+ sprintf(outbuf, SINGOUTSTR, cnttrue, (int) SIGLENBIT - cnttrue);
+ }
+
+ PG_FREE_IF_COPY(key, 0);
+ PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+ if (*((int4 *) a) == *((int4 *) b))
+ return 0;
+ return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+ int4 *ptr,
+ *res;
+
+ if (l == 1)
+ return l;
+
+ ptr = res = a;
+
+ qsort((void *) a, l, sizeof(int4), compareint);
+
+ while (ptr - a < l)
+ if (*ptr != *res)
+ *(++res) = *ptr++;
+ else
+ ptr++;
+ return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, SignTSVector * a)
+{
+ int4 k,
+ len = ARRNELEM(a);
+ int4 *ptr = GETARR(a);
+
+ MemSet((void *) sign, 0, sizeof(BITVEC));
+ for (k = 0; k < len; k++)
+ HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ GISTENTRY *retval = entry;
+
+ if (entry->leafkey)
+ { /* tsvector */
+ SignTSVector *res;
+ TSVector val = DatumGetTSVector(entry->key);
+ int4 len;
+ int4 *arr;
+ WordEntry *ptr = ARRPTR(val);
+ char *words = STRPTR(val);
+
+ len = CALCGTSIZE(ARRKEY, val->size);
+ res = (SignTSVector *) palloc(len);
+ SET_VARSIZE(res, len);
+ res->flag = ARRKEY;
+ arr = GETARR(res);
+ len = val->size;
+ while (len--)
+ {
+ pg_crc32 c;
+
+ INIT_CRC32(c);
+ COMP_CRC32(c, words + ptr->pos, ptr->len);
+ FIN_CRC32(c);
+
+ *arr = *(int4 *) &c;
+ arr++;
+ ptr++;
+ }
+
+ len = uniqueint(GETARR(res), val->size);
+ if (len != val->size)
+ {
+ /*
+ * there is a collision of hash-function; len is always less than
+ * val->size
+ */
+ len = CALCGTSIZE(ARRKEY, len);
+ res = (SignTSVector *) repalloc((void *) res, len);
+ SET_VARSIZE(res, len);
+ }
+
+ /* make signature, if array is too long */
+ if (VARSIZE(res) > TOAST_INDEX_TARGET)
+ {
+ SignTSVector *ressign;
+
+ len = CALCGTSIZE(SIGNKEY, 0);
+ ressign = (SignTSVector *) palloc(len);
+ SET_VARSIZE(ressign, len);
+ ressign->flag = SIGNKEY;
+ makesign(GETSIGN(ressign), res);
+ res = ressign;
+ }
+
+ retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+ gistentryinit(*retval, PointerGetDatum(res),
+ entry->rel, entry->page,
+ entry->offset, FALSE);
+ }
+ else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+ !ISALLTRUE(DatumGetPointer(entry->key)))
+ {
+ int4 i,
+ len;
+ SignTSVector *res;
+ BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
+
+ LOOPBYTE(
+ if ((sign[i] & 0xff) != 0xff)
+ PG_RETURN_POINTER(retval);
+ );
+
+ len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+ res = (SignTSVector *) palloc(len);
+ SET_VARSIZE(res, len);
+ res->flag = SIGNKEY | ALLISTRUE;
+
+ retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+ gistentryinit(*retval, PointerGetDatum(res),
+ entry->rel, entry->page,
+ entry->offset, FALSE);
+ }
+ PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ SignTSVector *key = (SignTSVector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+ if (key != (SignTSVector *) DatumGetPointer(entry->key))
+ {
+ GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+ gistentryinit(*retval, PointerGetDatum(key),
+ entry->rel, entry->page,
+ entry->offset, FALSE);
+
+ PG_RETURN_POINTER(retval);
+ }
+
+ PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+ int4 *arrb;
+ int4 *arre;
+} CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, QueryItem * val)
+{
+ int4 *StopLow = ((CHKVAL *) checkval)->arrb;
+ int4 *StopHigh = ((CHKVAL *) checkval)->arre;
+ int4 *StopMiddle;
+
+ /* Loop invariant: StopLow <= val < StopHigh */
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+ if (*StopMiddle == val->val)
+ return (true);
+ else if (*StopMiddle < val->val)
+ StopLow = StopMiddle + 1;
+ else
+ StopHigh = StopMiddle;
+ }
+
+ return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, QueryItem * val)
+{
+ return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(1);
+ SignTSVector *key = (SignTSVector *) DatumGetPointer(
+ ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+ );
+
+ if (!query->size)
+ PG_RETURN_BOOL(false);
+
+ if (ISSIGNKEY(key))
+ {
+ if (ISALLTRUE(key))
+ PG_RETURN_BOOL(true);
+
+ PG_RETURN_BOOL(TS_execute(
+ GETQUERY(query),
+ (void *) GETSIGN(key), false,
+ checkcondition_bit
+ ));
+ }
+ else
+ { /* only leaf pages */
+ CHKVAL chkval;
+
+ chkval.arrb = GETARR(key);
+ chkval.arre = chkval.arrb + ARRNELEM(key);
+ PG_RETURN_BOOL(TS_execute(
+ GETQUERY(query),
+ (void *) &chkval, true,
+ checkcondition_arr
+ ));
+ }
+}
+
+static int4
+unionkey(BITVECP sbase, SignTSVector * add)
+{
+ int4 i;
+
+ if (ISSIGNKEY(add))
+ {
+ BITVECP sadd = GETSIGN(add);
+
+ if (ISALLTRUE(add))
+ return 1;
+
+ LOOPBYTE(
+ sbase[i] |= sadd[i];
+ );
+ }
+ else
+ {
+ int4 *ptr = GETARR(add);
+
+ for (i = 0; i < ARRNELEM(add); i++)
+ HASH(sbase, ptr[i]);
+ }
+ return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ int *size = (int *) PG_GETARG_POINTER(1);
+ BITVEC base;
+ int4 i,
+ len;
+ int4 flag = 0;
+ SignTSVector *result;
+
+ MemSet((void *) base, 0, sizeof(BITVEC));
+ for (i = 0; i < entryvec->n; i++)
+ {
+ if (unionkey(base, GETENTRY(entryvec, i)))
+ {
+ flag = ALLISTRUE;
+ break;
+ }
+ }
+
+ flag |= SIGNKEY;
+ len = CALCGTSIZE(flag, 0);
+ result = (SignTSVector *) palloc(len);
+ *size = len;
+ SET_VARSIZE(result, len);
+ result->flag = flag;
+ if (!ISALLTRUE(result))
+ memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+ PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+ SignTSVector *a = (SignTSVector *) PG_GETARG_POINTER(0);
+ SignTSVector *b = (SignTSVector *) PG_GETARG_POINTER(1);
+ bool *result = (bool *) PG_GETARG_POINTER(2);
+
+ if (ISSIGNKEY(a))
+ { /* then b also ISSIGNKEY */
+ if (ISALLTRUE(a) && ISALLTRUE(b))
+ *result = true;
+ else if (ISALLTRUE(a))
+ *result = false;
+ else if (ISALLTRUE(b))
+ *result = false;
+ else
+ {
+ int4 i;
+ BITVECP sa = GETSIGN(a),
+ sb = GETSIGN(b);
+
+ *result = true;
+ LOOPBYTE(
+ if (sa[i] != sb[i])
+ {
+ *result = false;
+ break;
+ }
+ );
+ }
+ }
+ else
+ { /* a and b ISARRKEY */
+ int4 lena = ARRNELEM(a),
+ lenb = ARRNELEM(b);
+
+ if (lena != lenb)
+ *result = false;
+ else
+ {
+ int4 *ptra = GETARR(a),
+ *ptrb = GETARR(b);
+ int4 i;
+
+ *result = true;
+ for (i = 0; i < lena; i++)
+ if (ptra[i] != ptrb[i])
+ {
+ *result = false;
+ break;
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+ int4 size = 0,
+ i;
+
+ LOOPBYTE(
+ size += number_of_ones[(unsigned char) sign[i]];
+ );
+ return size;
+}
+
+static int
+hemdistsign(BITVECP a, BITVECP b)
+{
+ int i,
+ diff,
+ dist = 0;
+
+ LOOPBYTE(
+ diff = (unsigned char) (a[i] ^ b[i]);
+ dist += number_of_ones[diff];
+ );
+ return dist;
+}
+
+static int
+hemdist(SignTSVector * a, SignTSVector * b)
+{
+ if (ISALLTRUE(a))
+ {
+ if (ISALLTRUE(b))
+ return 0;
+ else
+ return SIGLENBIT - sizebitvec(GETSIGN(b));
+ }
+ else if (ISALLTRUE(b))
+ return SIGLENBIT - sizebitvec(GETSIGN(a));
+
+ return hemdistsign(GETSIGN(a), GETSIGN(b));
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+ GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+ float *penalty = (float *) PG_GETARG_POINTER(2);
+ SignTSVector *origval = (SignTSVector *) DatumGetPointer(origentry->key);
+ SignTSVector *newval = (SignTSVector *) DatumGetPointer(newentry->key);
+ BITVECP orig = GETSIGN(origval);
+
+ *penalty = 0.0;
+
+ if (ISARRKEY(newval))
+ {
+ BITVEC sign;
+
+ makesign(sign, newval);
+
+ if (ISALLTRUE(origval))
+ *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
+ else
+ *penalty = hemdistsign(sign, orig);
+ }
+ else
+ *penalty = hemdist(origval, newval);
+ PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+ bool allistrue;
+ BITVEC sign;
+} CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, SignTSVector * key)
+{
+ item->allistrue = false;
+ if (ISARRKEY(key))
+ makesign(item->sign, key);
+ else if (ISALLTRUE(key))
+ item->allistrue = true;
+ else
+ memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+ OffsetNumber pos;
+ int4 cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+ if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+ return 0;
+ else
+ return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN * a, CACHESIGN * b)
+{
+ if (a->allistrue)
+ {
+ if (b->allistrue)
+ return 0;
+ else
+ return SIGLENBIT - sizebitvec(b->sign);
+ }
+ else if (b->allistrue)
+ return SIGLENBIT - sizebitvec(a->sign);
+
+ return hemdistsign(a->sign, b->sign);
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+ OffsetNumber k,
+ j;
+ SignTSVector *datum_l,
+ *datum_r;
+ BITVECP union_l,
+ union_r;
+ int4 size_alpha,
+ size_beta;
+ int4 size_waste,
+ waste = -1;
+ int4 nbytes;
+ OffsetNumber seed_1 = 0,
+ seed_2 = 0;
+ OffsetNumber *left,
+ *right;
+ OffsetNumber maxoff;
+ BITVECP ptr;
+ int i;
+ CACHESIGN *cache;
+ SPLITCOST *costvector;
+
+ maxoff = entryvec->n - 2;
+ nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+ v->spl_left = (OffsetNumber *) palloc(nbytes);
+ v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+ cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+ fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+ for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
+ {
+ for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
+ {
+ if (k == FirstOffsetNumber)
+ fillcache(&cache[j], GETENTRY(entryvec, j));
+
+ size_waste = hemdistcache(&(cache[j]), &(cache[k]));
+ if (size_waste > waste)
+ {
+ waste = size_waste;
+ seed_1 = k;
+ seed_2 = j;
+ }
+ }
+ }
+
+ left = v->spl_left;
+ v->spl_nleft = 0;
+ right = v->spl_right;
+ v->spl_nright = 0;
+
+ if (seed_1 == 0 || seed_2 == 0)
+ {
+ seed_1 = 1;
+ seed_2 = 2;
+ }
+
+ /* form initial .. */
+ if (cache[seed_1].allistrue)
+ {
+ datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+ SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+ datum_l->flag = SIGNKEY | ALLISTRUE;
+ }
+ else
+ {
+ datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0));
+ SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
+ datum_l->flag = SIGNKEY;
+ memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+ }
+ if (cache[seed_2].allistrue)
+ {
+ datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+ SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+ datum_r->flag = SIGNKEY | ALLISTRUE;
+ }
+ else
+ {
+ datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0));
+ SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
+ datum_r->flag = SIGNKEY;
+ memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+ }
+
+ union_l = GETSIGN(datum_l);
+ union_r = GETSIGN(datum_r);
+ maxoff = OffsetNumberNext(maxoff);
+ fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+ /* sort before ... */
+ costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+ for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
+ {
+ costvector[j - 1].pos = j;
+ size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+ size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
+ costvector[j - 1].cost = Abs(size_alpha - size_beta);
+ }
+ qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+ for (k = 0; k < maxoff; k++)
+ {
+ j = costvector[k].pos;
+ if (j == seed_1)
+ {
+ *left++ = j;
+ v->spl_nleft++;
+ continue;
+ }
+ else if (j == seed_2)
+ {
+ *right++ = j;
+ v->spl_nright++;
+ continue;
+ }
+
+ if (ISALLTRUE(datum_l) || cache[j].allistrue)
+ {
+ if (ISALLTRUE(datum_l) && cache[j].allistrue)
+ size_alpha = 0;
+ else
+ size_alpha = SIGLENBIT - sizebitvec(
+ (cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
+ );
+ }
+ else
+ size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
+
+ if (ISALLTRUE(datum_r) || cache[j].allistrue)
+ {
+ if (ISALLTRUE(datum_r) && cache[j].allistrue)
+ size_beta = 0;
+ else
+ size_beta = SIGLENBIT - sizebitvec(
+ (cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
+ );
+ }
+ else
+ size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
+
+ if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
+ {
+ if (ISALLTRUE(datum_l) || cache[j].allistrue)
+ {
+ if (!ISALLTRUE(datum_l))
+ MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+ }
+ else
+ {
+ ptr = cache[j].sign;
+ LOOPBYTE(
+ union_l[i] |= ptr[i];
+ );
+ }
+ *left++ = j;
+ v->spl_nleft++;
+ }
+ else
+ {
+ if (ISALLTRUE(datum_r) || cache[j].allistrue)
+ {
+ if (!ISALLTRUE(datum_r))
+ MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+ }
+ else
+ {
+ ptr = cache[j].sign;
+ LOOPBYTE(
+ union_r[i] |= ptr[i];
+ );
+ }
+ *right++ = j;
+ v->spl_nright++;
+ }
+ }
+
+ *right = *left = FirstOffsetNumber;
+ v->spl_ldatum = PointerGetDatum(datum_l);
+ v->spl_rdatum = PointerGetDatum(datum_r);
+
+ PG_RETURN_POINTER(v);
+}
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
new file mode 100644
index 00000000000..1f8abb3298e
--- /dev/null
+++ b/src/backend/utils/adt/tsquery.c
@@ -0,0 +1,767 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery.c
+ * I/O functions for tsquery
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "libpq/pqformat.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+#include "utils/memutils.h"
+#include "utils/pg_crc.h"
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR 2
+#define WAITFIRSTOPERAND 3
+#define WAITSINGLEOPERAND 4
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct ParseQueryNode
+{
+ int2 weight;
+ int2 type;
+ int4 val;
+ int2 distance;
+ int2 length;
+ struct ParseQueryNode *next;
+} ParseQueryNode;
+
+static char *
+get_weight(char *buf, int2 *weight)
+{
+ *weight = 0;
+
+ if (!t_iseq(buf, ':'))
+ return buf;
+
+ buf++;
+ while (*buf && pg_mblen(buf) == 1)
+ {
+ switch (*buf)
+ {
+ case 'a':
+ case 'A':
+ *weight |= 1 << 3;
+ break;
+ case 'b':
+ case 'B':
+ *weight |= 1 << 2;
+ break;
+ case 'c':
+ case 'C':
+ *weight |= 1 << 1;
+ break;
+ case 'd':
+ case 'D':
+ *weight |= 1;
+ break;
+ default:
+ return buf;
+ }
+ buf++;
+ }
+
+ return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+ while (1)
+ {
+ switch (state->state)
+ {
+ case WAITFIRSTOPERAND:
+ case WAITOPERAND:
+ if (t_iseq(state->buf, '!'))
+ {
+ (state->buf)++; /* can safely ++, t_iseq guarantee
+ * that pg_mblen()==1 */
+ *val = (int4) '!';
+ state->state = WAITOPERAND;
+ return OPR;
+ }
+ else if (t_iseq(state->buf, '('))
+ {
+ state->count++;
+ (state->buf)++;
+ state->state = WAITOPERAND;
+ return OPEN;
+ }
+ else if (t_iseq(state->buf, ':'))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error at start of operand in tsearch query: \"%s\"",
+ state->buffer)));
+ }
+ else if (!t_isspace(state->buf))
+ {
+ state->valstate.prsbuf = state->buf;
+ if (gettoken_tsvector(&(state->valstate)))
+ {
+ *strval = state->valstate.word;
+ *lenval = state->valstate.curpos - state->valstate.word;
+ state->buf = get_weight(state->valstate.prsbuf, weight);
+ state->state = WAITOPERATOR;
+ return VAL;
+ }
+ else if (state->state == WAITFIRSTOPERAND)
+ return END;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("no operand in tsearch query: \"%s\"",
+ state->buffer)));
+ }
+ break;
+ case WAITOPERATOR:
+ if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
+ {
+ state->state = WAITOPERAND;
+ *val = (int4) *(state->buf);
+ (state->buf)++;
+ return OPR;
+ }
+ else if (t_iseq(state->buf, ')'))
+ {
+ (state->buf)++;
+ state->count--;
+ return (state->count < 0) ? ERR : CLOSE;
+ }
+ else if (*(state->buf) == '\0')
+ return (state->count) ? ERR : END;
+ else if (!t_isspace(state->buf))
+ return ERR;
+ break;
+ case WAITSINGLEOPERAND:
+ if (*(state->buf) == '\0')
+ return END;
+ *strval = state->buf;
+ *lenval = strlen(state->buf);
+ state->buf += strlen(state->buf);
+ state->count++;
+ return VAL;
+ default:
+ return ERR;
+ break;
+ }
+ state->buf += pg_mblen(state->buf);
+ }
+ return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+void
+pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+ ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode));
+
+ tmp->weight = weight;
+ tmp->type = type;
+ tmp->val = val;
+ if (distance >= MAXSTRPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("value is too big in tsearch query: \"%s\"",
+ state->buffer)));
+ if (lenval >= MAXSTRLEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("operand is too long in tsearch query: \"%s\"",
+ state->buffer)));
+ tmp->distance = distance;
+ tmp->length = lenval;
+ tmp->next = state->str;
+ state->str = tmp;
+ state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+void
+pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight)
+{
+ pg_crc32 c;
+
+ if (lenval >= MAXSTRLEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("word is too long in tsearch query: \"%s\"",
+ state->buffer)));
+
+ INIT_CRC32(c);
+ COMP_CRC32(c, strval, lenval);
+ FIN_CRC32(c);
+ pushquery(state, type, *(int4 *) &c,
+ state->curop - state->op, lenval, weight);
+
+ while (state->curop - state->op + lenval + 1 >= state->lenop)
+ {
+ int4 tmp = state->curop - state->op;
+
+ state->lenop *= 2;
+ state->op = (char *) repalloc((void *) state->op, state->lenop);
+ state->curop = state->op + tmp;
+ }
+ memcpy((void *) state->curop, (void *) strval, lenval);
+ state->curop += lenval;
+ *(state->curop) = '\0';
+ state->curop++;
+ state->sumlen += lenval + 1 /* \0 */ ;
+ return;
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notation of query
+ */
+static int4
+makepol(TSQueryParserState * state, void (*pushval) (TSQueryParserState *, int, char *, int, int2))
+{
+ int4 val = 0,
+ type;
+ int4 lenval = 0;
+ char *strval = NULL;
+ int4 stack[STACKDEPTH];
+ int4 lenstack = 0;
+ int2 weight = 0;
+
+ while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+ {
+ switch (type)
+ {
+ case VAL:
+ pushval(state, VAL, strval, lenval, weight);
+ while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+ stack[lenstack - 1] == (int4) '!'))
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ }
+ break;
+ case OPR:
+ if (lenstack && val == (int4) '|')
+ pushquery(state, OPR, val, 0, 0, 0);
+ else
+ {
+ if (lenstack == STACKDEPTH) /* internal error */
+ elog(ERROR, "tsquery stack too small");
+ stack[lenstack] = val;
+ lenstack++;
+ }
+ break;
+ case OPEN:
+ if (makepol(state, pushval) == ERR)
+ return ERR;
+ if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+ stack[lenstack - 1] == (int4) '!'))
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ }
+ break;
+ case CLOSE:
+ while (lenstack)
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ };
+ return END;
+ break;
+ case ERR:
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsearch query: \"%s\"",
+ state->buffer)));
+ return ERR;
+
+ }
+ }
+ while (lenstack)
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ };
+ return END;
+}
+
+static void
+findoprnd(QueryItem * ptr, int4 *pos)
+{
+ if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP)
+ {
+ ptr[*pos].left = 0;
+ (*pos)++;
+ }
+ else if (ptr[*pos].val == (int4) '!')
+ {
+ ptr[*pos].left = 1;
+ (*pos)++;
+ findoprnd(ptr, pos);
+ }
+ else
+ {
+ QueryItem *curitem = &ptr[*pos];
+ int4 tmp = *pos;
+
+ (*pos)++;
+ findoprnd(ptr, pos);
+ curitem->left = *pos - tmp;
+ findoprnd(ptr, pos);
+ }
+}
+
+
+/*
+ * input
+ */
+TSQuery
+parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain)
+{
+ TSQueryParserState state;
+ int4 i;
+ TSQuery query;
+ int4 commonlen;
+ QueryItem *ptr;
+ ParseQueryNode *tmp;
+ int4 pos = 0;
+
+ /* init state */
+ state.buffer = buf;
+ state.buf = buf;
+ state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
+ state.count = 0;
+ state.num = 0;
+ state.str = NULL;
+ state.cfg_id = cfg_id;
+
+ /* init value parser's state */
+ state.valstate.oprisdelim = true;
+ state.valstate.len = 32;
+ state.valstate.word = (char *) palloc(state.valstate.len);
+
+ /* init list of operand */
+ state.sumlen = 0;
+ state.lenop = 64;
+ state.curop = state.op = (char *) palloc(state.lenop);
+ *(state.curop) = '\0';
+
+ /* parse query & make polish notation (postfix, but in reverse order) */
+ makepol(&state, pushval);
+ pfree(state.valstate.word);
+ if (!state.num)
+ {
+ ereport(NOTICE,
+ (errmsg("tsearch query doesn't contain lexeme(s): \"%s\"",
+ state.buffer)));
+ query = (TSQuery) palloc(HDRSIZETQ);
+ SET_VARSIZE(query, HDRSIZETQ);
+ query->size = 0;
+ return query;
+ }
+
+ /* make finish struct */
+ commonlen = COMPUTESIZE(state.num, state.sumlen);
+ query = (TSQuery) palloc(commonlen);
+ SET_VARSIZE(query, commonlen);
+ query->size = state.num;
+ ptr = GETQUERY(query);
+
+ /* set item in polish notation */
+ for (i = 0; i < state.num; i++)
+ {
+ ptr[i].weight = state.str->weight;
+ ptr[i].type = state.str->type;
+ ptr[i].val = state.str->val;
+ ptr[i].distance = state.str->distance;
+ ptr[i].length = state.str->length;
+ tmp = state.str->next;
+ pfree(state.str);
+ state.str = tmp;
+ }
+
+ /* set user friendly-operand view */
+ memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+ pfree(state.op);
+
+ /* set left operand's position for every operator */
+ pos = 0;
+ findoprnd(ptr, &pos);
+
+ return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsqueryin(PG_FUNCTION_ARGS)
+{
+ char *in = PG_GETARG_CSTRING(0);
+
+ pg_verifymbstr(in, strlen(in), false);
+
+ PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+ QueryItem *curpol;
+ char *buf;
+ char *cur;
+ char *op;
+ int4 buflen;
+} INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
+{ \
+ int4 len = (inf)->cur - (inf)->buf; \
+ (inf)->buflen *= 2; \
+ (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
+ (inf)->cur = (inf)->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+ if (in->curpol->type == VAL)
+ {
+ char *op = in->op + in->curpol->distance;
+ int clen;
+
+ RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
+ *(in->cur) = '\'';
+ in->cur++;
+ while (*op)
+ {
+ if (t_iseq(op, '\''))
+ {
+ *(in->cur) = '\'';
+ in->cur++;
+ }
+ COPYCHAR(in->cur, op);
+
+ clen = pg_mblen(op);
+ op += clen;
+ in->cur += clen;
+ }
+ *(in->cur) = '\'';
+ in->cur++;
+ if (in->curpol->weight)
+ {
+ *(in->cur) = ':';
+ in->cur++;
+ if (in->curpol->weight & (1 << 3))
+ {
+ *(in->cur) = 'A';
+ in->cur++;
+ }
+ if (in->curpol->weight & (1 << 2))
+ {
+ *(in->cur) = 'B';
+ in->cur++;
+ }
+ if (in->curpol->weight & (1 << 1))
+ {
+ *(in->cur) = 'C';
+ in->cur++;
+ }
+ if (in->curpol->weight & 1)
+ {
+ *(in->cur) = 'D';
+ in->cur++;
+ }
+ }
+ *(in->cur) = '\0';
+ in->curpol++;
+ }
+ else if (in->curpol->val == (int4) '!')
+ {
+ bool isopr = false;
+
+ RESIZEBUF(in, 1);
+ *(in->cur) = '!';
+ in->cur++;
+ *(in->cur) = '\0';
+ in->curpol++;
+ if (in->curpol->type == OPR)
+ {
+ isopr = true;
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, "( ");
+ in->cur = strchr(in->cur, '\0');
+ }
+ infix(in, isopr);
+ if (isopr)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, " )");
+ in->cur = strchr(in->cur, '\0');
+ }
+ }
+ else
+ {
+ int4 op = in->curpol->val;
+ INFIX nrm;
+
+ in->curpol++;
+ if (op == (int4) '|' && !first)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, "( ");
+ in->cur = strchr(in->cur, '\0');
+ }
+
+ nrm.curpol = in->curpol;
+ nrm.op = in->op;
+ nrm.buflen = 16;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+ /* get right operand */
+ infix(&nrm, false);
+
+ /* get & print left operand */
+ in->curpol = nrm.curpol;
+ infix(in, false);
+
+ /* print operator & right operand */
+ RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+ sprintf(in->cur, " %c %s", op, nrm.buf);
+ in->cur = strchr(in->cur, '\0');
+ pfree(nrm.buf);
+
+ if (op == (int4) '|' && !first)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, " )");
+ in->cur = strchr(in->cur, '\0');
+ }
+ }
+}
+
+
+Datum
+tsqueryout(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ INFIX nrm;
+
+ if (query->size == 0)
+ {
+ char *b = palloc(1);
+
+ *b = '\0';
+ PG_RETURN_POINTER(b);
+ }
+ nrm.curpol = GETQUERY(query);
+ nrm.buflen = 32;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+ *(nrm.cur) = '\0';
+ nrm.op = GETOPERAND(query);
+ infix(&nrm, true);
+
+ PG_FREE_IF_COPY(query, 0);
+ PG_RETURN_CSTRING(nrm.buf);
+}
+
+Datum
+tsquerysend(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ StringInfoData buf;
+ int i;
+ QueryItem *item = GETQUERY(query);
+
+ pq_begintypsend(&buf);
+
+ pq_sendint(&buf, query->size, sizeof(int32));
+ for (i = 0; i < query->size; i++)
+ {
+ int tmp;
+
+ pq_sendint(&buf, item->type, sizeof(item->type));
+ pq_sendint(&buf, item->weight, sizeof(item->weight));
+ pq_sendint(&buf, item->left, sizeof(item->left));
+ pq_sendint(&buf, item->val, sizeof(item->val));
+
+ /*
+ * We are sure that sizeof(WordEntry) == sizeof(int32), and about
+ * layout of QueryItem
+ */
+ tmp = *(int32 *) (((char *) item) + HDRSIZEQI);
+ pq_sendint(&buf, tmp, sizeof(tmp));
+
+ item++;
+ }
+
+ item = GETQUERY(query);
+ for (i = 0; i < query->size; i++)
+ {
+ if (item->type == VAL)
+ pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length);
+ item++;
+ }
+
+ PG_FREE_IF_COPY(query, 0);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+Datum
+tsqueryrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ TSQuery query;
+ int i,
+ size,
+ tmp,
+ len = HDRSIZETQ;
+ QueryItem *item;
+ int datalen = 0;
+ char *ptr;
+
+ size = pq_getmsgint(buf, sizeof(uint32));
+ if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
+ elog(ERROR, "invalid size of tsquery");
+ len += sizeof(QueryItem) * size;
+
+ query = (TSQuery) palloc(len);
+ query->size = size;
+ item = GETQUERY(query);
+
+ for (i = 0; i < size; i++)
+ {
+ item->type = (int8) pq_getmsgint(buf, sizeof(int8));
+ item->weight = (int8) pq_getmsgint(buf, sizeof(int8));
+ item->left = (int16) pq_getmsgint(buf, sizeof(int16));
+ item->val = (int32) pq_getmsgint(buf, sizeof(int32));
+ tmp = pq_getmsgint(buf, sizeof(int32));
+ memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32));
+
+ /*
+ * Sanity checks
+ */
+ if (item->type == VAL)
+ {
+ datalen += item->length + 1; /* \0 */
+ }
+ else if (item->type == OPR)
+ {
+ if (item->val == '|' || item->val == '&')
+ {
+ if (item->left <= 0 || i + item->left >= size)
+ elog(ERROR, "invalid pointer to left operand");
+ }
+
+ if (i == size - 1)
+ elog(ERROR, "invalid pointer to right operand");
+ }
+ else
+ elog(ERROR, "unknown tsquery node type");
+
+ item++;
+ }
+
+ query = (TSQuery) repalloc(query, len + datalen);
+
+ item = GETQUERY(query);
+ ptr = GETOPERAND(query);
+ for (i = 0; i < size; i++)
+ {
+ if (item->type == VAL)
+ {
+ item->distance = ptr - GETOPERAND(query);
+ memcpy(ptr,
+ pq_getmsgbytes(buf, item->length),
+ item->length);
+ ptr += item->length;
+ *ptr++ = '\0';
+ }
+ item++;
+ }
+
+ Assert(ptr - GETOPERAND(query) == datalen);
+
+ SET_VARSIZE(query, len + datalen);
+
+ PG_RETURN_TSVECTOR(query);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ INFIX nrm;
+ text *res;
+ QueryItem *q;
+ int4 len;
+
+ if (query->size == 0)
+ {
+ res = (text *) palloc(VARHDRSZ);
+ SET_VARSIZE(res, VARHDRSZ);
+ PG_RETURN_POINTER(res);
+ }
+
+ q = clean_NOT(GETQUERY(query), &len);
+
+ if (!q)
+ {
+ res = (text *) palloc(1 + VARHDRSZ);
+ SET_VARSIZE(res, 1 + VARHDRSZ);
+ *((char *) VARDATA(res)) = 'T';
+ }
+ else
+ {
+ nrm.curpol = q;
+ nrm.buflen = 32;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+ *(nrm.cur) = '\0';
+ nrm.op = GETOPERAND(query);
+ infix(&nrm, true);
+
+ res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+ SET_VARSIZE(res, nrm.cur - nrm.buf + VARHDRSZ);
+ strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+ pfree(q);
+ }
+
+ PG_FREE_IF_COPY(query, 0);
+
+ PG_RETURN_POINTER(res);
+}
diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c
new file mode 100644
index 00000000000..7991a4ad198
--- /dev/null
+++ b/src/backend/utils/adt/tsquery_cleanup.c
@@ -0,0 +1,261 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery_cleanup.c
+ * Cleanup query from NOT values and/or stopword
+ * Utility functions to correct work.
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+
+typedef struct NODE
+{
+ struct NODE *left;
+ struct NODE *right;
+ QueryItem *valnode;
+} NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(QueryItem * in)
+{
+ NODE *node = (NODE *) palloc(sizeof(NODE));
+
+ node->valnode = in;
+ node->right = node->left = NULL;
+ if (in->type == OPR)
+ {
+ node->right = maketree(in + 1);
+ if (in->val != (int4) '!')
+ node->left = maketree(in + in->left);
+ }
+ return node;
+}
+
+typedef struct
+{
+ QueryItem *ptr;
+ int4 len;
+ int4 cur;
+} PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+ if (state->cur == state->len)
+ {
+ state->len *= 2;
+ state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem));
+ }
+ memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem));
+ if (node->valnode->type == VAL)
+ state->cur++;
+ else if (node->valnode->val == (int4) '!')
+ {
+ state->ptr[state->cur].left = 1;
+ state->cur++;
+ plainnode(state, node->right);
+ }
+ else
+ {
+ int4 cur = state->cur;
+
+ state->cur++;
+ plainnode(state, node->right);
+ state->ptr[cur].left = state->cur - cur;
+ plainnode(state, node->left);
+ }
+ pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static QueryItem *
+plaintree(NODE * root, int4 *len)
+{
+ PLAINTREE pl;
+
+ pl.cur = 0;
+ pl.len = 16;
+ if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+ {
+ pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
+ plainnode(&pl, root);
+ }
+ else
+ pl.ptr = NULL;
+ *len = pl.cur;
+ return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+ if (!node)
+ return;
+ if (node->left)
+ freetree(node->left);
+ if (node->right)
+ freetree(node->right);
+ pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+ if (node->valnode->type == VAL)
+ return node;
+
+ if (node->valnode->val == (int4) '!')
+ {
+ freetree(node);
+ return NULL;
+ }
+
+ /* operator & or | */
+ if (node->valnode->val == (int4) '|')
+ {
+ if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+ (node->right = clean_NOT_intree(node->right)) == NULL)
+ {
+ freetree(node);
+ return NULL;
+ }
+ }
+ else
+ {
+ NODE *res = node;
+
+ node->left = clean_NOT_intree(node->left);
+ node->right = clean_NOT_intree(node->right);
+ if (node->left == NULL && node->right == NULL)
+ {
+ pfree(node);
+ res = NULL;
+ }
+ else if (node->left == NULL)
+ {
+ res = node->right;
+ pfree(node);
+ }
+ else if (node->right == NULL)
+ {
+ res = node->left;
+ pfree(node);
+ }
+ return res;
+ }
+ return node;
+}
+
+QueryItem *
+clean_NOT(QueryItem * ptr, int4 *len)
+{
+ NODE *root = maketree(ptr);
+
+ return plaintree(clean_NOT_intree(root), len);
+}
+
+
+#ifdef V_UNKNOWN /* exists in Windows headers */
+#undef V_UNKNOWN
+#endif
+
+#define V_UNKNOWN 0
+#define V_TRUE 1
+#define V_FALSE 2
+#define V_STOP 3
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+ char lresult = V_UNKNOWN,
+ rresult = V_UNKNOWN;
+
+ if (node->valnode->type == VAL)
+ return node;
+ else if (node->valnode->type == VALSTOP)
+ {
+ pfree(node);
+ *result = V_STOP;
+ return NULL;
+ }
+
+
+ if (node->valnode->val == (int4) '!')
+ {
+ node->right = clean_fakeval_intree(node->right, &rresult);
+ if (!node->right)
+ {
+ *result = V_STOP;
+ freetree(node);
+ return NULL;
+ }
+ }
+ else
+ {
+ NODE *res = node;
+
+ node->left = clean_fakeval_intree(node->left, &lresult);
+ node->right = clean_fakeval_intree(node->right, &rresult);
+ if (lresult == V_STOP && rresult == V_STOP)
+ {
+ freetree(node);
+ *result = V_STOP;
+ return NULL;
+ }
+ else if (lresult == V_STOP)
+ {
+ res = node->right;
+ pfree(node);
+ }
+ else if (rresult == V_STOP)
+ {
+ res = node->left;
+ pfree(node);
+ }
+ return res;
+ }
+ return node;
+}
+
+QueryItem *
+clean_fakeval(QueryItem * ptr, int4 *len)
+{
+ NODE *root = maketree(ptr);
+ char result = V_UNKNOWN;
+ NODE *resroot;
+
+ resroot = clean_fakeval_intree(root, &result);
+ if (result != V_UNKNOWN)
+ {
+ elog(NOTICE, "query contains only stopword(s) or doesn't contain lexeme(s), ignored");
+ *len = 0;
+ return NULL;
+ }
+
+ return plaintree(resroot, len);
+}
diff --git a/src/backend/utils/adt/tsquery_gist.c b/src/backend/utils/adt/tsquery_gist.c
new file mode 100644
index 00000000000..0deca10075c
--- /dev/null
+++ b/src/backend/utils/adt/tsquery_gist.c
@@ -0,0 +1,259 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery_gist.c
+ * GiST index support for tsquery
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_gist.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/skey.h"
+#include "access/gist.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+
+#define GETENTRY(vec,pos) ((TSQuerySign *) DatumGetPointer((vec)->vector[(pos)].key))
+
+Datum
+gtsquery_compress(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ GISTENTRY *retval = entry;
+
+ if (entry->leafkey)
+ {
+ TSQuerySign *sign = (TSQuerySign *) palloc(sizeof(TSQuerySign));
+
+ retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+ *sign = makeTSQuerySign(DatumGetTSQuery(entry->key));
+
+ gistentryinit(*retval, PointerGetDatum(sign),
+ entry->rel, entry->page,
+ entry->offset, FALSE);
+ }
+
+ PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsquery_decompress(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(PG_GETARG_DATUM(0));
+}
+
+Datum
+gtsquery_consistent(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ TSQuerySign *key = (TSQuerySign *) DatumGetPointer(entry->key);
+ TSQuery query = PG_GETARG_TSQUERY(1);
+ StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
+ TSQuerySign sq = makeTSQuerySign(query);
+ bool retval;
+
+ switch (strategy)
+ {
+ case RTContainsStrategyNumber:
+ if (GIST_LEAF(entry))
+ retval = (*key & sq) == sq;
+ else
+ retval = (*key & sq) != 0;
+ break;
+ case RTContainedByStrategyNumber:
+ if (GIST_LEAF(entry))
+ retval = (*key & sq) == *key;
+ else
+ retval = (*key & sq) != 0;
+ break;
+ default:
+ retval = FALSE;
+ }
+ PG_RETURN_BOOL(retval);
+}
+
+Datum
+gtsquery_union(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ int *size = (int *) PG_GETARG_POINTER(1);
+ TSQuerySign *sign = (TSQuerySign *) palloc(sizeof(TSQuerySign));
+ int i;
+
+ memset(sign, 0, sizeof(TSQuerySign));
+
+ for (i = 0; i < entryvec->n; i++)
+ *sign |= *GETENTRY(entryvec, i);
+
+ *size = sizeof(TSQuerySign);
+
+ PG_RETURN_POINTER(sign);
+}
+
+Datum
+gtsquery_same(PG_FUNCTION_ARGS)
+{
+ TSQuerySign *a = (TSQuerySign *) PG_GETARG_POINTER(0);
+ TSQuerySign *b = (TSQuerySign *) PG_GETARG_POINTER(1);
+
+ PG_RETURN_POINTER(*a == *b);
+}
+
+static int
+sizebitvec(TSQuerySign sign)
+{
+ int size = 0,
+ i;
+
+ for (i = 0; i < TSQS_SIGLEN; i++)
+ size += 0x01 & (sign >> i);
+
+ return size;
+}
+
+static int
+hemdist(TSQuerySign a, TSQuerySign b)
+{
+ TSQuerySign res = a ^ b;
+
+ return sizebitvec(res);
+}
+
+Datum
+gtsquery_penalty(PG_FUNCTION_ARGS)
+{
+ TSQuerySign *origval = (TSQuerySign *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
+ TSQuerySign *newval = (TSQuerySign *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
+ float *penalty = (float *) PG_GETARG_POINTER(2);
+
+ *penalty = hemdist(*origval, *newval);
+
+ PG_RETURN_POINTER(penalty);
+}
+
+
+typedef struct
+{
+ OffsetNumber pos;
+ int4 cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+ if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+ return 0;
+ else
+ return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+
+Datum
+gtsquery_picksplit(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+ OffsetNumber maxoff = entryvec->n - 2;
+ OffsetNumber k,
+ j;
+
+ TSQuerySign *datum_l,
+ *datum_r;
+ int4 size_alpha,
+ size_beta;
+ int4 size_waste,
+ waste = -1;
+ int4 nbytes;
+ OffsetNumber seed_1 = 0,
+ seed_2 = 0;
+ OffsetNumber *left,
+ *right;
+
+ SPLITCOST *costvector;
+
+ nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+ left = v->spl_left = (OffsetNumber *) palloc(nbytes);
+ right = v->spl_right = (OffsetNumber *) palloc(nbytes);
+ v->spl_nleft = v->spl_nright = 0;
+
+ for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
+ for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
+ {
+ size_waste = hemdist(*GETENTRY(entryvec, j), *GETENTRY(entryvec, k));
+ if (size_waste > waste)
+ {
+ waste = size_waste;
+ seed_1 = k;
+ seed_2 = j;
+ }
+ }
+
+
+ if (seed_1 == 0 || seed_2 == 0)
+ {
+ seed_1 = 1;
+ seed_2 = 2;
+ }
+
+ datum_l = (TSQuerySign *) palloc(sizeof(TSQuerySign));
+ *datum_l = *GETENTRY(entryvec, seed_1);
+ datum_r = (TSQuerySign *) palloc(sizeof(TSQuerySign));
+ *datum_r = *GETENTRY(entryvec, seed_2);
+
+
+ maxoff = OffsetNumberNext(maxoff);
+ costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+ for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
+ {
+ costvector[j - 1].pos = j;
+ size_alpha = hemdist(*GETENTRY(entryvec, seed_1), *GETENTRY(entryvec, j));
+ size_beta = hemdist(*GETENTRY(entryvec, seed_2), *GETENTRY(entryvec, j));
+ costvector[j - 1].cost = abs(size_alpha - size_beta);
+ }
+ qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+ for (k = 0; k < maxoff; k++)
+ {
+ j = costvector[k].pos;
+ if (j == seed_1)
+ {
+ *left++ = j;
+ v->spl_nleft++;
+ continue;
+ }
+ else if (j == seed_2)
+ {
+ *right++ = j;
+ v->spl_nright++;
+ continue;
+ }
+ size_alpha = hemdist(*datum_l, *GETENTRY(entryvec, j));
+ size_beta = hemdist(*datum_r, *GETENTRY(entryvec, j));
+
+ if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05))
+ {
+ *datum_l |= *GETENTRY(entryvec, j);
+ *left++ = j;
+ v->spl_nleft++;
+ }
+ else
+ {
+ *datum_r |= *GETENTRY(entryvec, j);
+ *right++ = j;
+ v->spl_nright++;
+ }
+ }
+
+ *right = *left = FirstOffsetNumber;
+ v->spl_ldatum = PointerGetDatum(datum_l);
+ v->spl_rdatum = PointerGetDatum(datum_r);
+
+ PG_RETURN_POINTER(v);
+}
diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c
new file mode 100644
index 00000000000..fd97c2796df
--- /dev/null
+++ b/src/backend/utils/adt/tsquery_op.c
@@ -0,0 +1,289 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery_op.c
+ * Various operations with tsquery
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+#include "utils/pg_crc.h"
+
+Datum
+tsquery_numnode(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ int nnode = query->size;
+
+ PG_FREE_IF_COPY(query, 0);
+ PG_RETURN_INT32(nnode);
+}
+
+static QTNode *
+join_tsqueries(TSQuery a, TSQuery b)
+{
+ QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
+
+ res->flags |= QTN_NEEDFREE;
+
+ res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
+ res->valnode->type = OPR;
+
+ res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
+ res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
+ res->child[1] = QT2QTN(GETQUERY(a), GETOPERAND(a));
+ res->nchild = 2;
+
+ return res;
+}
+
+Datum
+tsquery_and(PG_FUNCTION_ARGS)
+{
+ TSQuery a = PG_GETARG_TSQUERY_COPY(0);
+ TSQuery b = PG_GETARG_TSQUERY_COPY(1);
+ QTNode *res;
+ TSQuery query;
+
+ if (a->size == 0)
+ {
+ PG_FREE_IF_COPY(a, 1);
+ PG_RETURN_POINTER(b);
+ }
+ else if (b->size == 0)
+ {
+ PG_FREE_IF_COPY(b, 1);
+ PG_RETURN_POINTER(a);
+ }
+
+ res = join_tsqueries(a, b);
+
+ res->valnode->val = '&';
+
+ query = QTN2QT(res);
+
+ QTNFree(res);
+ PG_FREE_IF_COPY(a, 0);
+ PG_FREE_IF_COPY(b, 1);
+
+ PG_RETURN_TSQUERY(query);
+}
+
+Datum
+tsquery_or(PG_FUNCTION_ARGS)
+{
+ TSQuery a = PG_GETARG_TSQUERY_COPY(0);
+ TSQuery b = PG_GETARG_TSQUERY_COPY(1);
+ QTNode *res;
+ TSQuery query;
+
+ if (a->size == 0)
+ {
+ PG_FREE_IF_COPY(a, 1);
+ PG_RETURN_POINTER(b);
+ }
+ else if (b->size == 0)
+ {
+ PG_FREE_IF_COPY(b, 1);
+ PG_RETURN_POINTER(a);
+ }
+
+ res = join_tsqueries(a, b);
+
+ res->valnode->val = '|';
+
+ query = QTN2QT(res);
+
+ QTNFree(res);
+ PG_FREE_IF_COPY(a, 0);
+ PG_FREE_IF_COPY(b, 1);
+
+ PG_RETURN_POINTER(query);
+}
+
+Datum
+tsquery_not(PG_FUNCTION_ARGS)
+{
+ TSQuery a = PG_GETARG_TSQUERY_COPY(0);
+ QTNode *res;
+ TSQuery query;
+
+ if (a->size == 0)
+ PG_RETURN_POINTER(a);
+
+ res = (QTNode *) palloc0(sizeof(QTNode));
+
+ res->flags |= QTN_NEEDFREE;
+
+ res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
+ res->valnode->type = OPR;
+ res->valnode->val = '!';
+
+ res->child = (QTNode **) palloc0(sizeof(QTNode *));
+ res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
+ res->nchild = 1;
+
+ query = QTN2QT(res);
+
+ QTNFree(res);
+ PG_FREE_IF_COPY(a, 0);
+
+ PG_RETURN_POINTER(query);
+}
+
+static int
+CompareTSQ(TSQuery a, TSQuery b)
+{
+ if (a->size != b->size)
+ {
+ return (a->size < b->size) ? -1 : 1;
+ }
+ else if (VARSIZE(a) != VARSIZE(b))
+ {
+ return (VARSIZE(a) < VARSIZE(b)) ? -1 : 1;
+ }
+ else
+ {
+ QTNode *an = QT2QTN(GETQUERY(a), GETOPERAND(a));
+ QTNode *bn = QT2QTN(GETQUERY(b), GETOPERAND(b));
+ int res = QTNodeCompare(an, bn);
+
+ QTNFree(an);
+ QTNFree(bn);
+
+ return res;
+ }
+
+ return 0;
+}
+
+Datum
+tsquery_cmp(PG_FUNCTION_ARGS)
+{
+ TSQuery a = PG_GETARG_TSQUERY_COPY(0);
+ TSQuery b = PG_GETARG_TSQUERY_COPY(1);
+ int res = CompareTSQ(a, b);
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_FREE_IF_COPY(b, 1);
+
+ PG_RETURN_INT32(res);
+}
+
+#define CMPFUNC( NAME, CONDITION ) \
+Datum \
+NAME(PG_FUNCTION_ARGS) { \
+ TSQuery a = PG_GETARG_TSQUERY_COPY(0); \
+ TSQuery b = PG_GETARG_TSQUERY_COPY(1); \
+ int res = CompareTSQ(a,b); \
+ \
+ PG_FREE_IF_COPY(a,0); \
+ PG_FREE_IF_COPY(b,1); \
+ \
+ PG_RETURN_BOOL( CONDITION ); \
+}
+
+CMPFUNC(tsquery_lt, res < 0);
+CMPFUNC(tsquery_le, res <= 0);
+CMPFUNC(tsquery_eq, res == 0);
+CMPFUNC(tsquery_ge, res >= 0);
+CMPFUNC(tsquery_gt, res > 0);
+CMPFUNC(tsquery_ne, res != 0);
+
+TSQuerySign
+makeTSQuerySign(TSQuery a)
+{
+ int i;
+ QueryItem *ptr = GETQUERY(a);
+ TSQuerySign sign = 0;
+
+ for (i = 0; i < a->size; i++)
+ {
+ if (ptr->type == VAL)
+ sign |= ((TSQuerySign) 1) << (ptr->val % TSQS_SIGLEN);
+ ptr++;
+ }
+
+ return sign;
+}
+
+Datum
+tsq_mcontains(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ TSQuery ex = PG_GETARG_TSQUERY(1);
+ TSQuerySign sq,
+ se;
+ int i,
+ j;
+ QueryItem *iq,
+ *ie;
+
+ if (query->size < ex->size)
+ {
+ PG_FREE_IF_COPY(query, 0);
+ PG_FREE_IF_COPY(ex, 1);
+
+ PG_RETURN_BOOL(false);
+ }
+
+ sq = makeTSQuerySign(query);
+ se = makeTSQuerySign(ex);
+
+ if ((sq & se) != se)
+ {
+ PG_FREE_IF_COPY(query, 0);
+ PG_FREE_IF_COPY(ex, 1);
+
+ PG_RETURN_BOOL(false);
+ }
+
+ ie = GETQUERY(ex);
+
+ for (i = 0; i < ex->size; i++)
+ {
+ iq = GETQUERY(query);
+ if (ie[i].type != VAL)
+ continue;
+ for (j = 0; j < query->size; j++)
+ if (iq[j].type == VAL && ie[i].val == iq[j].val)
+ {
+ j = query->size + 1;
+ break;
+ }
+ if (j == query->size)
+ {
+ PG_FREE_IF_COPY(query, 0);
+ PG_FREE_IF_COPY(ex, 1);
+
+ PG_RETURN_BOOL(false);
+ }
+ }
+
+ PG_FREE_IF_COPY(query, 0);
+ PG_FREE_IF_COPY(ex, 1);
+
+ PG_RETURN_BOOL(true);
+}
+
+Datum
+tsq_mcontained(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(
+ DirectFunctionCall2(
+ tsq_mcontains,
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(0)
+ )
+ );
+}
diff --git a/src/backend/utils/adt/tsquery_rewrite.c b/src/backend/utils/adt/tsquery_rewrite.c
new file mode 100644
index 00000000000..f0d22c644ae
--- /dev/null
+++ b/src/backend/utils/adt/tsquery_rewrite.c
@@ -0,0 +1,524 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery_rewrite.c
+ * Utilities for reconstructing tsquery
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "executor/spi.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+
+
+static int
+addone(int *counters, int last, int total)
+{
+ counters[last]++;
+ if (counters[last] >= total)
+ {
+ if (last == 0)
+ return 0;
+ if (addone(counters, last - 1, total - 1) == 0)
+ return 0;
+ counters[last] = counters[last - 1] + 1;
+ }
+ return 1;
+}
+
+static QTNode *
+findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
+{
+
+ if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val)
+ return node;
+
+ if (node->flags & QTN_NOCHANGE)
+ return node;
+
+ if (node->valnode->type == OPR)
+ {
+ if (node->nchild == ex->nchild)
+ {
+ if (QTNEq(node, ex))
+ {
+ QTNFree(node);
+ if (subs)
+ {
+ node = QTNCopy(subs);
+ node->flags |= QTN_NOCHANGE;
+ }
+ else
+ node = NULL;
+ *isfind = true;
+ }
+ }
+ else if (node->nchild > ex->nchild)
+ {
+ int *counters = (int *) palloc(sizeof(int) * node->nchild);
+ int i;
+ QTNode *tnode = (QTNode *) palloc(sizeof(QTNode));
+
+ memset(tnode, 0, sizeof(QTNode));
+ tnode->child = (QTNode **) palloc(sizeof(QTNode *) * ex->nchild);
+ tnode->nchild = ex->nchild;
+ tnode->valnode = (QueryItem *) palloc(sizeof(QueryItem));
+ *(tnode->valnode) = *(ex->valnode);
+
+ for (i = 0; i < ex->nchild; i++)
+ counters[i] = i;
+
+ do
+ {
+ tnode->sign = 0;
+ for (i = 0; i < ex->nchild; i++)
+ {
+ tnode->child[i] = node->child[counters[i]];
+ tnode->sign |= tnode->child[i]->sign;
+ }
+
+ if (QTNEq(tnode, ex))
+ {
+ int j = 0;
+
+ pfree(tnode->valnode);
+ pfree(tnode->child);
+ pfree(tnode);
+ if (subs)
+ {
+ tnode = QTNCopy(subs);
+ tnode->flags = QTN_NOCHANGE | QTN_NEEDFREE;
+ }
+ else
+ tnode = NULL;
+
+ node->child[counters[0]] = tnode;
+
+ for (i = 1; i < ex->nchild; i++)
+ node->child[counters[i]] = NULL;
+ for (i = 0; i < node->nchild; i++)
+ {
+ if (node->child[i])
+ {
+ node->child[j] = node->child[i];
+ j++;
+ }
+ }
+
+ node->nchild = j;
+
+ *isfind = true;
+
+ break;
+ }
+ } while (addone(counters, ex->nchild - 1, node->nchild));
+ if (tnode && (tnode->flags & QTN_NOCHANGE) == 0)
+ {
+ pfree(tnode->valnode);
+ pfree(tnode->child);
+ pfree(tnode);
+ }
+ else
+ QTNSort(node);
+ pfree(counters);
+ }
+ }
+ else if (QTNEq(node, ex))
+ {
+ QTNFree(node);
+ if (subs)
+ {
+ node = QTNCopy(subs);
+ node->flags |= QTN_NOCHANGE;
+ }
+ else
+ {
+ node = NULL;
+ }
+ *isfind = true;
+ }
+
+ return node;
+}
+
+static QTNode *
+dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
+{
+ root = findeq(root, ex, subs, isfind);
+
+ if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR)
+ {
+ int i;
+
+ for (i = 0; i < root->nchild; i++)
+ root->child[i] = dofindsubquery(root->child[i], ex, subs, isfind);
+ }
+
+ return root;
+}
+
+static QTNode *
+dropvoidsubtree(QTNode * root)
+{
+
+ if (!root)
+ return NULL;
+
+ if (root->valnode->type == OPR)
+ {
+ int i,
+ j = 0;
+
+ for (i = 0; i < root->nchild; i++)
+ {
+ if (root->child[i])
+ {
+ root->child[j] = root->child[i];
+ j++;
+ }
+ }
+
+ root->nchild = j;
+
+ if (root->valnode->val == (int4) '!' && root->nchild == 0)
+ {
+ QTNFree(root);
+ root = NULL;
+ }
+ else if (root->nchild == 1)
+ {
+ QTNode *nroot = root->child[0];
+
+ pfree(root);
+ root = nroot;
+ }
+ }
+
+ return root;
+}
+
+static QTNode *
+findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
+{
+ bool DidFind = false;
+
+ root = dofindsubquery(root, ex, subs, &DidFind);
+
+ if (!subs && DidFind)
+ root = dropvoidsubtree(root);
+
+ if (isfind)
+ *isfind = DidFind;
+
+ return root;
+}
+
+Datum
+ts_rewrite_accum(PG_FUNCTION_ARGS)
+{
+ TSQuery acc;
+ ArrayType *qa;
+ TSQuery q;
+ QTNode *qex = NULL,
+ *subs = NULL,
+ *acctree = NULL;
+ bool isfind = false;
+ Datum *elemsp;
+ int nelemsp;
+ MemoryContext aggcontext;
+ MemoryContext oldcontext;
+
+ aggcontext = ((AggState *) fcinfo->context)->aggcontext;
+
+ if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
+ {
+ acc = (TSQuery) MemoryContextAlloc(aggcontext, HDRSIZETQ);
+ SET_VARSIZE(acc, HDRSIZETQ);
+ acc->size = 0;
+ }
+ else
+ acc = PG_GETARG_TSQUERY(0);
+
+ if (PG_ARGISNULL(1) || PG_GETARG_POINTER(1) == NULL)
+ PG_RETURN_TSQUERY(acc);
+ else
+ qa = PG_GETARG_ARRAYTYPE_P_COPY(1);
+
+ if (ARR_NDIM(qa) != 1)
+ elog(ERROR, "array must be one-dimensional, not %d dimensions",
+ ARR_NDIM(qa));
+ if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3)
+ elog(ERROR, "array should have only three elements");
+ if (ARR_ELEMTYPE(qa) != TSQUERYOID)
+ elog(ERROR, "array should contain tsquery type");
+
+ deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp);
+
+ q = DatumGetTSQuery(elemsp[0]);
+ if (q->size == 0)
+ {
+ pfree(elemsp);
+ PG_RETURN_POINTER(acc);
+ }
+
+ if (!acc->size)
+ {
+ if (VARSIZE(acc) > HDRSIZETQ)
+ {
+ pfree(elemsp);
+ PG_RETURN_POINTER(acc);
+ }
+ else
+ acctree = QT2QTN(GETQUERY(q), GETOPERAND(q));
+ }
+ else
+ acctree = QT2QTN(GETQUERY(acc), GETOPERAND(acc));
+
+ QTNTernary(acctree);
+ QTNSort(acctree);
+
+ q = DatumGetTSQuery(elemsp[1]);
+ if (q->size == 0)
+ {
+ pfree(elemsp);
+ PG_RETURN_POINTER(acc);
+ }
+ qex = QT2QTN(GETQUERY(q), GETOPERAND(q));
+ QTNTernary(qex);
+ QTNSort(qex);
+
+ q = DatumGetTSQuery(elemsp[2]);
+ if (q->size)
+ subs = QT2QTN(GETQUERY(q), GETOPERAND(q));
+
+ acctree = findsubquery(acctree, qex, subs, &isfind);
+
+ if (isfind || !acc->size)
+ {
+ /* pfree( acc ); do not pfree(p), because nodeAgg.c will */
+ if (acctree)
+ {
+ QTNBinary(acctree);
+ oldcontext = MemoryContextSwitchTo(aggcontext);
+ acc = QTN2QT(acctree);
+ MemoryContextSwitchTo(oldcontext);
+ }
+ else
+ {
+ acc = (TSQuery) MemoryContextAlloc(aggcontext, HDRSIZETQ);
+ SET_VARSIZE(acc, HDRSIZETQ);
+ acc->size = 0;
+ }
+ }
+
+ pfree(elemsp);
+ QTNFree(qex);
+ QTNFree(subs);
+ QTNFree(acctree);
+
+ PG_RETURN_TSQUERY(acc);
+}
+
+Datum
+ts_rewrite_finish(PG_FUNCTION_ARGS)
+{
+ TSQuery acc = PG_GETARG_TSQUERY(0);
+ TSQuery rewrited;
+
+ if (acc == NULL || PG_ARGISNULL(0) || acc->size == 0)
+ {
+ rewrited = (TSQuery) palloc(HDRSIZETQ);
+ SET_VARSIZE(rewrited, HDRSIZETQ);
+ rewrited->size = 0;
+ }
+ else
+ {
+ rewrited = (TSQuery) palloc(VARSIZE(acc));
+ memcpy(rewrited, acc, VARSIZE(acc));
+ pfree(acc);
+ }
+
+ PG_RETURN_POINTER(rewrited);
+}
+
+Datum
+tsquery_rewrite(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY_COPY(0);
+ text *in = PG_GETARG_TEXT_P(1);
+ TSQuery rewrited = query;
+ MemoryContext outercontext = CurrentMemoryContext;
+ MemoryContext oldcontext;
+ QTNode *tree;
+ char *buf;
+ void *plan;
+ Portal portal;
+ bool isnull;
+ int i;
+
+ if (query->size == 0)
+ {
+ PG_FREE_IF_COPY(in, 1);
+ PG_RETURN_POINTER(rewrited);
+ }
+
+ tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
+ QTNTernary(tree);
+ QTNSort(tree);
+
+ buf = TextPGetCString(in);
+
+ SPI_connect();
+
+ if ((plan = SPI_prepare(buf, 0, NULL)) == NULL)
+ elog(ERROR, "SPI_prepare(\"%s\") failed", buf);
+
+ if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL)
+ elog(ERROR, "SPI_cursor_open(\"%s\") failed", buf);
+
+ SPI_cursor_fetch(portal, true, 100);
+
+ if (SPI_tuptable->tupdesc->natts != 2 ||
+ SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID ||
+ SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("ts_rewrite query must return two tsquery columns")));
+
+ while (SPI_processed > 0 && tree)
+ {
+ for (i = 0; i < SPI_processed && tree; i++)
+ {
+ Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+ Datum sdata;
+
+ if (isnull)
+ continue;
+
+ sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull);
+
+ if (!isnull)
+ {
+ TSQuery qtex = DatumGetTSQuery(qdata);
+ TSQuery qtsubs = DatumGetTSQuery(sdata);
+ QTNode *qex,
+ *qsubs = NULL;
+
+ if (qtex->size == 0)
+ {
+ if (qtex != (TSQuery) DatumGetPointer(qdata))
+ pfree(qtex);
+ if (qtsubs != (TSQuery) DatumGetPointer(sdata))
+ pfree(qtsubs);
+ continue;
+ }
+
+ qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex));
+
+ QTNTernary(qex);
+ QTNSort(qex);
+
+ if (qtsubs->size)
+ qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs));
+
+ oldcontext = MemoryContextSwitchTo(outercontext);
+ tree = findsubquery(tree, qex, qsubs, NULL);
+ MemoryContextSwitchTo(oldcontext);
+
+ QTNFree(qex);
+ if (qtex != (TSQuery) DatumGetPointer(qdata))
+ pfree(qtex);
+ QTNFree(qsubs);
+ if (qtsubs != (TSQuery) DatumGetPointer(sdata))
+ pfree(qtsubs);
+ }
+ }
+
+ SPI_freetuptable(SPI_tuptable);
+ SPI_cursor_fetch(portal, true, 100);
+ }
+
+ SPI_freetuptable(SPI_tuptable);
+ SPI_cursor_close(portal);
+ SPI_freeplan(plan);
+ SPI_finish();
+
+ if (tree)
+ {
+ QTNBinary(tree);
+ rewrited = QTN2QT(tree);
+ QTNFree(tree);
+ PG_FREE_IF_COPY(query, 0);
+ }
+ else
+ {
+ SET_VARSIZE(rewrited, HDRSIZETQ);
+ rewrited->size = 0;
+ }
+
+ pfree(buf);
+ PG_FREE_IF_COPY(in, 1);
+ PG_RETURN_POINTER(rewrited);
+}
+
+Datum
+tsquery_rewrite_query(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY_COPY(0);
+ TSQuery ex = PG_GETARG_TSQUERY(1);
+ TSQuery subst = PG_GETARG_TSQUERY(2);
+ TSQuery rewrited = query;
+ QTNode *tree,
+ *qex,
+ *subs = NULL;
+
+ if (query->size == 0 || ex->size == 0)
+ {
+ PG_FREE_IF_COPY(ex, 1);
+ PG_FREE_IF_COPY(subst, 2);
+ PG_RETURN_POINTER(rewrited);
+ }
+
+ tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
+ QTNTernary(tree);
+ QTNSort(tree);
+
+ qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex));
+ QTNTernary(qex);
+ QTNSort(qex);
+
+ if (subst->size)
+ subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
+
+ tree = findsubquery(tree, qex, subs, NULL);
+ QTNFree(qex);
+ QTNFree(subs);
+
+ if (!tree)
+ {
+ SET_VARSIZE(rewrited, HDRSIZETQ);
+ rewrited->size = 0;
+ PG_FREE_IF_COPY(ex, 1);
+ PG_FREE_IF_COPY(subst, 2);
+ PG_RETURN_POINTER(rewrited);
+ }
+ else
+ {
+ QTNBinary(tree);
+ rewrited = QTN2QT(tree);
+ QTNFree(tree);
+ }
+
+ PG_FREE_IF_COPY(query, 0);
+ PG_FREE_IF_COPY(ex, 1);
+ PG_FREE_IF_COPY(subst, 2);
+ PG_RETURN_POINTER(rewrited);
+}
diff --git a/src/backend/utils/adt/tsquery_util.c b/src/backend/utils/adt/tsquery_util.c
new file mode 100644
index 00000000000..ae8cc318da9
--- /dev/null
+++ b/src/backend/utils/adt/tsquery_util.c
@@ -0,0 +1,317 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery_util.c
+ * Utilities for tsquery datatype
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+
+
+QTNode *
+QT2QTN(QueryItem * in, char *operand)
+{
+ QTNode *node = (QTNode *) palloc0(sizeof(QTNode));
+
+ node->valnode = in;
+
+ if (in->type == OPR)
+ {
+ node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
+ node->child[0] = QT2QTN(in + 1, operand);
+ node->sign = node->child[0]->sign;
+ if (in->val == (int4) '!')
+ node->nchild = 1;
+ else
+ {
+ node->nchild = 2;
+ node->child[1] = QT2QTN(in + in->left, operand);
+ node->sign |= node->child[1]->sign;
+ }
+ }
+ else if (operand)
+ {
+ node->word = operand + in->distance;
+ node->sign = 1 << (in->val % 32);
+ }
+
+ return node;
+}
+
+void
+QTNFree(QTNode * in)
+{
+ if (!in)
+ return;
+
+ if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
+ pfree(in->word);
+
+ if (in->child)
+ {
+ if (in->valnode)
+ {
+ if (in->valnode->type == OPR && in->nchild > 0)
+ {
+ int i;
+
+ for (i = 0; i < in->nchild; i++)
+ QTNFree(in->child[i]);
+ }
+ if (in->flags & QTN_NEEDFREE)
+ pfree(in->valnode);
+ }
+ pfree(in->child);
+ }
+
+ pfree(in);
+}
+
+int
+QTNodeCompare(QTNode * an, QTNode * bn)
+{
+ if (an->valnode->type != bn->valnode->type)
+ return (an->valnode->type > bn->valnode->type) ? -1 : 1;
+ else if (an->valnode->val != bn->valnode->val)
+ return (an->valnode->val > bn->valnode->val) ? -1 : 1;
+ else if (an->valnode->type == VAL)
+ {
+ if (an->valnode->length == bn->valnode->length)
+ return strncmp(an->word, bn->word, an->valnode->length);
+ else
+ return (an->valnode->length > bn->valnode->length) ? -1 : 1;
+ }
+ else if (an->nchild != bn->nchild)
+ {
+ return (an->nchild > bn->nchild) ? -1 : 1;
+ }
+ else
+ {
+ int i,
+ res;
+
+ for (i = 0; i < an->nchild; i++)
+ if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
+ return res;
+ }
+
+ return 0;
+}
+
+static int
+cmpQTN(const void *a, const void *b)
+{
+ return QTNodeCompare(*(QTNode **) a, *(QTNode **) b);
+}
+
+void
+QTNSort(QTNode * in)
+{
+ int i;
+
+ if (in->valnode->type != OPR)
+ return;
+
+ for (i = 0; i < in->nchild; i++)
+ QTNSort(in->child[i]);
+ if (in->nchild > 1)
+ qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN);
+}
+
+bool
+QTNEq(QTNode * a, QTNode * b)
+{
+ uint32 sign = a->sign & b->sign;
+
+ if (!(sign == a->sign && sign == b->sign))
+ return 0;
+
+ return (QTNodeCompare(a, b) == 0) ? true : false;
+}
+
+void
+QTNTernary(QTNode * in)
+{
+ int i;
+
+ if (in->valnode->type != OPR)
+ return;
+
+ for (i = 0; i < in->nchild; i++)
+ QTNTernary(in->child[i]);
+
+ for (i = 0; i < in->nchild; i++)
+ {
+ if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val)
+ {
+ QTNode *cc = in->child[i];
+ int oldnchild = in->nchild;
+
+ in->nchild += cc->nchild - 1;
+ in->child = (QTNode **) repalloc(in->child, in->nchild * sizeof(QTNode *));
+
+ if (i + 1 != oldnchild)
+ memmove(in->child + i + cc->nchild, in->child + i + 1,
+ (oldnchild - i - 1) * sizeof(QTNode *));
+
+ memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
+ i += cc->nchild - 1;
+
+ pfree(cc);
+ }
+ }
+}
+
+void
+QTNBinary(QTNode * in)
+{
+ int i;
+
+ if (in->valnode->type != OPR)
+ return;
+
+ for (i = 0; i < in->nchild; i++)
+ QTNBinary(in->child[i]);
+
+ if (in->nchild <= 2)
+ return;
+
+ while (in->nchild > 2)
+ {
+ QTNode *nn = (QTNode *) palloc0(sizeof(QTNode));
+
+ nn->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
+ nn->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
+
+ nn->nchild = 2;
+ nn->flags = QTN_NEEDFREE;
+
+ nn->child[0] = in->child[0];
+ nn->child[1] = in->child[1];
+ nn->sign = nn->child[0]->sign | nn->child[1]->sign;
+
+ nn->valnode->type = in->valnode->type;
+ nn->valnode->val = in->valnode->val;
+
+ in->child[0] = nn;
+ in->child[1] = in->child[in->nchild - 1];
+ in->nchild--;
+ }
+}
+
+static void
+cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
+{
+ *nnode += 1;
+ if (in->valnode->type == OPR)
+ {
+ int i;
+
+ for (i = 0; i < in->nchild; i++)
+ cntsize(in->child[i], sumlen, nnode);
+ }
+ else
+ {
+ *sumlen += in->valnode->length + 1;
+ }
+}
+
+typedef struct
+{
+ QueryItem *curitem;
+ char *operand;
+ char *curoperand;
+} QTN2QTState;
+
+static void
+fillQT(QTN2QTState * state, QTNode * in)
+{
+ *(state->curitem) = *(in->valnode);
+
+ if (in->valnode->type == VAL)
+ {
+ memcpy(state->curoperand, in->word, in->valnode->length);
+ state->curitem->distance = state->curoperand - state->operand;
+ state->curoperand[in->valnode->length] = '\0';
+ state->curoperand += in->valnode->length + 1;
+ state->curitem++;
+ }
+ else
+ {
+ QueryItem *curitem = state->curitem;
+
+ Assert(in->nchild <= 2);
+ state->curitem++;
+
+ fillQT(state, in->child[0]);
+
+ if (in->nchild == 2)
+ {
+ curitem->left = state->curitem - curitem;
+ fillQT(state, in->child[1]);
+ }
+ }
+}
+
+TSQuery
+QTN2QT(QTNode *in)
+{
+ TSQuery out;
+ int len;
+ int sumlen = 0,
+ nnode = 0;
+ QTN2QTState state;
+
+ cntsize(in, &sumlen, &nnode);
+ len = COMPUTESIZE(nnode, sumlen);
+
+ out = (TSQuery) palloc(len);
+ SET_VARSIZE(out, len);
+ out->size = nnode;
+
+ state.curitem = GETQUERY(out);
+ state.operand = state.curoperand = GETOPERAND(out);
+
+ fillQT(&state, in);
+ return out;
+}
+
+QTNode *
+QTNCopy(QTNode *in)
+{
+ QTNode *out = (QTNode *) palloc(sizeof(QTNode));
+
+ *out = *in;
+ out->valnode = (QueryItem *) palloc(sizeof(QueryItem));
+ *(out->valnode) = *(in->valnode);
+ out->flags |= QTN_NEEDFREE;
+
+ if (in->valnode->type == VAL)
+ {
+ out->word = palloc(in->valnode->length + 1);
+ memcpy(out->word, in->word, in->valnode->length);
+ out->word[in->valnode->length] = '\0';
+ out->flags |= QTN_WORDFREE;
+ }
+ else
+ {
+ int i;
+
+ out->child = (QTNode **) palloc(sizeof(QTNode *) * in->nchild);
+
+ for (i = 0; i < in->nchild; i++)
+ out->child[i] = QTNCopy(in->child[i]);
+ }
+
+ return out;
+}
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
new file mode 100644
index 00000000000..8b2ab884c8c
--- /dev/null
+++ b/src/backend/utils/adt/tsrank.c
@@ -0,0 +1,804 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsrank.c
+ * rank tsvector by tsquery
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+#include "utils/array.h"
+
+
+static float weights[] = {0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
+
+#define RANK_NO_NORM 0x00
+#define RANK_NORM_LOGLENGTH 0x01
+#define RANK_NORM_LENGTH 0x02
+#define RANK_NORM_EXTDIST 0x04
+#define RANK_NORM_UNIQ 0x08
+#define RANK_NORM_LOGUNIQ 0x10
+#define DEF_NORM_METHOD RANK_NO_NORM
+
+static float calc_rank_or(float *w, TSVector t, TSQuery q);
+static float calc_rank_and(float *w, TSVector t, TSQuery q);
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4
+word_distance(int4 w)
+{
+ if (w > 100)
+ return 1e-30;
+
+ return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
+}
+
+static int
+cnt_length(TSVector t)
+{
+ WordEntry *ptr = ARRPTR(t),
+ *end = (WordEntry *) STRPTR(t);
+ int len = 0,
+ clen;
+
+ while (ptr < end)
+ {
+ if ((clen = POSDATALEN(t, ptr)) == 0)
+ len += 1;
+ else
+ len += clen;
+ ptr++;
+ }
+
+ return len;
+}
+
+static int4
+WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
+{
+ if (ptr->len == item->length)
+ return strncmp(
+ eval + ptr->pos,
+ qval + item->distance,
+ item->length);
+
+ return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry *
+find_wordentry(TSVector t, TSQuery q, QueryItem * item)
+{
+ WordEntry *StopLow = ARRPTR(t);
+ WordEntry *StopHigh = (WordEntry *) STRPTR(t);
+ WordEntry *StopMiddle;
+ int difference;
+
+ /* Loop invariant: StopLow <= item < StopHigh */
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+ difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+ if (difference == 0)
+ return StopMiddle;
+ else if (difference < 0)
+ StopLow = StopMiddle + 1;
+ else
+ StopHigh = StopMiddle;
+ }
+
+ return NULL;
+}
+
+
+static int
+compareQueryItem(const void *a, const void *b, void *arg)
+{
+ char *operand = (char *) arg;
+
+ if ((*(QueryItem **) a)->length == (*(QueryItem **) b)->length)
+ return strncmp(operand + (*(QueryItem **) a)->distance,
+ operand + (*(QueryItem **) b)->distance,
+ (*(QueryItem **) b)->length);
+
+ return ((*(QueryItem **) a)->length > (*(QueryItem **) b)->length) ? 1 : -1;
+}
+
+static QueryItem **
+SortAndUniqItems(char *operand, QueryItem * item, int *size)
+{
+ QueryItem **res,
+ **ptr,
+ **prevptr;
+
+ ptr = res = (QueryItem **) palloc(sizeof(QueryItem *) * *size);
+
+ while ((*size)--)
+ {
+ if (item->type == VAL)
+ {
+ *ptr = item;
+ ptr++;
+ }
+ item++;
+ }
+
+ *size = ptr - res;
+ if (*size < 2)
+ return res;
+
+ qsort_arg(res, *size, sizeof(QueryItem **), compareQueryItem, (void *) operand);
+
+ ptr = res + 1;
+ prevptr = res;
+
+ while (ptr - res < *size)
+ {
+ if (compareQueryItem((void *) ptr, (void *) prevptr, (void *) operand) != 0)
+ {
+ prevptr++;
+ *prevptr = *ptr;
+ }
+ ptr++;
+ }
+
+ *size = prevptr + 1 - res;
+ return res;
+}
+
+static WordEntryPos POSNULL[] = {
+ 0,
+ 0
+};
+
+static float
+calc_rank_and(float *w, TSVector t, TSQuery q)
+{
+ uint16 **pos;
+ int i,
+ k,
+ l,
+ p;
+ WordEntry *entry;
+ WordEntryPos *post,
+ *ct;
+ int4 dimt,
+ lenct,
+ dist;
+ float res = -1.0;
+ QueryItem **item;
+ int size = q->size;
+
+ item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
+ if (size < 2)
+ {
+ pfree(item);
+ return calc_rank_or(w, t, q);
+ }
+ pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
+ memset(pos, 0, sizeof(uint16 *) * q->size);
+ *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
+ WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
+
+ for (i = 0; i < size; i++)
+ {
+ entry = find_wordentry(t, q, item[i]);
+ if (!entry)
+ continue;
+
+ if (entry->haspos)
+ pos[i] = (uint16 *) _POSDATAPTR(t, entry);
+ else
+ pos[i] = (uint16 *) POSNULL;
+
+
+ dimt = *(uint16 *) (pos[i]);
+ post = (WordEntryPos *) (pos[i] + 1);
+ for (k = 0; k < i; k++)
+ {
+ if (!pos[k])
+ continue;
+ lenct = *(uint16 *) (pos[k]);
+ ct = (WordEntryPos *) (pos[k] + 1);
+ for (l = 0; l < dimt; l++)
+ {
+ for (p = 0; p < lenct; p++)
+ {
+ dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
+ if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
+ {
+ float curw;
+
+ if (!dist)
+ dist = MAXENTRYPOS;
+ curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
+ res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
+ }
+ }
+ }
+ }
+ }
+ pfree(pos);
+ pfree(item);
+ return res;
+}
+
+static float
+calc_rank_or(float *w, TSVector t, TSQuery q)
+{
+ WordEntry *entry;
+ WordEntryPos *post;
+ int4 dimt,
+ j,
+ i;
+ float res = 0.0;
+ QueryItem **item;
+ int size = q->size;
+
+ *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
+ item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
+
+ for (i = 0; i < size; i++)
+ {
+ float resj,
+ wjm;
+ int4 jm;
+
+ entry = find_wordentry(t, q, item[i]);
+ if (!entry)
+ continue;
+
+ if (entry->haspos)
+ {
+ dimt = POSDATALEN(t, entry);
+ post = POSDATAPTR(t, entry);
+ }
+ else
+ {
+ dimt = *(uint16 *) POSNULL;
+ post = POSNULL + 1;
+ }
+
+ resj = 0.0;
+ wjm = -1.0;
+ jm = 0;
+ for (j = 0; j < dimt; j++)
+ {
+ resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
+ if (wpos(post[j]) > wjm)
+ {
+ wjm = wpos(post[j]);
+ jm = j;
+ }
+ }
+/*
+ limit (sum(i/i^2),i->inf) = pi^2/6
+ resj = sum(wi/i^2),i=1,noccurence,
+ wi - should be sorted desc,
+ don't sort for now, just choose maximum weight. This should be corrected
+ Oleg Bartunov
+*/
+ res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
+ }
+ if (size > 0)
+ res = res / size;
+ pfree(item);
+ return res;
+}
+
+static float
+calc_rank(float *w, TSVector t, TSQuery q, int4 method)
+{
+ QueryItem *item = GETQUERY(q);
+ float res = 0.0;
+ int len;
+
+ if (!t->size || !q->size)
+ return 0.0;
+
+ res = (item->type != VAL && item->val == (int4) '&') ?
+ calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
+
+ if (res < 0)
+ res = 1e-20;
+
+ if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
+ res /= log((double) (cnt_length(t) + 1)) / log(2.0);
+
+ if (method & RANK_NORM_LENGTH)
+ {
+ len = cnt_length(t);
+ if (len > 0)
+ res /= (float) len;
+ }
+
+ if ((method & RANK_NORM_UNIQ) && t->size > 0)
+ res /= (float) (t->size);
+
+ if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
+ res /= log((double) (t->size + 1)) / log(2.0);
+
+ return res;
+}
+
+static float *
+getWeights(ArrayType *win)
+{
+ static float ws[lengthof(weights)];
+ int i;
+ float4 *arrdata;
+
+ if (win == 0)
+ return weights;
+
+ if (ARR_NDIM(win) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array of weight must be one-dimensional")));
+
+ if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights))
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array of weight is too short")));
+
+ if (ARR_HASNULL(win))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("array of weight must not contain nulls")));
+
+ arrdata = (float4 *) ARR_DATA_PTR(win);
+ for (i = 0; i < lengthof(weights); i++)
+ {
+ ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
+ if (ws[i] > 1.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("weight out of range")));
+ }
+
+ return ws;
+}
+
+Datum
+ts_rank_wttf(PG_FUNCTION_ARGS)
+{
+ ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ TSVector txt = PG_GETARG_TSVECTOR(1);
+ TSQuery query = PG_GETARG_TSQUERY(2);
+ int method = PG_GETARG_INT32(3);
+ float res;
+
+ res = calc_rank(getWeights(win), txt, query, method);
+
+ PG_FREE_IF_COPY(win, 0);
+ PG_FREE_IF_COPY(txt, 1);
+ PG_FREE_IF_COPY(query, 2);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+ts_rank_wtt(PG_FUNCTION_ARGS)
+{
+ ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ TSVector txt = PG_GETARG_TSVECTOR(1);
+ TSQuery query = PG_GETARG_TSQUERY(2);
+ float res;
+
+ res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD);
+
+ PG_FREE_IF_COPY(win, 0);
+ PG_FREE_IF_COPY(txt, 1);
+ PG_FREE_IF_COPY(query, 2);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+ts_rank_ttf(PG_FUNCTION_ARGS)
+{
+ TSVector txt = PG_GETARG_TSVECTOR(0);
+ TSQuery query = PG_GETARG_TSQUERY(1);
+ int method = PG_GETARG_INT32(2);
+ float res;
+
+ res = calc_rank(getWeights(NULL), txt, query, method);
+
+ PG_FREE_IF_COPY(txt, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+ts_rank_tt(PG_FUNCTION_ARGS)
+{
+ TSVector txt = PG_GETARG_TSVECTOR(0);
+ TSQuery query = PG_GETARG_TSQUERY(1);
+ float res;
+
+ res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);
+
+ PG_FREE_IF_COPY(txt, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_FLOAT4(res);
+}
+
+typedef struct
+{
+ QueryItem **item;
+ int16 nitem;
+ bool needfree;
+ uint8 wclass;
+ int32 pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b)
+{
+ if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
+ return 0;
+ return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
+}
+
+static bool
+checkcondition_QueryItem(void *checkval, QueryItem * val)
+{
+ return (bool) (val->istrue);
+}
+
+static void
+reset_istrue_flag(TSQuery query)
+{
+ QueryItem *item = GETQUERY(query);
+ int i;
+
+ /* reset istrue flag */
+ for (i = 0; i < query->size; i++)
+ {
+ if (item->type == VAL)
+ item->istrue = 0;
+ item++;
+ }
+}
+
+typedef struct
+{
+ int pos;
+ int p;
+ int q;
+ DocRepresentation *begin;
+ DocRepresentation *end;
+} Extention;
+
+
+static bool
+Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
+{
+ DocRepresentation *ptr;
+ int lastpos = ext->pos;
+ int i;
+ bool found = false;
+
+ reset_istrue_flag(query);
+
+ ext->p = 0x7fffffff;
+ ext->q = 0;
+ ptr = doc + ext->pos;
+
+ /* find upper bound of cover from current position, move up */
+ while (ptr - doc < len)
+ {
+ for (i = 0; i < ptr->nitem; i++)
+ ptr->item[i]->istrue = 1;
+ if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryItem))
+ {
+ if (ptr->pos > ext->q)
+ {
+ ext->q = ptr->pos;
+ ext->end = ptr;
+ lastpos = ptr - doc;
+ found = true;
+ }
+ break;
+ }
+ ptr++;
+ }
+
+ if (!found)
+ return false;
+
+ reset_istrue_flag(query);
+
+ ptr = doc + lastpos;
+
+ /* find lower bound of cover from founded upper bound, move down */
+ while (ptr >= doc + ext->pos)
+ {
+ for (i = 0; i < ptr->nitem; i++)
+ ptr->item[i]->istrue = 1;
+ if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryItem))
+ {
+ if (ptr->pos < ext->p)
+ {
+ ext->begin = ptr;
+ ext->p = ptr->pos;
+ }
+ break;
+ }
+ ptr--;
+ }
+
+ if (ext->p <= ext->q)
+ {
+ /*
+ * set position for next try to next lexeme after begining of founded
+ * cover
+ */
+ ext->pos = (ptr - doc) + 1;
+ return true;
+ }
+
+ ext->pos++;
+ return Cover(doc, len, query, ext);
+}
+
+static DocRepresentation *
+get_docrep(TSVector txt, TSQuery query, int *doclen)
+{
+ QueryItem *item = GETQUERY(query);
+ WordEntry *entry;
+ WordEntryPos *post;
+ int4 dimt,
+ j,
+ i;
+ int len = query->size * 4,
+ cur = 0;
+ DocRepresentation *doc;
+ char *operand;
+
+ *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
+ doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
+ operand = GETOPERAND(query);
+ reset_istrue_flag(query);
+
+ for (i = 0; i < query->size; i++)
+ {
+ if (item[i].type != VAL || item[i].istrue)
+ continue;
+
+ entry = find_wordentry(txt, query, &(item[i]));
+ if (!entry)
+ continue;
+
+ if (entry->haspos)
+ {
+ dimt = POSDATALEN(txt, entry);
+ post = POSDATAPTR(txt, entry);
+ }
+ else
+ {
+ dimt = *(uint16 *) POSNULL;
+ post = POSNULL + 1;
+ }
+
+ while (cur + dimt >= len)
+ {
+ len *= 2;
+ doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
+ }
+
+ for (j = 0; j < dimt; j++)
+ {
+ if (j == 0)
+ {
+ QueryItem *kptr,
+ *iptr = item + i;
+ int k;
+
+ doc[cur].needfree = false;
+ doc[cur].nitem = 0;
+ doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * query->size);
+
+ for (k = 0; k < query->size; k++)
+ {
+ kptr = item + k;
+ if (k == i ||
+ (item[k].type == VAL &&
+ compareQueryItem(&kptr, &iptr, operand) == 0))
+ {
+ doc[cur].item[doc[cur].nitem] = item + k;
+ doc[cur].nitem++;
+ kptr->istrue = 1;
+ }
+ }
+ }
+ else
+ {
+ doc[cur].needfree = false;
+ doc[cur].nitem = doc[cur - 1].nitem;
+ doc[cur].item = doc[cur - 1].item;
+ }
+ doc[cur].pos = WEP_GETPOS(post[j]);
+ doc[cur].wclass = WEP_GETWEIGHT(post[j]);
+ cur++;
+ }
+ }
+
+ *doclen = cur;
+
+ if (cur > 0)
+ {
+ if (cur > 1)
+ qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+ return doc;
+ }
+
+ pfree(doc);
+ return NULL;
+}
+
+static float4
+calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method)
+{
+ DocRepresentation *doc;
+ int len,
+ i,
+ doclen = 0;
+ Extention ext;
+ double Wdoc = 0.0;
+ double invws[lengthof(weights)];
+ double SumDist = 0.0,
+ PrevExtPos = 0.0,
+ CurExtPos = 0.0;
+ int NExtent = 0;
+
+ for (i = 0; i < lengthof(weights); i++)
+ {
+ invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
+ if (invws[i] > 1.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("weight out of range")));
+ invws[i] = 1.0 / invws[i];
+ }
+
+ doc = get_docrep(txt, query, &doclen);
+ if (!doc)
+ return 0.0;
+
+ MemSet(&ext, 0, sizeof(Extention));
+ while (Cover(doc, doclen, query, &ext))
+ {
+ double Cpos = 0.0;
+ double InvSum = 0.0;
+ int nNoise;
+ DocRepresentation *ptr = ext.begin;
+
+ while (ptr <= ext.end)
+ {
+ InvSum += invws[ptr->wclass];
+ ptr++;
+ }
+
+ Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
+
+ /*
+ * if doc are big enough then ext.q may be equal to ext.p due to limit
+ * of posional information. In this case we approximate number of
+ * noise word as half cover's length
+ */
+ nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
+ if (nNoise < 0)
+ nNoise = (ext.end - ext.begin) / 2;
+ Wdoc += Cpos / ((double) (1 + nNoise));
+
+ CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
+ if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent devision by
+ * zero in a case of
+ multiple lexize */ )
+ SumDist += 1.0 / (CurExtPos - PrevExtPos);
+
+ PrevExtPos = CurExtPos;
+ NExtent++;
+ }
+
+ if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
+ Wdoc /= log((double) (cnt_length(txt) + 1));
+
+ if (method & RANK_NORM_LENGTH)
+ {
+ len = cnt_length(txt);
+ if (len > 0)
+ Wdoc /= (double) len;
+ }
+
+ if ((method & RANK_NORM_EXTDIST) && SumDist > 0)
+ Wdoc /= ((double) NExtent) / SumDist;
+
+ if ((method & RANK_NORM_UNIQ) && txt->size > 0)
+ Wdoc /= (double) (txt->size);
+
+ if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
+ Wdoc /= log((double) (txt->size + 1)) / log(2.0);
+
+ for (i = 0; i < doclen; i++)
+ if (doc[i].needfree)
+ pfree(doc[i].item);
+ pfree(doc);
+
+ return (float4) Wdoc;
+}
+
+Datum
+ts_rankcd_wttf(PG_FUNCTION_ARGS)
+{
+ ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ TSVector txt = PG_GETARG_TSVECTOR(1);
+ TSQuery query = PG_GETARG_TSQUERY_COPY(2);
+ int method = PG_GETARG_INT32(3);
+ float res;
+
+ res = calc_rank_cd(getWeights(win), txt, query, method);
+
+ PG_FREE_IF_COPY(win, 0);
+ PG_FREE_IF_COPY(txt, 1);
+ PG_FREE_IF_COPY(query, 2);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+ts_rankcd_wtt(PG_FUNCTION_ARGS)
+{
+ ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ TSVector txt = PG_GETARG_TSVECTOR(1);
+ TSQuery query = PG_GETARG_TSQUERY_COPY(2);
+ float res;
+
+ res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
+
+ PG_FREE_IF_COPY(win, 0);
+ PG_FREE_IF_COPY(txt, 1);
+ PG_FREE_IF_COPY(query, 2);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+ts_rankcd_ttf(PG_FUNCTION_ARGS)
+{
+ TSVector txt = PG_GETARG_TSVECTOR(0);
+ TSQuery query = PG_GETARG_TSQUERY_COPY(1);
+ int method = PG_GETARG_INT32(2);
+ float res;
+
+ res = calc_rank_cd(getWeights(NULL), txt, query, method);
+
+ PG_FREE_IF_COPY(txt, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+ts_rankcd_tt(PG_FUNCTION_ARGS)
+{
+ TSVector txt = PG_GETARG_TSVECTOR(0);
+ TSQuery query = PG_GETARG_TSQUERY_COPY(1);
+ float res;
+
+ res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
+
+ PG_FREE_IF_COPY(txt, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_FLOAT4(res);
+}
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
new file mode 100644
index 00000000000..04b6345e162
--- /dev/null
+++ b/src/backend/utils/adt/tsvector.c
@@ -0,0 +1,683 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsvector.c
+ * I/O functions for tsvector
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "libpq/pqformat.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+#include "utils/memutils.h"
+
+
+static int
+comparePos(const void *a, const void *b)
+{
+ if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b))
+ return 0;
+ return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos * a, int4 l)
+{
+ WordEntryPos *ptr,
+ *res;
+
+ if (l == 1)
+ return l;
+
+ res = a;
+ qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+ ptr = a + 1;
+ while (ptr - a < l)
+ {
+ if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res))
+ {
+ res++;
+ *res = *ptr;
+ if (res - a >= MAXNUMPOS - 1 || WEP_GETPOS(*res) == MAXENTRYPOS - 1)
+ break;
+ }
+ else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res))
+ WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr));
+ ptr++;
+ }
+
+ return res + 1 - a;
+}
+
+static int
+compareentry(const void *a, const void *b, void *arg)
+{
+ char *BufferStr = (char *) arg;
+
+ if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+ {
+ return strncmp(&BufferStr[((WordEntryIN *) a)->entry.pos],
+ &BufferStr[((WordEntryIN *) b)->entry.pos],
+ ((WordEntryIN *) a)->entry.len);
+ }
+
+ return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+ WordEntryIN *ptr,
+ *res;
+
+ res = a;
+ if (l == 1)
+ {
+ if (a->entry.haspos)
+ {
+ *(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
+ *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
+ }
+ return l;
+ }
+
+ ptr = a + 1;
+ qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf);
+
+ while (ptr - a < l)
+ {
+ if (!(ptr->entry.len == res->entry.len &&
+ strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+ {
+ if (res->entry.haspos)
+ {
+ *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
+ *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
+ }
+ *outbuflen += SHORTALIGN(res->entry.len);
+ res++;
+ memcpy(res, ptr, sizeof(WordEntryIN));
+ }
+ else if (ptr->entry.haspos)
+ {
+ if (res->entry.haspos)
+ {
+ int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
+
+ res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
+ memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
+ &(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
+ *(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
+ pfree(ptr->pos);
+ }
+ else
+ {
+ res->entry.haspos = 1;
+ res->pos = ptr->pos;
+ }
+ }
+ ptr++;
+ }
+ if (res->entry.haspos)
+ {
+ *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
+ *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
+ }
+ *outbuflen += SHORTALIGN(res->entry.len);
+
+ return res + 1 - a;
+}
+
+static int
+WordEntryCMP(WordEntry * a, WordEntry * b, char *buf)
+{
+ return compareentry(a, b, buf);
+}
+
+#define WAITWORD 1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR 3
+#define WAITENDCMPLX 4
+#define WAITPOSINFO 5
+#define INPOSINFO 6
+#define WAITPOSDELIM 7
+#define WAITCHARCMPLX 8
+
+#define RESIZEPRSBUF \
+do { \
+ if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
+ { \
+ int4 clen = state->curpos - state->word; \
+ state->len *= 2; \
+ state->word = (char*)repalloc( (void*)state->word, state->len ); \
+ state->curpos = state->word + clen; \
+ } \
+} while (0)
+
+bool
+gettoken_tsvector(TSVectorParseState *state)
+{
+ int4 oldstate = 0;
+
+ state->curpos = state->word;
+ state->state = WAITWORD;
+ state->alen = 0;
+
+ while (1)
+ {
+ if (state->state == WAITWORD)
+ {
+ if (*(state->prsbuf) == '\0')
+ return false;
+ else if (t_iseq(state->prsbuf, '\''))
+ state->state = WAITENDCMPLX;
+ else if (t_iseq(state->prsbuf, '\\'))
+ {
+ state->state = WAITNEXTCHAR;
+ oldstate = WAITENDWORD;
+ }
+ else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ else if (!t_isspace(state->prsbuf))
+ {
+ COPYCHAR(state->curpos, state->prsbuf);
+ state->curpos += pg_mblen(state->prsbuf);
+ state->state = WAITENDWORD;
+ }
+ }
+ else if (state->state == WAITNEXTCHAR)
+ {
+ if (*(state->prsbuf) == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("there is no escaped character")));
+ else
+ {
+ RESIZEPRSBUF;
+ COPYCHAR(state->curpos, state->prsbuf);
+ state->curpos += pg_mblen(state->prsbuf);
+ state->state = oldstate;
+ }
+ }
+ else if (state->state == WAITENDWORD)
+ {
+ if (t_iseq(state->prsbuf, '\\'))
+ {
+ state->state = WAITNEXTCHAR;
+ oldstate = WAITENDWORD;
+ }
+ else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
+ (state->oprisdelim && ISOPERATOR(state->prsbuf)))
+ {
+ RESIZEPRSBUF;
+ if (state->curpos == state->word)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ *(state->curpos) = '\0';
+ return true;
+ }
+ else if (t_iseq(state->prsbuf, ':'))
+ {
+ if (state->curpos == state->word)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ *(state->curpos) = '\0';
+ if (state->oprisdelim)
+ return true;
+ else
+ state->state = INPOSINFO;
+ }
+ else
+ {
+ RESIZEPRSBUF;
+ COPYCHAR(state->curpos, state->prsbuf);
+ state->curpos += pg_mblen(state->prsbuf);
+ }
+ }
+ else if (state->state == WAITENDCMPLX)
+ {
+ if (t_iseq(state->prsbuf, '\''))
+ {
+ state->state = WAITCHARCMPLX;
+ }
+ else if (t_iseq(state->prsbuf, '\\'))
+ {
+ state->state = WAITNEXTCHAR;
+ oldstate = WAITENDCMPLX;
+ }
+ else if (*(state->prsbuf) == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ else
+ {
+ RESIZEPRSBUF;
+ COPYCHAR(state->curpos, state->prsbuf);
+ state->curpos += pg_mblen(state->prsbuf);
+ }
+ }
+ else if (state->state == WAITCHARCMPLX)
+ {
+ if (t_iseq(state->prsbuf, '\''))
+ {
+ RESIZEPRSBUF;
+ COPYCHAR(state->curpos, state->prsbuf);
+ state->curpos += pg_mblen(state->prsbuf);
+ state->state = WAITENDCMPLX;
+ }
+ else
+ {
+ RESIZEPRSBUF;
+ *(state->curpos) = '\0';
+ if (state->curpos == state->word)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ if (state->oprisdelim)
+ {
+ /* state->prsbuf+=pg_mblen(state->prsbuf); */
+ return true;
+ }
+ else
+ state->state = WAITPOSINFO;
+ continue; /* recheck current character */
+ }
+ }
+ else if (state->state == WAITPOSINFO)
+ {
+ if (t_iseq(state->prsbuf, ':'))
+ state->state = INPOSINFO;
+ else
+ return true;
+ }
+ else if (state->state == INPOSINFO)
+ {
+ if (t_isdigit(state->prsbuf))
+ {
+ if (state->alen == 0)
+ {
+ state->alen = 4;
+ state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
+ *(uint16 *) (state->pos) = 0;
+ }
+ else if (*(uint16 *) (state->pos) + 1 >= state->alen)
+ {
+ state->alen *= 2;
+ state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
+ }
+ (*(uint16 *) (state->pos))++;
+ WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf)));
+ if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("wrong position info in tsvector")));
+ WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
+ state->state = WAITPOSDELIM;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ }
+ else if (state->state == WAITPOSDELIM)
+ {
+ if (t_iseq(state->prsbuf, ','))
+ state->state = INPOSINFO;
+ else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
+ {
+ if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
+ }
+ else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
+ {
+ if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
+ }
+ else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
+ {
+ if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
+ }
+ else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
+ {
+ if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
+ }
+ else if (t_isspace(state->prsbuf) ||
+ *(state->prsbuf) == '\0')
+ return true;
+ else if (!t_isdigit(state->prsbuf))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsvector")));
+ }
+ else /* internal error */
+ elog(ERROR, "internal error in gettoken_tsvector");
+
+ /* get next char */
+ state->prsbuf += pg_mblen(state->prsbuf);
+ }
+
+ return false;
+}
+
+Datum
+tsvectorin(PG_FUNCTION_ARGS)
+{
+ char *buf = PG_GETARG_CSTRING(0);
+ TSVectorParseState state;
+ WordEntryIN *arr;
+ WordEntry *inarr;
+ int4 len = 0,
+ totallen = 64;
+ TSVector in;
+ char *tmpbuf,
+ *cur;
+ int4 i,
+ buflen = 256;
+
+ pg_verifymbstr(buf, strlen(buf), false);
+ state.prsbuf = buf;
+ state.len = 32;
+ state.word = (char *) palloc(state.len);
+ state.oprisdelim = false;
+
+ arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+ cur = tmpbuf = (char *) palloc(buflen);
+
+ while (gettoken_tsvector(&state))
+ {
+ /*
+ * Realloc buffers if it's needed
+ */
+ if (len >= totallen)
+ {
+ totallen *= 2;
+ arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+ }
+
+ while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+ {
+ int4 dist = cur - tmpbuf;
+
+ buflen *= 2;
+ tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+ cur = tmpbuf + dist;
+ }
+
+ if (state.curpos - state.word >= MAXSTRLEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("word is too long (%d bytes, max %d bytes)",
+ state.curpos - state.word, MAXSTRLEN)));
+
+ arr[len].entry.len = state.curpos - state.word;
+ if (cur - tmpbuf > MAXSTRPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("position value too large")));
+ arr[len].entry.pos = cur - tmpbuf;
+ memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+ cur += arr[len].entry.len;
+
+ if (state.alen)
+ {
+ arr[len].entry.haspos = 1;
+ arr[len].pos = state.pos;
+ }
+ else
+ arr[len].entry.haspos = 0;
+ len++;
+ }
+ pfree(state.word);
+
+ if (len > 0)
+ len = uniqueentry(arr, len, tmpbuf, &buflen);
+ else
+ buflen = 0;
+ totallen = CALCDATASIZE(len, buflen);
+ in = (TSVector) palloc0(totallen);
+
+ SET_VARSIZE(in, totallen);
+ in->size = len;
+ cur = STRPTR(in);
+ inarr = ARRPTR(in);
+ for (i = 0; i < len; i++)
+ {
+ memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+ arr[i].entry.pos = cur - STRPTR(in);
+ cur += SHORTALIGN(arr[i].entry.len);
+ if (arr[i].entry.haspos)
+ {
+ memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
+ cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
+ pfree(arr[i].pos);
+ }
+ inarr[i] = arr[i].entry;
+ }
+
+ PG_RETURN_TSVECTOR(in);
+}
+
+Datum
+tsvectorout(PG_FUNCTION_ARGS)
+{
+ TSVector out = PG_GETARG_TSVECTOR(0);
+ char *outbuf;
+ int4 i,
+ lenbuf = 0,
+ pp;
+ WordEntry *ptr = ARRPTR(out);
+ char *curbegin,
+ *curin,
+ *curout;
+
+ lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
+ for (i = 0; i < out->size; i++)
+ {
+ lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
+ if (ptr[i].haspos)
+ lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i]));
+ }
+
+ curout = outbuf = (char *) palloc(lenbuf);
+ for (i = 0; i < out->size; i++)
+ {
+ curbegin = curin = STRPTR(out) + ptr->pos;
+ if (i != 0)
+ *curout++ = ' ';
+ *curout++ = '\'';
+ while (curin - curbegin < ptr->len)
+ {
+ int len = pg_mblen(curin);
+
+ if (t_iseq(curin, '\''))
+ *curout++ = '\'';
+
+ while (len--)
+ *curout++ = *curin++;
+ }
+
+ *curout++ = '\'';
+ if ((pp = POSDATALEN(out, ptr)) != 0)
+ {
+ WordEntryPos *wptr;
+
+ *curout++ = ':';
+ wptr = POSDATAPTR(out, ptr);
+ while (pp)
+ {
+ curout += sprintf(curout, "%d", WEP_GETPOS(*wptr));
+ switch (WEP_GETWEIGHT(*wptr))
+ {
+ case 3:
+ *curout++ = 'A';
+ break;
+ case 2:
+ *curout++ = 'B';
+ break;
+ case 1:
+ *curout++ = 'C';
+ break;
+ case 0:
+ default:
+ break;
+ }
+
+ if (pp > 1)
+ *curout++ = ',';
+ pp--;
+ wptr++;
+ }
+ }
+ ptr++;
+ }
+
+ *curout = '\0';
+ PG_FREE_IF_COPY(out, 0);
+ PG_RETURN_CSTRING(outbuf);
+}
+
+Datum
+tsvectorsend(PG_FUNCTION_ARGS)
+{
+ TSVector vec = PG_GETARG_TSVECTOR(0);
+ StringInfoData buf;
+ int i,
+ j;
+ WordEntry *weptr = ARRPTR(vec);
+
+ pq_begintypsend(&buf);
+
+ pq_sendint(&buf, vec->size, sizeof(int32));
+ for (i = 0; i < vec->size; i++)
+ {
+ /*
+ * We are sure that sizeof(WordEntry) == sizeof(int32)
+ */
+ pq_sendint(&buf, *(int32 *) weptr, sizeof(int32));
+
+ pq_sendbytes(&buf, STRPTR(vec) + weptr->pos, weptr->len);
+ if (weptr->haspos)
+ {
+ WordEntryPos *wepptr = POSDATAPTR(vec, weptr);
+
+ pq_sendint(&buf, POSDATALEN(vec, weptr), sizeof(WordEntryPos));
+ for (j = 0; j < POSDATALEN(vec, weptr); j++)
+ pq_sendint(&buf, wepptr[j], sizeof(WordEntryPos));
+ }
+ weptr++;
+ }
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+Datum
+tsvectorrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ TSVector vec;
+ int i,
+ size,
+ len = DATAHDRSIZE;
+ WordEntry *weptr;
+ int datalen = 0;
+
+ size = pq_getmsgint(buf, sizeof(uint32));
+ if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry)))
+ elog(ERROR, "invalid size of tsvector");
+
+ len += sizeof(WordEntry) * size;
+
+ len *= 2;
+ vec = (TSVector) palloc0(len);
+ vec->size = size;
+
+ weptr = ARRPTR(vec);
+ for (i = 0; i < size; i++)
+ {
+ int tmp;
+
+ weptr = ARRPTR(vec) + i;
+
+ /*
+ * We are sure that sizeof(WordEntry) == sizeof(int32)
+ */
+ tmp = pq_getmsgint(buf, sizeof(int32));
+ *weptr = *(WordEntry *) & tmp;
+
+ while (CALCDATASIZE(size, datalen + SHORTALIGN(weptr->len)) >= len)
+ {
+ len *= 2;
+ vec = (TSVector) repalloc(vec, len);
+ weptr = ARRPTR(vec) + i;
+ }
+
+ memcpy(STRPTR(vec) + weptr->pos,
+ pq_getmsgbytes(buf, weptr->len),
+ weptr->len);
+ datalen += SHORTALIGN(weptr->len);
+
+ if (i > 0 && WordEntryCMP(weptr, weptr - 1, STRPTR(vec)) <= 0)
+ elog(ERROR, "lexemes are unordered");
+
+ if (weptr->haspos)
+ {
+ uint16 j,
+ npos;
+ WordEntryPos *wepptr;
+
+ npos = (uint16) pq_getmsgint(buf, sizeof(int16));
+ if (npos > MAXNUMPOS)
+ elog(ERROR, "unexpected number of positions");
+
+ while (CALCDATASIZE(size, datalen + (npos + 1) * sizeof(WordEntryPos)) >= len)
+ {
+ len *= 2;
+ vec = (TSVector) repalloc(vec, len);
+ weptr = ARRPTR(vec) + i;
+ }
+
+ memcpy(_POSDATAPTR(vec, weptr), &npos, sizeof(int16));
+ wepptr = POSDATAPTR(vec, weptr);
+ for (j = 0; j < npos; j++)
+ {
+ wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(int16));
+ if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
+ elog(ERROR, "position information is unordered");
+ }
+
+ datalen += (npos + 1) * sizeof(WordEntry);
+ }
+ }
+
+ SET_VARSIZE(vec, CALCDATASIZE(vec->size, datalen));
+
+ PG_RETURN_TSVECTOR(vec);
+}
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
new file mode 100644
index 00000000000..341247bec75
--- /dev/null
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -0,0 +1,1334 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsvector_op.c
+ * operations over tsvector
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "catalog/namespace.h"
+#include "commands/trigger.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+#include "mb/pg_wchar.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+
+
+typedef struct
+{
+ WordEntry *arrb;
+ WordEntry *arre;
+ char *values;
+ char *operand;
+} CHKVAL;
+
+typedef struct
+{
+ uint32 cur;
+ TSVector stat;
+} StatStorage;
+
+typedef struct
+{
+ uint32 len;
+ uint32 pos;
+ uint32 ndoc;
+ uint32 nentry;
+} StatEntry;
+
+typedef struct
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ int4 size;
+ int4 weight;
+ char data[1];
+} tsstat;
+
+#define STATHDRSIZE (sizeof(int4) * 4)
+#define CALCSTATSIZE(x, lenstr) ( (x) * sizeof(StatEntry) + STATHDRSIZE + (lenstr) )
+#define STATPTR(x) ( (StatEntry*) ( (char*)(x) + STATHDRSIZE ) )
+#define STATSTRPTR(x) ( (char*)(x) + STATHDRSIZE + ( sizeof(StatEntry) * ((TSVector)(x))->size ) )
+#define STATSTRSIZE(x) ( VARSIZE((TSVector)(x)) - STATHDRSIZE - ( sizeof(StatEntry) * ((TSVector)(x))->size ) )
+
+
+static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
+
+
+static int
+silly_cmp_tsvector(const TSVector a, const TSVector b)
+{
+ if (VARSIZE(a) < VARSIZE(b))
+ return -1;
+ else if (VARSIZE(a) > VARSIZE(b))
+ return 1;
+ else if (a->size < b->size)
+ return -1;
+ else if (a->size > b->size)
+ return 1;
+ else
+ {
+ WordEntry *aptr = ARRPTR(a);
+ WordEntry *bptr = ARRPTR(b);
+ int i = 0;
+ int res;
+
+
+ for (i = 0; i < a->size; i++)
+ {
+ if (aptr->haspos != bptr->haspos)
+ {
+ return (aptr->haspos > bptr->haspos) ? -1 : 1;
+ }
+ else if (aptr->len != bptr->len)
+ {
+ return (aptr->len > bptr->len) ? -1 : 1;
+ }
+ else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0)
+ {
+ return res;
+ }
+ else if (aptr->haspos)
+ {
+ WordEntryPos *ap = POSDATAPTR(a, aptr);
+ WordEntryPos *bp = POSDATAPTR(b, bptr);
+ int j;
+
+ if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
+ return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
+
+ for (j = 0; j < POSDATALEN(a, aptr); j++)
+ {
+ if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
+ {
+ return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
+ }
+ else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
+ {
+ return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
+ }
+ ap++, bp++;
+ }
+ }
+
+ aptr++;
+ bptr++;
+ }
+ }
+
+ return 0;
+}
+
+#define TSVECTORCMPFUNC( type, action, ret ) \
+Datum \
+tsvector_##type(PG_FUNCTION_ARGS) \
+{ \
+ TSVector a = PG_GETARG_TSVECTOR(0); \
+ TSVector b = PG_GETARG_TSVECTOR(1); \
+ int res = silly_cmp_tsvector(a, b); \
+ PG_FREE_IF_COPY(a,0); \
+ PG_FREE_IF_COPY(b,1); \
+ PG_RETURN_##ret( res action 0 ); \
+}
+
+TSVECTORCMPFUNC(lt, <, BOOL);
+TSVECTORCMPFUNC(le, <=, BOOL);
+TSVECTORCMPFUNC(eq, ==, BOOL);
+TSVECTORCMPFUNC(ge, >=, BOOL);
+TSVECTORCMPFUNC(gt, >, BOOL);
+TSVECTORCMPFUNC(ne, !=, BOOL);
+TSVECTORCMPFUNC(cmp, +, INT32);
+
+Datum
+tsvector_strip(PG_FUNCTION_ARGS)
+{
+ TSVector in = PG_GETARG_TSVECTOR(0);
+ TSVector out;
+ int i,
+ len = 0;
+ WordEntry *arrin = ARRPTR(in),
+ *arrout;
+ char *cur;
+
+ for (i = 0; i < in->size; i++)
+ len += SHORTALIGN(arrin[i].len);
+
+ len = CALCDATASIZE(in->size, len);
+ out = (TSVector) palloc0(len);
+ SET_VARSIZE(out, len);
+ out->size = in->size;
+ arrout = ARRPTR(out);
+ cur = STRPTR(out);
+ for (i = 0; i < in->size; i++)
+ {
+ memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
+ arrout[i].haspos = 0;
+ arrout[i].len = arrin[i].len;
+ arrout[i].pos = cur - STRPTR(out);
+ cur += SHORTALIGN(arrout[i].len);
+ }
+
+ PG_FREE_IF_COPY(in, 0);
+ PG_RETURN_POINTER(out);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+ TSVector in = PG_GETARG_TSVECTOR(0);
+ int4 ret = in->size;
+
+ PG_FREE_IF_COPY(in, 0);
+ PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_setweight(PG_FUNCTION_ARGS)
+{
+ TSVector in = PG_GETARG_TSVECTOR(0);
+ char cw = PG_GETARG_CHAR(1);
+ TSVector out;
+ int i,
+ j;
+ WordEntry *entry;
+ WordEntryPos *p;
+ int w = 0;
+
+ switch (cw)
+ {
+ case 'A':
+ case 'a':
+ w = 3;
+ break;
+ case 'B':
+ case 'b':
+ w = 2;
+ break;
+ case 'C':
+ case 'c':
+ w = 1;
+ break;
+ case 'D':
+ case 'd':
+ w = 0;
+ break;
+ /* internal error */
+ default:
+ elog(ERROR, "unrecognized weight");
+ }
+
+ out = (TSVector) palloc(VARSIZE(in));
+ memcpy(out, in, VARSIZE(in));
+ entry = ARRPTR(out);
+ i = out->size;
+ while (i--)
+ {
+ if ((j = POSDATALEN(out, entry)) != 0)
+ {
+ p = POSDATAPTR(out, entry);
+ while (j--)
+ {
+ WEP_SETWEIGHT(*p, w);
+ p++;
+ }
+ }
+ entry++;
+ }
+
+ PG_FREE_IF_COPY(in, 0);
+ PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b)
+{
+ if (a->len == b->len)
+ {
+ return strncmp(
+ ptra + a->pos,
+ ptrb + b->pos,
+ a->len);
+ }
+ return (a->len > b->len) ? 1 : -1;
+}
+
+static int4
+add_pos(TSVector src, WordEntry * srcptr, TSVector dest, WordEntry * destptr, int4 maxpos)
+{
+ uint16 *clen = (uint16 *) _POSDATAPTR(dest, destptr);
+ int i;
+ uint16 slen = POSDATALEN(src, srcptr),
+ startlen;
+ WordEntryPos *spos = POSDATAPTR(src, srcptr),
+ *dpos = POSDATAPTR(dest, destptr);
+
+ if (!destptr->haspos)
+ *clen = 0;
+
+ startlen = *clen;
+ for (i = 0; i < slen && *clen < MAXNUMPOS && (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1); i++)
+ {
+ WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
+ WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
+ (*clen)++;
+ }
+
+ if (*clen != startlen)
+ destptr->haspos = 1;
+ return *clen - startlen;
+}
+
+
+Datum
+tsvector_concat(PG_FUNCTION_ARGS)
+{
+ TSVector in1 = PG_GETARG_TSVECTOR(0);
+ TSVector in2 = PG_GETARG_TSVECTOR(1);
+ TSVector out;
+ WordEntry *ptr;
+ WordEntry *ptr1,
+ *ptr2;
+ WordEntryPos *p;
+ int maxpos = 0,
+ i,
+ j,
+ i1,
+ i2;
+ char *cur;
+ char *data,
+ *data1,
+ *data2;
+
+ ptr = ARRPTR(in1);
+ i = in1->size;
+ while (i--)
+ {
+ if ((j = POSDATALEN(in1, ptr)) != 0)
+ {
+ p = POSDATAPTR(in1, ptr);
+ while (j--)
+ {
+ if (WEP_GETPOS(*p) > maxpos)
+ maxpos = WEP_GETPOS(*p);
+ p++;
+ }
+ }
+ ptr++;
+ }
+
+ ptr1 = ARRPTR(in1);
+ ptr2 = ARRPTR(in2);
+ data1 = STRPTR(in1);
+ data2 = STRPTR(in2);
+ i1 = in1->size;
+ i2 = in2->size;
+ out = (TSVector) palloc0(VARSIZE(in1) + VARSIZE(in2));
+ SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2));
+ out->size = in1->size + in2->size;
+ data = cur = STRPTR(out);
+ ptr = ARRPTR(out);
+ while (i1 && i2)
+ {
+ int cmp = compareEntry(data1, ptr1, data2, ptr2);
+
+ if (cmp < 0)
+ { /* in1 first */
+ ptr->haspos = ptr1->haspos;
+ ptr->len = ptr1->len;
+ memcpy(cur, data1 + ptr1->pos, ptr1->len);
+ ptr->pos = cur - data;
+ cur += SHORTALIGN(ptr1->len);
+ if (ptr->haspos)
+ {
+ memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+ cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ ptr++;
+ ptr1++;
+ i1--;
+ }
+ else if (cmp > 0)
+ { /* in2 first */
+ ptr->haspos = ptr2->haspos;
+ ptr->len = ptr2->len;
+ memcpy(cur, data2 + ptr2->pos, ptr2->len);
+ ptr->pos = cur - data;
+ cur += SHORTALIGN(ptr2->len);
+ if (ptr->haspos)
+ {
+ int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+
+ if (addlen == 0)
+ ptr->haspos = 0;
+ else
+ cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ ptr++;
+ ptr2++;
+ i2--;
+ }
+ else
+ {
+ ptr->haspos = ptr1->haspos | ptr2->haspos;
+ ptr->len = ptr1->len;
+ memcpy(cur, data1 + ptr1->pos, ptr1->len);
+ ptr->pos = cur - data;
+ cur += SHORTALIGN(ptr1->len);
+ if (ptr->haspos)
+ {
+ if (ptr1->haspos)
+ {
+ memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+ cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
+ if (ptr2->haspos)
+ cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
+ }
+ else if (ptr2->haspos)
+ {
+ int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+
+ if (addlen == 0)
+ ptr->haspos = 0;
+ else
+ cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ }
+ ptr++;
+ ptr1++;
+ ptr2++;
+ i1--;
+ i2--;
+ }
+ }
+
+ while (i1)
+ {
+ ptr->haspos = ptr1->haspos;
+ ptr->len = ptr1->len;
+ memcpy(cur, data1 + ptr1->pos, ptr1->len);
+ ptr->pos = cur - data;
+ cur += SHORTALIGN(ptr1->len);
+ if (ptr->haspos)
+ {
+ memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+ cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ ptr++;
+ ptr1++;
+ i1--;
+ }
+
+ while (i2)
+ {
+ ptr->haspos = ptr2->haspos;
+ ptr->len = ptr2->len;
+ memcpy(cur, data2 + ptr2->pos, ptr2->len);
+ ptr->pos = cur - data;
+ cur += SHORTALIGN(ptr2->len);
+ if (ptr->haspos)
+ {
+ int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+
+ if (addlen == 0)
+ ptr->haspos = 0;
+ else
+ cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ ptr++;
+ ptr2++;
+ i2--;
+ }
+
+ out->size = ptr - ARRPTR(out);
+ SET_VARSIZE(out, CALCDATASIZE(out->size, cur - data));
+ if (data != STRPTR(out))
+ memmove(STRPTR(out), data, cur - data);
+
+ PG_FREE_IF_COPY(in1, 0);
+ PG_FREE_IF_COPY(in2, 1);
+ PG_RETURN_POINTER(out);
+}
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
+{
+ if (ptr->len == item->length)
+ return strncmp(
+ &(chkval->values[ptr->pos]),
+ &(chkval->operand[item->distance]),
+ item->length);
+
+ return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
+{
+ WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
+ uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
+
+ while (len--)
+ {
+ if (item->weight & (1 << WEP_GETWEIGHT(*ptr)))
+ return true;
+ ptr++;
+ }
+ return false;
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, QueryItem * val)
+{
+ WordEntry *StopLow = ((CHKVAL *) checkval)->arrb;
+ WordEntry *StopHigh = ((CHKVAL *) checkval)->arre;
+ WordEntry *StopMiddle;
+ int difference;
+
+ /* Loop invariant: StopLow <= val < StopHigh */
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+ difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+ if (difference == 0)
+ return (val->weight && StopMiddle->haspos) ?
+ checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true;
+ else if (difference < 0)
+ StopLow = StopMiddle + 1;
+ else
+ StopHigh = StopMiddle;
+ }
+
+ return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(QueryItem * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, QueryItem * val))
+{
+ if (curitem->type == VAL)
+ return chkcond(checkval, curitem);
+ else if (curitem->val == (int4) '!')
+ {
+ return (calcnot) ?
+ ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+ : true;
+ }
+ else if (curitem->val == (int4) '&')
+ {
+ if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+ return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+ else
+ return false;
+ }
+ else
+ { /* |-operator */
+ if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+ return true;
+ else
+ return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+ }
+ return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+ts_match_qv(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(0)));
+}
+
+Datum
+ts_match_vq(PG_FUNCTION_ARGS)
+{
+ TSVector val = PG_GETARG_TSVECTOR(0);
+ TSQuery query = PG_GETARG_TSQUERY(1);
+ CHKVAL chkval;
+ bool result;
+
+ if (!val->size || !query->size)
+ {
+ PG_FREE_IF_COPY(val, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_BOOL(false);
+ }
+
+ chkval.arrb = ARRPTR(val);
+ chkval.arre = chkval.arrb + val->size;
+ chkval.values = STRPTR(val);
+ chkval.operand = GETOPERAND(query);
+ result = TS_execute(
+ GETQUERY(query),
+ &chkval,
+ true,
+ checkcondition_str
+ );
+
+ PG_FREE_IF_COPY(val, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+ts_match_tt(PG_FUNCTION_ARGS)
+{
+ TSVector vector;
+ TSQuery query;
+ bool res;
+
+ vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
+ PG_GETARG_DATUM(0)));
+ query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
+ PG_GETARG_DATUM(1)));
+
+ res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
+ TSVectorGetDatum(vector),
+ TSQueryGetDatum(query)));
+
+ pfree(vector);
+ pfree(query);
+
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+ts_match_tq(PG_FUNCTION_ARGS)
+{
+ TSVector vector;
+ TSQuery query = PG_GETARG_TSQUERY(1);
+ bool res;
+
+ vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
+ PG_GETARG_DATUM(0)));
+
+ res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
+ TSVectorGetDatum(vector),
+ TSQueryGetDatum(query)));
+
+ pfree(vector);
+ PG_FREE_IF_COPY(query, 1);
+
+ PG_RETURN_BOOL(res);
+}
+
+/*
+ * Statistics of tsvector
+ */
+static int
+check_weight(TSVector txt, WordEntry * wptr, int8 weight)
+{
+ int len = POSDATALEN(txt, wptr);
+ int num = 0;
+ WordEntryPos *ptr = POSDATAPTR(txt, wptr);
+
+ while (len--)
+ {
+ if (weight & (1 << WEP_GETWEIGHT(*ptr)))
+ num++;
+ ptr++;
+ }
+ return num;
+}
+
+static WordEntry **
+SEI_realloc(WordEntry ** in, uint32 *len)
+{
+ if (*len == 0 || in == NULL)
+ {
+ *len = 8;
+ in = palloc(sizeof(WordEntry *) * (*len));
+ }
+ else
+ {
+ *len *= 2;
+ in = repalloc(in, sizeof(WordEntry *) * (*len));
+ }
+ return in;
+}
+
+static int
+compareStatWord(StatEntry * a, WordEntry * b, tsstat * stat, TSVector txt)
+{
+ if (a->len == b->len)
+ return strncmp(
+ STATSTRPTR(stat) + a->pos,
+ STRPTR(txt) + b->pos,
+ a->len
+ );
+ return (a->len > b->len) ? 1 : -1;
+}
+
+static tsstat *
+formstat(tsstat * stat, TSVector txt, WordEntry ** entry, uint32 len)
+{
+ tsstat *newstat;
+ uint32 totallen,
+ nentry;
+ uint32 slen = 0;
+ WordEntry **ptr = entry;
+ char *curptr;
+ StatEntry *sptr,
+ *nptr;
+
+ while (ptr - entry < len)
+ {
+ slen += (*ptr)->len;
+ ptr++;
+ }
+
+ nentry = stat->size + len;
+ slen += STATSTRSIZE(stat);
+ totallen = CALCSTATSIZE(nentry, slen);
+ newstat = palloc(totallen);
+ SET_VARSIZE(newstat, totallen);
+ newstat->weight = stat->weight;
+ newstat->size = nentry;
+
+ memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+ curptr = STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+ ptr = entry;
+ sptr = STATPTR(stat);
+ nptr = STATPTR(newstat);
+
+ if (len == 1)
+ {
+ StatEntry *StopLow = STATPTR(stat);
+ StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
+
+ while (StopLow < StopHigh)
+ {
+ sptr = StopLow + (StopHigh - StopLow) / 2;
+ if (compareStatWord(sptr, *ptr, stat, txt) < 0)
+ StopLow = sptr + 1;
+ else
+ StopHigh = sptr;
+ }
+ nptr = STATPTR(newstat) + (StopLow - STATPTR(stat));
+ memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat)));
+ if ((*ptr)->haspos)
+ nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
+ else
+ nptr->nentry = 1;
+ nptr->ndoc = 1;
+ nptr->len = (*ptr)->len;
+ memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+ nptr->pos = curptr - STATSTRPTR(newstat);
+ memcpy(nptr + 1, StopLow, sizeof(StatEntry) * (((StatEntry *) STATSTRPTR(stat)) - StopLow));
+ }
+ else
+ {
+ while (sptr - STATPTR(stat) < stat->size && ptr - entry < len)
+ {
+ if (compareStatWord(sptr, *ptr, stat, txt) < 0)
+ {
+ memcpy(nptr, sptr, sizeof(StatEntry));
+ sptr++;
+ }
+ else
+ {
+ if ((*ptr)->haspos)
+ nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
+ else
+ nptr->nentry = 1;
+ nptr->ndoc = 1;
+ nptr->len = (*ptr)->len;
+ memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+ nptr->pos = curptr - STATSTRPTR(newstat);
+ curptr += nptr->len;
+ ptr++;
+ }
+ nptr++;
+ }
+
+ memcpy(nptr, sptr, sizeof(StatEntry) * (stat->size - (sptr - STATPTR(stat))));
+
+ while (ptr - entry < len)
+ {
+ if ((*ptr)->haspos)
+ nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
+ else
+ nptr->nentry = 1;
+ nptr->ndoc = 1;
+ nptr->len = (*ptr)->len;
+ memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+ nptr->pos = curptr - STATSTRPTR(newstat);
+ curptr += nptr->len;
+ ptr++;
+ nptr++;
+ }
+ }
+
+ return newstat;
+}
+
+static tsstat *
+ts_accum(tsstat * stat, Datum data)
+{
+ tsstat *newstat;
+ TSVector txt = DatumGetTSVector(data);
+ WordEntry **newentry = NULL;
+ uint32 len = 0,
+ cur = 0;
+ StatEntry *sptr;
+ WordEntry *wptr;
+ int n = 0;
+
+ if (stat == NULL)
+ { /* Init in first */
+ stat = palloc(STATHDRSIZE);
+ SET_VARSIZE(stat, STATHDRSIZE);
+ stat->size = 0;
+ stat->weight = 0;
+ }
+
+ /* simple check of correctness */
+ if (txt == NULL || txt->size == 0)
+ {
+ if (txt != (TSVector) DatumGetPointer(data))
+ pfree(txt);
+ return stat;
+ }
+
+ sptr = STATPTR(stat);
+ wptr = ARRPTR(txt);
+
+ if (stat->size < 100 * txt->size)
+ { /* merge */
+ while (sptr - STATPTR(stat) < stat->size && wptr - ARRPTR(txt) < txt->size)
+ {
+ int cmp = compareStatWord(sptr, wptr, stat, txt);
+
+ if (cmp < 0)
+ sptr++;
+ else if (cmp == 0)
+ {
+ if (stat->weight == 0)
+ {
+ sptr->ndoc++;
+ sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
+ }
+ else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0)
+ {
+ sptr->ndoc++;
+ sptr->nentry += n;
+ }
+ sptr++;
+ wptr++;
+ }
+ else
+ {
+ if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
+ {
+ if (cur == len)
+ newentry = SEI_realloc(newentry, &len);
+ newentry[cur] = wptr;
+ cur++;
+ }
+ wptr++;
+ }
+ }
+
+ while (wptr - ARRPTR(txt) < txt->size)
+ {
+ if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
+ {
+ if (cur == len)
+ newentry = SEI_realloc(newentry, &len);
+ newentry[cur] = wptr;
+ cur++;
+ }
+ wptr++;
+ }
+ }
+ else
+ { /* search */
+ while (wptr - ARRPTR(txt) < txt->size)
+ {
+ StatEntry *StopLow = STATPTR(stat);
+ StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
+ int cmp;
+
+ while (StopLow < StopHigh)
+ {
+ sptr = StopLow + (StopHigh - StopLow) / 2;
+ cmp = compareStatWord(sptr, wptr, stat, txt);
+ if (cmp == 0)
+ {
+ if (stat->weight == 0)
+ {
+ sptr->ndoc++;
+ sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
+ }
+ else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0)
+ {
+ sptr->ndoc++;
+ sptr->nentry += n;
+ }
+ break;
+ }
+ else if (cmp < 0)
+ StopLow = sptr + 1;
+ else
+ StopHigh = sptr;
+ }
+
+ if (StopLow >= StopHigh)
+ { /* not found */
+ if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
+ {
+ if (cur == len)
+ newentry = SEI_realloc(newentry, &len);
+ newentry[cur] = wptr;
+ cur++;
+ }
+ }
+ wptr++;
+ }
+ }
+
+
+ if (cur == 0)
+ { /* no new words */
+ if (txt != (TSVector) DatumGetPointer(data))
+ pfree(txt);
+ return stat;
+ }
+
+ newstat = formstat(stat, txt, newentry, cur);
+ pfree(newentry);
+
+ if (txt != (TSVector) DatumGetPointer(data))
+ pfree(txt);
+ return newstat;
+}
+
+static void
+ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
+ tsstat * stat)
+{
+ TupleDesc tupdesc;
+ MemoryContext oldcontext;
+ StatStorage *st;
+
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ st = palloc(sizeof(StatStorage));
+ st->cur = 0;
+ st->stat = palloc(VARSIZE(stat));
+ memcpy(st->stat, stat, VARSIZE(stat));
+ funcctx->user_fctx = (void *) st;
+
+ tupdesc = CreateTemplateTupleDesc(3, false);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
+ INT4OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
+ INT4OID, -1, 0);
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+ funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext *funcctx)
+{
+ StatStorage *st;
+
+ st = (StatStorage *) funcctx->user_fctx;
+
+ if (st->cur < st->stat->size)
+ {
+ Datum result;
+ char *values[3];
+ char ndoc[16];
+ char nentry[16];
+ StatEntry *entry = STATPTR(st->stat) + st->cur;
+ HeapTuple tuple;
+
+ values[0] = palloc(entry->len + 1);
+ memcpy(values[0], STATSTRPTR(st->stat) + entry->pos, entry->len);
+ (values[0])[entry->len] = '\0';
+ sprintf(ndoc, "%d", entry->ndoc);
+ values[1] = ndoc;
+ sprintf(nentry, "%d", entry->nentry);
+ values[2] = nentry;
+
+ tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+ result = HeapTupleGetDatum(tuple);
+
+ pfree(values[0]);
+ st->cur++;
+ return result;
+ }
+ else
+ {
+ pfree(st->stat);
+ pfree(st);
+ }
+
+ return (Datum) 0;
+}
+
+static tsstat *
+ts_stat_sql(text *txt, text *ws)
+{
+ char *query = TextPGetCString(txt);
+ int i;
+ tsstat *newstat,
+ *stat;
+ bool isnull;
+ Portal portal;
+ void *plan;
+
+ if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
+ /* internal error */
+ elog(ERROR, "SPI_prepare(\"%s\") failed", query);
+
+ if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL)
+ /* internal error */
+ elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
+
+ SPI_cursor_fetch(portal, true, 100);
+
+ if (SPI_tuptable->tupdesc->natts != 1 ||
+ SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSVECTOROID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("ts_stat query must return one tsvector column")));
+
+ stat = palloc(STATHDRSIZE);
+ SET_VARSIZE(stat, STATHDRSIZE);
+ stat->size = 0;
+ stat->weight = 0;
+
+ if (ws)
+ {
+ char *buf;
+
+ buf = VARDATA(ws);
+ while (buf - VARDATA(ws) < VARSIZE(ws) - VARHDRSZ)
+ {
+ if (pg_mblen(buf) == 1)
+ {
+ switch (*buf)
+ {
+ case 'A':
+ case 'a':
+ stat->weight |= 1 << 3;
+ break;
+ case 'B':
+ case 'b':
+ stat->weight |= 1 << 2;
+ break;
+ case 'C':
+ case 'c':
+ stat->weight |= 1 << 1;
+ break;
+ case 'D':
+ case 'd':
+ stat->weight |= 1;
+ break;
+ default:
+ stat->weight |= 0;
+ }
+ }
+ buf += pg_mblen(buf);
+ }
+ }
+
+ while (SPI_processed > 0)
+ {
+ for (i = 0; i < SPI_processed; i++)
+ {
+ Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+ if (!isnull)
+ {
+ newstat = ts_accum(stat, data);
+ if (stat != newstat && stat)
+ pfree(stat);
+ stat = newstat;
+ }
+ }
+
+ SPI_freetuptable(SPI_tuptable);
+ SPI_cursor_fetch(portal, true, 100);
+ }
+
+ SPI_freetuptable(SPI_tuptable);
+ SPI_cursor_close(portal);
+ SPI_freeplan(plan);
+ pfree(query);
+
+ return stat;
+}
+
+Datum
+ts_stat1(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ tsstat *stat;
+ text *txt = PG_GETARG_TEXT_P(0);
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ SPI_connect();
+ stat = ts_stat_sql(txt, NULL);
+ PG_FREE_IF_COPY(txt, 0);
+ ts_setup_firstcall(fcinfo, funcctx, stat);
+ SPI_finish();
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ if ((result = ts_process_call(funcctx)) != (Datum) 0)
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+Datum
+ts_stat2(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ tsstat *stat;
+ text *txt = PG_GETARG_TEXT_P(0);
+ text *ws = PG_GETARG_TEXT_P(1);
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ SPI_connect();
+ stat = ts_stat_sql(txt, ws);
+ PG_FREE_IF_COPY(txt, 0);
+ PG_FREE_IF_COPY(ws, 1);
+ ts_setup_firstcall(fcinfo, funcctx, stat);
+ SPI_finish();
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ if ((result = ts_process_call(funcctx)) != (Datum) 0)
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+/* Check if datatype is TEXT or binary-equivalent to it */
+static bool
+istexttype(Oid typid)
+{
+ /* varchar(n) and char(n) are binary-compatible with text */
+ if (typid==TEXTOID || typid==VARCHAROID || typid==BPCHAROID)
+ return true;
+ /* Allow domains over these types, too */
+ typid = getBaseType(typid);
+ if (typid==TEXTOID || typid==VARCHAROID || typid==BPCHAROID)
+ return true;
+ return false;
+}
+
+
+/*
+ * Triggers for automatic update of a tsvector column from text column(s)
+ *
+ * Trigger arguments are either
+ * name of tsvector col, name of tsconfig to use, name(s) of text col(s)
+ * name of tsvector col, name of regconfig col, name(s) of text col(s)
+ * ie, tsconfig can either be specified by name, or indirectly as the
+ * contents of a regconfig field in the row. If the name is used, it must
+ * be explicitly schema-qualified.
+ */
+Datum
+tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
+{
+ return tsvector_update_trigger(fcinfo, false);
+}
+
+Datum
+tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
+{
+ return tsvector_update_trigger(fcinfo, true);
+}
+
+static Datum
+tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
+{
+ TriggerData *trigdata;
+ Trigger *trigger;
+ Relation rel;
+ HeapTuple rettuple = NULL;
+ int tsvector_attr_num,
+ i;
+ ParsedText prs;
+ Datum datum;
+ bool isnull;
+ text *txt;
+ Oid cfgId;
+
+ /* Check call context */
+ if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
+ elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
+
+ trigdata = (TriggerData *) fcinfo->context;
+ if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+ elog(ERROR, "tsvector_update_trigger: can't process STATEMENT events");
+ if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+ elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
+
+ if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+ rettuple = trigdata->tg_trigtuple;
+ else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+ rettuple = trigdata->tg_newtuple;
+ else
+ elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
+
+ trigger = trigdata->tg_trigger;
+ rel = trigdata->tg_relation;
+
+ if (trigger->tgnargs < 3)
+ elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
+
+ /* Find the target tsvector column */
+ tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+ if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("tsvector column \"%s\" does not exist",
+ trigger->tgargs[0])));
+ if (SPI_gettypeid(rel->rd_att, tsvector_attr_num) != TSVECTOROID)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" is not of tsvector type",
+ trigger->tgargs[0])));
+
+ /* Find the configuration to use */
+ if (config_column)
+ {
+ int config_attr_num;
+
+ config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
+ if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("config column \"%s\" does not exist",
+ trigger->tgargs[1])));
+ if (SPI_gettypeid(rel->rd_att, config_attr_num) != REGCONFIGOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" is not of regconfig type",
+ trigger->tgargs[1])));
+
+ datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
+ if (isnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("config column \"%s\" must not be NULL",
+ trigger->tgargs[1])));
+ cfgId = DatumGetObjectId(datum);
+ }
+ else
+ {
+ List *names;
+
+ names = stringToQualifiedNameList(trigger->tgargs[1]);
+ /* require a schema so that results are not search path dependent */
+ if (list_length(names) < 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("text search configuration name \"%s\" must be schema-qualified",
+ trigger->tgargs[1])));
+ cfgId = TSConfigGetCfgid(names, false);
+ }
+
+ /* initialize parse state */
+ prs.lenwords = 32;
+ prs.curwords = 0;
+ prs.pos = 0;
+ prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
+
+ /* find all words in indexable column(s) */
+ for (i = 2; i < trigger->tgnargs; i++)
+ {
+ int numattr;
+
+ numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+ if (numattr == SPI_ERROR_NOATTRIBUTE)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" does not exist",
+ trigger->tgargs[i])));
+ if (!istexttype(SPI_gettypeid(rel->rd_att, numattr)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" is not of character type",
+ trigger->tgargs[i])));
+
+ datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+ if (isnull)
+ continue;
+
+ txt = DatumGetTextP(datum);
+
+ parsetext(cfgId, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+
+ if (txt != (text *) DatumGetPointer(datum))
+ pfree(txt);
+ }
+
+ /* make tsvector value */
+ if (prs.curwords)
+ {
+ datum = PointerGetDatum(make_tsvector(&prs));
+ rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num,
+ &datum, NULL);
+ pfree(DatumGetPointer(datum));
+ }
+ else
+ {
+ TSVector out = palloc(CALCDATASIZE(0, 0));
+
+ SET_VARSIZE(out, CALCDATASIZE(0, 0));
+ out->size = 0;
+ datum = PointerGetDatum(out);
+ rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num,
+ &datum, NULL);
+ pfree(prs.words);
+ }
+
+ if (rettuple == NULL) /* internal error */
+ elog(ERROR, "tsvector_update_trigger: %d returned by SPI_modifytuple",
+ SPI_result);
+
+ return PointerGetDatum(rettuple);
+}