aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/tsquery.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/tsquery.c')
-rw-r--r--src/backend/utils/adt/tsquery.c582
1 files changed, 376 insertions, 206 deletions
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index 83759728ff9..27b93eb64d7 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.2 2007/08/31 02:26:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
@@ -23,6 +23,29 @@
#include "utils/pg_crc.h"
+struct TSQueryParserStateData
+{
+ /* State for gettoken_query */
+ char *buffer; /* entire string we are scanning */
+ char *buf; /* current scan point */
+ int state;
+ int count; /* nesting count, incremented by (,
+ decremented by ) */
+
+ /* polish (prefix) notation in list, filled in by push* functions */
+ List *polstr;
+
+ /* Strings from operands are collected in op. curop is a pointer to
+ * the end of used space of op. */
+ char *op;
+ char *curop;
+ int lenop; /* allocated size of op */
+ int sumlen; /* used size of op */
+
+ /* state for value's parser */
+ TSVectorParseState valstate;
+};
+
/* parser's states */
#define WAITOPERAND 1
#define WAITOPERATOR 2
@@ -30,21 +53,10 @@
#define WAITSINGLEOPERAND 4
/*
- * node of query tree, also used
- * for storing polish notation in parser
+ * subroutine to parse the weight part, like ':1AB' of a query.
*/
-typedef struct ParseQueryNode
-{
- int2 weight;
- int2 type;
- int4 val;
- int2 distance;
- int2 length;
- struct ParseQueryNode *next;
-} ParseQueryNode;
-
static char *
-get_weight(char *buf, int2 *weight)
+get_weight(char *buf, int16 *weight)
{
*weight = 0;
@@ -82,10 +94,27 @@ get_weight(char *buf, int2 *weight)
}
/*
+ * token types for parsing
+ */
+typedef enum {
+ PT_END = 0,
+ PT_ERR = 1,
+ PT_VAL = 2,
+ PT_OPR = 3,
+ PT_OPEN = 4,
+ PT_CLOSE = 5,
+} ts_tokentype;
+
+/*
* get token from query string
+ *
+ * *operator is filled in with OP_* when return values is PT_OPR
+ * *strval, *lenval and *weight are filled in when return value is PT_VAL
*/
-static int4
-gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+static ts_tokentype
+gettoken_query(TSQueryParserState state,
+ int8 *operator,
+ int *lenval, char **strval, int16 *weight)
{
while (1)
{
@@ -97,16 +126,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
{
(state->buf)++; /* can safely ++, t_iseq guarantee
* that pg_mblen()==1 */
- *val = (int4) '!';
+ *operator = OP_NOT;
state->state = WAITOPERAND;
- return OPR;
+ return PT_OPR;
}
else if (t_iseq(state->buf, '('))
{
state->count++;
(state->buf)++;
state->state = WAITOPERAND;
- return OPEN;
+ return PT_OPEN;
}
else if (t_iseq(state->buf, ':'))
{
@@ -117,17 +146,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
}
else if (!t_isspace(state->buf))
{
- state->valstate.prsbuf = state->buf;
- if (gettoken_tsvector(&(state->valstate)))
+ /* We rely on the tsvector parser to parse the value for us */
+ reset_tsvector_parser(state->valstate, state->buf);
+ if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
{
- *strval = state->valstate.word;
- *lenval = state->valstate.curpos - state->valstate.word;
- state->buf = get_weight(state->valstate.prsbuf, weight);
+ state->buf = get_weight(state->buf, weight);
state->state = WAITOPERATOR;
- return VAL;
+ return PT_VAL;
}
else if (state->state == WAITFIRSTOPERAND)
- return END;
+ return PT_END;
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -136,52 +164,71 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
}
break;
case WAITOPERATOR:
- if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
+ if (t_iseq(state->buf, '&'))
+ {
+ state->state = WAITOPERAND;
+ *operator = OP_AND;
+ (state->buf)++;
+ return PT_OPR;
+ }
+ if (t_iseq(state->buf, '|'))
{
state->state = WAITOPERAND;
- *val = (int4) *(state->buf);
+ *operator = OP_OR;
(state->buf)++;
- return OPR;
+ return PT_OPR;
}
else if (t_iseq(state->buf, ')'))
{
(state->buf)++;
state->count--;
- return (state->count < 0) ? ERR : CLOSE;
+ return (state->count < 0) ? PT_ERR : PT_CLOSE;
}
else if (*(state->buf) == '\0')
- return (state->count) ? ERR : END;
+ return (state->count) ? PT_ERR : PT_END;
else if (!t_isspace(state->buf))
- return ERR;
+ return PT_ERR;
break;
case WAITSINGLEOPERAND:
if (*(state->buf) == '\0')
- return END;
+ return PT_END;
*strval = state->buf;
*lenval = strlen(state->buf);
state->buf += strlen(state->buf);
state->count++;
- return VAL;
+ return PT_VAL;
default:
- return ERR;
+ return PT_ERR;
break;
}
state->buf += pg_mblen(state->buf);
}
- return END;
+ return PT_END;
}
/*
- * push new one in polish notation reverse view
+ * Push an operator to state->polstr
*/
void
-pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+pushOperator(TSQueryParserState state, int8 oper)
{
- ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode));
+ QueryOperator *tmp;
+
+ Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
+
+ tmp = (QueryOperator *) palloc(sizeof(QueryOperator));
+ tmp->type = QI_OPR;
+ tmp->oper = oper;
+ /* left is filled in later with findoprnd */
+
+ state->polstr = lcons(tmp, state->polstr);
+}
+
+static void
+pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight)
+{
+ QueryOperand *tmp;
- tmp->weight = weight;
- tmp->type = type;
- tmp->val = val;
if (distance >= MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -192,20 +239,27 @@ pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 l
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("operand is too long in tsearch query: \"%s\"",
state->buffer)));
- tmp->distance = distance;
+
+ tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
+ tmp->type = QI_VAL;
+ tmp->weight = weight;
+ tmp->valcrc = (int32) valcrc;
tmp->length = lenval;
- tmp->next = state->str;
- state->str = tmp;
- state->num++;
+ tmp->distance = distance;
+
+ state->polstr = lcons(tmp, state->polstr);
}
/*
- * This function is used for tsquery parsing
+ * Push an operand to state->polstr.
+ *
+ * strval must point to a string equal to state->curop. lenval is the length
+ * of the string.
*/
void
-pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight)
+pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
{
- pg_crc32 c;
+ pg_crc32 valcrc;
if (lenval >= MAXSTRLEN)
ereport(ERROR,
@@ -213,162 +267,202 @@ pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int
errmsg("word is too long in tsearch query: \"%s\"",
state->buffer)));
- INIT_CRC32(c);
- COMP_CRC32(c, strval, lenval);
- FIN_CRC32(c);
- pushquery(state, type, *(int4 *) &c,
- state->curop - state->op, lenval, weight);
+ INIT_CRC32(valcrc);
+ COMP_CRC32(valcrc, strval, lenval);
+ FIN_CRC32(valcrc);
+ pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);
+ /* append the value string to state.op, enlarging buffer if needed first */
while (state->curop - state->op + lenval + 1 >= state->lenop)
{
- int4 tmp = state->curop - state->op;
+ int used = state->curop - state->op;
state->lenop *= 2;
state->op = (char *) repalloc((void *) state->op, state->lenop);
- state->curop = state->op + tmp;
+ state->curop = state->op + used;
}
memcpy((void *) state->curop, (void *) strval, lenval);
state->curop += lenval;
*(state->curop) = '\0';
state->curop++;
state->sumlen += lenval + 1 /* \0 */ ;
- return;
}
+
+/*
+ * Push a stopword placeholder to state->polstr
+ */
+void
+pushStop(TSQueryParserState state)
+{
+ QueryOperand *tmp;
+
+ tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
+ tmp->type = QI_VALSTOP;
+
+ state->polstr = lcons(tmp, state->polstr);
+}
+
+
#define STACKDEPTH 32
/*
- * make polish notation of query
+ * Make polish (prefix) notation of query.
+ *
+ * See parse_tsquery for explanation of pushval.
*/
-static int4
-makepol(TSQueryParserState * state,
- void (*pushval) (TSQueryParserState *, int, char *, int, int2))
+static void
+makepol(TSQueryParserState state,
+ PushFunction pushval,
+ void *opaque)
{
- int4 val = 0,
- type;
- int4 lenval = 0;
+ int8 operator = 0;
+ ts_tokentype type;
+ int lenval = 0;
char *strval = NULL;
- int4 stack[STACKDEPTH];
- int4 lenstack = 0;
- int2 weight = 0;
+ int8 opstack[STACKDEPTH];
+ int lenstack = 0;
+ int16 weight = 0;
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
- while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+ while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)
{
switch (type)
{
- case VAL:
- pushval(state, VAL, strval, lenval, weight);
- while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
- stack[lenstack - 1] == (int4) '!'))
+ case PT_VAL:
+ pushval(opaque, state, strval, lenval, weight);
+ while (lenstack && (opstack[lenstack - 1] == OP_AND ||
+ opstack[lenstack - 1] == OP_NOT))
{
lenstack--;
- pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ pushOperator(state, opstack[lenstack]);
}
break;
- case OPR:
- if (lenstack && val == (int4) '|')
- pushquery(state, OPR, val, 0, 0, 0);
+ case PT_OPR:
+ if (lenstack && operator == OP_OR)
+ pushOperator(state, OP_OR);
else
{
if (lenstack == STACKDEPTH) /* internal error */
elog(ERROR, "tsquery stack too small");
- stack[lenstack] = val;
+ opstack[lenstack] = operator;
lenstack++;
}
break;
- case OPEN:
- if (makepol(state, pushval) == ERR)
- return ERR;
- if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
- stack[lenstack - 1] == (int4) '!'))
+ case PT_OPEN:
+ makepol(state, pushval, opaque);
+
+ if (lenstack && (opstack[lenstack - 1] == OP_AND ||
+ opstack[lenstack - 1] == OP_NOT))
{
lenstack--;
- pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ pushOperator(state, opstack[lenstack]);
}
break;
- case CLOSE:
+ case PT_CLOSE:
while (lenstack)
{
lenstack--;
- pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ pushOperator(state, opstack[lenstack]);
};
- return END;
- break;
- case ERR:
+ return;
+ case PT_ERR:
default:
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsearch query: \"%s\"",
state->buffer)));
- return ERR;
-
}
}
while (lenstack)
{
lenstack--;
- pushquery(state, OPR, stack[lenstack], 0, 0, 0);
- };
- return END;
+ pushOperator(state, opstack[lenstack]);
+ }
}
+/*
+ * Fills in the left-fields previously left unfilled. The input
+ * QueryItems must be in polish (prefix) notation.
+ */
static void
-findoprnd(QueryItem * ptr, int4 *pos)
+findoprnd(QueryItem *ptr, int *pos)
{
- if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP)
- {
- ptr[*pos].left = 0;
- (*pos)++;
- }
- else if (ptr[*pos].val == (int4) '!')
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (ptr[*pos].type == QI_VAL ||
+ ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here,
+ * they haven't been cleansed
+ * away yet.
+ */
{
- ptr[*pos].left = 1;
(*pos)++;
- findoprnd(ptr, pos);
}
- else
+ else
{
- QueryItem *curitem = &ptr[*pos];
- int4 tmp = *pos;
+ Assert(ptr[*pos].type == QI_OPR);
- (*pos)++;
- findoprnd(ptr, pos);
- curitem->left = *pos - tmp;
- findoprnd(ptr, pos);
+ if (ptr[*pos].operator.oper == OP_NOT)
+ {
+ ptr[*pos].operator.left = 1;
+ (*pos)++;
+ findoprnd(ptr, pos);
+ }
+ else
+ {
+ QueryOperator *curitem = &ptr[*pos].operator;
+ int tmp = *pos;
+
+ Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
+
+ (*pos)++;
+ findoprnd(ptr, pos);
+ curitem->left = *pos - tmp;
+ findoprnd(ptr, pos);
+ }
}
}
-
/*
- * input
+ * Each value (operand) in the query is be passed to pushval. pushval can
+ * transform the simple value to an arbitrarily complex expression using
+ * pushValue and pushOperator. It must push a single value with pushValue,
+ * a complete expression with all operands, or a a stopword placeholder
+ * with pushStop, otherwise the prefix notation representation will be broken,
+ * having an operator with no operand.
+ *
+ * opaque is passed on to pushval as is, pushval can use it to store its
+ * private state.
+ *
+ * The returned query might contain QI_STOPVAL nodes. The caller is responsible
+ * for cleaning them up (with clean_fakeval)
*/
TSQuery
-parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain)
+parse_tsquery(char *buf,
+ PushFunction pushval,
+ void *opaque,
+ bool isplain)
{
- TSQueryParserState state;
- int4 i;
+ struct TSQueryParserStateData state;
+ int i;
TSQuery query;
- int4 commonlen;
+ int commonlen;
QueryItem *ptr;
- ParseQueryNode *tmp;
- int4 pos = 0;
+ int pos = 0;
+ ListCell *cell;
/* init state */
state.buffer = buf;
state.buf = buf;
state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
state.count = 0;
- state.num = 0;
- state.str = NULL;
- state.cfg_id = cfg_id;
+ state.polstr = NIL;
/* init value parser's state */
- state.valstate.oprisdelim = true;
- state.valstate.len = 32;
- state.valstate.word = (char *) palloc(state.valstate.len);
+ state.valstate = init_tsvector_parser(NULL, true);
/* init list of operand */
state.sumlen = 0;
@@ -377,9 +471,11 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int
*(state.curop) = '\0';
/* parse query & make polish notation (postfix, but in reverse order) */
- makepol(&state, pushval);
- pfree(state.valstate.word);
- if (!state.num)
+ makepol(&state, pushval, opaque);
+
+ close_tsvector_parser(state.valstate);
+
+ if (list_length(state.polstr) == 0)
{
ereport(NOTICE,
(errmsg("tsearch query doesn't contain lexeme(s): \"%s\"",
@@ -390,37 +486,54 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int
return query;
}
- /* make finish struct */
- commonlen = COMPUTESIZE(state.num, state.sumlen);
- query = (TSQuery) palloc(commonlen);
+ /* Pack the QueryItems in the final TSQuery struct to return to caller */
+ commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
+ query = (TSQuery) palloc0(commonlen);
SET_VARSIZE(query, commonlen);
- query->size = state.num;
+ query->size = list_length(state.polstr);
ptr = GETQUERY(query);
- /* set item in polish notation */
- for (i = 0; i < state.num; i++)
+ /* Copy QueryItems to TSQuery */
+ i = 0;
+ foreach(cell, state.polstr)
{
- ptr[i].weight = state.str->weight;
- ptr[i].type = state.str->type;
- ptr[i].val = state.str->val;
- ptr[i].distance = state.str->distance;
- ptr[i].length = state.str->length;
- tmp = state.str->next;
- pfree(state.str);
- state.str = tmp;
+ QueryItem *item = (QueryItem *) lfirst(cell);
+
+ switch(item->type)
+ {
+ case QI_VAL:
+ memcpy(&ptr[i], item, sizeof(QueryOperand));
+ break;
+ case QI_VALSTOP:
+ ptr[i].type = QI_VALSTOP;
+ break;
+ case QI_OPR:
+ memcpy(&ptr[i], item, sizeof(QueryOperator));
+ break;
+ default:
+ elog(ERROR, "unknown QueryItem type %d", item->type);
+ }
+ i++;
}
- /* set user friendly-operand view */
+ /* Copy all the operand strings to TSQuery */
memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
pfree(state.op);
- /* set left operand's position for every operator */
+ /* Set left operand pointers for every operator. */
pos = 0;
findoprnd(ptr, &pos);
return query;
}
+static void
+pushval_asis(void *opaque, TSQueryParserState state, char *strval, int lenval,
+ int16 weight)
+{
+ pushValue(state, strval, lenval, weight);
+}
+
/*
* in without morphology
*/
@@ -431,7 +544,7 @@ tsqueryin(PG_FUNCTION_ARGS)
pg_verifymbstr(in, strlen(in), false);
- PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false));
+ PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, NULL, false));
}
/*
@@ -443,13 +556,14 @@ typedef struct
char *buf;
char *cur;
char *op;
- int4 buflen;
+ int buflen;
} INFIX;
-#define RESIZEBUF(inf,addsize) \
+/* Makes sure inf->buf is large enough for adding 'addsize' bytes */
+#define RESIZEBUF(inf, addsize) \
while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
{ \
- int4 len = (inf)->cur - (inf)->buf; \
+ int len = (inf)->cur - (inf)->buf; \
(inf)->buflen *= 2; \
(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
(inf)->cur = (inf)->buf + len; \
@@ -462,12 +576,16 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
static void
infix(INFIX * in, bool first)
{
- if (in->curpol->type == VAL)
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (in->curpol->type == QI_VAL)
{
- char *op = in->op + in->curpol->distance;
+ QueryOperand *curpol = &in->curpol->operand;
+ char *op = in->op + curpol->distance;
int clen;
- RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
+ RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
*(in->cur) = '\'';
in->cur++;
while (*op)
@@ -485,26 +603,26 @@ infix(INFIX * in, bool first)
}
*(in->cur) = '\'';
in->cur++;
- if (in->curpol->weight)
+ if (curpol->weight)
{
*(in->cur) = ':';
in->cur++;
- if (in->curpol->weight & (1 << 3))
+ if (curpol->weight & (1 << 3))
{
*(in->cur) = 'A';
in->cur++;
}
- if (in->curpol->weight & (1 << 2))
+ if (curpol->weight & (1 << 2))
{
*(in->cur) = 'B';
in->cur++;
}
- if (in->curpol->weight & (1 << 1))
+ if (curpol->weight & (1 << 1))
{
*(in->cur) = 'C';
in->cur++;
}
- if (in->curpol->weight & 1)
+ if (curpol->weight & 1)
{
*(in->cur) = 'D';
in->cur++;
@@ -513,7 +631,7 @@ infix(INFIX * in, bool first)
*(in->cur) = '\0';
in->curpol++;
}
- else if (in->curpol->val == (int4) '!')
+ else if (in->curpol->operator.oper == OP_NOT)
{
bool isopr = false;
@@ -522,13 +640,15 @@ infix(INFIX * in, bool first)
in->cur++;
*(in->cur) = '\0';
in->curpol++;
- if (in->curpol->type == OPR)
+
+ if (in->curpol->type == QI_OPR)
{
isopr = true;
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
in->cur = strchr(in->cur, '\0');
}
+
infix(in, isopr);
if (isopr)
{
@@ -539,11 +659,11 @@ infix(INFIX * in, bool first)
}
else
{
- int4 op = in->curpol->val;
+ int8 op = in->curpol->operator.oper;
INFIX nrm;
in->curpol++;
- if (op == (int4) '|' && !first)
+ if (op == OP_OR && !first)
{
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
@@ -564,11 +684,22 @@ infix(INFIX * in, bool first)
/* print operator & right operand */
RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
- sprintf(in->cur, " %c %s", op, nrm.buf);
+ switch(op)
+ {
+ case OP_OR:
+ sprintf(in->cur, " | %s", nrm.buf);
+ break;
+ case OP_AND:
+ sprintf(in->cur, " & %s", nrm.buf);
+ break;
+ default:
+ /* OP_NOT is handled in above if-branch*/
+ elog(ERROR, "unexpected operator type %d", op);
+ }
in->cur = strchr(in->cur, '\0');
pfree(nrm.buf);
- if (op == (int4) '|' && !first)
+ if (op == OP_OR && !first)
{
RESIZEBUF(in, 2);
sprintf(in->cur, " )");
@@ -615,28 +746,33 @@ tsquerysend(PG_FUNCTION_ARGS)
pq_sendint(&buf, query->size, sizeof(int32));
for (i = 0; i < query->size; i++)
{
- int tmp;
-
pq_sendint(&buf, item->type, sizeof(item->type));
- pq_sendint(&buf, item->weight, sizeof(item->weight));
- pq_sendint(&buf, item->left, sizeof(item->left));
- pq_sendint(&buf, item->val, sizeof(item->val));
-
- /*
- * We are sure that sizeof(WordEntry) == sizeof(int32), and about
- * layout of QueryItem
- */
- tmp = *(int32 *) (((char *) item) + HDRSIZEQI);
- pq_sendint(&buf, tmp, sizeof(tmp));
+ switch(item->type)
+ {
+ case QI_VAL:
+ pq_sendint(&buf, item->operand.weight, sizeof(item->operand.weight));
+ pq_sendint(&buf, item->operand.valcrc, sizeof(item->operand.valcrc));
+ pq_sendint(&buf, item->operand.length, sizeof(int16));
+ /* istrue flag is just for temporary use in tsrank.c/Cover,
+ * so we don't need to transfer that */
+ break;
+ case QI_OPR:
+ pq_sendint(&buf, item->operator.oper, sizeof(item->operator.oper));
+ if (item->operator.oper != OP_NOT)
+ pq_sendint(&buf, item->operator.left, sizeof(item->operator.left));
+ break;
+ default:
+ elog(ERROR, "unknown tsquery node type %d", item->type);
+ }
item++;
}
item = GETQUERY(query);
for (i = 0; i < query->size; i++)
{
- if (item->type == VAL)
- pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length);
+ if (item->type == QI_VAL)
+ pq_sendbytes(&buf, GETOPERAND(query) + item->operand.distance, item->operand.length);
item++;
}
@@ -652,8 +788,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
TSQuery query;
int i,
size,
- tmp,
- len = HDRSIZETQ;
+ len;
QueryItem *item;
int datalen = 0;
char *ptr;
@@ -661,7 +796,8 @@ tsqueryrecv(PG_FUNCTION_ARGS)
size = pq_getmsgint(buf, sizeof(uint32));
if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
elog(ERROR, "invalid size of tsquery");
- len += sizeof(QueryItem) * size;
+
+ len = HDRSIZETQ + sizeof(QueryItem) * size;
query = (TSQuery) palloc(len);
query->size = size;
@@ -670,32 +806,67 @@ tsqueryrecv(PG_FUNCTION_ARGS)
for (i = 0; i < size; i++)
{
item->type = (int8) pq_getmsgint(buf, sizeof(int8));
- item->weight = (int8) pq_getmsgint(buf, sizeof(int8));
- item->left = (int16) pq_getmsgint(buf, sizeof(int16));
- item->val = (int32) pq_getmsgint(buf, sizeof(int32));
- tmp = pq_getmsgint(buf, sizeof(int32));
- memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32));
-
- /*
- * Sanity checks
- */
- if (item->type == VAL)
- {
- datalen += item->length + 1; /* \0 */
- }
- else if (item->type == OPR)
+
+ switch(item->type)
{
- if (item->val == '|' || item->val == '&')
- {
- if (item->left <= 0 || i + item->left >= size)
- elog(ERROR, "invalid pointer to left operand");
- }
+ case QI_VAL:
+ item->operand.weight = (int8) pq_getmsgint(buf, sizeof(int8));
+ item->operand.valcrc = (int32) pq_getmsgint(buf, sizeof(int32));
+ item->operand.length = pq_getmsgint(buf, sizeof(int16));
+
+ /*
+ * Check that datalen doesn't grow too large. Without the
+ * check, a malicious client could induce a buffer overflow
+ * by sending a tsquery whose size exceeds 2GB. datalen
+ * would overflow, we would allocate a too small buffer below,
+ * and overflow the buffer. Because operand.length is a 20-bit
+ * field, adding one such value to datalen must exceed
+ * MaxAllocSize before wrapping over the 32-bit datalen field,
+ * so this check will protect from it.
+ */
+ if (datalen > MAXSTRLEN)
+ elog(ERROR, "invalid tsquery; total operand length exceeded");
+
+ /* We can calculate distance from datalen, no need to send it
+ * through the wire. If we did, we would have to check that
+ * it's valid anyway.
+ */
+ item->operand.distance = datalen;
+
+ datalen += item->operand.length + 1; /* \0 */
- if (i == size - 1)
- elog(ERROR, "invalid pointer to right operand");
+ break;
+ case QI_OPR:
+ item->operator.oper = (int8) pq_getmsgint(buf, sizeof(int8));
+ if (item->operator.oper != OP_NOT &&
+ item->operator.oper != OP_OR &&
+ item->operator.oper != OP_AND)
+ elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
+ if(item->operator.oper != OP_NOT)
+ {
+ item->operator.left = (int16) pq_getmsgint(buf, sizeof(int16));
+ /*
+ * Sanity checks
+ */
+ if (item->operator.left <= 0 || i + item->operator.left >= size)
+ elog(ERROR, "invalid pointer to left operand");
+
+ /* XXX: Though there's no way to construct a TSQuery that's
+ * not in polish notation, we don't enforce that for
+ * queries received from client in binary mode. Is there
+ * anything that relies on it?
+ *
+ * XXX: The tree could be malformed in other ways too,
+ * a node could have two parents, for example.
+ */
+ }
+
+ if (i == size - 1)
+ elog(ERROR, "invalid pointer to right operand");
+ break;
+ default:
+ elog(ERROR, "unknown tsquery node type %d", item->type);
}
- else
- elog(ERROR, "unknown tsquery node type");
item++;
}
@@ -706,13 +877,12 @@ tsqueryrecv(PG_FUNCTION_ARGS)
ptr = GETOPERAND(query);
for (i = 0; i < size; i++)
{
- if (item->type == VAL)
+ if (item->type == QI_VAL)
{
- item->distance = ptr - GETOPERAND(query);
memcpy(ptr,
- pq_getmsgbytes(buf, item->length),
- item->length);
- ptr += item->length;
+ pq_getmsgbytes(buf, item->operand.length),
+ item->operand.length);
+ ptr += item->operand.length;
*ptr++ = '\0';
}
item++;
@@ -736,7 +906,7 @@ tsquerytree(PG_FUNCTION_ARGS)
INFIX nrm;
text *res;
QueryItem *q;
- int4 len;
+ int len;
if (query->size == 0)
{