diff options
Diffstat (limited to 'src/backend/utils/adt/tsquery.c')
-rw-r--r-- | src/backend/utils/adt/tsquery.c | 582 |
1 files changed, 376 insertions, 206 deletions
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index 83759728ff9..27b93eb64d7 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.2 2007/08/31 02:26:29 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.3 2007/09/07 15:09:56 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -23,6 +23,29 @@ #include "utils/pg_crc.h" +struct TSQueryParserStateData +{ + /* State for gettoken_query */ + char *buffer; /* entire string we are scanning */ + char *buf; /* current scan point */ + int state; + int count; /* nesting count, incremented by (, + decremented by ) */ + + /* polish (prefix) notation in list, filled in by push* functions */ + List *polstr; + + /* Strings from operands are collected in op. curop is a pointer to + * the end of used space of op. */ + char *op; + char *curop; + int lenop; /* allocated size of op */ + int sumlen; /* used size of op */ + + /* state for value's parser */ + TSVectorParseState valstate; +}; + /* parser's states */ #define WAITOPERAND 1 #define WAITOPERATOR 2 @@ -30,21 +53,10 @@ #define WAITSINGLEOPERAND 4 /* - * node of query tree, also used - * for storing polish notation in parser + * subroutine to parse the weight part, like ':1AB' of a query. */ -typedef struct ParseQueryNode -{ - int2 weight; - int2 type; - int4 val; - int2 distance; - int2 length; - struct ParseQueryNode *next; -} ParseQueryNode; - static char * -get_weight(char *buf, int2 *weight) +get_weight(char *buf, int16 *weight) { *weight = 0; @@ -82,10 +94,27 @@ get_weight(char *buf, int2 *weight) } /* + * token types for parsing + */ +typedef enum { + PT_END = 0, + PT_ERR = 1, + PT_VAL = 2, + PT_OPR = 3, + PT_OPEN = 4, + PT_CLOSE = 5, +} ts_tokentype; + +/* * get token from query string + * + * *operator is filled in with OP_* when return values is PT_OPR + * *strval, *lenval and *weight are filled in when return value is PT_VAL */ -static int4 -gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight) +static ts_tokentype +gettoken_query(TSQueryParserState state, + int8 *operator, + int *lenval, char **strval, int16 *weight) { while (1) { @@ -97,16 +126,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva { (state->buf)++; /* can safely ++, t_iseq guarantee * that pg_mblen()==1 */ - *val = (int4) '!'; + *operator = OP_NOT; state->state = WAITOPERAND; - return OPR; + return PT_OPR; } else if (t_iseq(state->buf, '(')) { state->count++; (state->buf)++; state->state = WAITOPERAND; - return OPEN; + return PT_OPEN; } else if (t_iseq(state->buf, ':')) { @@ -117,17 +146,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva } else if (!t_isspace(state->buf)) { - state->valstate.prsbuf = state->buf; - if (gettoken_tsvector(&(state->valstate))) + /* We rely on the tsvector parser to parse the value for us */ + reset_tsvector_parser(state->valstate, state->buf); + if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf)) { - *strval = state->valstate.word; - *lenval = state->valstate.curpos - state->valstate.word; - state->buf = get_weight(state->valstate.prsbuf, weight); + state->buf = get_weight(state->buf, weight); state->state = WAITOPERATOR; - return VAL; + return PT_VAL; } else if (state->state == WAITFIRSTOPERAND) - return END; + return PT_END; else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -136,52 +164,71 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva } break; case WAITOPERATOR: - if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')) + if (t_iseq(state->buf, '&')) + { + state->state = WAITOPERAND; + *operator = OP_AND; + (state->buf)++; + return PT_OPR; + } + if (t_iseq(state->buf, '|')) { state->state = WAITOPERAND; - *val = (int4) *(state->buf); + *operator = OP_OR; (state->buf)++; - return OPR; + return PT_OPR; } else if (t_iseq(state->buf, ')')) { (state->buf)++; state->count--; - return (state->count < 0) ? ERR : CLOSE; + return (state->count < 0) ? PT_ERR : PT_CLOSE; } else if (*(state->buf) == '\0') - return (state->count) ? ERR : END; + return (state->count) ? PT_ERR : PT_END; else if (!t_isspace(state->buf)) - return ERR; + return PT_ERR; break; case WAITSINGLEOPERAND: if (*(state->buf) == '\0') - return END; + return PT_END; *strval = state->buf; *lenval = strlen(state->buf); state->buf += strlen(state->buf); state->count++; - return VAL; + return PT_VAL; default: - return ERR; + return PT_ERR; break; } state->buf += pg_mblen(state->buf); } - return END; + return PT_END; } /* - * push new one in polish notation reverse view + * Push an operator to state->polstr */ void -pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight) +pushOperator(TSQueryParserState state, int8 oper) { - ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode)); + QueryOperator *tmp; + + Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR); + + tmp = (QueryOperator *) palloc(sizeof(QueryOperator)); + tmp->type = QI_OPR; + tmp->oper = oper; + /* left is filled in later with findoprnd */ + + state->polstr = lcons(tmp, state->polstr); +} + +static void +pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight) +{ + QueryOperand *tmp; - tmp->weight = weight; - tmp->type = type; - tmp->val = val; if (distance >= MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -192,20 +239,27 @@ pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 l (errcode(ERRCODE_SYNTAX_ERROR), errmsg("operand is too long in tsearch query: \"%s\"", state->buffer))); - tmp->distance = distance; + + tmp = (QueryOperand *) palloc(sizeof(QueryOperand)); + tmp->type = QI_VAL; + tmp->weight = weight; + tmp->valcrc = (int32) valcrc; tmp->length = lenval; - tmp->next = state->str; - state->str = tmp; - state->num++; + tmp->distance = distance; + + state->polstr = lcons(tmp, state->polstr); } /* - * This function is used for tsquery parsing + * Push an operand to state->polstr. + * + * strval must point to a string equal to state->curop. lenval is the length + * of the string. */ void -pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight) +pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight) { - pg_crc32 c; + pg_crc32 valcrc; if (lenval >= MAXSTRLEN) ereport(ERROR, @@ -213,162 +267,202 @@ pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int errmsg("word is too long in tsearch query: \"%s\"", state->buffer))); - INIT_CRC32(c); - COMP_CRC32(c, strval, lenval); - FIN_CRC32(c); - pushquery(state, type, *(int4 *) &c, - state->curop - state->op, lenval, weight); + INIT_CRC32(valcrc); + COMP_CRC32(valcrc, strval, lenval); + FIN_CRC32(valcrc); + pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight); + /* append the value string to state.op, enlarging buffer if needed first */ while (state->curop - state->op + lenval + 1 >= state->lenop) { - int4 tmp = state->curop - state->op; + int used = state->curop - state->op; state->lenop *= 2; state->op = (char *) repalloc((void *) state->op, state->lenop); - state->curop = state->op + tmp; + state->curop = state->op + used; } memcpy((void *) state->curop, (void *) strval, lenval); state->curop += lenval; *(state->curop) = '\0'; state->curop++; state->sumlen += lenval + 1 /* \0 */ ; - return; } + +/* + * Push a stopword placeholder to state->polstr + */ +void +pushStop(TSQueryParserState state) +{ + QueryOperand *tmp; + + tmp = (QueryOperand *) palloc(sizeof(QueryOperand)); + tmp->type = QI_VALSTOP; + + state->polstr = lcons(tmp, state->polstr); +} + + #define STACKDEPTH 32 /* - * make polish notation of query + * Make polish (prefix) notation of query. + * + * See parse_tsquery for explanation of pushval. */ -static int4 -makepol(TSQueryParserState * state, - void (*pushval) (TSQueryParserState *, int, char *, int, int2)) +static void +makepol(TSQueryParserState state, + PushFunction pushval, + void *opaque) { - int4 val = 0, - type; - int4 lenval = 0; + int8 operator = 0; + ts_tokentype type; + int lenval = 0; char *strval = NULL; - int4 stack[STACKDEPTH]; - int4 lenstack = 0; - int2 weight = 0; + int8 opstack[STACKDEPTH]; + int lenstack = 0; + int16 weight = 0; /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); - while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END) + while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END) { switch (type) { - case VAL: - pushval(state, VAL, strval, lenval, weight); - while (lenstack && (stack[lenstack - 1] == (int4) '&' || - stack[lenstack - 1] == (int4) '!')) + case PT_VAL: + pushval(opaque, state, strval, lenval, weight); + while (lenstack && (opstack[lenstack - 1] == OP_AND || + opstack[lenstack - 1] == OP_NOT)) { lenstack--; - pushquery(state, OPR, stack[lenstack], 0, 0, 0); + pushOperator(state, opstack[lenstack]); } break; - case OPR: - if (lenstack && val == (int4) '|') - pushquery(state, OPR, val, 0, 0, 0); + case PT_OPR: + if (lenstack && operator == OP_OR) + pushOperator(state, OP_OR); else { if (lenstack == STACKDEPTH) /* internal error */ elog(ERROR, "tsquery stack too small"); - stack[lenstack] = val; + opstack[lenstack] = operator; lenstack++; } break; - case OPEN: - if (makepol(state, pushval) == ERR) - return ERR; - if (lenstack && (stack[lenstack - 1] == (int4) '&' || - stack[lenstack - 1] == (int4) '!')) + case PT_OPEN: + makepol(state, pushval, opaque); + + if (lenstack && (opstack[lenstack - 1] == OP_AND || + opstack[lenstack - 1] == OP_NOT)) { lenstack--; - pushquery(state, OPR, stack[lenstack], 0, 0, 0); + pushOperator(state, opstack[lenstack]); } break; - case CLOSE: + case PT_CLOSE: while (lenstack) { lenstack--; - pushquery(state, OPR, stack[lenstack], 0, 0, 0); + pushOperator(state, opstack[lenstack]); }; - return END; - break; - case ERR: + return; + case PT_ERR: default: ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error in tsearch query: \"%s\"", state->buffer))); - return ERR; - } } while (lenstack) { lenstack--; - pushquery(state, OPR, stack[lenstack], 0, 0, 0); - }; - return END; + pushOperator(state, opstack[lenstack]); + } } +/* + * Fills in the left-fields previously left unfilled. The input + * QueryItems must be in polish (prefix) notation. + */ static void -findoprnd(QueryItem * ptr, int4 *pos) +findoprnd(QueryItem *ptr, int *pos) { - if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP) - { - ptr[*pos].left = 0; - (*pos)++; - } - else if (ptr[*pos].val == (int4) '!') + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (ptr[*pos].type == QI_VAL || + ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here, + * they haven't been cleansed + * away yet. + */ { - ptr[*pos].left = 1; (*pos)++; - findoprnd(ptr, pos); } - else + else { - QueryItem *curitem = &ptr[*pos]; - int4 tmp = *pos; + Assert(ptr[*pos].type == QI_OPR); - (*pos)++; - findoprnd(ptr, pos); - curitem->left = *pos - tmp; - findoprnd(ptr, pos); + if (ptr[*pos].operator.oper == OP_NOT) + { + ptr[*pos].operator.left = 1; + (*pos)++; + findoprnd(ptr, pos); + } + else + { + QueryOperator *curitem = &ptr[*pos].operator; + int tmp = *pos; + + Assert(curitem->oper == OP_AND || curitem->oper == OP_OR); + + (*pos)++; + findoprnd(ptr, pos); + curitem->left = *pos - tmp; + findoprnd(ptr, pos); + } } } - /* - * input + * Each value (operand) in the query is be passed to pushval. pushval can + * transform the simple value to an arbitrarily complex expression using + * pushValue and pushOperator. It must push a single value with pushValue, + * a complete expression with all operands, or a a stopword placeholder + * with pushStop, otherwise the prefix notation representation will be broken, + * having an operator with no operand. + * + * opaque is passed on to pushval as is, pushval can use it to store its + * private state. + * + * The returned query might contain QI_STOPVAL nodes. The caller is responsible + * for cleaning them up (with clean_fakeval) */ TSQuery -parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain) +parse_tsquery(char *buf, + PushFunction pushval, + void *opaque, + bool isplain) { - TSQueryParserState state; - int4 i; + struct TSQueryParserStateData state; + int i; TSQuery query; - int4 commonlen; + int commonlen; QueryItem *ptr; - ParseQueryNode *tmp; - int4 pos = 0; + int pos = 0; + ListCell *cell; /* init state */ state.buffer = buf; state.buf = buf; state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND; state.count = 0; - state.num = 0; - state.str = NULL; - state.cfg_id = cfg_id; + state.polstr = NIL; /* init value parser's state */ - state.valstate.oprisdelim = true; - state.valstate.len = 32; - state.valstate.word = (char *) palloc(state.valstate.len); + state.valstate = init_tsvector_parser(NULL, true); /* init list of operand */ state.sumlen = 0; @@ -377,9 +471,11 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int *(state.curop) = '\0'; /* parse query & make polish notation (postfix, but in reverse order) */ - makepol(&state, pushval); - pfree(state.valstate.word); - if (!state.num) + makepol(&state, pushval, opaque); + + close_tsvector_parser(state.valstate); + + if (list_length(state.polstr) == 0) { ereport(NOTICE, (errmsg("tsearch query doesn't contain lexeme(s): \"%s\"", @@ -390,37 +486,54 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int return query; } - /* make finish struct */ - commonlen = COMPUTESIZE(state.num, state.sumlen); - query = (TSQuery) palloc(commonlen); + /* Pack the QueryItems in the final TSQuery struct to return to caller */ + commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen); + query = (TSQuery) palloc0(commonlen); SET_VARSIZE(query, commonlen); - query->size = state.num; + query->size = list_length(state.polstr); ptr = GETQUERY(query); - /* set item in polish notation */ - for (i = 0; i < state.num; i++) + /* Copy QueryItems to TSQuery */ + i = 0; + foreach(cell, state.polstr) { - ptr[i].weight = state.str->weight; - ptr[i].type = state.str->type; - ptr[i].val = state.str->val; - ptr[i].distance = state.str->distance; - ptr[i].length = state.str->length; - tmp = state.str->next; - pfree(state.str); - state.str = tmp; + QueryItem *item = (QueryItem *) lfirst(cell); + + switch(item->type) + { + case QI_VAL: + memcpy(&ptr[i], item, sizeof(QueryOperand)); + break; + case QI_VALSTOP: + ptr[i].type = QI_VALSTOP; + break; + case QI_OPR: + memcpy(&ptr[i], item, sizeof(QueryOperator)); + break; + default: + elog(ERROR, "unknown QueryItem type %d", item->type); + } + i++; } - /* set user friendly-operand view */ + /* Copy all the operand strings to TSQuery */ memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen); pfree(state.op); - /* set left operand's position for every operator */ + /* Set left operand pointers for every operator. */ pos = 0; findoprnd(ptr, &pos); return query; } +static void +pushval_asis(void *opaque, TSQueryParserState state, char *strval, int lenval, + int16 weight) +{ + pushValue(state, strval, lenval, weight); +} + /* * in without morphology */ @@ -431,7 +544,7 @@ tsqueryin(PG_FUNCTION_ARGS) pg_verifymbstr(in, strlen(in), false); - PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false)); + PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, NULL, false)); } /* @@ -443,13 +556,14 @@ typedef struct char *buf; char *cur; char *op; - int4 buflen; + int buflen; } INFIX; -#define RESIZEBUF(inf,addsize) \ +/* Makes sure inf->buf is large enough for adding 'addsize' bytes */ +#define RESIZEBUF(inf, addsize) \ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ { \ - int4 len = (inf)->cur - (inf)->buf; \ + int len = (inf)->cur - (inf)->buf; \ (inf)->buflen *= 2; \ (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \ (inf)->cur = (inf)->buf + len; \ @@ -462,12 +576,16 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ static void infix(INFIX * in, bool first) { - if (in->curpol->type == VAL) + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (in->curpol->type == QI_VAL) { - char *op = in->op + in->curpol->distance; + QueryOperand *curpol = &in->curpol->operand; + char *op = in->op + curpol->distance; int clen; - RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5); + RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5); *(in->cur) = '\''; in->cur++; while (*op) @@ -485,26 +603,26 @@ infix(INFIX * in, bool first) } *(in->cur) = '\''; in->cur++; - if (in->curpol->weight) + if (curpol->weight) { *(in->cur) = ':'; in->cur++; - if (in->curpol->weight & (1 << 3)) + if (curpol->weight & (1 << 3)) { *(in->cur) = 'A'; in->cur++; } - if (in->curpol->weight & (1 << 2)) + if (curpol->weight & (1 << 2)) { *(in->cur) = 'B'; in->cur++; } - if (in->curpol->weight & (1 << 1)) + if (curpol->weight & (1 << 1)) { *(in->cur) = 'C'; in->cur++; } - if (in->curpol->weight & 1) + if (curpol->weight & 1) { *(in->cur) = 'D'; in->cur++; @@ -513,7 +631,7 @@ infix(INFIX * in, bool first) *(in->cur) = '\0'; in->curpol++; } - else if (in->curpol->val == (int4) '!') + else if (in->curpol->operator.oper == OP_NOT) { bool isopr = false; @@ -522,13 +640,15 @@ infix(INFIX * in, bool first) in->cur++; *(in->cur) = '\0'; in->curpol++; - if (in->curpol->type == OPR) + + if (in->curpol->type == QI_OPR) { isopr = true; RESIZEBUF(in, 2); sprintf(in->cur, "( "); in->cur = strchr(in->cur, '\0'); } + infix(in, isopr); if (isopr) { @@ -539,11 +659,11 @@ infix(INFIX * in, bool first) } else { - int4 op = in->curpol->val; + int8 op = in->curpol->operator.oper; INFIX nrm; in->curpol++; - if (op == (int4) '|' && !first) + if (op == OP_OR && !first) { RESIZEBUF(in, 2); sprintf(in->cur, "( "); @@ -564,11 +684,22 @@ infix(INFIX * in, bool first) /* print operator & right operand */ RESIZEBUF(in, 3 + (nrm.cur - nrm.buf)); - sprintf(in->cur, " %c %s", op, nrm.buf); + switch(op) + { + case OP_OR: + sprintf(in->cur, " | %s", nrm.buf); + break; + case OP_AND: + sprintf(in->cur, " & %s", nrm.buf); + break; + default: + /* OP_NOT is handled in above if-branch*/ + elog(ERROR, "unexpected operator type %d", op); + } in->cur = strchr(in->cur, '\0'); pfree(nrm.buf); - if (op == (int4) '|' && !first) + if (op == OP_OR && !first) { RESIZEBUF(in, 2); sprintf(in->cur, " )"); @@ -615,28 +746,33 @@ tsquerysend(PG_FUNCTION_ARGS) pq_sendint(&buf, query->size, sizeof(int32)); for (i = 0; i < query->size; i++) { - int tmp; - pq_sendint(&buf, item->type, sizeof(item->type)); - pq_sendint(&buf, item->weight, sizeof(item->weight)); - pq_sendint(&buf, item->left, sizeof(item->left)); - pq_sendint(&buf, item->val, sizeof(item->val)); - - /* - * We are sure that sizeof(WordEntry) == sizeof(int32), and about - * layout of QueryItem - */ - tmp = *(int32 *) (((char *) item) + HDRSIZEQI); - pq_sendint(&buf, tmp, sizeof(tmp)); + switch(item->type) + { + case QI_VAL: + pq_sendint(&buf, item->operand.weight, sizeof(item->operand.weight)); + pq_sendint(&buf, item->operand.valcrc, sizeof(item->operand.valcrc)); + pq_sendint(&buf, item->operand.length, sizeof(int16)); + /* istrue flag is just for temporary use in tsrank.c/Cover, + * so we don't need to transfer that */ + break; + case QI_OPR: + pq_sendint(&buf, item->operator.oper, sizeof(item->operator.oper)); + if (item->operator.oper != OP_NOT) + pq_sendint(&buf, item->operator.left, sizeof(item->operator.left)); + break; + default: + elog(ERROR, "unknown tsquery node type %d", item->type); + } item++; } item = GETQUERY(query); for (i = 0; i < query->size; i++) { - if (item->type == VAL) - pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length); + if (item->type == QI_VAL) + pq_sendbytes(&buf, GETOPERAND(query) + item->operand.distance, item->operand.length); item++; } @@ -652,8 +788,7 @@ tsqueryrecv(PG_FUNCTION_ARGS) TSQuery query; int i, size, - tmp, - len = HDRSIZETQ; + len; QueryItem *item; int datalen = 0; char *ptr; @@ -661,7 +796,8 @@ tsqueryrecv(PG_FUNCTION_ARGS) size = pq_getmsgint(buf, sizeof(uint32)); if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem))) elog(ERROR, "invalid size of tsquery"); - len += sizeof(QueryItem) * size; + + len = HDRSIZETQ + sizeof(QueryItem) * size; query = (TSQuery) palloc(len); query->size = size; @@ -670,32 +806,67 @@ tsqueryrecv(PG_FUNCTION_ARGS) for (i = 0; i < size; i++) { item->type = (int8) pq_getmsgint(buf, sizeof(int8)); - item->weight = (int8) pq_getmsgint(buf, sizeof(int8)); - item->left = (int16) pq_getmsgint(buf, sizeof(int16)); - item->val = (int32) pq_getmsgint(buf, sizeof(int32)); - tmp = pq_getmsgint(buf, sizeof(int32)); - memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32)); - - /* - * Sanity checks - */ - if (item->type == VAL) - { - datalen += item->length + 1; /* \0 */ - } - else if (item->type == OPR) + + switch(item->type) { - if (item->val == '|' || item->val == '&') - { - if (item->left <= 0 || i + item->left >= size) - elog(ERROR, "invalid pointer to left operand"); - } + case QI_VAL: + item->operand.weight = (int8) pq_getmsgint(buf, sizeof(int8)); + item->operand.valcrc = (int32) pq_getmsgint(buf, sizeof(int32)); + item->operand.length = pq_getmsgint(buf, sizeof(int16)); + + /* + * Check that datalen doesn't grow too large. Without the + * check, a malicious client could induce a buffer overflow + * by sending a tsquery whose size exceeds 2GB. datalen + * would overflow, we would allocate a too small buffer below, + * and overflow the buffer. Because operand.length is a 20-bit + * field, adding one such value to datalen must exceed + * MaxAllocSize before wrapping over the 32-bit datalen field, + * so this check will protect from it. + */ + if (datalen > MAXSTRLEN) + elog(ERROR, "invalid tsquery; total operand length exceeded"); + + /* We can calculate distance from datalen, no need to send it + * through the wire. If we did, we would have to check that + * it's valid anyway. + */ + item->operand.distance = datalen; + + datalen += item->operand.length + 1; /* \0 */ - if (i == size - 1) - elog(ERROR, "invalid pointer to right operand"); + break; + case QI_OPR: + item->operator.oper = (int8) pq_getmsgint(buf, sizeof(int8)); + if (item->operator.oper != OP_NOT && + item->operator.oper != OP_OR && + item->operator.oper != OP_AND) + elog(ERROR, "unknown operator type %d", (int) item->operator.oper); + if(item->operator.oper != OP_NOT) + { + item->operator.left = (int16) pq_getmsgint(buf, sizeof(int16)); + /* + * Sanity checks + */ + if (item->operator.left <= 0 || i + item->operator.left >= size) + elog(ERROR, "invalid pointer to left operand"); + + /* XXX: Though there's no way to construct a TSQuery that's + * not in polish notation, we don't enforce that for + * queries received from client in binary mode. Is there + * anything that relies on it? + * + * XXX: The tree could be malformed in other ways too, + * a node could have two parents, for example. + */ + } + + if (i == size - 1) + elog(ERROR, "invalid pointer to right operand"); + break; + default: + elog(ERROR, "unknown tsquery node type %d", item->type); } - else - elog(ERROR, "unknown tsquery node type"); item++; } @@ -706,13 +877,12 @@ tsqueryrecv(PG_FUNCTION_ARGS) ptr = GETOPERAND(query); for (i = 0; i < size; i++) { - if (item->type == VAL) + if (item->type == QI_VAL) { - item->distance = ptr - GETOPERAND(query); memcpy(ptr, - pq_getmsgbytes(buf, item->length), - item->length); - ptr += item->length; + pq_getmsgbytes(buf, item->operand.length), + item->operand.length); + ptr += item->operand.length; *ptr++ = '\0'; } item++; @@ -736,7 +906,7 @@ tsquerytree(PG_FUNCTION_ARGS) INFIX nrm; text *res; QueryItem *q; - int4 len; + int len; if (query->size == 0) { |