aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/like_support.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2019-02-14 10:51:59 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2019-02-14 10:51:59 -0500
commit49fa99e54ec0ded180b52a4a14e543702d53e8c9 (patch)
tree64af70fc403c2354c5b05106d2afb5bb465142ad /src/backend/utils/adt/like_support.c
parent109de05cbb034b032cd60f50708716c8ff0afdf2 (diff)
downloadpostgresql-49fa99e54ec0ded180b52a4a14e543702d53e8c9.tar.gz
postgresql-49fa99e54ec0ded180b52a4a14e543702d53e8c9.zip
Move pattern selectivity code from selfuncs.c to like_support.c.
While at it, refactor patternsel() a bit so that it can be used from the LIKE/regex planner support functions as well. This makes the planner able to deal equally well with either operator or function syntax for these operations. I'm not excited about that as a feature in itself, but it provides a nice model for extensions to follow if they want such behavior for their operations. This change localizes the use of pattern_fixed_prefix() and make_greater_string() so that they no longer need be exported. (We might get pushback from extensions about that, perhaps, in which case I'd be inclined to re-export them in a new header file like_support.h.) This reduces the bulk of selfuncs.c a fair amount, removing ~1370 lines or about one-sixth of that file; it's still too big, but this is progress. Discussion: https://postgr.es/m/24537.1550093915@sss.pgh.pa.us
Diffstat (limited to 'src/backend/utils/adt/like_support.c')
-rw-r--r--src/backend/utils/adt/like_support.c1414
1 files changed, 1413 insertions, 1 deletions
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index b001dde5fc7..69509811ef7 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -34,17 +34,39 @@
*/
#include "postgres.h"
+#include <math.h>
+
+#include "access/htup_details.h"
#include "access/stratnum.h"
+#include "catalog/pg_collation.h"
#include "catalog/pg_opfamily.h"
+#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
+#include "mb/pg_wchar.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
#include "utils/builtins.h"
+#include "utils/datum.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
#include "utils/selfuncs.h"
+#include "utils/varlena.h"
+
+
+typedef enum
+{
+ Pattern_Type_Like,
+ Pattern_Type_Like_IC,
+ Pattern_Type_Regex,
+ Pattern_Type_Regex_IC,
+ Pattern_Type_Prefix
+} Pattern_Type;
+typedef enum
+{
+ Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
+} Pattern_Prefix_Status;
static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
static List *match_pattern_prefix(Node *leftop,
@@ -53,6 +75,34 @@ static List *match_pattern_prefix(Node *leftop,
Oid expr_coll,
Oid opfamily,
Oid indexcollation);
+static double patternsel_common(PlannerInfo *root,
+ Oid oprid,
+ Oid opfuncid,
+ List *args,
+ int varRelid,
+ Oid collation,
+ Pattern_Type ptype,
+ bool negate);
+static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
+ Pattern_Type ptype,
+ Oid collation,
+ Const **prefix,
+ Selectivity *rest_selec);
+static Selectivity prefix_selectivity(PlannerInfo *root,
+ VariableStatData *vardata,
+ Oid vartype, Oid opfamily, Const *prefixcon);
+static Selectivity like_selectivity(const char *patt, int pattlen,
+ bool case_insensitive);
+static Selectivity regex_selectivity(const char *patt, int pattlen,
+ bool case_insensitive,
+ int fixed_prefix_len);
+static int pattern_char_isalpha(char c, bool is_multibyte,
+ pg_locale_t locale, bool locale_is_c);
+static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
+ Oid collation);
+static Datum string_to_datum(const char *str, Oid datatype);
+static Const *string_to_const(const char *str, Oid datatype);
+static Const *string_to_bytea_const(const char *str, size_t str_len);
/*
@@ -96,7 +146,39 @@ like_regex_support(Node *rawreq, Pattern_Type ptype)
{
Node *ret = NULL;
- if (IsA(rawreq, SupportRequestIndexCondition))
+ if (IsA(rawreq, SupportRequestSelectivity))
+ {
+ /*
+ * Make a selectivity estimate for a function call, just as we'd do if
+ * the call was via the corresponding operator.
+ */
+ SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
+ Selectivity s1;
+
+ if (req->is_join)
+ {
+ /*
+ * For the moment we just punt. If patternjoinsel is ever
+ * improved to do better, this should be made to call it.
+ */
+ s1 = DEFAULT_MATCH_SEL;
+ }
+ else
+ {
+ /* Share code with operator restriction selectivity functions */
+ s1 = patternsel_common(req->root,
+ InvalidOid,
+ req->funcid,
+ req->args,
+ req->varRelid,
+ req->inputcollid,
+ ptype,
+ false);
+ }
+ req->selectivity = s1;
+ ret = (Node *) req;
+ }
+ else if (IsA(rawreq, SupportRequestIndexCondition))
{
/* Try to convert operator/function call to index conditions */
SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
@@ -311,3 +393,1333 @@ match_pattern_prefix(Node *leftop,
return result;
}
+
+
+/*
+ * patternsel_common - generic code for pattern-match restriction selectivity.
+ *
+ * To support using this from either the operator or function paths, caller
+ * may pass either operator OID or underlying function OID; we look up the
+ * latter from the former if needed. (We could just have patternsel() call
+ * get_opcode(), but the work would be wasted if we don't have a need to
+ * compare a fixed prefix to the pg_statistic data.)
+ *
+ * Note that oprid and/or opfuncid should be for the positive-match operator
+ * even when negate is true.
+ */
+static double
+patternsel_common(PlannerInfo *root,
+ Oid oprid,
+ Oid opfuncid,
+ List *args,
+ int varRelid,
+ Oid collation,
+ Pattern_Type ptype,
+ bool negate)
+{
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+ Datum constval;
+ Oid consttype;
+ Oid vartype;
+ Oid opfamily;
+ Pattern_Prefix_Status pstatus;
+ Const *patt;
+ Const *prefix = NULL;
+ Selectivity rest_selec = 0;
+ double nullfrac = 0.0;
+ double result;
+
+ /*
+ * Initialize result to the appropriate default estimate depending on
+ * whether it's a match or not-match operator.
+ */
+ if (negate)
+ result = 1.0 - DEFAULT_MATCH_SEL;
+ else
+ result = DEFAULT_MATCH_SEL;
+
+ /*
+ * If expression is not variable op constant, then punt and return the
+ * default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata, &other, &varonleft))
+ return result;
+ if (!varonleft || !IsA(other, Const))
+ {
+ ReleaseVariableStats(vardata);
+ return result;
+ }
+
+ /*
+ * If the constant is NULL, assume operator is strict and return zero, ie,
+ * operator will never return TRUE. (It's zero even for a negator op.)
+ */
+ if (((Const *) other)->constisnull)
+ {
+ ReleaseVariableStats(vardata);
+ return 0.0;
+ }
+ constval = ((Const *) other)->constvalue;
+ consttype = ((Const *) other)->consttype;
+
+ /*
+ * The right-hand const is type text or bytea for all supported operators.
+ * We do not expect to see binary-compatible types here, since
+ * const-folding should have relabeled the const to exactly match the
+ * operator's declared type.
+ */
+ if (consttype != TEXTOID && consttype != BYTEAOID)
+ {
+ ReleaseVariableStats(vardata);
+ return result;
+ }
+
+ /*
+ * Similarly, the exposed type of the left-hand side should be one of
+ * those we know. (Do not look at vardata.atttype, which might be
+ * something binary-compatible but different.) We can use it to choose
+ * the index opfamily from which we must draw the comparison operators.
+ *
+ * NOTE: It would be more correct to use the PATTERN opfamilies than the
+ * simple ones, but at the moment ANALYZE will not generate statistics for
+ * the PATTERN operators. But our results are so approximate anyway that
+ * it probably hardly matters.
+ */
+ vartype = vardata.vartype;
+
+ switch (vartype)
+ {
+ case TEXTOID:
+ case NAMEOID:
+ opfamily = TEXT_BTREE_FAM_OID;
+ break;
+ case BPCHAROID:
+ opfamily = BPCHAR_BTREE_FAM_OID;
+ break;
+ case BYTEAOID:
+ opfamily = BYTEA_BTREE_FAM_OID;
+ break;
+ default:
+ ReleaseVariableStats(vardata);
+ return result;
+ }
+
+ /*
+ * Grab the nullfrac for use below.
+ */
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+ nullfrac = stats->stanullfrac;
+ }
+
+ /*
+ * Pull out any fixed prefix implied by the pattern, and estimate the
+ * fractional selectivity of the remainder of the pattern. Unlike many
+ * other selectivity estimators, we use the pattern operator's actual
+ * collation for this step. This is not because we expect the collation
+ * to make a big difference in the selectivity estimate (it seldom would),
+ * but because we want to be sure we cache compiled regexps under the
+ * right cache key, so that they can be re-used at runtime.
+ */
+ patt = (Const *) other;
+ pstatus = pattern_fixed_prefix(patt, ptype, collation,
+ &prefix, &rest_selec);
+
+ /*
+ * If necessary, coerce the prefix constant to the right type.
+ */
+ if (prefix && prefix->consttype != vartype)
+ {
+ char *prefixstr;
+
+ switch (prefix->consttype)
+ {
+ case TEXTOID:
+ prefixstr = TextDatumGetCString(prefix->constvalue);
+ break;
+ case BYTEAOID:
+ prefixstr = DatumGetCString(DirectFunctionCall1(byteaout,
+ prefix->constvalue));
+ break;
+ default:
+ elog(ERROR, "unrecognized consttype: %u",
+ prefix->consttype);
+ ReleaseVariableStats(vardata);
+ return result;
+ }
+ prefix = string_to_const(prefixstr, vartype);
+ pfree(prefixstr);
+ }
+
+ if (pstatus == Pattern_Prefix_Exact)
+ {
+ /*
+ * Pattern specifies an exact match, so pretend operator is '='
+ */
+ Oid eqopr = get_opfamily_member(opfamily, vartype, vartype,
+ BTEqualStrategyNumber);
+
+ if (eqopr == InvalidOid)
+ elog(ERROR, "no = operator for opfamily %u", opfamily);
+ result = var_eq_const(&vardata, eqopr, prefix->constvalue,
+ false, true, false);
+ }
+ else
+ {
+ /*
+ * Not exact-match pattern. If we have a sufficiently large
+ * histogram, estimate selectivity for the histogram part of the
+ * population by counting matches in the histogram. If not, estimate
+ * selectivity of the fixed prefix and remainder of pattern
+ * separately, then combine the two to get an estimate of the
+ * selectivity for the part of the column population represented by
+ * the histogram. (For small histograms, we combine these
+ * approaches.)
+ *
+ * We then add up data for any most-common-values values; these are
+ * not in the histogram population, and we can get exact answers for
+ * them by applying the pattern operator, so there's no reason to
+ * approximate. (If the MCVs cover a significant part of the total
+ * population, this gives us a big leg up in accuracy.)
+ */
+ Selectivity selec;
+ int hist_size;
+ FmgrInfo opproc;
+ double mcv_selec,
+ sumcommon;
+
+ /* Try to use the histogram entries to get selectivity */
+ if (!OidIsValid(opfuncid))
+ opfuncid = get_opcode(oprid);
+ fmgr_info(opfuncid, &opproc);
+
+ selec = histogram_selectivity(&vardata, &opproc, constval, true,
+ 10, 1, &hist_size);
+
+ /* If not at least 100 entries, use the heuristic method */
+ if (hist_size < 100)
+ {
+ Selectivity heursel;
+ Selectivity prefixsel;
+
+ if (pstatus == Pattern_Prefix_Partial)
+ prefixsel = prefix_selectivity(root, &vardata, vartype,
+ opfamily, prefix);
+ else
+ prefixsel = 1.0;
+ heursel = prefixsel * rest_selec;
+
+ if (selec < 0) /* fewer than 10 histogram entries? */
+ selec = heursel;
+ else
+ {
+ /*
+ * For histogram sizes from 10 to 100, we combine the
+ * histogram and heuristic selectivities, putting increasingly
+ * more trust in the histogram for larger sizes.
+ */
+ double hist_weight = hist_size / 100.0;
+
+ selec = selec * hist_weight + heursel * (1.0 - hist_weight);
+ }
+ }
+
+ /* In any case, don't believe extremely small or large estimates. */
+ if (selec < 0.0001)
+ selec = 0.0001;
+ else if (selec > 0.9999)
+ selec = 0.9999;
+
+ /*
+ * If we have most-common-values info, add up the fractions of the MCV
+ * entries that satisfy MCV OP PATTERN. These fractions contribute
+ * directly to the result selectivity. Also add up the total fraction
+ * represented by MCV entries.
+ */
+ mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
+ &sumcommon);
+
+ /*
+ * Now merge the results from the MCV and histogram calculations,
+ * realizing that the histogram covers only the non-null values that
+ * are not listed in MCV.
+ */
+ selec *= 1.0 - nullfrac - sumcommon;
+ selec += mcv_selec;
+ result = selec;
+ }
+
+ /* now adjust if we wanted not-match rather than match */
+ if (negate)
+ result = 1.0 - result - nullfrac;
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(result);
+
+ if (prefix)
+ {
+ pfree(DatumGetPointer(prefix->constvalue));
+ pfree(prefix);
+ }
+
+ ReleaseVariableStats(vardata);
+
+ return result;
+}
+
+/*
+ * Fix impedance mismatch between SQL-callable functions and patternsel_common
+ */
+static double
+patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ Oid collation = PG_GET_COLLATION();
+
+ /*
+ * If this is for a NOT LIKE or similar operator, get the corresponding
+ * positive-match operator and work with that.
+ */
+ if (negate)
+ {
+ operator = get_negator(operator);
+ if (!OidIsValid(operator))
+ elog(ERROR, "patternsel called for operator without a negator");
+ }
+
+ return patternsel_common(root,
+ operator,
+ InvalidOid,
+ args,
+ varRelid,
+ collation,
+ ptype,
+ negate);
+}
+
+/*
+ * regexeqsel - Selectivity of regular-expression pattern match.
+ */
+Datum
+regexeqsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
+}
+
+/*
+ * icregexeqsel - Selectivity of case-insensitive regex match.
+ */
+Datum
+icregexeqsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
+}
+
+/*
+ * likesel - Selectivity of LIKE pattern match.
+ */
+Datum
+likesel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
+}
+
+/*
+ * prefixsel - selectivity of prefix operator
+ */
+Datum
+prefixsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *
+ * iclikesel - Selectivity of ILIKE pattern match.
+ */
+Datum
+iclikesel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
+}
+
+/*
+ * regexnesel - Selectivity of regular-expression pattern non-match.
+ */
+Datum
+regexnesel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
+}
+
+/*
+ * icregexnesel - Selectivity of case-insensitive regex non-match.
+ */
+Datum
+icregexnesel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
+}
+
+/*
+ * nlikesel - Selectivity of LIKE pattern non-match.
+ */
+Datum
+nlikesel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
+}
+
+/*
+ * icnlikesel - Selectivity of ILIKE pattern non-match.
+ */
+Datum
+icnlikesel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
+}
+
+/*
+ * patternjoinsel - Generic code for pattern-match join selectivity.
+ */
+static double
+patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
+{
+ /* For the moment we just punt. */
+ return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
+}
+
+/*
+ * regexeqjoinsel - Join selectivity of regular-expression pattern match.
+ */
+Datum
+regexeqjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
+}
+
+/*
+ * icregexeqjoinsel - Join selectivity of case-insensitive regex match.
+ */
+Datum
+icregexeqjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
+}
+
+/*
+ * likejoinsel - Join selectivity of LIKE pattern match.
+ */
+Datum
+likejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
+}
+
+/*
+ * prefixjoinsel - Join selectivity of prefix operator
+ */
+Datum
+prefixjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ * iclikejoinsel - Join selectivity of ILIKE pattern match.
+ */
+Datum
+iclikejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
+}
+
+/*
+ * regexnejoinsel - Join selectivity of regex non-match.
+ */
+Datum
+regexnejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
+}
+
+/*
+ * icregexnejoinsel - Join selectivity of case-insensitive regex non-match.
+ */
+Datum
+icregexnejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
+}
+
+/*
+ * nlikejoinsel - Join selectivity of LIKE pattern non-match.
+ */
+Datum
+nlikejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
+}
+
+/*
+ * icnlikejoinsel - Join selectivity of ILIKE pattern non-match.
+ */
+Datum
+icnlikejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
+}
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Pattern analysis functions
+ *
+ * These routines support analysis of LIKE and regular-expression patterns
+ * by the planner/optimizer. It's important that they agree with the
+ * regular-expression code in backend/regex/ and the LIKE code in
+ * backend/utils/adt/like.c. Also, the computation of the fixed prefix
+ * must be conservative: if we report a string longer than the true fixed
+ * prefix, the query may produce actually wrong answers, rather than just
+ * getting a bad selectivity estimate!
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Extract the fixed prefix, if any, for a pattern.
+ *
+ * *prefix is set to a palloc'd prefix string (in the form of a Const node),
+ * or to NULL if no fixed prefix exists for the pattern.
+ * If rest_selec is not NULL, *rest_selec is set to an estimate of the
+ * selectivity of the remainder of the pattern (without any fixed prefix).
+ * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
+ *
+ * The return value distinguishes no fixed prefix, a partial prefix,
+ * or an exact-match-only pattern.
+ */
+
+static Pattern_Prefix_Status
+like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
+ Const **prefix_const, Selectivity *rest_selec)
+{
+ char *match;
+ char *patt;
+ int pattlen;
+ Oid typeid = patt_const->consttype;
+ int pos,
+ match_pos;
+ bool is_multibyte = (pg_database_encoding_max_length() > 1);
+ pg_locale_t locale = 0;
+ bool locale_is_c = false;
+
+ /* the right-hand const is type text or bytea */
+ Assert(typeid == BYTEAOID || typeid == TEXTOID);
+
+ if (case_insensitive)
+ {
+ if (typeid == BYTEAOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("case insensitive matching not supported on type bytea")));
+
+ /* If case-insensitive, we need locale info */
+ if (lc_ctype_is_c(collation))
+ locale_is_c = true;
+ else if (collation != DEFAULT_COLLATION_OID)
+ {
+ if (!OidIsValid(collation))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for ILIKE"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+ locale = pg_newlocale_from_collation(collation);
+ }
+ }
+
+ if (typeid != BYTEAOID)
+ {
+ patt = TextDatumGetCString(patt_const->constvalue);
+ pattlen = strlen(patt);
+ }
+ else
+ {
+ bytea *bstr = DatumGetByteaPP(patt_const->constvalue);
+
+ pattlen = VARSIZE_ANY_EXHDR(bstr);
+ patt = (char *) palloc(pattlen);
+ memcpy(patt, VARDATA_ANY(bstr), pattlen);
+ Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
+ }
+
+ match = palloc(pattlen + 1);
+ match_pos = 0;
+ for (pos = 0; pos < pattlen; pos++)
+ {
+ /* % and _ are wildcard characters in LIKE */
+ if (patt[pos] == '%' ||
+ patt[pos] == '_')
+ break;
+
+ /* Backslash escapes the next character */
+ if (patt[pos] == '\\')
+ {
+ pos++;
+ if (pos >= pattlen)
+ break;
+ }
+
+ /* Stop if case-varying character (it's sort of a wildcard) */
+ if (case_insensitive &&
+ pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
+ break;
+
+ match[match_pos++] = patt[pos];
+ }
+
+ match[match_pos] = '\0';
+
+ if (typeid != BYTEAOID)
+ *prefix_const = string_to_const(match, typeid);
+ else
+ *prefix_const = string_to_bytea_const(match, match_pos);
+
+ if (rest_selec != NULL)
+ *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
+ case_insensitive);
+
+ pfree(patt);
+ pfree(match);
+
+ /* in LIKE, an empty pattern is an exact match! */
+ if (pos == pattlen)
+ return Pattern_Prefix_Exact; /* reached end of pattern, so exact */
+
+ if (match_pos > 0)
+ return Pattern_Prefix_Partial;
+
+ return Pattern_Prefix_None;
+}
+
+static Pattern_Prefix_Status
+regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
+ Const **prefix_const, Selectivity *rest_selec)
+{
+ Oid typeid = patt_const->consttype;
+ char *prefix;
+ bool exact;
+
+ /*
+ * Should be unnecessary, there are no bytea regex operators defined. As
+ * such, it should be noted that the rest of this function has *not* been
+ * made safe for binary (possibly NULL containing) strings.
+ */
+ if (typeid == BYTEAOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("regular-expression matching not supported on type bytea")));
+
+ /* Use the regexp machinery to extract the prefix, if any */
+ prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
+ case_insensitive, collation,
+ &exact);
+
+ if (prefix == NULL)
+ {
+ *prefix_const = NULL;
+
+ if (rest_selec != NULL)
+ {
+ char *patt = TextDatumGetCString(patt_const->constvalue);
+
+ *rest_selec = regex_selectivity(patt, strlen(patt),
+ case_insensitive,
+ 0);
+ pfree(patt);
+ }
+
+ return Pattern_Prefix_None;
+ }
+
+ *prefix_const = string_to_const(prefix, typeid);
+
+ if (rest_selec != NULL)
+ {
+ if (exact)
+ {
+ /* Exact match, so there's no additional selectivity */
+ *rest_selec = 1.0;
+ }
+ else
+ {
+ char *patt = TextDatumGetCString(patt_const->constvalue);
+
+ *rest_selec = regex_selectivity(patt, strlen(patt),
+ case_insensitive,
+ strlen(prefix));
+ pfree(patt);
+ }
+ }
+
+ pfree(prefix);
+
+ if (exact)
+ return Pattern_Prefix_Exact; /* pattern specifies exact match */
+ else
+ return Pattern_Prefix_Partial;
+}
+
+static Pattern_Prefix_Status
+pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
+ Const **prefix, Selectivity *rest_selec)
+{
+ Pattern_Prefix_Status result;
+
+ switch (ptype)
+ {
+ case Pattern_Type_Like:
+ result = like_fixed_prefix(patt, false, collation,
+ prefix, rest_selec);
+ break;
+ case Pattern_Type_Like_IC:
+ result = like_fixed_prefix(patt, true, collation,
+ prefix, rest_selec);
+ break;
+ case Pattern_Type_Regex:
+ result = regex_fixed_prefix(patt, false, collation,
+ prefix, rest_selec);
+ break;
+ case Pattern_Type_Regex_IC:
+ result = regex_fixed_prefix(patt, true, collation,
+ prefix, rest_selec);
+ break;
+ case Pattern_Type_Prefix:
+ /* Prefix type work is trivial. */
+ result = Pattern_Prefix_Partial;
+ *rest_selec = 1.0; /* all */
+ *prefix = makeConst(patt->consttype,
+ patt->consttypmod,
+ patt->constcollid,
+ patt->constlen,
+ datumCopy(patt->constvalue,
+ patt->constbyval,
+ patt->constlen),
+ patt->constisnull,
+ patt->constbyval);
+ break;
+ default:
+ elog(ERROR, "unrecognized ptype: %d", (int) ptype);
+ result = Pattern_Prefix_None; /* keep compiler quiet */
+ break;
+ }
+ return result;
+}
+
+/*
+ * Estimate the selectivity of a fixed prefix for a pattern match.
+ *
+ * A fixed prefix "foo" is estimated as the selectivity of the expression
+ * "variable >= 'foo' AND variable < 'fop'" (see also indxpath.c).
+ *
+ * The selectivity estimate is with respect to the portion of the column
+ * population represented by the histogram --- the caller must fold this
+ * together with info about MCVs and NULLs.
+ *
+ * We use the >= and < operators from the specified btree opfamily to do the
+ * estimation. The given variable and Const must be of the associated
+ * datatype.
+ *
+ * XXX Note: we make use of the upper bound to estimate operator selectivity
+ * even if the locale is such that we cannot rely on the upper-bound string.
+ * The selectivity only needs to be approximately right anyway, so it seems
+ * more useful to use the upper-bound code than not.
+ */
+static Selectivity
+prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
+ Oid vartype, Oid opfamily, Const *prefixcon)
+{
+ Selectivity prefixsel;
+ Oid cmpopr;
+ FmgrInfo opproc;
+ AttStatsSlot sslot;
+ Const *greaterstrcon;
+ Selectivity eq_sel;
+
+ cmpopr = get_opfamily_member(opfamily, vartype, vartype,
+ BTGreaterEqualStrategyNumber);
+ if (cmpopr == InvalidOid)
+ elog(ERROR, "no >= operator for opfamily %u", opfamily);
+ fmgr_info(get_opcode(cmpopr), &opproc);
+
+ prefixsel = ineq_histogram_selectivity(root, vardata,
+ &opproc, true, true,
+ prefixcon->constvalue,
+ prefixcon->consttype);
+
+ if (prefixsel < 0.0)
+ {
+ /* No histogram is present ... return a suitable default estimate */
+ return DEFAULT_MATCH_SEL;
+ }
+
+ /*-------
+ * If we can create a string larger than the prefix, say
+ * "x < greaterstr". We try to generate the string referencing the
+ * collation of the var's statistics, but if that's not available,
+ * use DEFAULT_COLLATION_OID.
+ *-------
+ */
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid, 0))
+ /* sslot.stacoll is set up */ ;
+ else
+ sslot.stacoll = DEFAULT_COLLATION_OID;
+ cmpopr = get_opfamily_member(opfamily, vartype, vartype,
+ BTLessStrategyNumber);
+ if (cmpopr == InvalidOid)
+ elog(ERROR, "no < operator for opfamily %u", opfamily);
+ fmgr_info(get_opcode(cmpopr), &opproc);
+ greaterstrcon = make_greater_string(prefixcon, &opproc, sslot.stacoll);
+ if (greaterstrcon)
+ {
+ Selectivity topsel;
+
+ topsel = ineq_histogram_selectivity(root, vardata,
+ &opproc, false, false,
+ greaterstrcon->constvalue,
+ greaterstrcon->consttype);
+
+ /* ineq_histogram_selectivity worked before, it shouldn't fail now */
+ Assert(topsel >= 0.0);
+
+ /*
+ * Merge the two selectivities in the same way as for a range query
+ * (see clauselist_selectivity()). Note that we don't need to worry
+ * about double-exclusion of nulls, since ineq_histogram_selectivity
+ * doesn't count those anyway.
+ */
+ prefixsel = topsel + prefixsel - 1.0;
+ }
+
+ /*
+ * If the prefix is long then the two bounding values might be too close
+ * together for the histogram to distinguish them usefully, resulting in a
+ * zero estimate (plus or minus roundoff error). To avoid returning a
+ * ridiculously small estimate, compute the estimated selectivity for
+ * "variable = 'foo'", and clamp to that. (Obviously, the resultant
+ * estimate should be at least that.)
+ *
+ * We apply this even if we couldn't make a greater string. That case
+ * suggests that the prefix is near the maximum possible, and thus
+ * probably off the end of the histogram, and thus we probably got a very
+ * small estimate from the >= condition; so we still need to clamp.
+ */
+ cmpopr = get_opfamily_member(opfamily, vartype, vartype,
+ BTEqualStrategyNumber);
+ if (cmpopr == InvalidOid)
+ elog(ERROR, "no = operator for opfamily %u", opfamily);
+ eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
+ false, true, false);
+
+ prefixsel = Max(prefixsel, eq_sel);
+
+ return prefixsel;
+}
+
+
+/*
+ * Estimate the selectivity of a pattern of the specified type.
+ * Note that any fixed prefix of the pattern will have been removed already,
+ * so actually we may be looking at just a fragment of the pattern.
+ *
+ * For now, we use a very simplistic approach: fixed characters reduce the
+ * selectivity a good deal, character ranges reduce it a little,
+ * wildcards (such as % for LIKE or .* for regex) increase it.
+ */
+
+#define FIXED_CHAR_SEL 0.20 /* about 1/5 */
+#define CHAR_RANGE_SEL 0.25
+#define ANY_CHAR_SEL 0.9 /* not 1, since it won't match end-of-string */
+#define FULL_WILDCARD_SEL 5.0
+#define PARTIAL_WILDCARD_SEL 2.0
+
+static Selectivity
+like_selectivity(const char *patt, int pattlen, bool case_insensitive)
+{
+ Selectivity sel = 1.0;
+ int pos;
+
+ /* Skip any leading wildcard; it's already factored into initial sel */
+ for (pos = 0; pos < pattlen; pos++)
+ {
+ if (patt[pos] != '%' && patt[pos] != '_')
+ break;
+ }
+
+ for (; pos < pattlen; pos++)
+ {
+ /* % and _ are wildcard characters in LIKE */
+ if (patt[pos] == '%')
+ sel *= FULL_WILDCARD_SEL;
+ else if (patt[pos] == '_')
+ sel *= ANY_CHAR_SEL;
+ else if (patt[pos] == '\\')
+ {
+ /* Backslash quotes the next character */
+ pos++;
+ if (pos >= pattlen)
+ break;
+ sel *= FIXED_CHAR_SEL;
+ }
+ else
+ sel *= FIXED_CHAR_SEL;
+ }
+ /* Could get sel > 1 if multiple wildcards */
+ if (sel > 1.0)
+ sel = 1.0;
+ return sel;
+}
+
+static Selectivity
+regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
+{
+ Selectivity sel = 1.0;
+ int paren_depth = 0;
+ int paren_pos = 0; /* dummy init to keep compiler quiet */
+ int pos;
+
+ for (pos = 0; pos < pattlen; pos++)
+ {
+ if (patt[pos] == '(')
+ {
+ if (paren_depth == 0)
+ paren_pos = pos; /* remember start of parenthesized item */
+ paren_depth++;
+ }
+ else if (patt[pos] == ')' && paren_depth > 0)
+ {
+ paren_depth--;
+ if (paren_depth == 0)
+ sel *= regex_selectivity_sub(patt + (paren_pos + 1),
+ pos - (paren_pos + 1),
+ case_insensitive);
+ }
+ else if (patt[pos] == '|' && paren_depth == 0)
+ {
+ /*
+ * If unquoted | is present at paren level 0 in pattern, we have
+ * multiple alternatives; sum their probabilities.
+ */
+ sel += regex_selectivity_sub(patt + (pos + 1),
+ pattlen - (pos + 1),
+ case_insensitive);
+ break; /* rest of pattern is now processed */
+ }
+ else if (patt[pos] == '[')
+ {
+ bool negclass = false;
+
+ if (patt[++pos] == '^')
+ {
+ negclass = true;
+ pos++;
+ }
+ if (patt[pos] == ']') /* ']' at start of class is not special */
+ pos++;
+ while (pos < pattlen && patt[pos] != ']')
+ pos++;
+ if (paren_depth == 0)
+ sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
+ }
+ else if (patt[pos] == '.')
+ {
+ if (paren_depth == 0)
+ sel *= ANY_CHAR_SEL;
+ }
+ else if (patt[pos] == '*' ||
+ patt[pos] == '?' ||
+ patt[pos] == '+')
+ {
+ /* Ought to be smarter about quantifiers... */
+ if (paren_depth == 0)
+ sel *= PARTIAL_WILDCARD_SEL;
+ }
+ else if (patt[pos] == '{')
+ {
+ while (pos < pattlen && patt[pos] != '}')
+ pos++;
+ if (paren_depth == 0)
+ sel *= PARTIAL_WILDCARD_SEL;
+ }
+ else if (patt[pos] == '\\')
+ {
+ /* backslash quotes the next character */
+ pos++;
+ if (pos >= pattlen)
+ break;
+ if (paren_depth == 0)
+ sel *= FIXED_CHAR_SEL;
+ }
+ else
+ {
+ if (paren_depth == 0)
+ sel *= FIXED_CHAR_SEL;
+ }
+ }
+ /* Could get sel > 1 if multiple wildcards */
+ if (sel > 1.0)
+ sel = 1.0;
+ return sel;
+}
+
+static Selectivity
+regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
+ int fixed_prefix_len)
+{
+ Selectivity sel;
+
+ /* If patt doesn't end with $, consider it to have a trailing wildcard */
+ if (pattlen > 0 && patt[pattlen - 1] == '$' &&
+ (pattlen == 1 || patt[pattlen - 2] != '\\'))
+ {
+ /* has trailing $ */
+ sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
+ }
+ else
+ {
+ /* no trailing $ */
+ sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
+ sel *= FULL_WILDCARD_SEL;
+ }
+
+ /* If there's a fixed prefix, discount its selectivity */
+ if (fixed_prefix_len > 0)
+ sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len);
+
+ /* Make sure result stays in range */
+ CLAMP_PROBABILITY(sel);
+ return sel;
+}
+
+/*
+ * Check whether char is a letter (and, hence, subject to case-folding)
+ *
+ * In multibyte character sets or with ICU, we can't use isalpha, and it does
+ * not seem worth trying to convert to wchar_t to use iswalpha. Instead, just
+ * assume any multibyte char is potentially case-varying.
+ */
+static int
+pattern_char_isalpha(char c, bool is_multibyte,
+ pg_locale_t locale, bool locale_is_c)
+{
+ if (locale_is_c)
+ return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+ else if (is_multibyte && IS_HIGHBIT_SET(c))
+ return true;
+ else if (locale && locale->provider == COLLPROVIDER_ICU)
+ return IS_HIGHBIT_SET(c) ? true : false;
+#ifdef HAVE_LOCALE_T
+ else if (locale && locale->provider == COLLPROVIDER_LIBC)
+ return isalpha_l((unsigned char) c, locale->info.lt);
+#endif
+ else
+ return isalpha((unsigned char) c);
+}
+
+
+/*
+ * For bytea, the increment function need only increment the current byte
+ * (there are no multibyte characters to worry about).
+ */
+static bool
+byte_increment(unsigned char *ptr, int len)
+{
+ if (*ptr >= 255)
+ return false;
+ (*ptr)++;
+ return true;
+}
+
+/*
+ * Try to generate a string greater than the given string or any
+ * string it is a prefix of. If successful, return a palloc'd string
+ * in the form of a Const node; else return NULL.
+ *
+ * The caller must provide the appropriate "less than" comparison function
+ * for testing the strings, along with the collation to use.
+ *
+ * The key requirement here is that given a prefix string, say "foo",
+ * we must be able to generate another string "fop" that is greater than
+ * all strings "foobar" starting with "foo". We can test that we have
+ * generated a string greater than the prefix string, but in non-C collations
+ * that is not a bulletproof guarantee that an extension of the string might
+ * not sort after it; an example is that "foo " is less than "foo!", but it
+ * is not clear that a "dictionary" sort ordering will consider "foo!" less
+ * than "foo bar". CAUTION: Therefore, this function should be used only for
+ * estimation purposes when working in a non-C collation.
+ *
+ * To try to catch most cases where an extended string might otherwise sort
+ * before the result value, we determine which of the strings "Z", "z", "y",
+ * and "9" is seen as largest by the collation, and append that to the given
+ * prefix before trying to find a string that compares as larger.
+ *
+ * To search for a greater string, we repeatedly "increment" the rightmost
+ * character, using an encoding-specific character incrementer function.
+ * When it's no longer possible to increment the last character, we truncate
+ * off that character and start incrementing the next-to-rightmost.
+ * For example, if "z" were the last character in the sort order, then we
+ * could produce "foo" as a string greater than "fonz".
+ *
+ * This could be rather slow in the worst case, but in most cases we
+ * won't have to try more than one or two strings before succeeding.
+ *
+ * Note that it's important for the character incrementer not to be too anal
+ * about producing every possible character code, since in some cases the only
+ * way to get a larger string is to increment a previous character position.
+ * So we don't want to spend too much time trying every possible character
+ * code at the last position. A good rule of thumb is to be sure that we
+ * don't try more than 256*K values for a K-byte character (and definitely
+ * not 256^K, which is what an exhaustive search would approach).
+ */
+static Const *
+make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
+{
+ Oid datatype = str_const->consttype;
+ char *workstr;
+ int len;
+ Datum cmpstr;
+ char *cmptxt = NULL;
+ mbcharacter_incrementer charinc;
+
+ /*
+ * Get a modifiable copy of the prefix string in C-string format, and set
+ * up the string we will compare to as a Datum. In C locale this can just
+ * be the given prefix string, otherwise we need to add a suffix. Type
+ * BYTEA sorts bytewise so it never needs a suffix either.
+ */
+ if (datatype == BYTEAOID)
+ {
+ bytea *bstr = DatumGetByteaPP(str_const->constvalue);
+
+ len = VARSIZE_ANY_EXHDR(bstr);
+ workstr = (char *) palloc(len);
+ memcpy(workstr, VARDATA_ANY(bstr), len);
+ Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
+ cmpstr = str_const->constvalue;
+ }
+ else
+ {
+ if (datatype == NAMEOID)
+ workstr = DatumGetCString(DirectFunctionCall1(nameout,
+ str_const->constvalue));
+ else
+ workstr = TextDatumGetCString(str_const->constvalue);
+ len = strlen(workstr);
+ if (lc_collate_is_c(collation) || len == 0)
+ cmpstr = str_const->constvalue;
+ else
+ {
+ /* If first time through, determine the suffix to use */
+ static char suffixchar = 0;
+ static Oid suffixcollation = 0;
+
+ if (!suffixchar || suffixcollation != collation)
+ {
+ char *best;
+
+ best = "Z";
+ if (varstr_cmp(best, 1, "z", 1, collation) < 0)
+ best = "z";
+ if (varstr_cmp(best, 1, "y", 1, collation) < 0)
+ best = "y";
+ if (varstr_cmp(best, 1, "9", 1, collation) < 0)
+ best = "9";
+ suffixchar = *best;
+ suffixcollation = collation;
+ }
+
+ /* And build the string to compare to */
+ if (datatype == NAMEOID)
+ {
+ cmptxt = palloc(len + 2);
+ memcpy(cmptxt, workstr, len);
+ cmptxt[len] = suffixchar;
+ cmptxt[len + 1] = '\0';
+ cmpstr = PointerGetDatum(cmptxt);
+ }
+ else
+ {
+ cmptxt = palloc(VARHDRSZ + len + 1);
+ SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
+ memcpy(VARDATA(cmptxt), workstr, len);
+ *(VARDATA(cmptxt) + len) = suffixchar;
+ cmpstr = PointerGetDatum(cmptxt);
+ }
+ }
+ }
+
+ /* Select appropriate character-incrementer function */
+ if (datatype == BYTEAOID)
+ charinc = byte_increment;
+ else
+ charinc = pg_database_encoding_character_incrementer();
+
+ /* And search ... */
+ while (len > 0)
+ {
+ int charlen;
+ unsigned char *lastchar;
+
+ /* Identify the last character --- for bytea, just the last byte */
+ if (datatype == BYTEAOID)
+ charlen = 1;
+ else
+ charlen = len - pg_mbcliplen(workstr, len, len - 1);
+ lastchar = (unsigned char *) (workstr + len - charlen);
+
+ /*
+ * Try to generate a larger string by incrementing the last character
+ * (for BYTEA, we treat each byte as a character).
+ *
+ * Note: the incrementer function is expected to return true if it's
+ * generated a valid-per-the-encoding new character, otherwise false.
+ * The contents of the character on false return are unspecified.
+ */
+ while (charinc(lastchar, charlen))
+ {
+ Const *workstr_const;
+
+ if (datatype == BYTEAOID)
+ workstr_const = string_to_bytea_const(workstr, len);
+ else
+ workstr_const = string_to_const(workstr, datatype);
+
+ if (DatumGetBool(FunctionCall2Coll(ltproc,
+ collation,
+ cmpstr,
+ workstr_const->constvalue)))
+ {
+ /* Successfully made a string larger than cmpstr */
+ if (cmptxt)
+ pfree(cmptxt);
+ pfree(workstr);
+ return workstr_const;
+ }
+
+ /* No good, release unusable value and try again */
+ pfree(DatumGetPointer(workstr_const->constvalue));
+ pfree(workstr_const);
+ }
+
+ /*
+ * No luck here, so truncate off the last character and try to
+ * increment the next one.
+ */
+ len -= charlen;
+ workstr[len] = '\0';
+ }
+
+ /* Failed... */
+ if (cmptxt)
+ pfree(cmptxt);
+ pfree(workstr);
+
+ return NULL;
+}
+
+/*
+ * Generate a Datum of the appropriate type from a C string.
+ * Note that all of the supported types are pass-by-ref, so the
+ * returned value should be pfree'd if no longer needed.
+ */
+static Datum
+string_to_datum(const char *str, Oid datatype)
+{
+ Assert(str != NULL);
+
+ /*
+ * We cheat a little by assuming that CStringGetTextDatum() will do for
+ * bpchar and varchar constants too...
+ */
+ if (datatype == NAMEOID)
+ return DirectFunctionCall1(namein, CStringGetDatum(str));
+ else if (datatype == BYTEAOID)
+ return DirectFunctionCall1(byteain, CStringGetDatum(str));
+ else
+ return CStringGetTextDatum(str);
+}
+
+/*
+ * Generate a Const node of the appropriate type from a C string.
+ */
+static Const *
+string_to_const(const char *str, Oid datatype)
+{
+ Datum conval = string_to_datum(str, datatype);
+ Oid collation;
+ int constlen;
+
+ /*
+ * We only need to support a few datatypes here, so hard-wire properties
+ * instead of incurring the expense of catalog lookups.
+ */
+ switch (datatype)
+ {
+ case TEXTOID:
+ case VARCHAROID:
+ case BPCHAROID:
+ collation = DEFAULT_COLLATION_OID;
+ constlen = -1;
+ break;
+
+ case NAMEOID:
+ collation = C_COLLATION_OID;
+ constlen = NAMEDATALEN;
+ break;
+
+ case BYTEAOID:
+ collation = InvalidOid;
+ constlen = -1;
+ break;
+
+ default:
+ elog(ERROR, "unexpected datatype in string_to_const: %u",
+ datatype);
+ return NULL;
+ }
+
+ return makeConst(datatype, -1, collation, constlen,
+ conval, false, false);
+}
+
+/*
+ * Generate a Const node of bytea type from a binary C string and a length.
+ */
+static Const *
+string_to_bytea_const(const char *str, size_t str_len)
+{
+ bytea *bstr = palloc(VARHDRSZ + str_len);
+ Datum conval;
+
+ memcpy(VARDATA(bstr), str, str_len);
+ SET_VARSIZE(bstr, VARHDRSZ + str_len);
+ conval = PointerGetDatum(bstr);
+
+ return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
+}