1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
|
/*-------------------------------------------------------------------------
*
* like_support.c
* Planner support functions for LIKE, regex, and related operators.
*
* These routines handle special optimization of operators that can be
* used with index scans even though they are not known to the executor's
* indexscan machinery. The key idea is that these operators allow us
* to derive approximate indexscan qual clauses, such that any tuples
* that pass the operator clause itself must also satisfy the simpler
* indexscan condition(s). Then we can use the indexscan machinery
* to avoid scanning as much of the table as we'd otherwise have to,
* while applying the original operator as a qpqual condition to ensure
* we deliver only the tuples we want. (In essence, we're using a regular
* index as if it were a lossy index.)
*
* An example of what we're doing is
* textfield LIKE 'abc%def'
* from which we can generate the indexscanable conditions
* textfield >= 'abc' AND textfield < 'abd'
* which allow efficient scanning of an index on textfield.
* (In reality, character set and collation issues make the transformation
* from LIKE to indexscan limits rather harder than one might think ...
* but that's the basic idea.)
*
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/utils/adt/like_support.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/stratnum.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
#include "utils/selfuncs.h"
static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
static List *match_pattern_prefix(Node *leftop,
Node *rightop,
Pattern_Type ptype,
Oid expr_coll,
Oid opfamily,
Oid indexcollation);
/*
* Planner support functions for LIKE, regex, and related operators
*/
Datum
textlike_support(PG_FUNCTION_ARGS)
{
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like));
}
Datum
texticlike_support(PG_FUNCTION_ARGS)
{
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC));
}
Datum
textregexeq_support(PG_FUNCTION_ARGS)
{
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex));
}
Datum
texticregexeq_support(PG_FUNCTION_ARGS)
{
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
}
/* Common code for the above */
static Node *
like_regex_support(Node *rawreq, Pattern_Type ptype)
{
Node *ret = NULL;
if (IsA(rawreq, SupportRequestIndexCondition))
{
/* Try to convert operator/function call to index conditions */
SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
/*
* Currently we have no "reverse" match operators with the pattern on
* the left, so we only need consider cases with the indexkey on the
* left.
*/
if (req->indexarg != 0)
return NULL;
if (is_opclause(req->node))
{
OpExpr *clause = (OpExpr *) req->node;
Assert(list_length(clause->args) == 2);
ret = (Node *)
match_pattern_prefix((Node *) linitial(clause->args),
(Node *) lsecond(clause->args),
ptype,
clause->inputcollid,
req->opfamily,
req->indexcollation);
}
else if (is_funcclause(req->node)) /* be paranoid */
{
FuncExpr *clause = (FuncExpr *) req->node;
Assert(list_length(clause->args) == 2);
ret = (Node *)
match_pattern_prefix((Node *) linitial(clause->args),
(Node *) lsecond(clause->args),
ptype,
clause->inputcollid,
req->opfamily,
req->indexcollation);
}
}
return ret;
}
/*
* match_pattern_prefix
* Try to generate an indexqual for a LIKE or regex operator.
*/
static List *
match_pattern_prefix(Node *leftop,
Node *rightop,
Pattern_Type ptype,
Oid expr_coll,
Oid opfamily,
Oid indexcollation)
{
List *result;
Const *patt;
Const *prefix;
Pattern_Prefix_Status pstatus;
Oid ldatatype;
Oid rdatatype;
Oid oproid;
Expr *expr;
FmgrInfo ltproc;
Const *greaterstr;
/*
* Can't do anything with a non-constant or NULL pattern argument.
*
* Note that since we restrict ourselves to cases with a hard constant on
* the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
* about verifying that.
*/
if (!IsA(rightop, Const) ||
((Const *) rightop)->constisnull)
return NIL;
patt = (Const *) rightop;
/*
* Try to extract a fixed prefix from the pattern.
*/
pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
&prefix, NULL);
/* fail if no fixed prefix */
if (pstatus == Pattern_Prefix_None)
return NIL;
/*
* Must also check that index's opfamily supports the operators we will
* want to apply. (A hash index, for example, will not support ">=".)
* Currently, only btree and spgist support the operators we need.
*
* Note: actually, in the Pattern_Prefix_Exact case, we only need "=" so a
* hash index would work. Currently it doesn't seem worth checking for
* that, however.
*
* We insist on the opfamily being one of the specific ones we expect,
* else we'd do the wrong thing if someone were to make a reverse-sort
* opfamily with the same operators.
*
* The non-pattern opclasses will not sort the way we need in most non-C
* locales. We can use such an index anyway for an exact match (simple
* equality), but not for prefix-match cases. Note that here we are
* looking at the index's collation, not the expression's collation --
* this test is *not* dependent on the LIKE/regex operator's collation.
*
* While we're at it, identify the type the comparison constant(s) should
* have, based on the opfamily.
*/
switch (opfamily)
{
case TEXT_BTREE_FAM_OID:
if (!(pstatus == Pattern_Prefix_Exact ||
lc_collate_is_c(indexcollation)))
return NIL;
rdatatype = TEXTOID;
break;
case TEXT_PATTERN_BTREE_FAM_OID:
case TEXT_SPGIST_FAM_OID:
rdatatype = TEXTOID;
break;
case BPCHAR_BTREE_FAM_OID:
if (!(pstatus == Pattern_Prefix_Exact ||
lc_collate_is_c(indexcollation)))
return NIL;
rdatatype = BPCHAROID;
break;
case BPCHAR_PATTERN_BTREE_FAM_OID:
rdatatype = BPCHAROID;
break;
case BYTEA_BTREE_FAM_OID:
rdatatype = BYTEAOID;
break;
default:
return NIL;
}
/* OK, prepare to create the indexqual(s) */
ldatatype = exprType(leftop);
/*
* If necessary, coerce the prefix constant to the right type. The given
* prefix constant is either text or bytea type, therefore the only case
* where we need to do anything is when converting text to bpchar. Those
* two types are binary-compatible, so relabeling the Const node is
* sufficient.
*/
if (prefix->consttype != rdatatype)
{
Assert(prefix->consttype == TEXTOID &&
rdatatype == BPCHAROID);
prefix->consttype = rdatatype;
}
/*
* If we found an exact-match pattern, generate an "=" indexqual.
*/
if (pstatus == Pattern_Prefix_Exact)
{
oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
BTEqualStrategyNumber);
if (oproid == InvalidOid)
elog(ERROR, "no = operator for opfamily %u", opfamily);
expr = make_opclause(oproid, BOOLOID, false,
(Expr *) leftop, (Expr *) prefix,
InvalidOid, indexcollation);
result = list_make1(expr);
return result;
}
/*
* Otherwise, we have a nonempty required prefix of the values.
*
* We can always say "x >= prefix".
*/
oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
BTGreaterEqualStrategyNumber);
if (oproid == InvalidOid)
elog(ERROR, "no >= operator for opfamily %u", opfamily);
expr = make_opclause(oproid, BOOLOID, false,
(Expr *) leftop, (Expr *) prefix,
InvalidOid, indexcollation);
result = list_make1(expr);
/*-------
* If we can create a string larger than the prefix, we can say
* "x < greaterstr". NB: we rely on make_greater_string() to generate
* a guaranteed-greater string, not just a probably-greater string.
* In general this is only guaranteed in C locale, so we'd better be
* using a C-locale index collation.
*-------
*/
oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
BTLessStrategyNumber);
if (oproid == InvalidOid)
elog(ERROR, "no < operator for opfamily %u", opfamily);
fmgr_info(get_opcode(oproid), <proc);
greaterstr = make_greater_string(prefix, <proc, indexcollation);
if (greaterstr)
{
expr = make_opclause(oproid, BOOLOID, false,
(Expr *) leftop, (Expr *) greaterstr,
InvalidOid, indexcollation);
result = lappend(result, expr);
}
return result;
}
|