aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/geqo/geqo_eval.c
blob: 847afd465dd5c40097cb35ec1a1df885c13429bb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
/*------------------------------------------------------------------------
 *
 * geqo_eval.c
 *	  Routines to evaluate query trees
 *
 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.84 2007/02/13 02:31:02 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */

/* contributed by:
   =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
   *  Martin Utesch				 * Institute of Automatic Control	   *
   =							 = University of Mining and Technology =
   *  utesch@aut.tu-freiberg.de  * Freiberg, Germany				   *
   =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
 */

#include "postgres.h"

#include <float.h>
#include <limits.h>
#include <math.h>

#include "optimizer/geqo.h"
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "utils/memutils.h"


static bool desirable_join(PlannerInfo *root,
			   RelOptInfo *outer_rel, RelOptInfo *inner_rel);


/*
 * geqo_eval
 *
 * Returns cost of a query tree as an individual of the population.
 */
Cost
geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
{
	MemoryContext mycontext;
	MemoryContext oldcxt;
	RelOptInfo *joinrel;
	Cost		fitness;
	int			savelength;
	struct HTAB *savehash;

	/*
	 * Because gimme_tree considers both left- and right-sided trees, there is
	 * no difference between a tour (a,b,c,d,...) and a tour (b,a,c,d,...) ---
	 * the same join orders will be considered. To avoid redundant cost
	 * calculations, we simply reject tours where tour[0] > tour[1], assigning
	 * them an artificially bad fitness.
	 *
	 * init_tour() is aware of this rule and so we should never reject a tour
	 * during the initial filling of the pool.	It seems difficult to persuade
	 * the recombination logic never to break the rule, however.
	 */
	if (num_gene >= 2 && tour[0] > tour[1])
		return DBL_MAX;

	/*
	 * Create a private memory context that will hold all temp storage
	 * allocated inside gimme_tree().
	 *
	 * Since geqo_eval() will be called many times, we can't afford to let all
	 * that memory go unreclaimed until end of statement.  Note we make the
	 * temp context a child of the planner's normal context, so that it will
	 * be freed even if we abort via ereport(ERROR).
	 */
	mycontext = AllocSetContextCreate(CurrentMemoryContext,
									  "GEQO",
									  ALLOCSET_DEFAULT_MINSIZE,
									  ALLOCSET_DEFAULT_INITSIZE,
									  ALLOCSET_DEFAULT_MAXSIZE);
	oldcxt = MemoryContextSwitchTo(mycontext);

	/*
	 * gimme_tree will add entries to root->join_rel_list, which may or may
	 * not already contain some entries.  The newly added entries will be
	 * recycled by the MemoryContextDelete below, so we must ensure that the
	 * list is restored to its former state before exiting.  We can do this by
	 * truncating the list to its original length.	NOTE this assumes that any
	 * added entries are appended at the end!
	 *
	 * We also must take care not to mess up the outer join_rel_hash, if there
	 * is one.	We can do this by just temporarily setting the link to NULL.
	 * (If we are dealing with enough join rels, which we very likely are, a
	 * new hash table will get built and used locally.)
	 */
	savelength = list_length(evaldata->root->join_rel_list);
	savehash = evaldata->root->join_rel_hash;

	evaldata->root->join_rel_hash = NULL;

	/* construct the best path for the given combination of relations */
	joinrel = gimme_tree(tour, num_gene, evaldata);

	/*
	 * compute fitness
	 *
	 * XXX geqo does not currently support optimization for partial result
	 * retrieval --- how to fix?
	 */
	if (joinrel)
		fitness = joinrel->cheapest_total_path->total_cost;
	else
		fitness = DBL_MAX;

	/*
	 * Restore join_rel_list to its former state, and put back original
	 * hashtable if any.
	 */
	evaldata->root->join_rel_list = list_truncate(evaldata->root->join_rel_list,
												  savelength);
	evaldata->root->join_rel_hash = savehash;

	/* release all the memory acquired within gimme_tree */
	MemoryContextSwitchTo(oldcxt);
	MemoryContextDelete(mycontext);

	return fitness;
}

/*
 * gimme_tree
 *	  Form planner estimates for a join tree constructed in the specified
 *	  order.
 *
 *	 'tour' is the proposed join order, of length 'num_gene'
 *	 'evaldata' contains the context we need
 *
 * Returns a new join relation whose cheapest path is the best plan for
 * this join order.  NB: will return NULL if join order is invalid.
 *
 * The original implementation of this routine always joined in the specified
 * order, and so could only build left-sided plans (and right-sided and
 * mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
 * It could never produce a "bushy" plan.  This had a couple of big problems,
 * of which the worst was that as of 7.4, there are situations involving IN
 * subqueries where the only valid plans are bushy.
 *
 * The present implementation takes the given tour as a guideline, but
 * postpones joins that seem unsuitable according to some heuristic rules.
 * This allows correct bushy plans to be generated at need, and as a nice
 * side-effect it seems to materially improve the quality of the generated
 * plans.
 */
RelOptInfo *
gimme_tree(Gene *tour, int num_gene, GeqoEvalData *evaldata)
{
	RelOptInfo **stack;
	int			stack_depth;
	RelOptInfo *joinrel;
	int			rel_count;

	/*
	 * Create a stack to hold not-yet-joined relations.
	 */
	stack = (RelOptInfo **) palloc(num_gene * sizeof(RelOptInfo *));
	stack_depth = 0;

	/*
	 * Push each relation onto the stack in the specified order.  After
	 * pushing each relation, see whether the top two stack entries are
	 * joinable according to the desirable_join() heuristics.  If so, join
	 * them into one stack entry, and try again to combine with the next stack
	 * entry down (if any).  When the stack top is no longer joinable,
	 * continue to the next input relation.  After we have pushed the last
	 * input relation, the heuristics are disabled and we force joining all
	 * the remaining stack entries.
	 *
	 * If desirable_join() always returns true, this produces a straight
	 * left-to-right join just like the old code.  Otherwise we may produce a
	 * bushy plan or a left/right-sided plan that really corresponds to some
	 * tour other than the one given.  To the extent that the heuristics are
	 * helpful, however, this will be a better plan than the raw tour.
	 *
	 * Also, when a join attempt fails (because of OJ or IN constraints), we
	 * may be able to recover and produce a workable plan, where the old code
	 * just had to give up.  This case acts the same as a false result from
	 * desirable_join().
	 */
	for (rel_count = 0; rel_count < num_gene; rel_count++)
	{
		int			cur_rel_index;

		/* Get the next input relation and push it */
		cur_rel_index = (int) tour[rel_count];
		stack[stack_depth] = (RelOptInfo *) list_nth(evaldata->initial_rels,
													 cur_rel_index - 1);
		stack_depth++;

		/*
		 * While it's feasible, pop the top two stack entries and replace with
		 * their join.
		 */
		while (stack_depth >= 2)
		{
			RelOptInfo *outer_rel = stack[stack_depth - 2];
			RelOptInfo *inner_rel = stack[stack_depth - 1];

			/*
			 * Don't pop if heuristics say not to join now.  However, once we
			 * have exhausted the input, the heuristics can't prevent popping.
			 */
			if (rel_count < num_gene - 1 &&
				!desirable_join(evaldata->root, outer_rel, inner_rel))
				break;

			/*
			 * Construct a RelOptInfo representing the join of these two input
			 * relations.  Note that we expect the joinrel not to exist in
			 * root->join_rel_list yet, and so the paths constructed for it
			 * will only include the ones we want.
			 */
			joinrel = make_join_rel(evaldata->root, outer_rel, inner_rel);

			/* Can't pop stack here if join order is not valid */
			if (!joinrel)
				break;

			/* Find and save the cheapest paths for this rel */
			set_cheapest(joinrel);

			/* Pop the stack and replace the inputs with their join */
			stack_depth--;
			stack[stack_depth - 1] = joinrel;
		}
	}

	/* Did we succeed in forming a single join relation? */
	if (stack_depth == 1)
		joinrel = stack[0];
	else
		joinrel = NULL;

	pfree(stack);

	return joinrel;
}

/*
 * Heuristics for gimme_tree: do we want to join these two relations?
 */
static bool
desirable_join(PlannerInfo *root,
			   RelOptInfo *outer_rel, RelOptInfo *inner_rel)
{
	ListCell   *l;

	/*
	 * Join if there is an applicable join clause.
	 */
	if (have_relevant_joinclause(root, outer_rel, inner_rel))
		return true;

	/*
	 * Join if the rels overlap the same outer-join side and don't already
	 * implement the outer join. This is needed to ensure that we can find a
	 * valid solution in a case where an OJ contains a clauseless join.
	 */
	foreach(l, root->oj_info_list)
	{
		OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);

		/* ignore full joins --- other mechanisms preserve their ordering */
		if (ojinfo->is_full_join)
			continue;
		if (bms_overlap(outer_rel->relids, ojinfo->min_righthand) &&
			bms_overlap(inner_rel->relids, ojinfo->min_righthand) &&
			!bms_overlap(outer_rel->relids, ojinfo->min_lefthand) &&
			!bms_overlap(inner_rel->relids, ojinfo->min_lefthand))
			return true;
		if (bms_overlap(outer_rel->relids, ojinfo->min_lefthand) &&
			bms_overlap(inner_rel->relids, ojinfo->min_lefthand) &&
			!bms_overlap(outer_rel->relids, ojinfo->min_righthand) &&
			!bms_overlap(inner_rel->relids, ojinfo->min_righthand))
			return true;
	}

	/*
	 * Join if the rels are members of the same IN sub-select. This is needed
	 * to ensure that we can find a valid solution in a case where an IN
	 * sub-select has a clauseless join.
	 */
	foreach(l, root->in_info_list)
	{
		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);

		if (bms_is_subset(outer_rel->relids, ininfo->righthand) &&
			bms_is_subset(inner_rel->relids, ininfo->righthand))
			return true;
	}

	/* Otherwise postpone the join till later. */
	return false;
}