/*-------------------------------------------------------------------------
 *
 * planagg.c
 *	  Special planning for aggregate queries.
 *
 * This module tries to replace MIN/MAX aggregate functions by subqueries
 * of the form
 *		(SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
 * Given a suitable index on tab.col, this can be much faster than the
 * generic scan-all-the-rows aggregation plan.  We can handle multiple
 * MIN/MAX aggregates by generating multiple subqueries, and their
 * orderings can be different.  However, if the query contains any
 * non-optimizable aggregates, there's no point since we'll have to
 * scan all the rows anyway.
 *
 *
 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  src/backend/optimizer/plan/planagg.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "catalog/pg_aggregate.h"
#include "catalog/pg_am.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/subselect.h"
#include "parser/parsetree.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"


/* Per-aggregate info during optimize_minmax_aggregates() */
typedef struct
{
	MinMaxAggInfo *mminfo;		/* info gathered by preprocessing */
	Path	   *path;			/* access path for ordered scan */
	Cost		pathcost;		/* estimated cost to fetch first row */
	Param	   *param;			/* param for subplan's output */
} PrivateMMAggInfo;

static bool find_minmax_aggs_walker(Node *node, List **context);
static PrivateMMAggInfo *find_minmax_path(PlannerInfo *root, RelOptInfo *rel,
										  MinMaxAggInfo *mminfo);
static bool path_usable_for_agg(Path *path);
static void make_agg_subplan(PlannerInfo *root, RelOptInfo *rel,
							 PrivateMMAggInfo *info);
static void add_notnull_qual(PlannerInfo *root, RelOptInfo *rel,
							 PrivateMMAggInfo *info, Path *path);
static Node *replace_aggs_with_params_mutator(Node *node, List **context);
static Oid	fetch_agg_sort_op(Oid aggfnoid);


/*
 * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
 *
 * Check to see whether the query contains MIN/MAX aggregate functions that
 * might be optimizable via indexscans.  If it does, and all the aggregates
 * are potentially optimizable, then set up root->minmax_aggs with a list of
 * these aggregates.
 *
 * Note: we are passed the preprocessed targetlist separately, because it's
 * not necessarily equal to root->parse->targetList.
 */
void
preprocess_minmax_aggregates(PlannerInfo *root, List *tlist)
{
	Query	   *parse = root->parse;
	FromExpr   *jtnode;
	RangeTblRef *rtr;
	RangeTblEntry *rte;
	List	   *aggs_list;
	ListCell   *lc;

	/* minmax_aggs list should be empty at this point */
	Assert(root->minmax_aggs == NIL);

	/* Nothing to do if query has no aggregates */
	if (!parse->hasAggs)
		return;

	Assert(!parse->setOperations);		/* shouldn't get here if a setop */
	Assert(parse->rowMarks == NIL);		/* nor if FOR UPDATE */

	/*
	 * Reject unoptimizable cases.
	 *
	 * We don't handle GROUP BY or windowing, because our current
	 * implementations of grouping require looking at all the rows anyway, and
	 * so there's not much point in optimizing MIN/MAX.
	 */
	if (parse->groupClause || parse->hasWindowFuncs)
		return;

	/*
	 * We also restrict the query to reference exactly one table, since join
	 * conditions can't be handled reasonably.  (We could perhaps handle a
	 * query containing cartesian-product joins, but it hardly seems worth the
	 * trouble.)  However, the single real table could be buried in several
	 * levels of FromExpr due to subqueries.  Note the single table could be
	 * an inheritance parent, too.
	 */
	jtnode = parse->jointree;
	while (IsA(jtnode, FromExpr))
	{
		if (list_length(jtnode->fromlist) != 1)
			return;
		jtnode = linitial(jtnode->fromlist);
	}
	if (!IsA(jtnode, RangeTblRef))
		return;
	rtr = (RangeTblRef *) jtnode;
	rte = planner_rt_fetch(rtr->rtindex, root);
	if (rte->rtekind != RTE_RELATION)
		return;

	/*
	 * Scan the tlist and HAVING qual to find all the aggregates and verify
	 * all are MIN/MAX aggregates.  Stop as soon as we find one that isn't.
	 */
	aggs_list = NIL;
	if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
		return;
	if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
		return;

	/*
	 * OK, there is at least the possibility of performing the optimization.
	 * Build pathkeys (and thereby EquivalenceClasses) for each aggregate.
	 * The existence of the EquivalenceClasses will prompt the path generation
	 * logic to try to build paths matching the desired sort ordering(s).
	 *
	 * Note: the pathkeys are non-canonical at this point.  They'll be fixed
	 * later by canonicalize_all_pathkeys().
	 */
	foreach(lc, aggs_list)
	{
		MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);

		mminfo->pathkeys = make_pathkeys_for_aggregate(root,
													   mminfo->target,
													   mminfo->aggsortop);
	}

	/*
	 * We're done until path generation is complete.  Save info for later.
	 */
	root->minmax_aggs = aggs_list;
}

/*
 * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
 *
 * Check to see whether all the aggregates are in fact optimizable into
 * indexscans. If so, and the result is estimated to be cheaper than the
 * generic aggregate method, then generate and return a Plan that does it
 * that way.  Otherwise, return NULL.
 *
 * We are passed the preprocessed tlist, as well as the best path devised for
 * computing the input of a standard Agg node.
 */
Plan *
optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
{
	Query	   *parse = root->parse;
	FromExpr   *jtnode;
	RangeTblRef *rtr;
	RelOptInfo *rel;
	List	   *aggs_list;
	ListCell   *lc;
	Cost		total_cost;
	Path		agg_p;
	Plan	   *plan;
	Node	   *hqual;
	QualCost	tlist_cost;

	/* Nothing to do if preprocess_minmax_aggs rejected the query */
	if (root->minmax_aggs == NIL)
		return NULL;

	/* Re-locate the one real table identified by preprocess_minmax_aggs */
	jtnode = parse->jointree;
	while (IsA(jtnode, FromExpr))
	{
		Assert(list_length(jtnode->fromlist) == 1);
		jtnode = linitial(jtnode->fromlist);
	}
	Assert(IsA(jtnode, RangeTblRef));
	rtr = (RangeTblRef *) jtnode;
	rel = find_base_rel(root, rtr->rtindex);

	/*
	 * Examine each agg to see if we can find a suitable ordered path for it.
	 * Give up if any agg isn't indexable.
	 */
	aggs_list = NIL;
	total_cost = 0;
	foreach(lc, root->minmax_aggs)
	{
		MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
		PrivateMMAggInfo *info;

		info = find_minmax_path(root, rel, mminfo);
		if (!info)
			return NULL;
		aggs_list = lappend(aggs_list, info);
		total_cost += info->pathcost;
	}

	/*
	 * Now we have enough info to compare costs against the generic aggregate
	 * implementation.
	 *
	 * Note that we don't include evaluation cost of the tlist here; this is
	 * OK since it isn't included in best_path's cost either, and should be
	 * the same in either case.
	 */
	cost_agg(&agg_p, root, AGG_PLAIN, list_length(aggs_list),
			 0, 0,
			 best_path->startup_cost, best_path->total_cost,
			 best_path->parent->rows);

	if (total_cost > agg_p.total_cost)
		return NULL;			/* too expensive */

	/*
	 * OK, we are going to generate an optimized plan.
	 *
	 * First, generate a subplan and output Param node for each agg.
	 */
	foreach(lc, aggs_list)
	{
		make_agg_subplan(root, rel, (PrivateMMAggInfo *) lfirst(lc));
	}

	/*
	 * Modify the targetlist and HAVING qual to reference subquery outputs
	 */
	tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist,
													  &aggs_list);
	hqual = replace_aggs_with_params_mutator(parse->havingQual,
											 &aggs_list);

	/*
	 * We have to replace Aggrefs with Params in equivalence classes too, else
	 * ORDER BY or DISTINCT on an optimized aggregate will fail.
	 *
	 * Note: at some point it might become necessary to mutate other data
	 * structures too, such as the query's sortClause or distinctClause. Right
	 * now, those won't be examined after this point.
	 */
	mutate_eclass_expressions(root,
							  replace_aggs_with_params_mutator,
							  &aggs_list);

	/*
	 * Generate the output plan --- basically just a Result
	 */
	plan = (Plan *) make_result(root, tlist, hqual, NULL);

	/* Account for evaluation cost of the tlist (make_result did the rest) */
	cost_qual_eval(&tlist_cost, tlist, root);
	plan->startup_cost += tlist_cost.startup;
	plan->total_cost += tlist_cost.startup + tlist_cost.per_tuple;

	return plan;
}

/*
 * find_minmax_aggs_walker
 *		Recursively scan the Aggref nodes in an expression tree, and check
 *		that each one is a MIN/MAX aggregate.  If so, build a list of the
 *		distinct aggregate calls in the tree.
 *
 * Returns TRUE if a non-MIN/MAX aggregate is found, FALSE otherwise.
 * (This seemingly-backward definition is used because expression_tree_walker
 * aborts the scan on TRUE return, which is what we want.)
 *
 * Found aggregates are added to the list at *context; it's up to the caller
 * to initialize the list to NIL.
 *
 * This does not descend into subqueries, and so should be used only after
 * reduction of sublinks to subplans.  There mustn't be outer-aggregate
 * references either.
 */
static bool
find_minmax_aggs_walker(Node *node, List **context)
{
	if (node == NULL)
		return false;
	if (IsA(node, Aggref))
	{
		Aggref	   *aggref = (Aggref *) node;
		Oid			aggsortop;
		TargetEntry *curTarget;
		MinMaxAggInfo *mminfo;
		ListCell   *l;

		Assert(aggref->agglevelsup == 0);
		if (list_length(aggref->args) != 1 || aggref->aggorder != NIL)
			return true;		/* it couldn't be MIN/MAX */
		/* note: we do not care if DISTINCT is mentioned ... */
		curTarget = (TargetEntry *) linitial(aggref->args);

		aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
		if (!OidIsValid(aggsortop))
			return true;		/* not a MIN/MAX aggregate */

		if (contain_mutable_functions((Node *) curTarget->expr))
			return true;		/* not potentially indexable */

		if (type_is_rowtype(exprType((Node *) curTarget->expr)))
			return true;		/* IS NOT NULL would have weird semantics */

		/*
		 * Check whether it's already in the list, and add it if not.
		 */
		foreach(l, *context)
		{
			mminfo = (MinMaxAggInfo *) lfirst(l);
			if (mminfo->aggfnoid == aggref->aggfnoid &&
				equal(mminfo->target, curTarget->expr))
				return false;
		}

		mminfo = makeNode(MinMaxAggInfo);
		mminfo->aggfnoid = aggref->aggfnoid;
		mminfo->aggsortop = aggsortop;
		mminfo->target = curTarget->expr;
		mminfo->pathkeys = NIL;				/* don't compute pathkeys yet */

		*context = lappend(*context, mminfo);

		/*
		 * We need not recurse into the argument, since it can't contain any
		 * aggregates.
		 */
		return false;
	}
	Assert(!IsA(node, SubLink));
	return expression_tree_walker(node, find_minmax_aggs_walker,
								  (void *) context);
}

/*
 * find_minmax_path
 *		Given a MIN/MAX aggregate, try to find an ordered Path it can be
 *		optimized with.
 *
 * If successful, build and return a PrivateMMAggInfo struct.  Otherwise,
 * return NULL.
 */
static PrivateMMAggInfo *
find_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *mminfo)
{
	PrivateMMAggInfo *info;
	Path	   *best_path = NULL;
	Cost		best_cost = 0;
	double		path_fraction;
	PathKey    *mmpathkey;
	ListCell   *lc;

	/*
	 * Punt if the aggregate's pathkey turned out to be redundant, ie its
	 * pathkeys list is now empty.  This would happen with something like
	 * "SELECT max(x) ... WHERE x = constant".  There's no need to try to
	 * optimize such a case, because if there is an index that would help,
	 * it should already have been used with the WHERE clause.
	 */
	if (mminfo->pathkeys == NIL)
		return NULL;

	/*
	 * Search the paths that were generated for the rel to see if there are
	 * any with the desired ordering.  There could be multiple such paths,
	 * in which case take the cheapest (as measured according to how fast it
	 * will be to fetch the first row).
	 *
	 * We can't use pathkeys_contained_in() to check the ordering, because we
	 * would like to match pathkeys regardless of the nulls_first setting.
	 * However, we know that MIN/MAX aggregates will have at most one item in
	 * their pathkeys, so it's not too complicated to match by brute force.
	 *
	 * Note: this test ignores the possible costs associated with skipping
	 * NULL tuples.  We assume that adding the not-null criterion to the
	 * indexqual doesn't really cost anything.
	 */
	if (rel->rows > 1.0)
		path_fraction = 1.0 / rel->rows;
	else
		path_fraction = 1.0;

	Assert(list_length(mminfo->pathkeys) == 1);
	mmpathkey = (PathKey *) linitial(mminfo->pathkeys);

	foreach(lc, rel->pathlist)
	{
		Path   *path = (Path *) lfirst(lc);
		PathKey *pathkey;
		Cost	path_cost;

		if (path->pathkeys == NIL)
			continue;				/* unordered path */
		pathkey = (PathKey *) linitial(path->pathkeys);

		if (mmpathkey->pk_eclass == pathkey->pk_eclass &&
			mmpathkey->pk_opfamily == pathkey->pk_opfamily &&
			mmpathkey->pk_strategy == pathkey->pk_strategy)
		{
			/*
			 * OK, it has the right ordering; is it acceptable otherwise?
			 * (We test in this order because the pathkey check is cheap.)
			 */
			if (path_usable_for_agg(path))
			{
				/*
				 * It'll work; but is it the cheapest?
				 *
				 * Note: cost calculation here should match
				 * compare_fractional_path_costs().
				 */
				path_cost = path->startup_cost +
					path_fraction * (path->total_cost - path->startup_cost);

				if (best_path == NULL || path_cost < best_cost)
				{
					best_path = path;
					best_cost = path_cost;
				}
			}
		}
	}

	/* Fail if no suitable path */
	if (best_path == NULL)
		return NULL;

	/* Construct private state for further processing */
	info = (PrivateMMAggInfo *) palloc(sizeof(PrivateMMAggInfo));
	info->mminfo = mminfo;
	info->path = best_path;
	info->pathcost = best_cost;
	info->param = NULL;			/* will be set later */

	return info;
}

/*
 * To be usable, a Path needs to be an IndexPath on a btree index, or be a
 * MergeAppendPath of such IndexPaths.  This restriction is mainly because
 * we need to be sure the index can handle an added NOT NULL constraint at
 * minimal additional cost.  If you wish to relax it, you'll need to improve
 * add_notnull_qual() too.
 */
static bool
path_usable_for_agg(Path *path)
{
	if (IsA(path, IndexPath))
	{
		IndexPath  *ipath = (IndexPath *) path;

		/* OK if it's a btree index */
		if (ipath->indexinfo->relam == BTREE_AM_OID)
			return true;
	}
	else if (IsA(path, MergeAppendPath))
	{
		MergeAppendPath *mpath = (MergeAppendPath *) path;
		ListCell   *lc;

		foreach(lc, mpath->subpaths)
		{
			if (!path_usable_for_agg((Path *) lfirst(lc)))
				return false;
		}
		return true;
	}
	return false;
}

/*
 * Construct a suitable plan for a converted aggregate query
 */
static void
make_agg_subplan(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info)
{
	PlannerInfo subroot;
	Query	   *subparse;
	Plan	   *plan;
	TargetEntry *tle;

	/*
	 * Generate a suitably modified query.	Much of the work here is probably
	 * unnecessary in the normal case, but we want to make it look good if
	 * someone tries to EXPLAIN the result.
	 */
	memcpy(&subroot, root, sizeof(PlannerInfo));
	subroot.parse = subparse = (Query *) copyObject(root->parse);
	subparse->commandType = CMD_SELECT;
	subparse->resultRelation = 0;
	subparse->returningList = NIL;
	subparse->utilityStmt = NULL;
	subparse->intoClause = NULL;
	subparse->hasAggs = false;
	subparse->hasDistinctOn = false;
	subparse->groupClause = NIL;
	subparse->havingQual = NULL;
	subparse->distinctClause = NIL;
	subparse->sortClause = NIL;
	subroot.hasHavingQual = false;

	/* single tlist entry that is the aggregate target */
	tle = makeTargetEntry(copyObject(info->mminfo->target),
						  1,
						  pstrdup("agg_target"),
						  false);
	subparse->targetList = list_make1(tle);

	/* set up expressions for LIMIT 1 */
	subparse->limitOffset = NULL;
	subparse->limitCount = (Node *) makeConst(INT8OID, -1, sizeof(int64),
											  Int64GetDatum(1), false,
											  FLOAT8PASSBYVAL);

	/*
	 * Modify the ordered Path to add an indexed "target IS NOT NULL"
	 * condition to each scan.  We need this to ensure we don't return a NULL,
	 * which'd be contrary to the standard behavior of MIN/MAX.  We insist on
	 * it being indexed, else the Path might not be as cheap as we thought.
	 */
	add_notnull_qual(root, rel, info, info->path);

	/*
	 * Generate the plan for the subquery. We already have a Path, but we have
	 * to convert it to a Plan and attach a LIMIT node above it.
	 */
	plan = create_plan(&subroot, info->path);

	plan->targetlist = subparse->targetList;

	plan = (Plan *) make_limit(plan,
							   subparse->limitOffset,
							   subparse->limitCount,
							   0, 1);

	/*
	 * Convert the plan into an InitPlan, and make a Param for its result.
	 */
	info->param = SS_make_initplan_from_plan(&subroot, plan,
											 exprType((Node *) tle->expr),
											 -1);

	/*
	 * Put the updated list of InitPlans back into the outer PlannerInfo.
	 */
	root->init_plans = subroot.init_plans;
}

/*
 * Attach a suitable NOT NULL qual to the IndexPath, or each of the member
 * IndexPaths.  Note we assume we can modify the paths in-place.
 */
static void
add_notnull_qual(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info,
				 Path *path)
{
	if (IsA(path, IndexPath))
	{
		IndexPath  *ipath = (IndexPath *) path;
		Expr	   *target;
		NullTest   *ntest;
		RestrictInfo *rinfo;
		List	   *newquals;
		bool		found_clause;

		/*
		 * If we are looking at a child of the original rel, we have to adjust
		 * the agg target expression to match the child.
		 */
		if (ipath->path.parent != rel)
		{
			AppendRelInfo *appinfo = NULL;
			ListCell *lc;

			/* Search for the appropriate AppendRelInfo */
			foreach(lc, root->append_rel_list)
			{
				appinfo = (AppendRelInfo *) lfirst(lc);
				if (appinfo->parent_relid == rel->relid &&
					appinfo->child_relid == ipath->path.parent->relid)
					break;
				appinfo = NULL;
			}
			if (!appinfo)
				elog(ERROR, "failed to find AppendRelInfo for child rel");
			target = (Expr *)
				adjust_appendrel_attrs((Node *) info->mminfo->target,
									   appinfo);
		}
		else
		{
			/* Otherwise, just make a copy (may not be necessary) */
			target = copyObject(info->mminfo->target);
		}

		/* Build "target IS NOT NULL" expression */
		ntest = makeNode(NullTest);
		ntest->nulltesttype = IS_NOT_NULL;
		ntest->arg = target;
		/* we checked it wasn't a rowtype in find_minmax_aggs_walker */
		ntest->argisrow = false;

		/*
		 * We can skip adding the NOT NULL qual if it duplicates either an
		 * already-given index condition, or a clause of the index predicate.
		 */
		if (list_member(get_actual_clauses(ipath->indexquals), ntest) ||
			list_member(ipath->indexinfo->indpred, ntest))
			return;

		/* Wrap it in a RestrictInfo and prepend to existing indexquals */
		rinfo = make_restrictinfo((Expr *) ntest,
								  true,
								  false,
								  false,
								  NULL,
								  NULL);

		newquals = list_concat(list_make1(rinfo), ipath->indexquals);

		/*
		 * We can't just stick the IS NOT NULL at the front of the list,
		 * though.  It has to go in the right position corresponding to its
		 * index column, which might not be the first one.  Easiest way to fix
		 * this is to run the quals through group_clauses_by_indexkey again.
		 */
		newquals = group_clauses_by_indexkey(ipath->indexinfo,
											 newquals,
											 NIL,
											 NULL,
											 SAOP_FORBID,
											 &found_clause);

		newquals = flatten_clausegroups_list(newquals);

		/* Trouble if we lost any quals */
		if (list_length(newquals) != list_length(ipath->indexquals) + 1)
			elog(ERROR, "add_notnull_qual failed to add NOT NULL qual");

		/*
		 * And update the path's indexquals.  Note we don't bother adding
		 * to indexclauses, which is OK since this is like a generated
		 * index qual.
		 */
		ipath->indexquals = newquals;
	}
	else if (IsA(path, MergeAppendPath))
	{
		MergeAppendPath *mpath = (MergeAppendPath *) path;
		ListCell   *lc;

		foreach(lc, mpath->subpaths)
		{
			add_notnull_qual(root, rel, info, (Path *) lfirst(lc));
		}
	}
	else
	{
		/* shouldn't get here, because of path_usable_for_agg checks */
		elog(ERROR, "add_notnull_qual failed");
	}
}

/*
 * Replace original aggregate calls with subplan output Params
 */
static Node *
replace_aggs_with_params_mutator(Node *node, List **context)
{
	if (node == NULL)
		return NULL;
	if (IsA(node, Aggref))
	{
		Aggref	   *aggref = (Aggref *) node;
		TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args);
		ListCell   *l;

		foreach(l, *context)
		{
			PrivateMMAggInfo *info = (PrivateMMAggInfo *) lfirst(l);

			if (info->mminfo->aggfnoid == aggref->aggfnoid &&
				equal(info->mminfo->target, curTarget->expr))
				return (Node *) info->param;
		}
		elog(ERROR, "failed to re-find PrivateMMAggInfo record");
	}
	Assert(!IsA(node, SubLink));
	return expression_tree_mutator(node, replace_aggs_with_params_mutator,
								   (void *) context);
}

/*
 * Get the OID of the sort operator, if any, associated with an aggregate.
 * Returns InvalidOid if there is no such operator.
 */
static Oid
fetch_agg_sort_op(Oid aggfnoid)
{
	HeapTuple	aggTuple;
	Form_pg_aggregate aggform;
	Oid			aggsortop;

	/* fetch aggregate entry from pg_aggregate */
	aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
	if (!HeapTupleIsValid(aggTuple))
		return InvalidOid;
	aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
	aggsortop = aggform->aggsortop;
	ReleaseSysCache(aggTuple);

	return aggsortop;
}