diff options
Diffstat (limited to 'src/backend/partitioning/partbounds.c')
-rw-r--r-- | src/backend/partitioning/partbounds.c | 2315 |
1 files changed, 2315 insertions, 0 deletions
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c new file mode 100644 index 00000000000..229ce3ff5c9 --- /dev/null +++ b/src/backend/partitioning/partbounds.c @@ -0,0 +1,2315 @@ +/*------------------------------------------------------------------------- + * + * partbounds.c + * Support routines for manipulating partition bounds + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/partitioning/partbounds.c + * + *------------------------------------------------------------------------- +*/ +#include "postgres.h" + +#include "catalog/partition.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_type.h" +#include "commands/tablecmds.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "parser/parse_coerce.h" +#include "partitioning/partprune.h" +#include "partitioning/partbounds.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/hashutils.h" +#include "utils/lsyscache.h" +#include "utils/partcache.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/ruleutils.h" +#include "utils/syscache.h" + +static int get_partition_bound_num_indexes(PartitionBoundInfo b); +static Expr *make_partition_op_expr(PartitionKey key, int keynum, + uint16 strategy, Expr *arg1, Expr *arg2); +static Oid get_partition_operator(PartitionKey key, int col, + StrategyNumber strategy, bool *need_relabel); +static List *get_qual_for_hash(Relation parent, PartitionBoundSpec *spec); +static List *get_qual_for_list(Relation parent, PartitionBoundSpec *spec); +static List *get_qual_for_range(Relation parent, PartitionBoundSpec *spec, + bool for_default); +static void get_range_key_properties(PartitionKey key, int keynum, + PartitionRangeDatum *ldatum, + PartitionRangeDatum *udatum, + ListCell **partexprs_item, + Expr **keyCol, + Const **lower_val, Const **upper_val); +static List *get_range_nulltest(PartitionKey key); + +/* + * get_qual_from_partbound + * Given a parser node for partition bound, return the list of executable + * expressions as partition constraint + */ +List * +get_qual_from_partbound(Relation rel, Relation parent, + PartitionBoundSpec *spec) +{ + PartitionKey key = RelationGetPartitionKey(parent); + List *my_qual = NIL; + + Assert(key != NULL); + + switch (key->strategy) + { + case PARTITION_STRATEGY_HASH: + Assert(spec->strategy == PARTITION_STRATEGY_HASH); + my_qual = get_qual_for_hash(parent, spec); + break; + + case PARTITION_STRATEGY_LIST: + Assert(spec->strategy == PARTITION_STRATEGY_LIST); + my_qual = get_qual_for_list(parent, spec); + break; + + case PARTITION_STRATEGY_RANGE: + Assert(spec->strategy == PARTITION_STRATEGY_RANGE); + my_qual = get_qual_for_range(parent, spec, false); + break; + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + return my_qual; +} + +/* + * Are two partition bound collections logically equal? + * + * Used in the keep logic of relcache.c (ie, in RelationClearRelation()). + * This is also useful when b1 and b2 are bound collections of two separate + * relations, respectively, because PartitionBoundInfo is a canonical + * representation of partition bounds. + */ +bool +partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, + PartitionBoundInfo b1, PartitionBoundInfo b2) +{ + int i; + + if (b1->strategy != b2->strategy) + return false; + + if (b1->ndatums != b2->ndatums) + return false; + + if (b1->null_index != b2->null_index) + return false; + + if (b1->default_index != b2->default_index) + return false; + + if (b1->strategy == PARTITION_STRATEGY_HASH) + { + int greatest_modulus = get_hash_partition_greatest_modulus(b1); + + /* + * If two hash partitioned tables have different greatest moduli, + * their partition schemes don't match. + */ + if (greatest_modulus != get_hash_partition_greatest_modulus(b2)) + return false; + + /* + * We arrange the partitions in the ascending order of their modulus + * and remainders. Also every modulus is factor of next larger + * modulus. Therefore we can safely store index of a given partition + * in indexes array at remainder of that partition. Also entries at + * (remainder + N * modulus) positions in indexes array are all same + * for (modulus, remainder) specification for any partition. Thus + * datums array from both the given bounds are same, if and only if + * their indexes array will be same. So, it suffices to compare + * indexes array. + */ + for (i = 0; i < greatest_modulus; i++) + if (b1->indexes[i] != b2->indexes[i]) + return false; + +#ifdef USE_ASSERT_CHECKING + + /* + * Nonetheless make sure that the bounds are indeed same when the + * indexes match. Hash partition bound stores modulus and remainder + * at b1->datums[i][0] and b1->datums[i][1] position respectively. + */ + for (i = 0; i < b1->ndatums; i++) + Assert((b1->datums[i][0] == b2->datums[i][0] && + b1->datums[i][1] == b2->datums[i][1])); +#endif + } + else + { + for (i = 0; i < b1->ndatums; i++) + { + int j; + + for (j = 0; j < partnatts; j++) + { + /* For range partitions, the bounds might not be finite. */ + if (b1->kind != NULL) + { + /* The different kinds of bound all differ from each other */ + if (b1->kind[i][j] != b2->kind[i][j]) + return false; + + /* + * Non-finite bounds are equal without further + * examination. + */ + if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE) + continue; + } + + /* + * Compare the actual values. Note that it would be both + * incorrect and unsafe to invoke the comparison operator + * derived from the partitioning specification here. It would + * be incorrect because we want the relcache entry to be + * updated for ANY change to the partition bounds, not just + * those that the partitioning operator thinks are + * significant. It would be unsafe because we might reach + * this code in the context of an aborted transaction, and an + * arbitrary partitioning operator might not be safe in that + * context. datumIsEqual() should be simple enough to be + * safe. + */ + if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j], + parttypbyval[j], parttyplen[j])) + return false; + } + + if (b1->indexes[i] != b2->indexes[i]) + return false; + } + + /* There are ndatums+1 indexes in case of range partitions */ + if (b1->strategy == PARTITION_STRATEGY_RANGE && + b1->indexes[i] != b2->indexes[i]) + return false; + } + return true; +} + +/* + * Return a copy of given PartitionBoundInfo structure. The data types of bounds + * are described by given partition key specification. + */ +PartitionBoundInfo +partition_bounds_copy(PartitionBoundInfo src, + PartitionKey key) +{ + PartitionBoundInfo dest; + int i; + int ndatums; + int partnatts; + int num_indexes; + + dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData)); + + dest->strategy = src->strategy; + ndatums = dest->ndatums = src->ndatums; + partnatts = key->partnatts; + + num_indexes = get_partition_bound_num_indexes(src); + + /* List partitioned tables have only a single partition key. */ + Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1); + + dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums); + + if (src->kind != NULL) + { + dest->kind = (PartitionRangeDatumKind **) palloc(ndatums * + sizeof(PartitionRangeDatumKind *)); + for (i = 0; i < ndatums; i++) + { + dest->kind[i] = (PartitionRangeDatumKind *) palloc(partnatts * + sizeof(PartitionRangeDatumKind)); + + memcpy(dest->kind[i], src->kind[i], + sizeof(PartitionRangeDatumKind) * key->partnatts); + } + } + else + dest->kind = NULL; + + for (i = 0; i < ndatums; i++) + { + int j; + + /* + * For a corresponding to hash partition, datums array will have two + * elements - modulus and remainder. + */ + bool hash_part = (key->strategy == PARTITION_STRATEGY_HASH); + int natts = hash_part ? 2 : partnatts; + + dest->datums[i] = (Datum *) palloc(sizeof(Datum) * natts); + + for (j = 0; j < natts; j++) + { + bool byval; + int typlen; + + if (hash_part) + { + typlen = sizeof(int32); /* Always int4 */ + byval = true; /* int4 is pass-by-value */ + } + else + { + byval = key->parttypbyval[j]; + typlen = key->parttyplen[j]; + } + + if (dest->kind == NULL || + dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE) + dest->datums[i][j] = datumCopy(src->datums[i][j], + byval, typlen); + } + } + + dest->indexes = (int *) palloc(sizeof(int) * num_indexes); + memcpy(dest->indexes, src->indexes, sizeof(int) * num_indexes); + + dest->null_index = src->null_index; + dest->default_index = src->default_index; + + return dest; +} + +/* + * check_new_partition_bound + * + * Checks if the new partition's bound overlaps any of the existing partitions + * of parent. Also performs additional checks as necessary per strategy. + */ +void +check_new_partition_bound(char *relname, Relation parent, + PartitionBoundSpec *spec) +{ + PartitionKey key = RelationGetPartitionKey(parent); + PartitionDesc partdesc = RelationGetPartitionDesc(parent); + PartitionBoundInfo boundinfo = partdesc->boundinfo; + ParseState *pstate = make_parsestate(NULL); + int with = -1; + bool overlap = false; + + if (spec->is_default) + { + /* + * The default partition bound never conflicts with any other + * partition's; if that's what we're attaching, the only possible + * problem is that one already exists, so check for that and we're + * done. + */ + if (boundinfo == NULL || !partition_bound_has_default(boundinfo)) + return; + + /* Default partition already exists, error out. */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("partition \"%s\" conflicts with existing default partition \"%s\"", + relname, get_rel_name(partdesc->oids[boundinfo->default_index])), + parser_errposition(pstate, spec->location))); + } + + switch (key->strategy) + { + case PARTITION_STRATEGY_HASH: + { + Assert(spec->strategy == PARTITION_STRATEGY_HASH); + Assert(spec->remainder >= 0 && spec->remainder < spec->modulus); + + if (partdesc->nparts > 0) + { + Datum **datums = boundinfo->datums; + int ndatums = boundinfo->ndatums; + int greatest_modulus; + int remainder; + int offset; + bool valid_modulus = true; + int prev_modulus, /* Previous largest modulus */ + next_modulus; /* Next largest modulus */ + + /* + * Check rule that every modulus must be a factor of the + * next larger modulus. For example, if you have a bunch + * of partitions that all have modulus 5, you can add a + * new partition with modulus 10 or a new partition with + * modulus 15, but you cannot add both a partition with + * modulus 10 and a partition with modulus 15, because 10 + * is not a factor of 15. + * + * Get the greatest (modulus, remainder) pair contained in + * boundinfo->datums that is less than or equal to the + * (spec->modulus, spec->remainder) pair. + */ + offset = partition_hash_bsearch(boundinfo, + spec->modulus, + spec->remainder); + if (offset < 0) + { + next_modulus = DatumGetInt32(datums[0][0]); + valid_modulus = (next_modulus % spec->modulus) == 0; + } + else + { + prev_modulus = DatumGetInt32(datums[offset][0]); + valid_modulus = (spec->modulus % prev_modulus) == 0; + + if (valid_modulus && (offset + 1) < ndatums) + { + next_modulus = DatumGetInt32(datums[offset + 1][0]); + valid_modulus = (next_modulus % spec->modulus) == 0; + } + } + + if (!valid_modulus) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("every hash partition modulus must be a factor of the next larger modulus"))); + + greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); + remainder = spec->remainder; + + /* + * Normally, the lowest remainder that could conflict with + * the new partition is equal to the remainder specified + * for the new partition, but when the new partition has a + * modulus higher than any used so far, we need to adjust. + */ + if (remainder >= greatest_modulus) + remainder = remainder % greatest_modulus; + + /* Check every potentially-conflicting remainder. */ + do + { + if (boundinfo->indexes[remainder] != -1) + { + overlap = true; + with = boundinfo->indexes[remainder]; + break; + } + remainder += spec->modulus; + } while (remainder < greatest_modulus); + } + + break; + } + + case PARTITION_STRATEGY_LIST: + { + Assert(spec->strategy == PARTITION_STRATEGY_LIST); + + if (partdesc->nparts > 0) + { + ListCell *cell; + + Assert(boundinfo && + boundinfo->strategy == PARTITION_STRATEGY_LIST && + (boundinfo->ndatums > 0 || + partition_bound_accepts_nulls(boundinfo) || + partition_bound_has_default(boundinfo))); + + foreach(cell, spec->listdatums) + { + Const *val = castNode(Const, lfirst(cell)); + + if (!val->constisnull) + { + int offset; + bool equal; + + offset = partition_list_bsearch(&key->partsupfunc[0], + key->partcollation, + boundinfo, + val->constvalue, + &equal); + if (offset >= 0 && equal) + { + overlap = true; + with = boundinfo->indexes[offset]; + break; + } + } + else if (partition_bound_accepts_nulls(boundinfo)) + { + overlap = true; + with = boundinfo->null_index; + break; + } + } + } + + break; + } + + case PARTITION_STRATEGY_RANGE: + { + PartitionRangeBound *lower, + *upper; + + Assert(spec->strategy == PARTITION_STRATEGY_RANGE); + lower = make_one_range_bound(key, -1, spec->lowerdatums, true); + upper = make_one_range_bound(key, -1, spec->upperdatums, false); + + /* + * First check if the resulting range would be empty with + * specified lower and upper bounds + */ + if (partition_rbound_cmp(key->partnatts, key->partsupfunc, + key->partcollation, lower->datums, + lower->kind, true, upper) >= 0) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("empty range bound specified for partition \"%s\"", + relname), + errdetail("Specified lower bound %s is greater than or equal to upper bound %s.", + get_range_partbound_string(spec->lowerdatums), + get_range_partbound_string(spec->upperdatums)), + parser_errposition(pstate, spec->location))); + } + + if (partdesc->nparts > 0) + { + int offset; + bool equal; + + Assert(boundinfo && + boundinfo->strategy == PARTITION_STRATEGY_RANGE && + (boundinfo->ndatums > 0 || + partition_bound_has_default(boundinfo))); + + /* + * Test whether the new lower bound (which is treated + * inclusively as part of the new partition) lies inside + * an existing partition, or in a gap. + * + * If it's inside an existing partition, the bound at + * offset + 1 will be the upper bound of that partition, + * and its index will be >= 0. + * + * If it's in a gap, the bound at offset + 1 will be the + * lower bound of the next partition, and its index will + * be -1. This is also true if there is no next partition, + * since the index array is initialised with an extra -1 + * at the end. + */ + offset = partition_range_bsearch(key->partnatts, + key->partsupfunc, + key->partcollation, + boundinfo, lower, + &equal); + + if (boundinfo->indexes[offset + 1] < 0) + { + /* + * Check that the new partition will fit in the gap. + * For it to fit, the new upper bound must be less + * than or equal to the lower bound of the next + * partition, if there is one. + */ + if (offset + 1 < boundinfo->ndatums) + { + int32 cmpval; + Datum *datums; + PartitionRangeDatumKind *kind; + bool is_lower; + + datums = boundinfo->datums[offset + 1]; + kind = boundinfo->kind[offset + 1]; + is_lower = (boundinfo->indexes[offset + 1] == -1); + + cmpval = partition_rbound_cmp(key->partnatts, + key->partsupfunc, + key->partcollation, + datums, kind, + is_lower, upper); + if (cmpval < 0) + { + /* + * The new partition overlaps with the + * existing partition between offset + 1 and + * offset + 2. + */ + overlap = true; + with = boundinfo->indexes[offset + 2]; + } + } + } + else + { + /* + * The new partition overlaps with the existing + * partition between offset and offset + 1. + */ + overlap = true; + with = boundinfo->indexes[offset + 1]; + } + } + + break; + } + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + if (overlap) + { + Assert(with >= 0); + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("partition \"%s\" would overlap partition \"%s\"", + relname, get_rel_name(partdesc->oids[with])), + parser_errposition(pstate, spec->location))); + } +} + +/* + * check_default_allows_bound + * + * This function checks if there exists a row in the default partition that + * would properly belong to the new partition being added. If it finds one, + * it throws an error. + */ +void +check_default_allows_bound(Relation parent, Relation default_rel, + PartitionBoundSpec *new_spec) +{ + List *new_part_constraints; + List *def_part_constraints; + List *all_parts; + ListCell *lc; + + new_part_constraints = (new_spec->strategy == PARTITION_STRATEGY_LIST) + ? get_qual_for_list(parent, new_spec) + : get_qual_for_range(parent, new_spec, false); + def_part_constraints = + get_proposed_default_constraint(new_part_constraints); + + /* + * If the existing constraints on the default partition imply that it will + * not contain any row that would belong to the new partition, we can + * avoid scanning the default partition. + */ + if (PartConstraintImpliedByRelConstraint(default_rel, def_part_constraints)) + { + ereport(INFO, + (errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints", + RelationGetRelationName(default_rel)))); + return; + } + + /* + * Scan the default partition and its subpartitions, and check for rows + * that do not satisfy the revised partition constraints. + */ + if (default_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + all_parts = find_all_inheritors(RelationGetRelid(default_rel), + AccessExclusiveLock, NULL); + else + all_parts = list_make1_oid(RelationGetRelid(default_rel)); + + foreach(lc, all_parts) + { + Oid part_relid = lfirst_oid(lc); + Relation part_rel; + Expr *constr; + Expr *partition_constraint; + EState *estate; + HeapTuple tuple; + ExprState *partqualstate = NULL; + Snapshot snapshot; + TupleDesc tupdesc; + ExprContext *econtext; + HeapScanDesc scan; + MemoryContext oldCxt; + TupleTableSlot *tupslot; + + /* Lock already taken above. */ + if (part_relid != RelationGetRelid(default_rel)) + { + part_rel = heap_open(part_relid, NoLock); + + /* + * If the partition constraints on default partition child imply + * that it will not contain any row that would belong to the new + * partition, we can avoid scanning the child table. + */ + if (PartConstraintImpliedByRelConstraint(part_rel, + def_part_constraints)) + { + ereport(INFO, + (errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints", + RelationGetRelationName(part_rel)))); + + heap_close(part_rel, NoLock); + continue; + } + } + else + part_rel = default_rel; + + /* + * Only RELKIND_RELATION relations (i.e. leaf partitions) need to be + * scanned. + */ + if (part_rel->rd_rel->relkind != RELKIND_RELATION) + { + if (part_rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + ereport(WARNING, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("skipped scanning foreign table \"%s\" which is a partition of default partition \"%s\"", + RelationGetRelationName(part_rel), + RelationGetRelationName(default_rel)))); + + if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel)) + heap_close(part_rel, NoLock); + + continue; + } + + tupdesc = CreateTupleDescCopy(RelationGetDescr(part_rel)); + constr = linitial(def_part_constraints); + partition_constraint = (Expr *) + map_partition_varattnos((List *) constr, + 1, part_rel, parent, NULL); + estate = CreateExecutorState(); + + /* Build expression execution states for partition check quals */ + partqualstate = ExecPrepareExpr(partition_constraint, estate); + + econtext = GetPerTupleExprContext(estate); + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = heap_beginscan(part_rel, snapshot, 0, NULL); + tupslot = MakeSingleTupleTableSlot(tupdesc); + + /* + * Switch to per-tuple memory context and reset it for each tuple + * produced, so we don't leak memory. + */ + oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + + while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + ExecStoreTuple(tuple, tupslot, InvalidBuffer, false); + econtext->ecxt_scantuple = tupslot; + + if (!ExecCheck(partqualstate, econtext)) + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("updated partition constraint for default partition \"%s\" would be violated by some row", + RelationGetRelationName(default_rel)))); + + ResetExprContext(econtext); + CHECK_FOR_INTERRUPTS(); + } + + MemoryContextSwitchTo(oldCxt); + heap_endscan(scan); + UnregisterSnapshot(snapshot); + ExecDropSingleTupleTableSlot(tupslot); + FreeExecutorState(estate); + + if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel)) + heap_close(part_rel, NoLock); /* keep the lock until commit */ + } +} + +/* + * get_hash_partition_greatest_modulus + * + * Returns the greatest modulus of the hash partition bound. The greatest + * modulus will be at the end of the datums array because hash partitions are + * arranged in the ascending order of their modulus and remainders. + */ +int +get_hash_partition_greatest_modulus(PartitionBoundInfo bound) +{ + Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH); + Assert(bound->datums && bound->ndatums > 0); + Assert(DatumGetInt32(bound->datums[bound->ndatums - 1][0]) > 0); + + return DatumGetInt32(bound->datums[bound->ndatums - 1][0]); +} + +/* + * make_one_range_bound + * + * Return a PartitionRangeBound given a list of PartitionRangeDatum elements + * and a flag telling whether the bound is lower or not. Made into a function + * because there are multiple sites that want to use this facility. + */ +PartitionRangeBound * +make_one_range_bound(PartitionKey key, int index, List *datums, bool lower) +{ + PartitionRangeBound *bound; + ListCell *lc; + int i; + + Assert(datums != NIL); + + bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound)); + bound->index = index; + bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum)); + bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts * + sizeof(PartitionRangeDatumKind)); + bound->lower = lower; + + i = 0; + foreach(lc, datums) + { + PartitionRangeDatum *datum = castNode(PartitionRangeDatum, lfirst(lc)); + + /* What's contained in this range datum? */ + bound->kind[i] = datum->kind; + + if (datum->kind == PARTITION_RANGE_DATUM_VALUE) + { + Const *val = castNode(Const, datum->value); + + if (val->constisnull) + elog(ERROR, "invalid range bound datum"); + bound->datums[i] = val->constvalue; + } + + i++; + } + + return bound; +} + +/* + * partition_rbound_cmp + * + * Return for two range bounds whether the 1st one (specified in datums1, + * kind1, and lower1) is <, =, or > the bound specified in *b2. + * + * partnatts, partsupfunc and partcollation give the number of attributes in the + * bounds to be compared, comparison function to be used and the collations of + * attributes, respectively. + * + * Note that if the values of the two range bounds compare equal, then we take + * into account whether they are upper or lower bounds, and an upper bound is + * considered to be smaller than a lower bound. This is important to the way + * that RelationBuildPartitionDesc() builds the PartitionBoundInfoData + * structure, which only stores the upper bound of a common boundary between + * two contiguous partitions. + */ +int32 +partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, + Oid *partcollation, + Datum *datums1, PartitionRangeDatumKind *kind1, + bool lower1, PartitionRangeBound *b2) +{ + int32 cmpval = 0; /* placate compiler */ + int i; + Datum *datums2 = b2->datums; + PartitionRangeDatumKind *kind2 = b2->kind; + bool lower2 = b2->lower; + + for (i = 0; i < partnatts; i++) + { + /* + * First, handle cases where the column is unbounded, which should not + * invoke the comparison procedure, and should not consider any later + * columns. Note that the PartitionRangeDatumKind enum elements + * compare the same way as the values they represent. + */ + if (kind1[i] < kind2[i]) + return -1; + else if (kind1[i] > kind2[i]) + return 1; + else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE) + + /* + * The column bounds are both MINVALUE or both MAXVALUE. No later + * columns should be considered, but we still need to compare + * whether they are upper or lower bounds. + */ + break; + + cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i], + partcollation[i], + datums1[i], + datums2[i])); + if (cmpval != 0) + break; + } + + /* + * If the comparison is anything other than equal, we're done. If they + * compare equal though, we still have to consider whether the boundaries + * are inclusive or exclusive. Exclusive one is considered smaller of the + * two. + */ + if (cmpval == 0 && lower1 != lower2) + cmpval = lower1 ? 1 : -1; + + return cmpval; +} + +/* + * partition_rbound_datum_cmp + * + * Return whether range bound (specified in rb_datums, rb_kind, and rb_lower) + * is <, =, or > partition key of tuple (tuple_datums) + * + * n_tuple_datums, partsupfunc and partcollation give number of attributes in + * the bounds to be compared, comparison function to be used and the collations + * of attributes resp. + * + */ +int32 +partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation, + Datum *rb_datums, PartitionRangeDatumKind *rb_kind, + Datum *tuple_datums, int n_tuple_datums) +{ + int i; + int32 cmpval = -1; + + for (i = 0; i < n_tuple_datums; i++) + { + if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE) + return -1; + else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE) + return 1; + + cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i], + partcollation[i], + rb_datums[i], + tuple_datums[i])); + if (cmpval != 0) + break; + } + + return cmpval; +} + +/* + * partition_hbound_cmp + * + * Compares modulus first, then remainder if modulus are equal. + */ +int32 +partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2) +{ + if (modulus1 < modulus2) + return -1; + if (modulus1 > modulus2) + return 1; + if (modulus1 == modulus2 && remainder1 != remainder2) + return (remainder1 > remainder2) ? 1 : -1; + return 0; +} + +/* + * partition_list_bsearch + * Returns the index of the greatest bound datum that is less than equal + * to the given value or -1 if all of the bound datums are greater + * + * *is_equal is set to true if the bound datum at the returned index is equal + * to the input value. + */ +int +partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, + PartitionBoundInfo boundinfo, + Datum value, bool *is_equal) +{ + int lo, + hi, + mid; + + lo = -1; + hi = boundinfo->ndatums - 1; + while (lo < hi) + { + int32 cmpval; + + mid = (lo + hi + 1) / 2; + cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0], + partcollation[0], + boundinfo->datums[mid][0], + value)); + if (cmpval <= 0) + { + lo = mid; + *is_equal = (cmpval == 0); + if (*is_equal) + break; + } + else + hi = mid - 1; + } + + return lo; +} + +/* + * partition_range_bsearch + * Returns the index of the greatest range bound that is less than or + * equal to the given range bound or -1 if all of the range bounds are + * greater + * + * *is_equal is set to true if the range bound at the returned index is equal + * to the input range bound + */ +int +partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc, + Oid *partcollation, + PartitionBoundInfo boundinfo, + PartitionRangeBound *probe, bool *is_equal) +{ + int lo, + hi, + mid; + + lo = -1; + hi = boundinfo->ndatums - 1; + while (lo < hi) + { + int32 cmpval; + + mid = (lo + hi + 1) / 2; + cmpval = partition_rbound_cmp(partnatts, partsupfunc, + partcollation, + boundinfo->datums[mid], + boundinfo->kind[mid], + (boundinfo->indexes[mid] == -1), + probe); + if (cmpval <= 0) + { + lo = mid; + *is_equal = (cmpval == 0); + + if (*is_equal) + break; + } + else + hi = mid - 1; + } + + return lo; +} + +/* + * partition_range_bsearch + * Returns the index of the greatest range bound that is less than or + * equal to the given tuple or -1 if all of the range bounds are greater + * + * *is_equal is set to true if the range bound at the returned index is equal + * to the input tuple. + */ +int +partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, + PartitionBoundInfo boundinfo, + int nvalues, Datum *values, bool *is_equal) +{ + int lo, + hi, + mid; + + lo = -1; + hi = boundinfo->ndatums - 1; + while (lo < hi) + { + int32 cmpval; + + mid = (lo + hi + 1) / 2; + cmpval = partition_rbound_datum_cmp(partsupfunc, + partcollation, + boundinfo->datums[mid], + boundinfo->kind[mid], + values, + nvalues); + if (cmpval <= 0) + { + lo = mid; + *is_equal = (cmpval == 0); + + if (*is_equal) + break; + } + else + hi = mid - 1; + } + + return lo; +} + +/* + * partition_hash_bsearch + * Returns the index of the greatest (modulus, remainder) pair that is + * less than or equal to the given (modulus, remainder) pair or -1 if + * all of them are greater + */ +int +partition_hash_bsearch(PartitionBoundInfo boundinfo, + int modulus, int remainder) +{ + int lo, + hi, + mid; + + lo = -1; + hi = boundinfo->ndatums - 1; + while (lo < hi) + { + int32 cmpval, + bound_modulus, + bound_remainder; + + mid = (lo + hi + 1) / 2; + bound_modulus = DatumGetInt32(boundinfo->datums[mid][0]); + bound_remainder = DatumGetInt32(boundinfo->datums[mid][1]); + cmpval = partition_hbound_cmp(bound_modulus, bound_remainder, + modulus, remainder); + if (cmpval <= 0) + { + lo = mid; + + if (cmpval == 0) + break; + } + else + hi = mid - 1; + } + + return lo; +} + +/* + * get_partition_bound_num_indexes + * + * Returns the number of the entries in the partition bound indexes array. + */ +static int +get_partition_bound_num_indexes(PartitionBoundInfo bound) +{ + int num_indexes; + + Assert(bound); + + switch (bound->strategy) + { + case PARTITION_STRATEGY_HASH: + + /* + * The number of the entries in the indexes array is same as the + * greatest modulus. + */ + num_indexes = get_hash_partition_greatest_modulus(bound); + break; + + case PARTITION_STRATEGY_LIST: + num_indexes = bound->ndatums; + break; + + case PARTITION_STRATEGY_RANGE: + /* Range partitioned table has an extra index. */ + num_indexes = bound->ndatums + 1; + break; + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) bound->strategy); + } + + return num_indexes; +} + +/* + * get_partition_operator + * + * Return oid of the operator of given strategy for a given partition key + * column. + */ +static Oid +get_partition_operator(PartitionKey key, int col, StrategyNumber strategy, + bool *need_relabel) +{ + Oid operoid; + + /* + * First check if there exists an operator of the given strategy, with + * this column's type as both its lefttype and righttype, in the + * partitioning operator family specified for the column. + */ + operoid = get_opfamily_member(key->partopfamily[col], + key->parttypid[col], + key->parttypid[col], + strategy); + + /* + * If one doesn't exist, we must resort to using an operator in the same + * operator family but with the operator class declared input type. It is + * OK to do so, because the column's type is known to be binary-coercible + * with the operator class input type (otherwise, the operator class in + * question would not have been accepted as the partitioning operator + * class). We must however inform the caller to wrap the non-Const + * expression with a RelabelType node to denote the implicit coercion. It + * ensures that the resulting expression structurally matches similarly + * processed expressions within the optimizer. + */ + if (!OidIsValid(operoid)) + { + operoid = get_opfamily_member(key->partopfamily[col], + key->partopcintype[col], + key->partopcintype[col], + strategy); + if (!OidIsValid(operoid)) + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + strategy, key->partopcintype[col], key->partopcintype[col], + key->partopfamily[col]); + *need_relabel = true; + } + else + *need_relabel = false; + + return operoid; +} + +/* + * make_partition_op_expr + * Returns an Expr for the given partition key column with arg1 and + * arg2 as its leftop and rightop, respectively + */ +static Expr * +make_partition_op_expr(PartitionKey key, int keynum, + uint16 strategy, Expr *arg1, Expr *arg2) +{ + Oid operoid; + bool need_relabel = false; + Expr *result = NULL; + + /* Get the correct btree operator for this partitioning column */ + operoid = get_partition_operator(key, keynum, strategy, &need_relabel); + + /* + * Chosen operator may be such that the non-Const operand needs to be + * coerced, so apply the same; see the comment in + * get_partition_operator(). + */ + if (!IsA(arg1, Const) && + (need_relabel || + key->partcollation[keynum] != key->parttypcoll[keynum])) + arg1 = (Expr *) makeRelabelType(arg1, + key->partopcintype[keynum], + -1, + key->partcollation[keynum], + COERCE_EXPLICIT_CAST); + + /* Generate the actual expression */ + switch (key->strategy) + { + case PARTITION_STRATEGY_LIST: + { + List *elems = (List *) arg2; + int nelems = list_length(elems); + + Assert(nelems >= 1); + Assert(keynum == 0); + + if (nelems > 1 && + !type_is_array(key->parttypid[keynum])) + { + ArrayExpr *arrexpr; + ScalarArrayOpExpr *saopexpr; + + /* Construct an ArrayExpr for the right-hand inputs */ + arrexpr = makeNode(ArrayExpr); + arrexpr->array_typeid = + get_array_type(key->parttypid[keynum]); + arrexpr->array_collid = key->parttypcoll[keynum]; + arrexpr->element_typeid = key->parttypid[keynum]; + arrexpr->elements = elems; + arrexpr->multidims = false; + arrexpr->location = -1; + + /* Build leftop = ANY (rightop) */ + saopexpr = makeNode(ScalarArrayOpExpr); + saopexpr->opno = operoid; + saopexpr->opfuncid = get_opcode(operoid); + saopexpr->useOr = true; + saopexpr->inputcollid = key->partcollation[keynum]; + saopexpr->args = list_make2(arg1, arrexpr); + saopexpr->location = -1; + + result = (Expr *) saopexpr; + } + else + { + List *elemops = NIL; + ListCell *lc; + + foreach(lc, elems) + { + Expr *elem = lfirst(lc), + *elemop; + + elemop = make_opclause(operoid, + BOOLOID, + false, + arg1, elem, + InvalidOid, + key->partcollation[keynum]); + elemops = lappend(elemops, elemop); + } + + result = nelems > 1 ? makeBoolExpr(OR_EXPR, elemops, -1) : linitial(elemops); + } + break; + } + + case PARTITION_STRATEGY_RANGE: + result = make_opclause(operoid, + BOOLOID, + false, + arg1, arg2, + InvalidOid, + key->partcollation[keynum]); + break; + + default: + elog(ERROR, "invalid partitioning strategy"); + break; + } + + return result; +} + +/* + * get_qual_for_hash + * + * Returns a CHECK constraint expression to use as a hash partition's + * constraint, given the parent relation and partition bound structure. + * + * The partition constraint for a hash partition is always a call to the + * built-in function satisfies_hash_partition(). + */ +static List * +get_qual_for_hash(Relation parent, PartitionBoundSpec *spec) +{ + PartitionKey key = RelationGetPartitionKey(parent); + FuncExpr *fexpr; + Node *relidConst; + Node *modulusConst; + Node *remainderConst; + List *args; + ListCell *partexprs_item; + int i; + + /* Fixed arguments. */ + relidConst = (Node *) makeConst(OIDOID, + -1, + InvalidOid, + sizeof(Oid), + ObjectIdGetDatum(RelationGetRelid(parent)), + false, + true); + + modulusConst = (Node *) makeConst(INT4OID, + -1, + InvalidOid, + sizeof(int32), + Int32GetDatum(spec->modulus), + false, + true); + + remainderConst = (Node *) makeConst(INT4OID, + -1, + InvalidOid, + sizeof(int32), + Int32GetDatum(spec->remainder), + false, + true); + + args = list_make3(relidConst, modulusConst, remainderConst); + partexprs_item = list_head(key->partexprs); + + /* Add an argument for each key column. */ + for (i = 0; i < key->partnatts; i++) + { + Node *keyCol; + + /* Left operand */ + if (key->partattrs[i] != 0) + { + keyCol = (Node *) makeVar(1, + key->partattrs[i], + key->parttypid[i], + key->parttypmod[i], + key->parttypcoll[i], + 0); + } + else + { + keyCol = (Node *) copyObject(lfirst(partexprs_item)); + partexprs_item = lnext(partexprs_item); + } + + args = lappend(args, keyCol); + } + + fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION, + BOOLOID, + args, + InvalidOid, + InvalidOid, + COERCE_EXPLICIT_CALL); + + return list_make1(fexpr); +} + +/* + * get_qual_for_list + * + * Returns an implicit-AND list of expressions to use as a list partition's + * constraint, given the parent relation and partition bound structure. + * + * The function returns NIL for a default partition when it's the only + * partition since in that case there is no constraint. + */ +static List * +get_qual_for_list(Relation parent, PartitionBoundSpec *spec) +{ + PartitionKey key = RelationGetPartitionKey(parent); + List *result; + Expr *keyCol; + Expr *opexpr; + NullTest *nulltest; + ListCell *cell; + List *elems = NIL; + bool list_has_null = false; + + /* + * Only single-column list partitioning is supported, so we are worried + * only about the partition key with index 0. + */ + Assert(key->partnatts == 1); + + /* Construct Var or expression representing the partition column */ + if (key->partattrs[0] != 0) + keyCol = (Expr *) makeVar(1, + key->partattrs[0], + key->parttypid[0], + key->parttypmod[0], + key->parttypcoll[0], + 0); + else + keyCol = (Expr *) copyObject(linitial(key->partexprs)); + + /* + * For default list partition, collect datums for all the partitions. The + * default partition constraint should check that the partition key is + * equal to none of those. + */ + if (spec->is_default) + { + int i; + int ndatums = 0; + PartitionDesc pdesc = RelationGetPartitionDesc(parent); + PartitionBoundInfo boundinfo = pdesc->boundinfo; + + if (boundinfo) + { + ndatums = boundinfo->ndatums; + + if (partition_bound_accepts_nulls(boundinfo)) + list_has_null = true; + } + + /* + * If default is the only partition, there need not be any partition + * constraint on it. + */ + if (ndatums == 0 && !list_has_null) + return NIL; + + for (i = 0; i < ndatums; i++) + { + Const *val; + + /* + * Construct Const from known-not-null datum. We must be careful + * to copy the value, because our result has to be able to outlive + * the relcache entry we're copying from. + */ + val = makeConst(key->parttypid[0], + key->parttypmod[0], + key->parttypcoll[0], + key->parttyplen[0], + datumCopy(*boundinfo->datums[i], + key->parttypbyval[0], + key->parttyplen[0]), + false, /* isnull */ + key->parttypbyval[0]); + + elems = lappend(elems, val); + } + } + else + { + /* + * Create list of Consts for the allowed values, excluding any nulls. + */ + foreach(cell, spec->listdatums) + { + Const *val = castNode(Const, lfirst(cell)); + + if (val->constisnull) + list_has_null = true; + else + elems = lappend(elems, copyObject(val)); + } + } + + if (elems) + { + /* + * Generate the operator expression from the non-null partition + * values. + */ + opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber, + keyCol, (Expr *) elems); + } + else + { + /* + * If there are no partition values, we don't need an operator + * expression. + */ + opexpr = NULL; + } + + if (!list_has_null) + { + /* + * Gin up a "col IS NOT NULL" test that will be AND'd with the main + * expression. This might seem redundant, but the partition routing + * machinery needs it. + */ + nulltest = makeNode(NullTest); + nulltest->arg = keyCol; + nulltest->nulltesttype = IS_NOT_NULL; + nulltest->argisrow = false; + nulltest->location = -1; + + result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest); + } + else + { + /* + * Gin up a "col IS NULL" test that will be OR'd with the main + * expression. + */ + nulltest = makeNode(NullTest); + nulltest->arg = keyCol; + nulltest->nulltesttype = IS_NULL; + nulltest->argisrow = false; + nulltest->location = -1; + + if (opexpr) + { + Expr *or; + + or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1); + result = list_make1(or); + } + else + result = list_make1(nulltest); + } + + /* + * Note that, in general, applying NOT to a constraint expression doesn't + * necessarily invert the set of rows it accepts, because NOT (NULL) is + * NULL. However, the partition constraints we construct here never + * evaluate to NULL, so applying NOT works as intended. + */ + if (spec->is_default) + { + result = list_make1(make_ands_explicit(result)); + result = list_make1(makeBoolExpr(NOT_EXPR, result, -1)); + } + + return result; +} + +/* + * get_qual_for_range + * + * Returns an implicit-AND list of expressions to use as a range partition's + * constraint, given the parent relation and partition bound structure. + * + * For a multi-column range partition key, say (a, b, c), with (al, bl, cl) + * as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we + * generate an expression tree of the following form: + * + * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL) + * AND + * (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl)) + * AND + * (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu)) + * + * It is often the case that a prefix of lower and upper bound tuples contains + * the same values, for example, (al = au), in which case, we will emit an + * expression tree of the following form: + * + * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL) + * AND + * (a = al) + * AND + * (b > bl OR (b = bl AND c >= cl)) + * AND + * (b < bu) OR (b = bu AND c < cu)) + * + * If a bound datum is either MINVALUE or MAXVALUE, these expressions are + * simplified using the fact that any value is greater than MINVALUE and less + * than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically + * true, and we need not emit any expression for it, and the last line becomes + * + * (b < bu) OR (b = bu), which is simplified to (b <= bu) + * + * In most common cases with only one partition column, say a, the following + * expression tree will be generated: a IS NOT NULL AND a >= al AND a < au + * + * For default partition, it returns the negation of the constraints of all + * the other partitions. + * + * External callers should pass for_default as false; we set it to true only + * when recursing. + */ +static List * +get_qual_for_range(Relation parent, PartitionBoundSpec *spec, + bool for_default) +{ + List *result = NIL; + ListCell *cell1, + *cell2, + *partexprs_item, + *partexprs_item_saved; + int i, + j; + PartitionRangeDatum *ldatum, + *udatum; + PartitionKey key = RelationGetPartitionKey(parent); + Expr *keyCol; + Const *lower_val, + *upper_val; + List *lower_or_arms, + *upper_or_arms; + int num_or_arms, + current_or_arm; + ListCell *lower_or_start_datum, + *upper_or_start_datum; + bool need_next_lower_arm, + need_next_upper_arm; + + if (spec->is_default) + { + List *or_expr_args = NIL; + PartitionDesc pdesc = RelationGetPartitionDesc(parent); + Oid *inhoids = pdesc->oids; + int nparts = pdesc->nparts, + i; + + for (i = 0; i < nparts; i++) + { + Oid inhrelid = inhoids[i]; + HeapTuple tuple; + Datum datum; + bool isnull; + PartitionBoundSpec *bspec; + + tuple = SearchSysCache1(RELOID, inhrelid); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", inhrelid); + + datum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relpartbound, + &isnull); + + Assert(!isnull); + bspec = (PartitionBoundSpec *) + stringToNode(TextDatumGetCString(datum)); + if (!IsA(bspec, PartitionBoundSpec)) + elog(ERROR, "expected PartitionBoundSpec"); + + if (!bspec->is_default) + { + List *part_qual; + + part_qual = get_qual_for_range(parent, bspec, true); + + /* + * AND the constraints of the partition and add to + * or_expr_args + */ + or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1 + ? makeBoolExpr(AND_EXPR, part_qual, -1) + : linitial(part_qual)); + } + ReleaseSysCache(tuple); + } + + if (or_expr_args != NIL) + { + Expr *other_parts_constr; + + /* + * Combine the constraints obtained for non-default partitions + * using OR. As requested, each of the OR's args doesn't include + * the NOT NULL test for partition keys (which is to avoid its + * useless repetition). Add the same now. + */ + other_parts_constr = + makeBoolExpr(AND_EXPR, + lappend(get_range_nulltest(key), + list_length(or_expr_args) > 1 + ? makeBoolExpr(OR_EXPR, or_expr_args, + -1) + : linitial(or_expr_args)), + -1); + + /* + * Finally, the default partition contains everything *NOT* + * contained in the non-default partitions. + */ + result = list_make1(makeBoolExpr(NOT_EXPR, + list_make1(other_parts_constr), -1)); + } + + return result; + } + + lower_or_start_datum = list_head(spec->lowerdatums); + upper_or_start_datum = list_head(spec->upperdatums); + num_or_arms = key->partnatts; + + /* + * If it is the recursive call for default, we skip the get_range_nulltest + * to avoid accumulating the NullTest on the same keys for each partition. + */ + if (!for_default) + result = get_range_nulltest(key); + + /* + * Iterate over the key columns and check if the corresponding lower and + * upper datums are equal using the btree equality operator for the + * column's type. If equal, we emit single keyCol = common_value + * expression. Starting from the first column for which the corresponding + * lower and upper bound datums are not equal, we generate OR expressions + * as shown in the function's header comment. + */ + i = 0; + partexprs_item = list_head(key->partexprs); + partexprs_item_saved = partexprs_item; /* placate compiler */ + forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums) + { + EState *estate; + MemoryContext oldcxt; + Expr *test_expr; + ExprState *test_exprstate; + Datum test_result; + bool isNull; + + ldatum = castNode(PartitionRangeDatum, lfirst(cell1)); + udatum = castNode(PartitionRangeDatum, lfirst(cell2)); + + /* + * Since get_range_key_properties() modifies partexprs_item, and we + * might need to start over from the previous expression in the later + * part of this function, save away the current value. + */ + partexprs_item_saved = partexprs_item; + + get_range_key_properties(key, i, ldatum, udatum, + &partexprs_item, + &keyCol, + &lower_val, &upper_val); + + /* + * If either value is NULL, the corresponding partition bound is + * either MINVALUE or MAXVALUE, and we treat them as unequal, because + * even if they're the same, there is no common value to equate the + * key column with. + */ + if (!lower_val || !upper_val) + break; + + /* Create the test expression */ + estate = CreateExecutorState(); + oldcxt = MemoryContextSwitchTo(estate->es_query_cxt); + test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber, + (Expr *) lower_val, + (Expr *) upper_val); + fix_opfuncids((Node *) test_expr); + test_exprstate = ExecInitExpr(test_expr, NULL); + test_result = ExecEvalExprSwitchContext(test_exprstate, + GetPerTupleExprContext(estate), + &isNull); + MemoryContextSwitchTo(oldcxt); + FreeExecutorState(estate); + + /* If not equal, go generate the OR expressions */ + if (!DatumGetBool(test_result)) + break; + + /* + * The bounds for the last key column can't be equal, because such a + * range partition would never be allowed to be defined (it would have + * an empty range otherwise). + */ + if (i == key->partnatts - 1) + elog(ERROR, "invalid range bound specification"); + + /* Equal, so generate keyCol = lower_val expression */ + result = lappend(result, + make_partition_op_expr(key, i, BTEqualStrategyNumber, + keyCol, (Expr *) lower_val)); + + i++; + } + + /* First pair of lower_val and upper_val that are not equal. */ + lower_or_start_datum = cell1; + upper_or_start_datum = cell2; + + /* OR will have as many arms as there are key columns left. */ + num_or_arms = key->partnatts - i; + current_or_arm = 0; + lower_or_arms = upper_or_arms = NIL; + need_next_lower_arm = need_next_upper_arm = true; + while (current_or_arm < num_or_arms) + { + List *lower_or_arm_args = NIL, + *upper_or_arm_args = NIL; + + /* Restart scan of columns from the i'th one */ + j = i; + partexprs_item = partexprs_item_saved; + + for_both_cell(cell1, lower_or_start_datum, cell2, upper_or_start_datum) + { + PartitionRangeDatum *ldatum_next = NULL, + *udatum_next = NULL; + + ldatum = castNode(PartitionRangeDatum, lfirst(cell1)); + if (lnext(cell1)) + ldatum_next = castNode(PartitionRangeDatum, + lfirst(lnext(cell1))); + udatum = castNode(PartitionRangeDatum, lfirst(cell2)); + if (lnext(cell2)) + udatum_next = castNode(PartitionRangeDatum, + lfirst(lnext(cell2))); + get_range_key_properties(key, j, ldatum, udatum, + &partexprs_item, + &keyCol, + &lower_val, &upper_val); + + if (need_next_lower_arm && lower_val) + { + uint16 strategy; + + /* + * For the non-last columns of this arm, use the EQ operator. + * For the last column of this arm, use GT, unless this is the + * last column of the whole bound check, or the next bound + * datum is MINVALUE, in which case use GE. + */ + if (j - i < current_or_arm) + strategy = BTEqualStrategyNumber; + else if (j == key->partnatts - 1 || + (ldatum_next && + ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE)) + strategy = BTGreaterEqualStrategyNumber; + else + strategy = BTGreaterStrategyNumber; + + lower_or_arm_args = lappend(lower_or_arm_args, + make_partition_op_expr(key, j, + strategy, + keyCol, + (Expr *) lower_val)); + } + + if (need_next_upper_arm && upper_val) + { + uint16 strategy; + + /* + * For the non-last columns of this arm, use the EQ operator. + * For the last column of this arm, use LT, unless the next + * bound datum is MAXVALUE, in which case use LE. + */ + if (j - i < current_or_arm) + strategy = BTEqualStrategyNumber; + else if (udatum_next && + udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE) + strategy = BTLessEqualStrategyNumber; + else + strategy = BTLessStrategyNumber; + + upper_or_arm_args = lappend(upper_or_arm_args, + make_partition_op_expr(key, j, + strategy, + keyCol, + (Expr *) upper_val)); + } + + /* + * Did we generate enough of OR's arguments? First arm considers + * the first of the remaining columns, second arm considers first + * two of the remaining columns, and so on. + */ + ++j; + if (j - i > current_or_arm) + { + /* + * We must not emit any more arms if the new column that will + * be considered is unbounded, or this one was. + */ + if (!lower_val || !ldatum_next || + ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE) + need_next_lower_arm = false; + if (!upper_val || !udatum_next || + udatum_next->kind != PARTITION_RANGE_DATUM_VALUE) + need_next_upper_arm = false; + break; + } + } + + if (lower_or_arm_args != NIL) + lower_or_arms = lappend(lower_or_arms, + list_length(lower_or_arm_args) > 1 + ? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1) + : linitial(lower_or_arm_args)); + + if (upper_or_arm_args != NIL) + upper_or_arms = lappend(upper_or_arms, + list_length(upper_or_arm_args) > 1 + ? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1) + : linitial(upper_or_arm_args)); + + /* If no work to do in the next iteration, break away. */ + if (!need_next_lower_arm && !need_next_upper_arm) + break; + + ++current_or_arm; + } + + /* + * Generate the OR expressions for each of lower and upper bounds (if + * required), and append to the list of implicitly ANDed list of + * expressions. + */ + if (lower_or_arms != NIL) + result = lappend(result, + list_length(lower_or_arms) > 1 + ? makeBoolExpr(OR_EXPR, lower_or_arms, -1) + : linitial(lower_or_arms)); + if (upper_or_arms != NIL) + result = lappend(result, + list_length(upper_or_arms) > 1 + ? makeBoolExpr(OR_EXPR, upper_or_arms, -1) + : linitial(upper_or_arms)); + + /* + * As noted above, for non-default, we return list with constant TRUE. If + * the result is NIL during the recursive call for default, it implies + * this is the only other partition which can hold every value of the key + * except NULL. Hence we return the NullTest result skipped earlier. + */ + if (result == NIL) + result = for_default + ? get_range_nulltest(key) + : list_make1(makeBoolConst(true, false)); + + return result; +} + +/* + * get_range_key_properties + * Returns range partition key information for a given column + * + * This is a subroutine for get_qual_for_range, and its API is pretty + * specialized to that caller. + * + * Constructs an Expr for the key column (returned in *keyCol) and Consts + * for the lower and upper range limits (returned in *lower_val and + * *upper_val). For MINVALUE/MAXVALUE limits, NULL is returned instead of + * a Const. All of these structures are freshly palloc'd. + * + * *partexprs_item points to the cell containing the next expression in + * the key->partexprs list, or NULL. It may be advanced upon return. + */ +static void +get_range_key_properties(PartitionKey key, int keynum, + PartitionRangeDatum *ldatum, + PartitionRangeDatum *udatum, + ListCell **partexprs_item, + Expr **keyCol, + Const **lower_val, Const **upper_val) +{ + /* Get partition key expression for this column */ + if (key->partattrs[keynum] != 0) + { + *keyCol = (Expr *) makeVar(1, + key->partattrs[keynum], + key->parttypid[keynum], + key->parttypmod[keynum], + key->parttypcoll[keynum], + 0); + } + else + { + if (*partexprs_item == NULL) + elog(ERROR, "wrong number of partition key expressions"); + *keyCol = copyObject(lfirst(*partexprs_item)); + *partexprs_item = lnext(*partexprs_item); + } + + /* Get appropriate Const nodes for the bounds */ + if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE) + *lower_val = castNode(Const, copyObject(ldatum->value)); + else + *lower_val = NULL; + + if (udatum->kind == PARTITION_RANGE_DATUM_VALUE) + *upper_val = castNode(Const, copyObject(udatum->value)); + else + *upper_val = NULL; +} + +/* + * get_range_nulltest + * + * A non-default range partition table does not currently allow partition + * keys to be null, so emit an IS NOT NULL expression for each key column. + */ +static List * +get_range_nulltest(PartitionKey key) +{ + List *result = NIL; + NullTest *nulltest; + ListCell *partexprs_item; + int i; + + partexprs_item = list_head(key->partexprs); + for (i = 0; i < key->partnatts; i++) + { + Expr *keyCol; + + if (key->partattrs[i] != 0) + { + keyCol = (Expr *) makeVar(1, + key->partattrs[i], + key->parttypid[i], + key->parttypmod[i], + key->parttypcoll[i], + 0); + } + else + { + if (partexprs_item == NULL) + elog(ERROR, "wrong number of partition key expressions"); + keyCol = copyObject(lfirst(partexprs_item)); + partexprs_item = lnext(partexprs_item); + } + + nulltest = makeNode(NullTest); + nulltest->arg = keyCol; + nulltest->nulltesttype = IS_NOT_NULL; + nulltest->argisrow = false; + nulltest->location = -1; + result = lappend(result, nulltest); + } + + return result; +} + +/* + * compute_hash_value + * + * Compute the hash value for given not null partition key values. + */ +uint64 +compute_hash_value(int partnatts, FmgrInfo *partsupfunc, + Datum *values, bool *isnull) +{ + int i; + uint64 rowHash = 0; + Datum seed = UInt64GetDatum(HASH_PARTITION_SEED); + + for (i = 0; i < partnatts; i++) + { + if (!isnull[i]) + { + Datum hash; + + Assert(OidIsValid(partsupfunc[i].fn_oid)); + + /* + * Compute hash for each datum value by calling respective + * datatype-specific hash functions of each partition key + * attribute. + */ + hash = FunctionCall2(&partsupfunc[i], values[i], seed); + + /* Form a single 64-bit hash value */ + rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); + } + } + + return rowHash; +} + +/* + * satisfies_hash_partition + * + * This is an SQL-callable function for use in hash partition constraints. + * The first three arguments are the parent table OID, modulus, and remainder. + * The remaining arguments are the value of the partitioning columns (or + * expressions); these are hashed and the results are combined into a single + * hash value by calling hash_combine64. + * + * Returns true if remainder produced when this computed single hash value is + * divided by the given modulus is equal to given remainder, otherwise false. + * + * See get_qual_for_hash() for usage. + */ +Datum +satisfies_hash_partition(PG_FUNCTION_ARGS) +{ + typedef struct ColumnsHashData + { + Oid relid; + int nkeys; + Oid variadic_type; + int16 variadic_typlen; + bool variadic_typbyval; + char variadic_typalign; + FmgrInfo partsupfunc[PARTITION_MAX_KEYS]; + } ColumnsHashData; + Oid parentId; + int modulus; + int remainder; + Datum seed = UInt64GetDatum(HASH_PARTITION_SEED); + ColumnsHashData *my_extra; + uint64 rowHash = 0; + + /* Return null if the parent OID, modulus, or remainder is NULL. */ + if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2)) + PG_RETURN_NULL(); + parentId = PG_GETARG_OID(0); + modulus = PG_GETARG_INT32(1); + remainder = PG_GETARG_INT32(2); + + /* Sanity check modulus and remainder. */ + if (modulus <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("modulus for hash partition must be a positive integer"))); + if (remainder < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("remainder for hash partition must be a non-negative integer"))); + if (remainder >= modulus) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("remainder for hash partition must be less than modulus"))); + + /* + * Cache hash function information. + */ + my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || my_extra->relid != parentId) + { + Relation parent; + PartitionKey key; + int j; + + /* Open parent relation and fetch partition keyinfo */ + parent = try_relation_open(parentId, AccessShareLock); + if (parent == NULL) + PG_RETURN_NULL(); + key = RelationGetPartitionKey(parent); + + /* Reject parent table that is not hash-partitioned. */ + if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE || + key->strategy != PARTITION_STRATEGY_HASH) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("\"%s\" is not a hash partitioned table", + get_rel_name(parentId)))); + + if (!get_fn_expr_variadic(fcinfo->flinfo)) + { + int nargs = PG_NARGS() - 3; + + /* complain if wrong number of column values */ + if (key->partnatts != nargs) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)", + key->partnatts, nargs))); + + /* allocate space for our cache */ + fcinfo->flinfo->fn_extra = + MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt, + offsetof(ColumnsHashData, partsupfunc) + + sizeof(FmgrInfo) * nargs); + my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra; + my_extra->relid = parentId; + my_extra->nkeys = key->partnatts; + + /* check argument types and save fmgr_infos */ + for (j = 0; j < key->partnatts; ++j) + { + Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, j + 3); + + if (argtype != key->parttypid[j] && !IsBinaryCoercible(argtype, key->parttypid[j])) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"", + j + 1, format_type_be(key->parttypid[j]), format_type_be(argtype)))); + + fmgr_info_copy(&my_extra->partsupfunc[j], + &key->partsupfunc[j], + fcinfo->flinfo->fn_mcxt); + } + + } + else + { + ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3); + + /* allocate space for our cache -- just one FmgrInfo in this case */ + fcinfo->flinfo->fn_extra = + MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt, + offsetof(ColumnsHashData, partsupfunc) + + sizeof(FmgrInfo)); + my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra; + my_extra->relid = parentId; + my_extra->nkeys = key->partnatts; + my_extra->variadic_type = ARR_ELEMTYPE(variadic_array); + get_typlenbyvalalign(my_extra->variadic_type, + &my_extra->variadic_typlen, + &my_extra->variadic_typbyval, + &my_extra->variadic_typalign); + + /* check argument types */ + for (j = 0; j < key->partnatts; ++j) + if (key->parttypid[j] != my_extra->variadic_type) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"", + j + 1, + format_type_be(key->parttypid[j]), + format_type_be(my_extra->variadic_type)))); + + fmgr_info_copy(&my_extra->partsupfunc[0], + &key->partsupfunc[0], + fcinfo->flinfo->fn_mcxt); + } + + /* Hold lock until commit */ + relation_close(parent, NoLock); + } + + if (!OidIsValid(my_extra->variadic_type)) + { + int nkeys = my_extra->nkeys; + int i; + + /* + * For a non-variadic call, neither the number of arguments nor their + * types can change across calls, so avoid the expense of rechecking + * here. + */ + + for (i = 0; i < nkeys; i++) + { + Datum hash; + + /* keys start from fourth argument of function. */ + int argno = i + 3; + + if (PG_ARGISNULL(argno)) + continue; + + Assert(OidIsValid(my_extra->partsupfunc[i].fn_oid)); + + hash = FunctionCall2(&my_extra->partsupfunc[i], + PG_GETARG_DATUM(argno), + seed); + + /* Form a single 64-bit hash value */ + rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); + } + } + else + { + ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3); + int i; + int nelems; + Datum *datum; + bool *isnull; + + deconstruct_array(variadic_array, + my_extra->variadic_type, + my_extra->variadic_typlen, + my_extra->variadic_typbyval, + my_extra->variadic_typalign, + &datum, &isnull, &nelems); + + /* complain if wrong number of column values */ + if (nelems != my_extra->nkeys) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)", + my_extra->nkeys, nelems))); + + for (i = 0; i < nelems; i++) + { + Datum hash; + + if (isnull[i]) + continue; + + Assert(OidIsValid(my_extra->partsupfunc[0].fn_oid)); + + hash = FunctionCall2(&my_extra->partsupfunc[0], + datum[i], + seed); + + /* Form a single 64-bit hash value */ + rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); + } + } + + PG_RETURN_BOOL(rowHash % modulus == remainder); +} |