diff options
Diffstat (limited to 'src/backend')
43 files changed, 5686 insertions, 195 deletions
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 83a97b06ab8..34018cac7c8 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -930,6 +930,7 @@ extractRelOptions(HeapTuple tuple, TupleDesc tupdesc, case RELKIND_RELATION: case RELKIND_TOASTVALUE: case RELKIND_MATVIEW: + case RELKIND_PARTITIONED_TABLE: options = heap_reloptions(classForm->relkind, datum, false); break; case RELKIND_VIEW: @@ -1381,6 +1382,7 @@ heap_reloptions(char relkind, Datum reloptions, bool validate) return (bytea *) rdopts; case RELKIND_RELATION: case RELKIND_MATVIEW: + case RELKIND_PARTITIONED_TABLE: return default_reloptions(reloptions, validate, RELOPT_KIND_HEAP); default: /* other relkinds are not supported */ diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index 1ce76100497..2d5ac09bece 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -11,7 +11,7 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global OBJS = catalog.o dependency.o heap.o index.o indexing.o namespace.o aclchk.o \ - objectaccess.o objectaddress.o pg_aggregate.o pg_collation.o \ + objectaccess.o objectaddress.o partition.o pg_aggregate.o pg_collation.o \ pg_constraint.o pg_conversion.o \ pg_depend.o pg_enum.o pg_inherits.o pg_largeobject.o pg_namespace.o \ pg_operator.o pg_proc.o pg_range.o pg_db_role_setting.o pg_shdepend.o \ @@ -41,7 +41,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\ pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \ pg_foreign_table.h pg_policy.h pg_replication_origin.h \ pg_default_acl.h pg_init_privs.h pg_seclabel.h pg_shseclabel.h \ - pg_collation.h pg_range.h pg_transform.h \ + pg_collation.h pg_partitioned_table.h pg_range.h pg_transform.h \ toasting.h indexing.h \ ) diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index c0df6710d1d..3086021432a 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -768,6 +768,8 @@ objectsInSchemaToOids(GrantObjectType objtype, List *nspnames) objects = list_concat(objects, objs); objs = getRelationsInNamespace(namespaceId, RELKIND_FOREIGN_TABLE); objects = list_concat(objects, objs); + objs = getRelationsInNamespace(namespaceId, RELKIND_PARTITIONED_TABLE); + objects = list_concat(objects, objs); break; case ACL_OBJECT_SEQUENCE: objs = getRelationsInNamespace(namespaceId, RELKIND_SEQUENCE); diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index b697e88ef09..0cdd1c5c6cb 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1352,7 +1352,8 @@ void recordDependencyOnSingleRelExpr(const ObjectAddress *depender, Node *expr, Oid relId, DependencyType behavior, - DependencyType self_behavior) + DependencyType self_behavior, + bool ignore_self) { find_expr_references_context context; RangeTblEntry rte; @@ -1407,9 +1408,10 @@ recordDependencyOnSingleRelExpr(const ObjectAddress *depender, context.addrs->numrefs = outrefs; /* Record the self-dependencies */ - recordMultipleDependencies(depender, - self_addrs->refs, self_addrs->numrefs, - self_behavior); + if (!ignore_self) + recordMultipleDependencies(depender, + self_addrs->refs, self_addrs->numrefs, + self_behavior); free_object_addresses(self_addrs); } diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 0b804e7ac60..7f5bad0b5da 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -41,6 +41,7 @@ #include "catalog/heap.h" #include "catalog/index.h" #include "catalog/objectaccess.h" +#include "catalog/partition.h" #include "catalog/pg_attrdef.h" #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" @@ -48,6 +49,8 @@ #include "catalog/pg_foreign_table.h" #include "catalog/pg_inherits.h" #include "catalog/pg_namespace.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_partitioned_table.h" #include "catalog/pg_statistic.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" @@ -808,6 +811,7 @@ InsertPgClassTuple(Relation pg_class_desc, values[Anum_pg_class_relhassubclass - 1] = BoolGetDatum(rd_rel->relhassubclass); values[Anum_pg_class_relispopulated - 1] = BoolGetDatum(rd_rel->relispopulated); values[Anum_pg_class_relreplident - 1] = CharGetDatum(rd_rel->relreplident); + values[Anum_pg_class_relispartition - 1] = BoolGetDatum(rd_rel->relispartition); values[Anum_pg_class_relfrozenxid - 1] = TransactionIdGetDatum(rd_rel->relfrozenxid); values[Anum_pg_class_relminmxid - 1] = MultiXactIdGetDatum(rd_rel->relminmxid); if (relacl != (Datum) 0) @@ -819,6 +823,9 @@ InsertPgClassTuple(Relation pg_class_desc, else nulls[Anum_pg_class_reloptions - 1] = true; + /* relpartbound is set by updating this tuple, if necessary */ + nulls[Anum_pg_class_relpartbound - 1] = true; + tup = heap_form_tuple(RelationGetDescr(pg_class_desc), values, nulls); /* @@ -924,6 +931,9 @@ AddNewRelationTuple(Relation pg_class_desc, new_rel_reltup->reltype = new_type_oid; new_rel_reltup->reloftype = reloftype; + /* relispartition is always set by updating this tuple later */ + new_rel_reltup->relispartition = false; + new_rel_desc->rd_att->tdtypeid = new_type_oid; /* Now build and insert the tuple */ @@ -1104,7 +1114,8 @@ heap_create_with_catalog(const char *relname, if (IsBinaryUpgrade && (relkind == RELKIND_RELATION || relkind == RELKIND_SEQUENCE || relkind == RELKIND_VIEW || relkind == RELKIND_MATVIEW || - relkind == RELKIND_COMPOSITE_TYPE || relkind == RELKIND_FOREIGN_TABLE)) + relkind == RELKIND_COMPOSITE_TYPE || relkind == RELKIND_FOREIGN_TABLE || + relkind == RELKIND_PARTITIONED_TABLE)) { if (!OidIsValid(binary_upgrade_next_heap_pg_class_oid)) ereport(ERROR, @@ -1138,6 +1149,7 @@ heap_create_with_catalog(const char *relname, case RELKIND_VIEW: case RELKIND_MATVIEW: case RELKIND_FOREIGN_TABLE: + case RELKIND_PARTITIONED_TABLE: relacl = get_user_default_acl(ACL_OBJECT_RELATION, ownerid, relnamespace); break; @@ -1182,7 +1194,8 @@ heap_create_with_catalog(const char *relname, relkind == RELKIND_VIEW || relkind == RELKIND_MATVIEW || relkind == RELKIND_FOREIGN_TABLE || - relkind == RELKIND_COMPOSITE_TYPE)) + relkind == RELKIND_COMPOSITE_TYPE || + relkind == RELKIND_PARTITIONED_TABLE)) new_array_oid = AssignTypeArrayOid(); /* @@ -1349,7 +1362,9 @@ heap_create_with_catalog(const char *relname, if (relpersistence == RELPERSISTENCE_UNLOGGED) { Assert(relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW || - relkind == RELKIND_TOASTVALUE); + relkind == RELKIND_TOASTVALUE || + relkind == RELKIND_PARTITIONED_TABLE); + heap_create_init_fork(new_rel_desc); } @@ -1754,6 +1769,8 @@ void heap_drop_with_catalog(Oid relid) { Relation rel; + Oid parentOid; + Relation parent = NULL; /* * Open and lock the relation. @@ -1761,6 +1778,21 @@ heap_drop_with_catalog(Oid relid) rel = relation_open(relid, AccessExclusiveLock); /* + * If the relation is a partition, we must grab exclusive lock on its + * parent because we need to update its partition descriptor. We must + * take a table lock strong enough to prevent all queries on the parent + * from proceeding until we commit and send out a shared-cache-inval + * notice that will make them update their partition descriptor. + * Sometimes, doing this is cycles spent uselessly, especially if the + * parent will be dropped as part of the same command anyway. + */ + if (rel->rd_rel->relispartition) + { + parentOid = get_partition_parent(relid); + parent = heap_open(parentOid, AccessExclusiveLock); + } + + /* * There can no longer be anyone *else* touching the relation, but we * might still have open queries or cursors, or pending trigger events, in * our own session. @@ -1796,6 +1828,12 @@ heap_drop_with_catalog(Oid relid) } /* + * If a partitioned table, delete the pg_partitioned_table tuple. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + RemovePartitionKeyByRelId(relid); + + /* * Schedule unlinking of the relation's physical files at commit. */ if (rel->rd_rel->relkind != RELKIND_VIEW && @@ -1845,6 +1883,12 @@ heap_drop_with_catalog(Oid relid) * delete relation tuple */ DeleteRelationTuple(relid); + + if (parent) + { + CacheInvalidateRelcache(parent); + heap_close(parent, NoLock); /* keep the lock */ + } } @@ -2028,6 +2072,17 @@ StoreRelCheck(Relation rel, char *ccname, Node *expr, attNos = NULL; /* + * Partitioned tables do not contain any rows themselves, so a NO INHERIT + * constraint makes no sense. + */ + if (is_no_inherit && + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot add NO INHERIT constraint to partitioned table \"%s\"", + RelationGetRelationName(rel)))); + + /* * Create the Check Constraint */ constrOid = @@ -2440,8 +2495,11 @@ MergeWithExistingConstraint(Relation rel, char *ccname, Node *expr, * definition) then interpret addition of a local constraint as a * legal merge. This allows ALTER ADD CONSTRAINT on parent and * child tables to be given in either order with same end state. + * However if the relation is a partition, all inherited + * constraints are always non-local, including those that were + * merged. */ - if (is_local && !con->conislocal) + if (is_local && !con->conislocal && !rel->rd_rel->relispartition) allow_merge = true; if (!found || !allow_merge) @@ -2486,10 +2544,24 @@ MergeWithExistingConstraint(Relation rel, char *ccname, Node *expr, tup = heap_copytuple(tup); con = (Form_pg_constraint) GETSTRUCT(tup); - if (is_local) - con->conislocal = true; + /* + * In case of partitions, an inherited constraint must be + * inherited only once since it cannot have multiple parents and + * it is never considered local. + */ + if (rel->rd_rel->relispartition) + { + con->coninhcount = 1; + con->conislocal = false; + } else - con->coninhcount++; + { + if (is_local) + con->conislocal = true; + else + con->coninhcount++; + } + if (is_no_inherit) { Assert(is_local); @@ -3013,3 +3085,187 @@ insert_ordered_unique_oid(List *list, Oid datum) lappend_cell_oid(list, prev, datum); return list; } + +/* + * StorePartitionKey + * Store information about the partition key rel into the catalog + */ +void +StorePartitionKey(Relation rel, + char strategy, + int16 partnatts, + AttrNumber *partattrs, + List *partexprs, + Oid *partopclass, + Oid *partcollation) +{ + int i; + int2vector *partattrs_vec; + oidvector *partopclass_vec; + oidvector *partcollation_vec; + Datum partexprDatum; + Relation pg_partitioned_table; + HeapTuple tuple; + Datum values[Natts_pg_partitioned_table]; + bool nulls[Natts_pg_partitioned_table]; + ObjectAddress myself; + ObjectAddress referenced; + + Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + + tuple = SearchSysCache1(PARTRELID, + ObjectIdGetDatum(RelationGetRelid(rel))); + + /* Copy the partition attribute numbers, opclass OIDs into arrays */ + partattrs_vec = buildint2vector(partattrs, partnatts); + partopclass_vec = buildoidvector(partopclass, partnatts); + partcollation_vec = buildoidvector(partcollation, partnatts); + + /* Convert the expressions (if any) to a text datum */ + if (partexprs) + { + char *exprString; + + exprString = nodeToString(partexprs); + partexprDatum = CStringGetTextDatum(exprString); + pfree(exprString); + } + else + partexprDatum = (Datum) 0; + + pg_partitioned_table = heap_open(PartitionedRelationId, RowExclusiveLock); + + MemSet(nulls, false, sizeof(nulls)); + + /* Only this can ever be NULL */ + if (!partexprDatum) + nulls[Anum_pg_partitioned_table_partexprs - 1] = true; + + values[Anum_pg_partitioned_table_partrelid - 1] = ObjectIdGetDatum(RelationGetRelid(rel)); + values[Anum_pg_partitioned_table_partstrat - 1] = CharGetDatum(strategy); + values[Anum_pg_partitioned_table_partnatts - 1] = Int16GetDatum(partnatts); + values[Anum_pg_partitioned_table_partattrs - 1] = PointerGetDatum(partattrs_vec); + values[Anum_pg_partitioned_table_partclass - 1] = PointerGetDatum(partopclass_vec); + values[Anum_pg_partitioned_table_partcollation - 1] = PointerGetDatum(partcollation_vec); + values[Anum_pg_partitioned_table_partexprs - 1] = partexprDatum; + + tuple = heap_form_tuple(RelationGetDescr(pg_partitioned_table), values, nulls); + + simple_heap_insert(pg_partitioned_table, tuple); + + /* Update the indexes on pg_partitioned_table */ + CatalogUpdateIndexes(pg_partitioned_table, tuple); + heap_close(pg_partitioned_table, RowExclusiveLock); + + /* Mark this relation as dependent on a few things as follows */ + myself.classId = RelationRelationId; + myself.objectId = RelationGetRelid(rel);; + myself.objectSubId = 0; + + /* Operator class and collation per key column */ + for (i = 0; i < partnatts; i++) + { + referenced.classId = OperatorClassRelationId; + referenced.objectId = partopclass[i]; + referenced.objectSubId = 0; + + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + referenced.classId = CollationRelationId; + referenced.objectId = partcollation[i]; + referenced.objectSubId = 0; + + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } + + /* + * Anything mentioned in the expressions. We must ignore the column + * references, which will depend on the table itself; there is no + * separate partition key object. + */ + if (partexprs) + recordDependencyOnSingleRelExpr(&myself, + (Node *) partexprs, + RelationGetRelid(rel), + DEPENDENCY_NORMAL, + DEPENDENCY_AUTO, true); + + /* + * We must invalidate the relcache so that the next + * CommandCounterIncrement() will cause the same to be rebuilt using the + * information in just created catalog entry. + */ + CacheInvalidateRelcache(rel); +} + +/* + * RemovePartitionKeyByRelId + * Remove pg_partitioned_table entry for a relation + */ +void +RemovePartitionKeyByRelId(Oid relid) +{ + Relation rel; + HeapTuple tuple; + + rel = heap_open(PartitionedRelationId, RowExclusiveLock); + + tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for partition key of relation %u", + relid); + + simple_heap_delete(rel, &tuple->t_self); + + ReleaseSysCache(tuple); + heap_close(rel, RowExclusiveLock); +} + +/* + * StorePartitionBound + * Update pg_class tuple of rel to store the partition bound and set + * relispartition to true + */ +void +StorePartitionBound(Relation rel, Node *bound) +{ + Relation classRel; + HeapTuple tuple, + newtuple; + Datum new_val[Natts_pg_class]; + bool new_null[Natts_pg_class], + new_repl[Natts_pg_class]; + + /* Update pg_class tuple */ + classRel = heap_open(RelationRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(rel))); +#ifdef USE_ASSERT_CHECKING + { + Form_pg_class classForm; + bool isnull; + + classForm = (Form_pg_class) GETSTRUCT(tuple); + Assert(!classForm->relispartition); + (void) SysCacheGetAttr(RELOID, tuple, Anum_pg_class_relpartbound, + &isnull); + Assert(isnull); + } +#endif + + /* Fill in relpartbound value */ + memset(new_val, 0, sizeof(new_val)); + memset(new_null, false, sizeof(new_null)); + memset(new_repl, false, sizeof(new_repl)); + new_val[Anum_pg_class_relpartbound - 1] = CStringGetTextDatum(nodeToString(bound)); + new_null[Anum_pg_class_relpartbound - 1] = false; + new_repl[Anum_pg_class_relpartbound - 1] = true; + newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel), + new_val, new_null, new_repl); + /* Also set the flag */ + ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = true; + simple_heap_update(classRel, &newtuple->t_self, newtuple); + CatalogUpdateIndexes(classRel, newtuple); + heap_freetuple(newtuple); + heap_close(classRel, RowExclusiveLock); +} diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 08b646d8f33..08b0989112b 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -1043,7 +1043,7 @@ index_create(Relation heapRelation, (Node *) indexInfo->ii_Expressions, heapRelationId, DEPENDENCY_NORMAL, - DEPENDENCY_AUTO); + DEPENDENCY_AUTO, false); } /* Store dependencies on anything mentioned in predicate */ @@ -1053,7 +1053,7 @@ index_create(Relation heapRelation, (Node *) indexInfo->ii_Predicate, heapRelationId, DEPENDENCY_NORMAL, - DEPENDENCY_AUTO); + DEPENDENCY_AUTO, false); } } else diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index d531d17cdbc..bb4b080b007 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -1204,7 +1204,8 @@ get_relation_by_qualified_name(ObjectType objtype, List *objname, RelationGetRelationName(relation)))); break; case OBJECT_TABLE: - if (relation->rd_rel->relkind != RELKIND_RELATION) + if (relation->rd_rel->relkind != RELKIND_RELATION && + relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table", @@ -3244,6 +3245,7 @@ getRelationDescription(StringInfo buffer, Oid relid) switch (relForm->relkind) { case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: appendStringInfo(buffer, _("table %s"), relname); break; @@ -3701,6 +3703,7 @@ getRelationTypeDescription(StringInfo buffer, Oid relid, int32 objectSubId) switch (relForm->relkind) { case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: appendStringInfoString(buffer, "table"); break; case RELKIND_INDEX: diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c new file mode 100644 index 00000000000..6dab45f0edf --- /dev/null +++ b/src/backend/catalog/partition.c @@ -0,0 +1,1917 @@ +/*------------------------------------------------------------------------- + * + * partition.c + * Partitioning related data structures and functions. + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/catalog/partition.c + * + *------------------------------------------------------------------------- +*/ + +#include "postgres.h" + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "access/sysattr.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/objectaddress.h" +#include "catalog/partition.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_inherits_fn.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_type.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/parsenodes.h" +#include "optimizer/clauses.h" +#include "optimizer/planmain.h" +#include "optimizer/var.h" +#include "rewrite/rewriteManip.h" +#include "storage/lmgr.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/memutils.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/ruleutils.h" +#include "utils/syscache.h" + +/* + * Information about bounds of a partitioned relation + * + * A list partition datum that is known to be NULL is never put into the + * datums array. Instead, it is tracked using has_null and null_index fields. + * + * In the case of range partitioning, ndatums will typically be far less than + * 2 * nparts, because a partition's upper bound and the next partition's lower + * bound are the same in most common cases, and we only store one of them. + * + * In the case of list partitioning, the indexes array stores one entry for + * every datum, which is the index of the partition that accepts a given datum. + * In case of range partitioning, it stores one entry per distinct range + * datum, which is the index of the partition for which a given datum + * is an upper bound. + */ + +/* Ternary value to represent what's contained in a range bound datum */ +typedef enum RangeDatumContent +{ + RANGE_DATUM_FINITE = 0, /* actual datum stored elsewhere */ + RANGE_DATUM_NEG_INF, /* negative infinity */ + RANGE_DATUM_POS_INF /* positive infinity */ +} RangeDatumContent; + +typedef struct PartitionBoundInfoData +{ + char strategy; /* list or range bounds? */ + int ndatums; /* Length of the datums following array */ + Datum **datums; /* Array of datum-tuples with key->partnatts + * datums each */ + RangeDatumContent **content;/* what's contained in each range bound datum? + * (see the above enum); NULL for list + * partitioned tables */ + int *indexes; /* Partition indexes; one entry per member of + * the datums array (plus one if range + * partitioned table) */ + bool has_null; /* Is there a null-accepting partition? false + * for range partitioned tables */ + int null_index; /* Index of the null-accepting partition; -1 + * for range partitioned tables */ +} PartitionBoundInfoData; + +/* + * When qsort'ing partition bounds after reading from the catalog, each bound + * is represented with one of the following structs. + */ + +/* One value coming from some (index'th) list partition */ +typedef struct PartitionListValue +{ + int index; + Datum value; +} PartitionListValue; + +/* One bound of a range partition */ +typedef struct PartitionRangeBound +{ + int index; + Datum *datums; /* range bound datums */ + RangeDatumContent *content; /* what's contained in each datum? */ + bool lower; /* this is the lower (vs upper) bound */ +} PartitionRangeBound; + +static int32 qsort_partition_list_value_cmp(const void *a, const void *b, + void *arg); +static int32 qsort_partition_rbound_cmp(const void *a, const void *b, + void *arg); + +static List *get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec); +static List *get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec); +static Oid get_partition_operator(PartitionKey key, int col, + StrategyNumber strategy, bool *need_relabel); +static List *generate_partition_qual(Relation rel, bool recurse); + +static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index, + List *datums, bool lower); +static int32 partition_rbound_cmp(PartitionKey key, + Datum *datums1, RangeDatumContent *content1, bool lower1, + PartitionRangeBound *b2); +static int32 partition_rbound_datum_cmp(PartitionKey key, + Datum *rb_datums, RangeDatumContent *rb_content, + Datum *tuple_datums); + +static int32 partition_bound_cmp(PartitionKey key, + PartitionBoundInfo boundinfo, + int offset, void *probe, bool probe_is_bound); +static int partition_bound_bsearch(PartitionKey key, + PartitionBoundInfo boundinfo, + void *probe, bool probe_is_bound, bool *is_equal); + +/* Support get_partition_for_tuple() */ +static void FormPartitionKeyDatum(PartitionDispatch pd, + TupleTableSlot *slot, + EState *estate, + Datum *values, + bool *isnull); + +/* + * RelationBuildPartitionDesc + * Form rel's partition descriptor + * + * Not flushed from the cache by RelationClearRelation() unless changed because + * of addition or removal of partition. + */ +void +RelationBuildPartitionDesc(Relation rel) +{ + List *inhoids, + *partoids; + Oid *oids = NULL; + List *boundspecs = NIL; + ListCell *cell; + int i, + nparts; + PartitionKey key = RelationGetPartitionKey(rel); + PartitionDesc result; + MemoryContext oldcxt; + + int ndatums = 0; + + /* List partitioning specific */ + PartitionListValue **all_values = NULL; + bool found_null = false; + int null_index = -1; + + /* Range partitioning specific */ + PartitionRangeBound **rbounds = NULL; + + /* + * The following could happen in situations where rel has a pg_class entry + * but not the pg_partitioned_table entry yet. + */ + if (key == NULL) + return; + + /* Get partition oids from pg_inherits */ + inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock); + + /* Collect bound spec nodes in a list */ + i = 0; + partoids = NIL; + foreach(cell, inhoids) + { + Oid inhrelid = lfirst_oid(cell); + HeapTuple tuple; + Datum datum; + bool isnull; + Node *boundspec; + + tuple = SearchSysCache1(RELOID, inhrelid); + + /* + * It is possible that the pg_class tuple of a partition has not been + * updated yet to set its relpartbound field. The only case where + * this happens is when we open the parent relation to check using its + * partition descriptor that a new partition's bound does not overlap + * some existing partition. + */ + if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition) + { + ReleaseSysCache(tuple); + continue; + } + + datum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relpartbound, + &isnull); + Assert(!isnull); + boundspec = (Node *) stringToNode(TextDatumGetCString(datum)); + boundspecs = lappend(boundspecs, boundspec); + partoids = lappend_oid(partoids, inhrelid); + ReleaseSysCache(tuple); + } + + nparts = list_length(partoids); + + if (nparts > 0) + { + oids = (Oid *) palloc(nparts * sizeof(Oid)); + i = 0; + foreach(cell, partoids) + oids[i++] = lfirst_oid(cell); + + /* Convert from node to the internal representation */ + if (key->strategy == PARTITION_STRATEGY_LIST) + { + List *non_null_values = NIL; + + /* + * Create a unified list of non-null values across all partitions. + */ + i = 0; + found_null = false; + null_index = -1; + foreach(cell, boundspecs) + { + ListCell *c; + PartitionBoundSpec *spec = lfirst(cell); + + if (spec->strategy != PARTITION_STRATEGY_LIST) + elog(ERROR, "invalid strategy in partition bound spec"); + + foreach(c, spec->listdatums) + { + Const *val = lfirst(c); + PartitionListValue *list_value = NULL; + + if (!val->constisnull) + { + list_value = (PartitionListValue *) + palloc0(sizeof(PartitionListValue)); + list_value->index = i; + list_value->value = val->constvalue; + } + else + { + /* + * Never put a null into the values array, flag + * instead for the code further down below where we + * construct the actual relcache struct. + */ + if (found_null) + elog(ERROR, "found null more than once"); + found_null = true; + null_index = i; + } + + if (list_value) + non_null_values = lappend(non_null_values, + list_value); + } + + i++; + } + + ndatums = list_length(non_null_values); + + /* + * Collect all list values in one array. Alongside the value, we + * also save the index of partition the value comes from. + */ + all_values = (PartitionListValue **) palloc(ndatums * + sizeof(PartitionListValue *)); + i = 0; + foreach(cell, non_null_values) + { + PartitionListValue *src = lfirst(cell); + + all_values[i] = (PartitionListValue *) + palloc(sizeof(PartitionListValue)); + all_values[i]->value = src->value; + all_values[i]->index = src->index; + i++; + } + + qsort_arg(all_values, ndatums, sizeof(PartitionListValue *), + qsort_partition_list_value_cmp, (void *) key); + } + else if (key->strategy == PARTITION_STRATEGY_RANGE) + { + int j, + k; + PartitionRangeBound **all_bounds, + *prev; + bool *distinct_indexes; + + all_bounds = (PartitionRangeBound **) palloc0(2 * nparts * + sizeof(PartitionRangeBound *)); + distinct_indexes = (bool *) palloc(2 * nparts * sizeof(bool)); + + /* + * Create a unified list of range bounds across all the + * partitions. + */ + i = j = 0; + foreach(cell, boundspecs) + { + PartitionBoundSpec *spec = lfirst(cell); + PartitionRangeBound *lower, + *upper; + + if (spec->strategy != PARTITION_STRATEGY_RANGE) + elog(ERROR, "invalid strategy in partition bound spec"); + + lower = make_one_range_bound(key, i, spec->lowerdatums, + true); + upper = make_one_range_bound(key, i, spec->upperdatums, + false); + all_bounds[j] = lower; + all_bounds[j + 1] = upper; + j += 2; + i++; + } + Assert(j == 2 * nparts); + + /* Sort all the bounds in ascending order */ + qsort_arg(all_bounds, 2 * nparts, + sizeof(PartitionRangeBound *), + qsort_partition_rbound_cmp, + (void *) key); + + /* + * Count the number of distinct bounds to allocate an array of + * that size. + */ + ndatums = 0; + prev = NULL; + for (i = 0; i < 2 * nparts; i++) + { + PartitionRangeBound *cur = all_bounds[i]; + bool is_distinct = false; + int j; + + /* Is current bound is distinct from the previous? */ + for (j = 0; j < key->partnatts; j++) + { + Datum cmpval; + + if (prev == NULL) + { + is_distinct = true; + break; + } + + /* + * If either of them has infinite element, we can't equate + * them. Even when both are infinite, they'd have + * opposite signs, because only one of cur and prev is a + * lower bound). + */ + if (cur->content[j] != RANGE_DATUM_FINITE || + prev->content[j] != RANGE_DATUM_FINITE) + { + is_distinct = true; + break; + } + cmpval = FunctionCall2Coll(&key->partsupfunc[j], + key->partcollation[j], + cur->datums[j], + prev->datums[j]); + if (DatumGetInt32(cmpval) != 0) + { + is_distinct = true; + break; + } + } + + /* + * Count the current bound if it is distinct from the previous + * one. Also, store if the index i contains a distinct bound + * that we'd like put in the relcache array. + */ + if (is_distinct) + { + distinct_indexes[i] = true; + ndatums++; + } + else + distinct_indexes[i] = false; + + prev = cur; + } + + /* + * Finally save them in an array from where they will be copied + * into the relcache. + */ + rbounds = (PartitionRangeBound **) palloc(ndatums * + sizeof(PartitionRangeBound *)); + k = 0; + for (i = 0; i < 2 * nparts; i++) + { + if (distinct_indexes[i]) + rbounds[k++] = all_bounds[i]; + } + Assert(k == ndatums); + } + else + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + /* Now build the actual relcache partition descriptor */ + rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext, + RelationGetRelationName(rel), + ALLOCSET_DEFAULT_SIZES); + oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt); + + result = (PartitionDescData *) palloc0(sizeof(PartitionDescData)); + result->nparts = nparts; + if (nparts > 0) + { + PartitionBoundInfo boundinfo; + int *mapping; + int next_index = 0; + + result->oids = (Oid *) palloc0(nparts * sizeof(Oid)); + + boundinfo = (PartitionBoundInfoData *) + palloc0(sizeof(PartitionBoundInfoData)); + boundinfo->strategy = key->strategy; + boundinfo->ndatums = ndatums; + boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); + + /* Initialize mapping array with invalid values */ + mapping = (int *) palloc(sizeof(int) * nparts); + for (i = 0; i < nparts; i++) + mapping[i] = -1; + + switch (key->strategy) + { + case PARTITION_STRATEGY_LIST: + { + boundinfo->has_null = found_null; + boundinfo->indexes = (int *) palloc(ndatums * sizeof(int)); + + /* + * Copy values. Indexes of individual values are mapped + * to canonical values so that they match for any two list + * partitioned tables with same number of partitions and + * same lists per partition. One way to canonicalize is + * to assign the index in all_values[] of the smallest + * value of each partition, as the index of all of the + * partition's values. + */ + for (i = 0; i < ndatums; i++) + { + boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum)); + boundinfo->datums[i][0] = datumCopy(all_values[i]->value, + key->parttypbyval[0], + key->parttyplen[0]); + + /* If the old index has no mapping, assign one */ + if (mapping[all_values[i]->index] == -1) + mapping[all_values[i]->index] = next_index++; + + boundinfo->indexes[i] = mapping[all_values[i]->index]; + } + + /* + * If null-accepting partition has no mapped index yet, + * assign one. This could happen if such partition + * accepts only null and hence not covered in the above + * loop which only handled non-null values. + */ + if (found_null) + { + Assert(null_index >= 0); + if (mapping[null_index] == -1) + mapping[null_index] = next_index++; + } + + /* All partition must now have a valid mapping */ + Assert(next_index == nparts); + + if (found_null) + boundinfo->null_index = mapping[null_index]; + else + boundinfo->null_index = -1; + break; + } + + case PARTITION_STRATEGY_RANGE: + { + boundinfo->content = (RangeDatumContent **) palloc(ndatums * + sizeof(RangeDatumContent *)); + boundinfo->indexes = (int *) palloc((ndatums + 1) * + sizeof(int)); + + for (i = 0; i < ndatums; i++) + { + int j; + + boundinfo->datums[i] = (Datum *) palloc(key->partnatts * + sizeof(Datum)); + boundinfo->content[i] = (RangeDatumContent *) + palloc(key->partnatts * + sizeof(RangeDatumContent)); + for (j = 0; j < key->partnatts; j++) + { + if (rbounds[i]->content[j] == RANGE_DATUM_FINITE) + boundinfo->datums[i][j] = + datumCopy(rbounds[i]->datums[j], + key->parttypbyval[j], + key->parttyplen[j]); + /* Remember, we are storing the tri-state value. */ + boundinfo->content[i][j] = rbounds[i]->content[j]; + } + + /* + * There is no mapping for invalid indexes. + * + * Any lower bounds in the rbounds array have invalid + * indexes assigned, because the values between the + * previous bound (if there is one) and this (lower) + * bound are not part of the range of any existing + * partition. + */ + if (rbounds[i]->lower) + boundinfo->indexes[i] = -1; + else + { + int orig_index = rbounds[i]->index; + + /* If the old index is has no mapping, assign one */ + if (mapping[orig_index] == -1) + mapping[orig_index] = next_index++; + + boundinfo->indexes[i] = mapping[orig_index]; + } + } + boundinfo->indexes[i] = -1; + break; + } + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + result->boundinfo = boundinfo; + + /* + * Now assign OIDs from the original array into mapped indexes of the + * result array. Order of OIDs in the former is defined by the + * catalog scan that retrived them, whereas that in the latter is + * defined by canonicalized representation of the list values or the + * range bounds. + */ + for (i = 0; i < nparts; i++) + result->oids[mapping[i]] = oids[i]; + pfree(mapping); + } + + MemoryContextSwitchTo(oldcxt); + rel->rd_partdesc = result; +} + +/* + * Are two partition bound collections logically equal? + * + * Used in the keep logic of relcache.c (ie, in RelationClearRelation()). + * This is also useful when b1 and b2 are bound collections of two separate + * relations, respectively, because PartitionBoundInfo is a canonical + * representation of partition bounds. + */ +bool +partition_bounds_equal(PartitionKey key, + PartitionBoundInfo b1, PartitionBoundInfo b2) +{ + int i; + + if (b1->strategy != b2->strategy) + return false; + + if (b1->ndatums != b2->ndatums) + return false; + + if (b1->has_null != b2->has_null) + return false; + + if (b1->null_index != b2->null_index) + return false; + + for (i = 0; i < b1->ndatums; i++) + { + int j; + + for (j = 0; j < key->partnatts; j++) + { + int32 cmpval; + + cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[j], + key->partcollation[j], + b1->datums[i][j], + b2->datums[i][j])); + if (cmpval != 0) + return false; + + /* Range partitions can have infinite datums */ + if (b1->content != NULL && b1->content[i][j] != b2->content[i][j]) + return false; + } + + if (b1->indexes[i] != b2->indexes[i]) + return false; + } + + /* There are ndatums+1 indexes in case of range partitions */ + if (key->strategy == PARTITION_STRATEGY_RANGE && + b1->indexes[i] != b2->indexes[i]) + return false; + + return true; +} + +/* + * check_new_partition_bound + * + * Checks if the new partition's bound overlaps any of the existing partitions + * of parent. Also performs additional checks as necessary per strategy. + */ +void +check_new_partition_bound(char *relname, Relation parent, Node *bound) +{ + PartitionBoundSpec *spec = (PartitionBoundSpec *) bound; + PartitionKey key = RelationGetPartitionKey(parent); + PartitionDesc partdesc = RelationGetPartitionDesc(parent); + ParseState *pstate = make_parsestate(NULL); + int with = -1; + bool overlap = false; + + switch (key->strategy) + { + case PARTITION_STRATEGY_LIST: + { + Assert(spec->strategy == PARTITION_STRATEGY_LIST); + + if (partdesc->nparts > 0) + { + PartitionBoundInfo boundinfo = partdesc->boundinfo; + ListCell *cell; + + Assert(boundinfo && + boundinfo->strategy == PARTITION_STRATEGY_LIST && + (boundinfo->ndatums > 0 || boundinfo->has_null)); + + foreach(cell, spec->listdatums) + { + Const *val = lfirst(cell); + + if (!val->constisnull) + { + int offset; + bool equal; + + offset = partition_bound_bsearch(key, boundinfo, + &val->constvalue, + true, &equal); + if (offset >= 0 && equal) + { + overlap = true; + with = boundinfo->indexes[offset]; + break; + } + } + else if (boundinfo->has_null) + { + overlap = true; + with = boundinfo->null_index; + break; + } + } + } + + break; + } + + case PARTITION_STRATEGY_RANGE: + { + PartitionRangeBound *lower, + *upper; + + Assert(spec->strategy == PARTITION_STRATEGY_RANGE); + lower = make_one_range_bound(key, -1, spec->lowerdatums, true); + upper = make_one_range_bound(key, -1, spec->upperdatums, false); + + /* + * First check if the resulting range would be empty with + * specified lower and upper bounds + */ + if (partition_rbound_cmp(key, lower->datums, lower->content, true, + upper) >= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot create range partition with empty range"), + parser_errposition(pstate, spec->location))); + + if (partdesc->nparts > 0) + { + PartitionBoundInfo boundinfo = partdesc->boundinfo; + int off1, + off2; + bool equal = false; + + Assert(boundinfo && boundinfo->ndatums > 0 && + boundinfo->strategy == PARTITION_STRATEGY_RANGE); + + /* + * Find the greatest index of a range bound that is less + * than or equal with the new lower bound. + */ + off1 = partition_bound_bsearch(key, boundinfo, lower, true, + &equal); + + /* + * If equal has been set to true, that means the new lower + * bound is found to be equal with the bound at off1, + * which clearly means an overlap with the partition at + * index off1+1). + * + * Otherwise, check if there is a "gap" that could be + * occupied by the new partition. In case of a gap, the + * new upper bound should not cross past the upper + * boundary of the gap, that is, off2 == off1 should be + * true. + */ + if (!equal && boundinfo->indexes[off1 + 1] < 0) + { + off2 = partition_bound_bsearch(key, boundinfo, upper, + true, &equal); + + if (equal || off1 != off2) + { + overlap = true; + with = boundinfo->indexes[off2 + 1]; + } + } + else + { + overlap = true; + with = boundinfo->indexes[off1 + 1]; + } + } + + break; + } + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + if (overlap) + { + Assert(with >= 0); + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("partition \"%s\" would overlap partition \"%s\"", + relname, get_rel_name(partdesc->oids[with])), + parser_errposition(pstate, spec->location))); + } +} + +/* + * get_partition_parent + * + * Returns inheritance parent of a partition by scanning pg_inherits + * + * Note: Because this function assumes that the relation whose OID is passed + * as an argument will have precisely one parent, it should only be called + * when it is known that the relation is a partition. + */ +Oid +get_partition_parent(Oid relid) +{ + Form_pg_inherits form; + Relation catalogRelation; + SysScanDesc scan; + ScanKeyData key[2]; + HeapTuple tuple; + Oid result; + + catalogRelation = heap_open(InheritsRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_inherits_inhrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + ScanKeyInit(&key[1], + Anum_pg_inherits_inhseqno, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(1)); + + scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true, + NULL, 2, key); + + tuple = systable_getnext(scan); + Assert(HeapTupleIsValid(tuple)); + + form = (Form_pg_inherits) GETSTRUCT(tuple); + result = form->inhparent; + + systable_endscan(scan); + heap_close(catalogRelation, AccessShareLock); + + return result; +} + +/* + * get_qual_from_partbound + * Given a parser node for partition bound, return the list of executable + * expressions as partition constraint + */ +List * +get_qual_from_partbound(Relation rel, Relation parent, Node *bound) +{ + PartitionBoundSpec *spec = (PartitionBoundSpec *) bound; + PartitionKey key = RelationGetPartitionKey(parent); + List *my_qual = NIL; + TupleDesc parent_tupdesc = RelationGetDescr(parent); + AttrNumber parent_attno; + AttrNumber *partition_attnos; + bool found_whole_row; + + Assert(key != NULL); + + switch (key->strategy) + { + case PARTITION_STRATEGY_LIST: + Assert(spec->strategy == PARTITION_STRATEGY_LIST); + my_qual = get_qual_for_list(key, spec); + break; + + case PARTITION_STRATEGY_RANGE: + Assert(spec->strategy == PARTITION_STRATEGY_RANGE); + my_qual = get_qual_for_range(key, spec); + break; + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + /* + * Translate vars in the generated expression to have correct attnos. Note + * that the vars in my_qual bear attnos dictated by key which carries + * physical attnos of the parent. We must allow for a case where physical + * attnos of a partition can be different from the parent. + */ + partition_attnos = (AttrNumber *) + palloc0(parent_tupdesc->natts * sizeof(AttrNumber)); + for (parent_attno = 1; parent_attno <= parent_tupdesc->natts; + parent_attno++) + { + Form_pg_attribute attribute = parent_tupdesc->attrs[parent_attno - 1]; + char *attname = NameStr(attribute->attname); + AttrNumber partition_attno; + + if (attribute->attisdropped) + continue; + + partition_attno = get_attnum(RelationGetRelid(rel), attname); + partition_attnos[parent_attno - 1] = partition_attno; + } + + my_qual = (List *) map_variable_attnos((Node *) my_qual, + 1, 0, + partition_attnos, + parent_tupdesc->natts, + &found_whole_row); + /* there can never be a whole-row reference here */ + if (found_whole_row) + elog(ERROR, "unexpected whole-row reference found in partition key"); + + return my_qual; +} + +/* + * RelationGetPartitionQual + * + * Returns a list of partition quals + */ +List * +RelationGetPartitionQual(Relation rel, bool recurse) +{ + /* Quick exit */ + if (!rel->rd_rel->relispartition) + return NIL; + + return generate_partition_qual(rel, recurse); +} + +/* Turn an array of OIDs with N elements into a list */ +#define OID_ARRAY_TO_LIST(arr, N, list) \ + do\ + {\ + int i;\ + for (i = 0; i < (N); i++)\ + (list) = lappend_oid((list), (arr)[i]);\ + } while(0) + +/* + * RelationGetPartitionDispatchInfo + * Returns information necessary to route tuples down a partition tree + * + * All the partitions will be locked with lockmode, unless it is NoLock. + * A list of the OIDs of all the leaf partition of rel is returned in + * *leaf_part_oids. + */ +PartitionDispatch * +RelationGetPartitionDispatchInfo(Relation rel, int lockmode, + int *num_parted, List **leaf_part_oids) +{ + PartitionDesc rootpartdesc = RelationGetPartitionDesc(rel); + PartitionDispatchData **pd; + List *all_parts = NIL, + *parted_rels; + ListCell *lc; + int i, + k; + + /* + * Lock partitions and make a list of the partitioned ones to prepare + * their PartitionDispatch objects below. + * + * Cannot use find_all_inheritors() here, because then the order of OIDs + * in parted_rels list would be unknown, which does not help, because we + * we assign indexes within individual PartitionDispatch in an order that + * is predetermined (determined by the order of OIDs in individual + * partition descriptors). + */ + *num_parted = 1; + parted_rels = list_make1(rel); + OID_ARRAY_TO_LIST(rootpartdesc->oids, rootpartdesc->nparts, all_parts); + foreach(lc, all_parts) + { + Relation partrel = heap_open(lfirst_oid(lc), lockmode); + PartitionDesc partdesc = RelationGetPartitionDesc(partrel); + + /* + * If this partition is a partitioned table, add its children to the + * end of the list, so that they are processed as well. + */ + if (partdesc) + { + (*num_parted)++; + parted_rels = lappend(parted_rels, partrel); + OID_ARRAY_TO_LIST(partdesc->oids, partdesc->nparts, all_parts); + } + else + heap_close(partrel, NoLock); + + /* + * We keep the partitioned ones open until we're done using the + * information being collected here (for example, see + * ExecEndModifyTable). + */ + } + + /* Generate PartitionDispatch objects for all partitioned tables */ + pd = (PartitionDispatchData **) palloc(*num_parted * + sizeof(PartitionDispatchData *)); + *leaf_part_oids = NIL; + i = k = 0; + foreach(lc, parted_rels) + { + Relation partrel = lfirst(lc); + PartitionKey partkey = RelationGetPartitionKey(partrel); + PartitionDesc partdesc = RelationGetPartitionDesc(partrel); + int j, + m; + + pd[i] = (PartitionDispatch) palloc(sizeof(PartitionDispatchData)); + pd[i]->reldesc = partrel; + pd[i]->key = partkey; + pd[i]->keystate = NIL; + pd[i]->partdesc = partdesc; + pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int)); + + m = 0; + for (j = 0; j < partdesc->nparts; j++) + { + Oid partrelid = partdesc->oids[j]; + + if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE) + { + *leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid); + pd[i]->indexes[j] = k++; + } + else + { + /* + * We can assign indexes this way because of the way + * parted_rels has been generated. + */ + pd[i]->indexes[j] = -(i + 1 + m); + m++; + } + } + i++; + } + + return pd; +} + +/* Module-local functions */ + +/* + * get_qual_for_list + * + * Returns a list of expressions to use as a list partition's constraint. + */ +static List * +get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec) +{ + List *result; + ArrayExpr *arr; + ScalarArrayOpExpr *opexpr; + ListCell *cell, + *prev, + *next; + Node *keyCol; + Oid operoid; + bool need_relabel, + list_has_null = false; + NullTest *nulltest1 = NULL, + *nulltest2 = NULL; + + /* Left operand is either a simple Var or arbitrary expression */ + if (key->partattrs[0] != 0) + keyCol = (Node *) makeVar(1, + key->partattrs[0], + key->parttypid[0], + key->parttypmod[0], + key->parttypcoll[0], + 0); + else + keyCol = (Node *) copyObject(linitial(key->partexprs)); + + /* + * We must remove any NULL value in the list; we handle it separately + * below. + */ + prev = NULL; + for (cell = list_head(spec->listdatums); cell; cell = next) + { + Const *val = (Const *) lfirst(cell); + + next = lnext(cell); + + if (val->constisnull) + { + list_has_null = true; + spec->listdatums = list_delete_cell(spec->listdatums, + cell, prev); + } + else + prev = cell; + } + + if (!list_has_null) + { + /* + * Gin up a col IS NOT NULL test that will be AND'd with other + * expressions + */ + nulltest1 = makeNode(NullTest); + nulltest1->arg = (Expr *) keyCol; + nulltest1->nulltesttype = IS_NOT_NULL; + nulltest1->argisrow = false; + nulltest1->location = -1; + } + else + { + /* + * Gin up a col IS NULL test that will be OR'd with other expressions + */ + nulltest2 = makeNode(NullTest); + nulltest2->arg = (Expr *) keyCol; + nulltest2->nulltesttype = IS_NULL; + nulltest2->argisrow = false; + nulltest2->location = -1; + } + + /* Right operand is an ArrayExpr containing this partition's values */ + arr = makeNode(ArrayExpr); + arr->array_typeid = !type_is_array(key->parttypid[0]) + ? get_array_type(key->parttypid[0]) + : key->parttypid[0]; + arr->array_collid = key->parttypcoll[0]; + arr->element_typeid = key->parttypid[0]; + arr->elements = spec->listdatums; + arr->multidims = false; + arr->location = -1; + + /* Get the correct btree equality operator */ + operoid = get_partition_operator(key, 0, BTEqualStrategyNumber, + &need_relabel); + if (need_relabel || key->partcollation[0] != key->parttypcoll[0]) + keyCol = (Node *) makeRelabelType((Expr *) keyCol, + key->partopcintype[0], + -1, + key->partcollation[0], + COERCE_EXPLICIT_CAST); + + /* Build leftop = ANY (rightop) */ + opexpr = makeNode(ScalarArrayOpExpr); + opexpr->opno = operoid; + opexpr->opfuncid = get_opcode(operoid); + opexpr->useOr = true; + opexpr->inputcollid = key->partcollation[0]; + opexpr->args = list_make2(keyCol, arr); + opexpr->location = -1; + + if (nulltest1) + result = list_make2(nulltest1, opexpr); + else if (nulltest2) + { + Expr *or; + + or = makeBoolExpr(OR_EXPR, list_make2(nulltest2, opexpr), -1); + result = list_make1(or); + } + else + result = list_make1(opexpr); + + return result; +} + +/* + * get_qual_for_range + * + * Get a list of OpExpr's to use as a range partition's constraint. + */ +static List * +get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec) +{ + List *result = NIL; + ListCell *cell1, + *cell2, + *partexprs_item; + int i; + + /* + * Iterate over columns of the key, emitting an OpExpr for each using the + * corresponding lower and upper datums as constant operands. + */ + i = 0; + partexprs_item = list_head(key->partexprs); + forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums) + { + PartitionRangeDatum *ldatum = lfirst(cell1), + *udatum = lfirst(cell2); + Node *keyCol; + Const *lower_val = NULL, + *upper_val = NULL; + EState *estate; + MemoryContext oldcxt; + Expr *test_expr; + ExprState *test_exprstate; + Datum test_result; + bool isNull; + bool need_relabel = false; + Oid operoid; + NullTest *nulltest; + + /* Left operand */ + if (key->partattrs[i] != 0) + { + keyCol = (Node *) makeVar(1, + key->partattrs[i], + key->parttypid[i], + key->parttypmod[i], + key->parttypcoll[i], + 0); + } + else + { + keyCol = (Node *) copyObject(lfirst(partexprs_item)); + partexprs_item = lnext(partexprs_item); + } + + /* + * Emit a IS NOT NULL expression for non-Var keys, because whereas + * simple attributes are covered by NOT NULL constraints, expression + * keys are still nullable which is not acceptable in case of range + * partitioning. + */ + if (!IsA(keyCol, Var)) + { + nulltest = makeNode(NullTest); + nulltest->arg = (Expr *) keyCol; + nulltest->nulltesttype = IS_NOT_NULL; + nulltest->argisrow = false; + nulltest->location = -1; + result = lappend(result, nulltest); + } + + /* + * Stop at this column if either of lower or upper datum is infinite, + * but do emit an OpExpr for the non-infinite datum. + */ + if (!ldatum->infinite) + lower_val = (Const *) ldatum->value; + if (!udatum->infinite) + upper_val = (Const *) udatum->value; + + /* + * If lower_val and upper_val are both finite and happen to be equal, + * emit only (keyCol = lower_val) for this column, because all rows in + * this partition could only ever contain this value (ie, lower_val) + * in the current partitioning column. We must consider further + * columns because the above condition does not fully constrain the + * rows of this partition. + */ + if (lower_val && upper_val) + { + /* Get the correct btree equality operator for the test */ + operoid = get_partition_operator(key, i, BTEqualStrategyNumber, + &need_relabel); + + /* Create the test expression */ + estate = CreateExecutorState(); + oldcxt = MemoryContextSwitchTo(estate->es_query_cxt); + test_expr = make_opclause(operoid, + BOOLOID, + false, + (Expr *) lower_val, + (Expr *) upper_val, + InvalidOid, + key->partcollation[i]); + fix_opfuncids((Node *) test_expr); + test_exprstate = ExecInitExpr(test_expr, NULL); + test_result = ExecEvalExprSwitchContext(test_exprstate, + GetPerTupleExprContext(estate), + &isNull, NULL); + MemoryContextSwitchTo(oldcxt); + FreeExecutorState(estate); + + if (DatumGetBool(test_result)) + { + /* This can never be, but it's better to make sure */ + if (i == key->partnatts - 1) + elog(ERROR, "invalid range bound specification"); + + if (need_relabel || key->partcollation[i] != key->parttypcoll[i]) + keyCol = (Node *) makeRelabelType((Expr *) keyCol, + key->partopcintype[i], + -1, + key->partcollation[i], + COERCE_EXPLICIT_CAST); + result = lappend(result, + make_opclause(operoid, + BOOLOID, + false, + (Expr *) keyCol, + (Expr *) lower_val, + InvalidOid, + key->partcollation[i])); + + /* Go over to consider the next column. */ + i++; + continue; + } + } + + /* + * We can say here that lower_val != upper_val. Emit expressions + * (keyCol >= lower_val) and (keyCol < upper_val), then stop. + */ + if (lower_val) + { + operoid = get_partition_operator(key, i, + BTGreaterEqualStrategyNumber, + &need_relabel); + + if (need_relabel || key->partcollation[i] != key->parttypcoll[i]) + keyCol = (Node *) makeRelabelType((Expr *) keyCol, + key->partopcintype[i], + -1, + key->partcollation[i], + COERCE_EXPLICIT_CAST); + result = lappend(result, + make_opclause(operoid, + BOOLOID, + false, + (Expr *) keyCol, + (Expr *) lower_val, + InvalidOid, + key->partcollation[i])); + } + + if (upper_val) + { + operoid = get_partition_operator(key, i, + BTLessStrategyNumber, + &need_relabel); + + if (need_relabel || key->partcollation[i] != key->parttypcoll[i]) + keyCol = (Node *) makeRelabelType((Expr *) keyCol, + key->partopcintype[i], + -1, + key->partcollation[i], + COERCE_EXPLICIT_CAST); + + result = lappend(result, + make_opclause(operoid, + BOOLOID, + false, + (Expr *) keyCol, + (Expr *) upper_val, + InvalidOid, + key->partcollation[i])); + } + + /* + * We can stop at this column, because we would not have checked the + * next column when routing a given row into this partition. + */ + break; + } + + return result; +} + +/* + * get_partition_operator + * + * Return oid of the operator of given strategy for a given partition key + * column. + */ +static Oid +get_partition_operator(PartitionKey key, int col, StrategyNumber strategy, + bool *need_relabel) +{ + Oid operoid; + + /* + * First check if there exists an operator of the given strategy, with + * this column's type as both its lefttype and righttype, in the + * partitioning operator family specified for the column. + */ + operoid = get_opfamily_member(key->partopfamily[col], + key->parttypid[col], + key->parttypid[col], + strategy); + + /* + * If one doesn't exist, we must resort to using an operator in the same + * opreator family but with the operator class declared input type. It is + * OK to do so, because the column's type is known to be binary-coercible + * with the operator class input type (otherwise, the operator class in + * question would not have been accepted as the partitioning operator + * class). We must however inform the caller to wrap the non-Const + * expression with a RelabelType node to denote the implicit coercion. It + * ensures that the resulting expression structurally matches similarly + * processed expressions within the optimizer. + */ + if (!OidIsValid(operoid)) + { + operoid = get_opfamily_member(key->partopfamily[col], + key->partopcintype[col], + key->partopcintype[col], + strategy); + *need_relabel = true; + } + else + *need_relabel = false; + + if (!OidIsValid(operoid)) + elog(ERROR, "could not find operator for partitioning"); + + return operoid; +} + +/* + * generate_partition_qual + * + * Generate partition predicate from rel's partition bound expression + * + * Result expression tree is stored CacheMemoryContext to ensure it survives + * as long as the relcache entry. But we should be running in a less long-lived + * working context. To avoid leaking cache memory if this routine fails partway + * through, we build in working memory and then copy the completed structure + * into cache memory. + */ +static List * +generate_partition_qual(Relation rel, bool recurse) +{ + HeapTuple tuple; + MemoryContext oldcxt; + Datum boundDatum; + bool isnull; + Node *bound; + List *my_qual = NIL, + *result = NIL; + Relation parent; + + /* Guard against stack overflow due to overly deep partition tree */ + check_stack_depth(); + + /* Grab at least an AccessShareLock on the parent table */ + parent = heap_open(get_partition_parent(RelationGetRelid(rel)), + AccessShareLock); + + /* Quick copy */ + if (rel->rd_partcheck) + { + if (parent->rd_rel->relispartition && recurse) + result = list_concat(generate_partition_qual(parent, true), + copyObject(rel->rd_partcheck)); + else + result = copyObject(rel->rd_partcheck); + + heap_close(parent, AccessShareLock); + return result; + } + + /* Get pg_class.relpartbound */ + if (!rel->rd_rel->relispartition) /* should not happen */ + elog(ERROR, "relation \"%s\" has relispartition = false", + RelationGetRelationName(rel)); + tuple = SearchSysCache1(RELOID, RelationGetRelid(rel)); + boundDatum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relpartbound, + &isnull); + if (isnull) /* should not happen */ + elog(ERROR, "relation \"%s\" has relpartbound = null", + RelationGetRelationName(rel)); + bound = stringToNode(TextDatumGetCString(boundDatum)); + ReleaseSysCache(tuple); + + my_qual = get_qual_from_partbound(rel, parent, bound); + + /* If requested, add parent's quals to the list (if any) */ + if (parent->rd_rel->relispartition && recurse) + { + List *parent_check; + + parent_check = generate_partition_qual(parent, true); + result = list_concat(parent_check, my_qual); + } + else + result = my_qual; + + /* Save a copy of my_qual in the relcache */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + rel->rd_partcheck = copyObject(my_qual); + MemoryContextSwitchTo(oldcxt); + + /* Keep the parent locked until commit */ + heap_close(parent, NoLock); + + return result; +} + +/* ---------------- + * FormPartitionKeyDatum + * Construct values[] and isnull[] arrays for the partition key + * of a tuple. + * + * pkinfo partition key execution info + * slot Heap tuple from which to extract partition key + * estate executor state for evaluating any partition key + * expressions (must be non-NULL) + * values Array of partition key Datums (output area) + * isnull Array of is-null indicators (output area) + * + * the ecxt_scantuple slot of estate's per-tuple expr context must point to + * the heap tuple passed in. + * ---------------- + */ +static void +FormPartitionKeyDatum(PartitionDispatch pd, + TupleTableSlot *slot, + EState *estate, + Datum *values, + bool *isnull) +{ + ListCell *partexpr_item; + int i; + + if (pd->key->partexprs != NIL && pd->keystate == NIL) + { + /* Check caller has set up context correctly */ + Assert(estate != NULL && + GetPerTupleExprContext(estate)->ecxt_scantuple == slot); + + /* First time through, set up expression evaluation state */ + pd->keystate = (List *) ExecPrepareExpr((Expr *) pd->key->partexprs, + estate); + } + + partexpr_item = list_head(pd->keystate); + for (i = 0; i < pd->key->partnatts; i++) + { + AttrNumber keycol = pd->key->partattrs[i]; + Datum datum; + bool isNull; + + if (keycol != 0) + { + /* Plain column; get the value directly from the heap tuple */ + datum = slot_getattr(slot, keycol, &isNull); + } + else + { + /* Expression; need to evaluate it */ + if (partexpr_item == NULL) + elog(ERROR, "wrong number of partition key expressions"); + datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item), + GetPerTupleExprContext(estate), + &isNull, + NULL); + partexpr_item = lnext(partexpr_item); + } + values[i] = datum; + isnull[i] = isNull; + } + + if (partexpr_item != NULL) + elog(ERROR, "wrong number of partition key expressions"); +} + +/* + * get_partition_for_tuple + * Finds a leaf partition for tuple contained in *slot + * + * Returned value is the sequence number of the leaf partition thus found, + * or -1 if no leaf partition is found for the tuple. *failed_at is set + * to the OID of the partitioned table whose partition was not found in + * the latter case. + */ +int +get_partition_for_tuple(PartitionDispatch * pd, + TupleTableSlot *slot, + EState *estate, + Oid *failed_at) +{ + PartitionDispatch parent; + Datum values[PARTITION_MAX_KEYS]; + bool isnull[PARTITION_MAX_KEYS]; + int cur_offset, + cur_index; + int i; + + /* start with the root partitioned table */ + parent = pd[0]; + while (true) + { + PartitionKey key = parent->key; + PartitionDesc partdesc = parent->partdesc; + + /* Quick exit */ + if (partdesc->nparts == 0) + { + *failed_at = RelationGetRelid(parent->reldesc); + return -1; + } + + /* Extract partition key from tuple */ + FormPartitionKeyDatum(parent, slot, estate, values, isnull); + + if (key->strategy == PARTITION_STRATEGY_RANGE) + { + /* Disallow nulls in the range partition key of the tuple */ + for (i = 0; i < key->partnatts; i++) + if (isnull[i]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("range partition key of row contains null"))); + } + + if (partdesc->boundinfo->has_null && isnull[0]) + /* Tuple maps to the null-accepting list partition */ + cur_index = partdesc->boundinfo->null_index; + else + { + /* Else bsearch in partdesc->boundinfo */ + bool equal = false; + + cur_offset = partition_bound_bsearch(key, partdesc->boundinfo, + values, false, &equal); + switch (key->strategy) + { + case PARTITION_STRATEGY_LIST: + if (cur_offset >= 0 && equal) + cur_index = partdesc->boundinfo->indexes[cur_offset]; + else + cur_index = -1; + break; + + case PARTITION_STRATEGY_RANGE: + + /* + * Offset returned is such that the bound at offset is + * found to be less or equal with the tuple. So, the bound + * at offset+1 would be the upper bound. + */ + cur_index = partdesc->boundinfo->indexes[cur_offset + 1]; + break; + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + } + + /* + * cur_index < 0 means we failed to find a partition of this parent. + * cur_index >= 0 means we either found the leaf partition, or the + * next parent to find a partition of. + */ + if (cur_index < 0) + { + *failed_at = RelationGetRelid(parent->reldesc); + return -1; + } + else if (parent->indexes[cur_index] < 0) + parent = pd[-parent->indexes[cur_index]]; + else + break; + } + + return parent->indexes[cur_index]; +} + +/* + * qsort_partition_list_value_cmp + * + * Compare two list partition bound datums + */ +static int32 +qsort_partition_list_value_cmp(const void *a, const void *b, void *arg) +{ + Datum val1 = (*(const PartitionListValue **) a)->value, + val2 = (*(const PartitionListValue **) b)->value; + PartitionKey key = (PartitionKey) arg; + + return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0], + key->partcollation[0], + val1, val2)); +} + +/* + * make_one_range_bound + * + * Return a PartitionRangeBound given a list of PartitionRangeDatum elements + * and a flag telling whether the bound is lower or not. Made into a function + * because there are multiple sites that want to use this facility. + */ +static PartitionRangeBound * +make_one_range_bound(PartitionKey key, int index, List *datums, bool lower) +{ + PartitionRangeBound *bound; + ListCell *cell; + int i; + + bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound)); + bound->index = index; + bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum)); + bound->content = (RangeDatumContent *) palloc0(key->partnatts * + sizeof(RangeDatumContent)); + bound->lower = lower; + + i = 0; + foreach(cell, datums) + { + PartitionRangeDatum *datum = lfirst(cell); + + /* What's contained in this range datum? */ + bound->content[i] = !datum->infinite + ? RANGE_DATUM_FINITE + : (lower ? RANGE_DATUM_NEG_INF + : RANGE_DATUM_POS_INF); + + if (bound->content[i] == RANGE_DATUM_FINITE) + { + Const *val = (Const *) datum->value; + + if (val->constisnull) + elog(ERROR, "invalid range bound datum"); + bound->datums[i] = val->constvalue; + } + + i++; + } + + return bound; +} + +/* Used when sorting range bounds across all range partitions */ +static int32 +qsort_partition_rbound_cmp(const void *a, const void *b, void *arg) +{ + PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a); + PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b); + PartitionKey key = (PartitionKey) arg; + + return partition_rbound_cmp(key, b1->datums, b1->content, b1->lower, b2); +} + +/* + * partition_rbound_cmp + * + * Return for two range bounds whether the 1st one (specified in datum1, + * content1, and lower1) is <=, =, >= the bound specified in *b2 + */ +static int32 +partition_rbound_cmp(PartitionKey key, + Datum *datums1, RangeDatumContent *content1, bool lower1, + PartitionRangeBound *b2) +{ + int32 cmpval; + int i; + Datum *datums2 = b2->datums; + RangeDatumContent *content2 = b2->content; + bool lower2 = b2->lower; + + for (i = 0; i < key->partnatts; i++) + { + /* + * First, handle cases involving infinity, which don't require + * invoking the comparison proc. + */ + if (content1[i] != RANGE_DATUM_FINITE && + content2[i] != RANGE_DATUM_FINITE) + + /* + * Both are infinity, so they are equal unless one is negative + * infinity and other positive (or vice versa) + */ + return content1[i] == content2[i] ? 0 + : (content1[i] < content2[i] ? -1 : 1); + else if (content1[i] != RANGE_DATUM_FINITE) + return content1[i] == RANGE_DATUM_NEG_INF ? -1 : 1; + else if (content2[i] != RANGE_DATUM_FINITE) + return content2[i] == RANGE_DATUM_NEG_INF ? 1 : -1; + + cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i], + key->partcollation[i], + datums1[i], + datums2[i])); + if (cmpval != 0) + break; + } + + /* + * If the comparison is anything other than equal, we're done. If they + * compare equal though, we still have to consider whether the boundaries + * are inclusive or exclusive. Exclusive one is considered smaller of the + * two. + */ + if (cmpval == 0 && lower1 != lower2) + cmpval = lower1 ? 1 : -1; + + return cmpval; +} + +/* + * partition_rbound_datum_cmp + * + * Return whether range bound (specified in rb_datums, rb_content, and + * rb_lower) <=, =, >= partition key of tuple (tuple_datums) + */ +static int32 +partition_rbound_datum_cmp(PartitionKey key, + Datum *rb_datums, RangeDatumContent *rb_content, + Datum *tuple_datums) +{ + int i; + int32 cmpval = -1; + + for (i = 0; i < key->partnatts; i++) + { + if (rb_content[i] != RANGE_DATUM_FINITE) + return rb_content[i] == RANGE_DATUM_NEG_INF ? -1 : 1; + + cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i], + key->partcollation[i], + rb_datums[i], + tuple_datums[i])); + if (cmpval != 0) + break; + } + + return cmpval; +} + +/* + * partition_bound_cmp + * + * Return whether the bound at offset in boundinfo is <=, =, >= the argument + * specified in *probe. + */ +static int32 +partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo, + int offset, void *probe, bool probe_is_bound) +{ + Datum *bound_datums = boundinfo->datums[offset]; + int32 cmpval = -1; + + switch (key->strategy) + { + case PARTITION_STRATEGY_LIST: + cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0], + key->partcollation[0], + bound_datums[0], + *(Datum *) probe)); + break; + + case PARTITION_STRATEGY_RANGE: + { + RangeDatumContent *content = boundinfo->content[offset]; + + if (probe_is_bound) + { + /* + * We need to pass whether the existing bound is a lower + * bound, so that two equal-valued lower and upper bounds + * are not regarded equal. + */ + bool lower = boundinfo->indexes[offset] < 0; + + cmpval = partition_rbound_cmp(key, + bound_datums, content, lower, + (PartitionRangeBound *) probe); + } + else + cmpval = partition_rbound_datum_cmp(key, + bound_datums, content, + (Datum *) probe); + break; + } + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + return cmpval; +} + +/* + * Binary search on a collection of partition bounds. Returns greatest index + * of bound in array boundinfo->datums which is less or equal with *probe. + * If all bounds in the array are greater than *probe, -1 is returned. + * + * *probe could either be a partition bound or a Datum array representing + * the partition key of a tuple being routed; probe_is_bound tells which. + * We pass that down to the comparison function so that it can interpret the + * contents of *probe accordingly. + * + * *is_equal is set to whether the bound at the returned index is equal with + * *probe. + */ +static int +partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo, + void *probe, bool probe_is_bound, bool *is_equal) +{ + int lo, + hi, + mid; + + lo = -1; + hi = boundinfo->ndatums - 1; + while (lo < hi) + { + int32 cmpval; + + mid = (lo + hi + 1) / 2; + cmpval = partition_bound_cmp(key, boundinfo, mid, probe, + probe_is_bound); + if (cmpval <= 0) + { + lo = mid; + *is_equal = (cmpval == 0); + } + else + hi = mid - 1; + } + + return lo; +} diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c index 8fabe6899f6..724b41e64cd 100644 --- a/src/backend/catalog/pg_constraint.c +++ b/src/backend/catalog/pg_constraint.c @@ -368,7 +368,7 @@ CreateConstraintEntry(const char *constraintName, */ recordDependencyOnSingleRelExpr(&conobject, conExpr, relId, DEPENDENCY_NORMAL, - DEPENDENCY_NORMAL); + DEPENDENCY_NORMAL, false); } /* Post creation hook for new constraint */ diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index c617abb223b..f4afcd9aae1 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -201,7 +201,8 @@ analyze_rel(Oid relid, RangeVar *relation, int options, * locked the relation. */ if (onerel->rd_rel->relkind == RELKIND_RELATION || - onerel->rd_rel->relkind == RELKIND_MATVIEW) + onerel->rd_rel->relkind == RELKIND_MATVIEW || + onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { /* Regular table, so we'll use the regular row acquisition function */ acquirefunc = acquire_sample_rows; @@ -1317,7 +1318,8 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, /* Check table type (MATVIEW can't happen, but might as well allow) */ if (childrel->rd_rel->relkind == RELKIND_RELATION || - childrel->rd_rel->relkind == RELKIND_MATVIEW) + childrel->rd_rel->relkind == RELKIND_MATVIEW || + childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { /* Regular table, so use the regular row acquisition function */ acquirefunc = acquire_sample_rows; diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index ec5d6f15659..270be0af18e 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -161,6 +161,11 @@ typedef struct CopyStateData ExprState **defexprs; /* array of default att expressions */ bool volatile_defexprs; /* is any of defexprs volatile? */ List *range_table; + PartitionDispatch *partition_dispatch_info; + int num_dispatch; + int num_partitions; + ResultRelInfo *partitions; + TupleConversionMap **partition_tupconv_maps; /* * These variables are used to reduce overhead in textual COPY FROM. @@ -1397,6 +1402,71 @@ BeginCopy(ParseState *pstate, (errcode(ERRCODE_UNDEFINED_COLUMN), errmsg("table \"%s\" does not have OIDs", RelationGetRelationName(cstate->rel)))); + + /* + * Initialize state for CopyFrom tuple routing. Watch out for + * any foreign partitions. + */ + if (is_from && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + PartitionDispatch *pd; + List *leaf_parts; + ListCell *cell; + int i, + num_parted, + num_leaf_parts; + ResultRelInfo *leaf_part_rri; + + /* Get the tuple-routing information and lock partitions */ + pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock, + &num_parted, &leaf_parts); + num_leaf_parts = list_length(leaf_parts); + cstate->partition_dispatch_info = pd; + cstate->num_dispatch = num_parted; + cstate->num_partitions = num_leaf_parts; + cstate->partitions = (ResultRelInfo *) palloc(num_leaf_parts * + sizeof(ResultRelInfo)); + cstate->partition_tupconv_maps = (TupleConversionMap **) + palloc0(num_leaf_parts * sizeof(TupleConversionMap *)); + + leaf_part_rri = cstate->partitions; + i = 0; + foreach(cell, leaf_parts) + { + Relation partrel; + + /* + * We locked all the partitions above including the leaf + * partitions. Note that each of the relations in + * cstate->partitions will be closed by CopyFrom() after + * it's finished with its processing. + */ + partrel = heap_open(lfirst_oid(cell), NoLock); + + /* + * Verify result relation is a valid target for the current + * operation. + */ + CheckValidResultRel(partrel, CMD_INSERT); + + InitResultRelInfo(leaf_part_rri, + partrel, + 1, /* dummy */ + false, /* no partition constraint check */ + 0); + + /* Open partition indices */ + ExecOpenIndices(leaf_part_rri, false); + + if (!equalTupleDescs(tupDesc, RelationGetDescr(partrel))) + cstate->partition_tupconv_maps[i] = + convert_tuples_by_name(tupDesc, + RelationGetDescr(partrel), + gettext_noop("could not convert row type")); + leaf_part_rri++; + i++; + } + } } else { @@ -1751,6 +1821,12 @@ BeginCopyTo(ParseState *pstate, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot copy from sequence \"%s\"", RelationGetRelationName(rel)))); + else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot copy from partitioned table \"%s\"", + RelationGetRelationName(rel)), + errhint("Try the COPY (SELECT ...) TO variant."))); else ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -2249,6 +2325,7 @@ CopyFrom(CopyState cstate) Datum *values; bool *nulls; ResultRelInfo *resultRelInfo; + ResultRelInfo *saved_resultRelInfo = NULL; EState *estate = CreateExecutorState(); /* for ExecConstraints() */ ExprContext *econtext; TupleTableSlot *myslot; @@ -2275,6 +2352,7 @@ CopyFrom(CopyState cstate) * only hint about them in the view case.) */ if (cstate->rel->rd_rel->relkind != RELKIND_RELATION && + cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE && !(cstate->rel->trigdesc && cstate->rel->trigdesc->trig_insert_instead_row)) { @@ -2385,6 +2463,7 @@ CopyFrom(CopyState cstate) InitResultRelInfo(resultRelInfo, cstate->rel, 1, /* dummy rangetable index */ + true, /* do load partition check expression */ 0); ExecOpenIndices(resultRelInfo, false); @@ -2407,11 +2486,13 @@ CopyFrom(CopyState cstate) * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default * expressions. Such triggers or expressions might query the table we're * inserting to, and act differently if the tuples that have already been - * processed and prepared for insertion are not there. + * processed and prepared for insertion are not there. We also can't + * do it if the table is partitioned. */ if ((resultRelInfo->ri_TrigDesc != NULL && (resultRelInfo->ri_TrigDesc->trig_insert_before_row || resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) || + cstate->partition_dispatch_info != NULL || cstate->volatile_defexprs) { useHeapMultiInsert = false; @@ -2488,6 +2569,59 @@ CopyFrom(CopyState cstate) slot = myslot; ExecStoreTuple(tuple, slot, InvalidBuffer, false); + /* Determine the partition to heap_insert the tuple into */ + if (cstate->partition_dispatch_info) + { + int leaf_part_index; + TupleConversionMap *map; + + /* + * Away we go ... If we end up not finding a partition after all, + * ExecFindPartition() does not return and errors out instead. + * Otherwise, the returned value is to be used as an index into + * arrays mt_partitions[] and mt_partition_tupconv_maps[] that + * will get us the ResultRelInfo and TupleConversionMap for the + * partition, respectively. + */ + leaf_part_index = ExecFindPartition(resultRelInfo, + cstate->partition_dispatch_info, + slot, + estate); + Assert(leaf_part_index >= 0 && + leaf_part_index < cstate->num_partitions); + + /* + * Save the old ResultRelInfo and switch to the one corresponding + * to the selected partition. + */ + saved_resultRelInfo = resultRelInfo; + resultRelInfo = cstate->partitions + leaf_part_index; + + /* We do not yet have a way to insert into a foreign partition */ + if (resultRelInfo->ri_FdwRoutine) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot route inserted tuples to a foreign table"))); + + /* + * For ExecInsertIndexTuples() to work on the partition's indexes + */ + estate->es_result_relation_info = resultRelInfo; + + /* + * We might need to convert from the parent rowtype to the + * partition rowtype. + */ + map = cstate->partition_tupconv_maps[leaf_part_index]; + if (map) + { + tuple = do_convert_tuple(tuple, map); + ExecStoreTuple(tuple, slot, InvalidBuffer, true); + } + + tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + } + skip_tuple = false; /* BEFORE ROW INSERT Triggers */ @@ -2513,7 +2647,8 @@ CopyFrom(CopyState cstate) else { /* Check the constraints of the tuple */ - if (cstate->rel->rd_att->constr) + if (cstate->rel->rd_att->constr || + resultRelInfo->ri_PartitionCheck) ExecConstraints(resultRelInfo, slot, estate); if (useHeapMultiInsert) @@ -2546,7 +2681,8 @@ CopyFrom(CopyState cstate) List *recheckIndexes = NIL; /* OK, store the tuple and create index entries for it */ - heap_insert(cstate->rel, tuple, mycid, hi_options, bistate); + heap_insert(resultRelInfo->ri_RelationDesc, tuple, mycid, + hi_options, bistate); if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(slot, @@ -2570,6 +2706,12 @@ CopyFrom(CopyState cstate) * tuples inserted by an INSERT command. */ processed++; + + if (saved_resultRelInfo) + { + resultRelInfo = saved_resultRelInfo; + estate->es_result_relation_info = resultRelInfo; + } } } @@ -2607,6 +2749,32 @@ CopyFrom(CopyState cstate) ExecCloseIndices(resultRelInfo); + /* Close all the partitioned tables, leaf partitions, and their indices */ + if (cstate->partition_dispatch_info) + { + int i; + + /* + * Remember cstate->partition_dispatch_info[0] corresponds to the root + * partitioned table, which we must not try to close, because it is + * the main target table of COPY that will be closed eventually by + * DoCopy(). + */ + for (i = 1; i < cstate->num_dispatch; i++) + { + PartitionDispatch pd = cstate->partition_dispatch_info[i]; + + heap_close(pd->reldesc, NoLock); + } + for (i = 0; i < cstate->num_partitions; i++) + { + ResultRelInfo *resultRelInfo = cstate->partitions + i; + + ExecCloseIndices(resultRelInfo); + heap_close(resultRelInfo->ri_RelationDesc, NoLock); + } + } + FreeExecutorState(estate); /* diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 5b4f6affcce..d6d52d99295 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -112,7 +112,7 @@ create_ctas_internal(List *attrList, IntoClause *into) * Create the relation. (This will error out if there's an existing view, * so we don't need more code to complain if "replace" is false.) */ - intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL); + intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL); /* * If necessary, create a TOAST table for the target table. Note that diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 85817c65302..eeb2b1fe80d 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -69,8 +69,6 @@ static void ComputeIndexAttrs(IndexInfo *indexInfo, char *accessMethodName, Oid accessMethodId, bool amcanorder, bool isconstraint); -static Oid GetIndexOpClass(List *opclass, Oid attrType, - char *accessMethodName, Oid accessMethodId); static char *ChooseIndexName(const char *tabname, Oid namespaceId, List *colnames, List *exclusionOpNames, bool primary, bool isconstraint); @@ -383,6 +381,11 @@ DefineIndex(Oid relationId, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot create index on foreign table \"%s\"", RelationGetRelationName(rel)))); + else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot create index on partitioned table \"%s\"", + RelationGetRelationName(rel)))); else ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -1145,10 +1148,10 @@ ComputeIndexAttrs(IndexInfo *indexInfo, /* * Identify the opclass to use. */ - classOidP[attn] = GetIndexOpClass(attribute->opclass, - atttype, - accessMethodName, - accessMethodId); + classOidP[attn] = ResolveOpClass(attribute->opclass, + atttype, + accessMethodName, + accessMethodId); /* * Identify the exclusion operator, if any. @@ -1255,10 +1258,13 @@ ComputeIndexAttrs(IndexInfo *indexInfo, /* * Resolve possibly-defaulted operator class specification + * + * Note: This is used to resolve operator class specification in index and + * partition key definitions. */ -static Oid -GetIndexOpClass(List *opclass, Oid attrType, - char *accessMethodName, Oid accessMethodId) +Oid +ResolveOpClass(List *opclass, Oid attrType, + char *accessMethodName, Oid accessMethodId) { char *schemaname; char *opcname; diff --git a/src/backend/commands/lockcmds.c b/src/backend/commands/lockcmds.c index a0c0d75977b..9e62e00b8dc 100644 --- a/src/backend/commands/lockcmds.c +++ b/src/backend/commands/lockcmds.c @@ -87,7 +87,7 @@ RangeVarCallbackForLockTable(const RangeVar *rv, Oid relid, Oid oldrelid, * check */ /* Currently, we only allow plain tables to be locked */ - if (relkind != RELKIND_RELATION) + if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table", diff --git a/src/backend/commands/policy.c b/src/backend/commands/policy.c index 70e22c10000..6da3205c9e1 100644 --- a/src/backend/commands/policy.c +++ b/src/backend/commands/policy.c @@ -88,7 +88,7 @@ RangeVarCallbackForPolicy(const RangeVar *rv, Oid relid, Oid oldrelid, rv->relname))); /* Relation type MUST be a table. */ - if (relkind != RELKIND_RELATION) + if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table", rv->relname))); @@ -384,7 +384,8 @@ RemovePolicyById(Oid policy_id) relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid; rel = heap_open(relid, AccessExclusiveLock); - if (rel->rd_rel->relkind != RELKIND_RELATION) + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table", diff --git a/src/backend/commands/seclabel.c b/src/backend/commands/seclabel.c index 5bd7e124c18..2b0ae348301 100644 --- a/src/backend/commands/seclabel.c +++ b/src/backend/commands/seclabel.c @@ -110,7 +110,8 @@ ExecSecLabelStmt(SecLabelStmt *stmt) relation->rd_rel->relkind != RELKIND_VIEW && relation->rd_rel->relkind != RELKIND_MATVIEW && relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE && - relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE) + relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, view, materialized view, composite type, or foreign table", diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 7e37108b8d6..d953b4408bd 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -234,7 +234,7 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) stmt->tablespacename = NULL; stmt->if_not_exists = seq->if_not_exists; - address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL); + address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL); seqoid = address.objectId; Assert(seqoid != InvalidOid); @@ -1475,7 +1475,8 @@ process_owned_by(Relation seqrel, List *owned_by) /* Must be a regular or foreign table */ if (!(tablerel->rd_rel->relkind == RELKIND_RELATION || - tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)) + tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE || + tablerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("referenced relation \"%s\" is not a table or foreign table", diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 6322fa75a76..c77b216d4f7 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -29,6 +29,7 @@ #include "catalog/indexing.h" #include "catalog/namespace.h" #include "catalog/objectaccess.h" +#include "catalog/partition.h" #include "catalog/pg_am.h" #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" @@ -65,6 +66,9 @@ #include "nodes/parsenodes.h" #include "optimizer/clauses.h" #include "optimizer/planner.h" +#include "optimizer/predtest.h" +#include "optimizer/prep.h" +#include "optimizer/var.h" #include "parser/parse_clause.h" #include "parser/parse_coerce.h" #include "parser/parse_collate.h" @@ -162,6 +166,7 @@ typedef struct AlteredTableInfo Oid newTableSpace; /* new tablespace; 0 means no change */ bool chgPersistence; /* T if SET LOGGED/UNLOGGED is used */ char newrelpersistence; /* if above is true */ + List *partition_constraint; /* for attach partition validation */ /* Objects to rebuild after completing ALTER TYPE operations */ List *changedConstraintOids; /* OIDs of constraints to rebuild */ List *changedConstraintDefs; /* string definitions of same */ @@ -252,6 +257,12 @@ static const struct dropmsgstrings dropmsgstringarray[] = { gettext_noop("foreign table \"%s\" does not exist, skipping"), gettext_noop("\"%s\" is not a foreign table"), gettext_noop("Use DROP FOREIGN TABLE to remove a foreign table.")}, + {RELKIND_PARTITIONED_TABLE, + ERRCODE_UNDEFINED_TABLE, + gettext_noop("table \"%s\" does not exist"), + gettext_noop("table \"%s\" does not exist, skipping"), + gettext_noop("\"%s\" is not a table"), + gettext_noop("Use DROP TABLE to remove a table.")}, {'\0', 0, NULL, NULL, NULL, NULL} }; @@ -272,7 +283,8 @@ struct DropRelationCallbackState static void truncate_check_rel(Relation rel); static List *MergeAttributes(List *schema, List *supers, char relpersistence, - List **supOids, List **supconstr, int *supOidCount); + bool is_partition, List **supOids, List **supconstr, + int *supOidCount); static bool MergeCheckConstraint(List *constraints, char *name, Node *expr); static void MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel); static void MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel); @@ -339,7 +351,9 @@ static void add_column_datatype_dependency(Oid relid, int32 attnum, Oid typid); static void add_column_collation_dependency(Oid relid, int32 attnum, Oid collid); static void ATPrepAddOids(List **wqueue, Relation rel, bool recurse, AlterTableCmd *cmd, LOCKMODE lockmode); +static void ATPrepDropNotNull(Relation rel, bool recurse, bool recursing); static ObjectAddress ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode); +static void ATPrepSetNotNull(Relation rel, bool recurse, bool recursing); static ObjectAddress ATExecSetNotNull(AlteredTableInfo *tab, Relation rel, const char *colName, LOCKMODE lockmode); static ObjectAddress ATExecColumnDefault(Relation rel, const char *colName, @@ -433,6 +447,15 @@ static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, void *arg); static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, void *arg); +static bool is_partition_attr(Relation rel, AttrNumber attnum, bool *used_in_expr); +static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy); +static void ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs, + List **partexprs, Oid *partopclass, Oid *partcollation); +static void CreateInheritance(Relation child_rel, Relation parent_rel); +static void RemoveInheritance(Relation child_rel, Relation parent_rel); +static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel, + PartitionCmd *cmd); +static ObjectAddress ATExecDetachPartition(Relation rel, RangeVar *name); /* ---------------------------------------------------------------- @@ -455,7 +478,7 @@ static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, */ ObjectAddress DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, - ObjectAddress *typaddress) + ObjectAddress *typaddress, const char *queryString) { char relname[NAMEDATALEN]; Oid namespaceId; @@ -492,6 +515,14 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("ON COMMIT can only be used on temporary tables"))); + if (stmt->partspec != NULL) + { + if (relkind != RELKIND_RELATION) + elog(ERROR, "unexpected relkind: %d", (int) relkind); + + relkind = RELKIND_PARTITIONED_TABLE; + } + /* * Look up the namespace in which we are supposed to create the relation, * check we have permission to create there, lock it against concurrent @@ -578,6 +609,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, */ schema = MergeAttributes(schema, stmt->inhRelations, stmt->relation->relpersistence, + stmt->partbound != NULL, &inheritOids, &old_constraints, &parentOidCount); /* @@ -588,17 +620,33 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, descriptor = BuildDescForRelation(schema); /* - * Notice that we allow OIDs here only for plain tables, even though some - * other relkinds can support them. This is necessary because the - * default_with_oids GUC must apply only to plain tables and not any other - * relkind; doing otherwise would break existing pg_dump files. We could - * allow explicit "WITH OIDS" while not allowing default_with_oids to - * affect other relkinds, but it would complicate interpretOidsOption(). + * Notice that we allow OIDs here only for plain tables and partitioned + * tables, even though some other relkinds can support them. This is + * necessary because the default_with_oids GUC must apply only to plain + * tables and not any other relkind; doing otherwise would break existing + * pg_dump files. We could allow explicit "WITH OIDS" while not allowing + * default_with_oids to affect other relkinds, but it would complicate + * interpretOidsOption(). */ localHasOids = interpretOidsOption(stmt->options, - (relkind == RELKIND_RELATION)); + (relkind == RELKIND_RELATION || + relkind == RELKIND_PARTITIONED_TABLE)); descriptor->tdhasoid = (localHasOids || parentOidCount > 0); + if (stmt->partbound) + { + /* If the parent has OIDs, partitions must have them too. */ + if (parentOidCount > 0 && !localHasOids) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot create table without OIDs as partition of table with OIDs"))); + /* If the parent doesn't, partitions must not have them. */ + if (parentOidCount == 0 && localHasOids) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot create table with OIDs as partition of table without OIDs"))); + } + /* * Find columns with default values and prepare for insertion of the * defaults. Pre-cooked (that is, inherited) defaults go into a list of @@ -697,6 +745,110 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, */ rel = relation_open(relationId, AccessExclusiveLock); + /* Process and store partition bound, if any. */ + if (stmt->partbound) + { + Node *bound; + ParseState *pstate; + Oid parentId = linitial_oid(inheritOids); + Relation parent; + + /* Already have strong enough lock on the parent */ + parent = heap_open(parentId, NoLock); + + /* + * We are going to try to validate the partition bound specification + * against the partition key of parentRel, so it better have one. + */ + if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("\"%s\" is not partitioned", + RelationGetRelationName(parent)))); + + /* Tranform the bound values */ + pstate = make_parsestate(NULL); + pstate->p_sourcetext = queryString; + bound = transformPartitionBound(pstate, parent, stmt->partbound); + + /* + * Check first that the new partition's bound is valid and does not + * overlap with any of existing partitions of the parent - note that + * it does not return on error. + */ + check_new_partition_bound(relname, parent, bound); + heap_close(parent, NoLock); + + /* Update the pg_class entry. */ + StorePartitionBound(rel, bound); + + /* + * The code that follows may also update the pg_class tuple to update + * relnumchecks, so bump up the command counter to avoid the "already + * updated by self" error. + */ + CommandCounterIncrement(); + } + + /* + * Process the partitioning specification (if any) and store the + * partition key information into the catalog. + */ + if (stmt->partspec) + { + char strategy; + int partnatts, + i; + AttrNumber partattrs[PARTITION_MAX_KEYS]; + Oid partopclass[PARTITION_MAX_KEYS]; + Oid partcollation[PARTITION_MAX_KEYS]; + List *partexprs = NIL; + List *cmds = NIL; + + /* + * We need to transform the raw parsetrees corresponding to partition + * expressions into executable expression trees. Like column defaults + * and CHECK constraints, we could not have done the transformation + * earlier. + */ + stmt->partspec = transformPartitionSpec(rel, stmt->partspec, + &strategy); + ComputePartitionAttrs(rel, stmt->partspec->partParams, + partattrs, &partexprs, partopclass, + partcollation); + + partnatts = list_length(stmt->partspec->partParams); + StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs, + partopclass, partcollation); + + /* Force key columns to be NOT NULL when using range partitioning */ + if (strategy == PARTITION_STRATEGY_RANGE) + { + for (i = 0; i < partnatts; i++) + { + AttrNumber partattno = partattrs[i]; + Form_pg_attribute attform = descriptor->attrs[partattno-1]; + + if (partattno != 0 && !attform->attnotnull) + { + /* Add a subcommand to make this one NOT NULL */ + AlterTableCmd *cmd = makeNode(AlterTableCmd); + + cmd->subtype = AT_SetNotNull; + cmd->name = pstrdup(NameStr(attform->attname)); + cmds = lappend(cmds, cmd); + } + } + + /* + * Although, there cannot be any partitions yet, we still need to + * pass true for recurse; ATPrepSetNotNull() complains if we don't + */ + if (cmds != NIL) + AlterTableInternal(RelationGetRelid(rel), cmds, true); + } + } + /* * Now add any newly specified column default values and CHECK constraints * to the new relation. These are passed to us in the form of raw @@ -927,6 +1079,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, HeapTuple tuple; struct DropRelationCallbackState *state; char relkind; + char expected_relkind; Form_pg_class classform; LOCKMODE heap_lockmode; @@ -955,7 +1108,19 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, return; /* concurrently dropped, so nothing to do */ classform = (Form_pg_class) GETSTRUCT(tuple); - if (classform->relkind != relkind) + /* + * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE, + * but RemoveRelations() can only pass one relkind for a given relation. + * It chooses RELKIND_RELATION for both regular and partitioned tables. + * That means we must be careful before giving the wrong type error when + * the relation is RELKIND_PARTITIONED_TABLE. + */ + if (classform->relkind == RELKIND_PARTITIONED_TABLE) + expected_relkind = RELKIND_RELATION; + else + expected_relkind = classform->relkind; + + if (relkind != expected_relkind) DropErrorMsgWrongType(rel->relname, classform->relkind, relkind); /* Allow DROP to either table owner or schema owner */ @@ -1054,6 +1219,10 @@ ExecuteTruncate(TruncateStmt *stmt) relids = lappend_oid(relids, childrelid); } } + else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("must truncate child tables too"))); } /* @@ -1153,6 +1322,7 @@ ExecuteTruncate(TruncateStmt *stmt) InitResultRelInfo(resultRelInfo, rel, 0, /* dummy rangetable index */ + false, 0); resultRelInfo++; } @@ -1293,7 +1463,8 @@ truncate_check_rel(Relation rel) AclResult aclresult; /* Only allow truncate on regular tables */ - if (rel->rd_rel->relkind != RELKIND_RELATION) + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table", @@ -1359,6 +1530,7 @@ storage_name(char c) * of ColumnDef's.) It is destructively changed. * 'supers' is a list of names (as RangeVar nodes) of parent relations. * 'relpersistence' is a persistence type of the table. + * 'is_partition' tells if the table is a partition * * Output arguments: * 'supOids' receives a list of the OIDs of the parent relations. @@ -1410,7 +1582,8 @@ storage_name(char c) */ static List * MergeAttributes(List *schema, List *supers, char relpersistence, - List **supOids, List **supconstr, int *supOidCount) + bool is_partition, List **supOids, List **supconstr, + int *supOidCount) { ListCell *entry; List *inhSchema = NIL; @@ -1420,6 +1593,7 @@ MergeAttributes(List *schema, List *supers, char relpersistence, bool have_bogus_defaults = false; int child_attno; static Node bogus_marker = {0}; /* marks conflicting defaults */ + List *saved_schema = NIL; /* * Check for and reject tables with too many columns. We perform this @@ -1439,6 +1613,17 @@ MergeAttributes(List *schema, List *supers, char relpersistence, MaxHeapAttributeNumber))); /* + * In case of a partition, there are no new column definitions, only + * dummy ColumnDefs created for column constraints. We merge these + * constraints inherited from the parent. + */ + if (is_partition) + { + saved_schema = schema; + schema = NIL; + } + + /* * Check for duplicate names in the explicit list of attributes. * * Although we might consider merging such entries in the same way that we @@ -1518,11 +1703,35 @@ MergeAttributes(List *schema, List *supers, char relpersistence, * on the parent table, which might otherwise be attempting to clear * the parent's relhassubclass field, if its previous children were * recently dropped. + * + * If the child table is a partition, then we instead grab an exclusive + * lock on the parent because its partition descriptor will be changed + * by addition of the new partition. */ - relation = heap_openrv(parent, ShareUpdateExclusiveLock); + if (!is_partition) + relation = heap_openrv(parent, ShareUpdateExclusiveLock); + else + relation = heap_openrv(parent, AccessExclusiveLock); + + /* + * We do not allow partitioned tables and partitions to participate + * in regular inheritance. + */ + if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && + !is_partition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from partitioned table \"%s\"", + parent->relname))); + if (relation->rd_rel->relispartition && !is_partition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from partition \"%s\"", + parent->relname))); if (relation->rd_rel->relkind != RELKIND_RELATION && - relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE) + relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("inherited relation \"%s\" is not a table or foreign table", @@ -1532,7 +1741,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence, relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("cannot inherit from temporary relation \"%s\"", + errmsg(!is_partition + ? "cannot inherit from temporary relation \"%s\"" + : "cannot create a permanent relation as partition of temporary relation \"%s\"", parent->relname))); /* If existing rel is temp, it must belong to this session */ @@ -1540,7 +1751,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence, !relation->rd_islocaltemp) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("cannot inherit from temporary relation of another session"))); + errmsg(!is_partition + ? "cannot inherit from temporary relation of another session" + : "cannot create as partition of temporary relation of another session"))); /* * We should have an UNDER permission flag for this, but for now, @@ -1777,9 +1990,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence, pfree(newattno); /* - * Close the parent rel, but keep our ShareUpdateExclusiveLock on it - * until xact commit. That will prevent someone else from deleting or - * ALTERing the parent before the child is committed. + * Close the parent rel, but keep our lock on it until xact commit. + * That will prevent someone else from deleting or ALTERing the parent + * before the child is committed. */ heap_close(relation, NoLock); } @@ -1787,7 +2000,8 @@ MergeAttributes(List *schema, List *supers, char relpersistence, /* * If we had no inherited attributes, the result schema is just the * explicitly declared columns. Otherwise, we need to merge the declared - * columns into the inherited schema list. + * columns into the inherited schema list. Although, we never have any + * explicitly declared columns if the table is a partition. */ if (inhSchema != NIL) { @@ -1816,6 +2030,12 @@ MergeAttributes(List *schema, List *supers, char relpersistence, newcollid; /* + * Partitions have only one parent, so conflict should never + * occur + */ + Assert(!is_partition); + + /* * Yes, try to merge the two column definitions. They must * have the same type, typmod, and collation. */ @@ -1897,6 +2117,56 @@ MergeAttributes(List *schema, List *supers, char relpersistence, } /* + * Now that we have the column definition list for a partition, we can + * check whether the columns referenced in column option specifications + * actually exist. Also, we merge the options into the corresponding + * column definitions. + */ + if (is_partition && list_length(saved_schema) > 0) + { + schema = list_concat(schema, saved_schema); + + foreach(entry, schema) + { + ColumnDef *coldef = lfirst(entry); + ListCell *rest = lnext(entry); + ListCell *prev = entry; + + /* + * Partition column option that does not belong to a column from + * the parent. This works because the columns from the parent + * come first in the list (see above). + */ + if (coldef->typeName == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist", + coldef->colname))); + while (rest != NULL) + { + ColumnDef *restdef = lfirst(rest); + ListCell *next = lnext(rest); /* need to save it in case + * we delete it */ + + if (strcmp(coldef->colname, restdef->colname) == 0) + { + /* + * merge the column options into the column from the + * parent + */ + coldef->is_not_null = restdef->is_not_null; + coldef->raw_default = restdef->raw_default; + coldef->cooked_default = restdef->cooked_default; + coldef->constraints = restdef->constraints; + list_delete_cell(schema, rest, prev); + } + prev = rest; + rest = next; + } + } + } + + /* * If we found any conflicting parent default values, check to make sure * they were overridden by the child. */ @@ -2166,7 +2436,8 @@ renameatt_check(Oid myrelid, Form_pg_class classform, bool recursing) relkind != RELKIND_MATVIEW && relkind != RELKIND_COMPOSITE_TYPE && relkind != RELKIND_INDEX && - relkind != RELKIND_FOREIGN_TABLE) + relkind != RELKIND_FOREIGN_TABLE && + relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, view, materialized view, composite type, index, or foreign table", @@ -3057,6 +3328,11 @@ AlterTableGetLockLevel(List *cmds) cmd_lockmode = AlterTableGetRelOptionsLockLevel((List *) cmd->def); break; + case AT_AttachPartition: + case AT_DetachPartition: + cmd_lockmode = AccessExclusiveLock; + break; + default: /* oops */ elog(ERROR, "unrecognized alter table type: %d", (int) cmd->subtype); @@ -3168,12 +3444,14 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd, break; case AT_DropNotNull: /* ALTER COLUMN DROP NOT NULL */ ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE); + ATPrepDropNotNull(rel, recurse, recursing); ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode); /* No command-specific prep needed */ pass = AT_PASS_DROP; break; case AT_SetNotNull: /* ALTER COLUMN SET NOT NULL */ ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE); + ATPrepSetNotNull(rel, recurse, recursing); ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode); /* No command-specific prep needed */ pass = AT_PASS_ADD_CONSTR; @@ -3374,6 +3652,12 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd, /* No command-specific prep needed */ pass = AT_PASS_MISC; break; + case AT_AttachPartition: + case AT_DetachPartition: + ATSimplePermissions(rel, ATT_TABLE); + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; default: /* oops */ elog(ERROR, "unrecognized alter table type: %d", (int) cmd->subtype); @@ -3444,7 +3728,14 @@ ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode) { AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab); - if (tab->relkind == RELKIND_RELATION || + /* + * If the table is source table of ATTACH PARTITION command, we did + * not modify anything about it that will change its toasting + * requirement, so no need to check. + */ + if (((tab->relkind == RELKIND_RELATION || + tab->relkind == RELKIND_PARTITIONED_TABLE) && + tab->partition_constraint == NIL) || tab->relkind == RELKIND_MATVIEW) AlterTableCreateToastTable(tab->relid, (Datum) 0, lockmode); } @@ -3693,6 +3984,12 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel, case AT_GenericOptions: ATExecGenericOptions(rel, (List *) cmd->def); break; + case AT_AttachPartition: + ATExecAttachPartition(wqueue, rel, (PartitionCmd *) cmd->def); + break; + case AT_DetachPartition: + ATExecDetachPartition(rel, ((PartitionCmd *) cmd->def)->name); + break; default: /* oops */ elog(ERROR, "unrecognized alter table type: %d", (int) cmd->subtype); @@ -3878,7 +4175,8 @@ ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode) * Test the current data within the table against new constraints * generated by ALTER TABLE commands, but don't rebuild data. */ - if (tab->constraints != NIL || tab->new_notnull) + if (tab->constraints != NIL || tab->new_notnull || + tab->partition_constraint != NIL) ATRewriteTable(tab, InvalidOid, lockmode); /* @@ -3958,6 +4256,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) CommandId mycid; BulkInsertState bistate; int hi_options; + List *partqualstate = NIL; /* * Open the relation(s). We have surely already locked the existing @@ -4022,6 +4321,15 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) } } + /* Build expression execution states for partition check quals */ + if (tab->partition_constraint) + { + needscan = true; + partqualstate = (List *) + ExecPrepareExpr((Expr *) tab->partition_constraint, + estate); + } + foreach(l, tab->newvals) { NewColumnValue *ex = lfirst(l); @@ -4211,6 +4519,11 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) } } + if (partqualstate && !ExecQual(partqualstate, econtext, true)) + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("partition constraint is violated by some row"))); + /* Write the tuple out to the new relation */ if (newrel) heap_insert(newrel, tuple, mycid, hi_options, bistate); @@ -4291,6 +4604,7 @@ ATSimplePermissions(Relation rel, int allowed_targets) switch (rel->rd_rel->relkind) { case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: actual_target = ATT_TABLE; break; case RELKIND_VIEW: @@ -4407,7 +4721,8 @@ ATSimpleRecursion(List **wqueue, Relation rel, */ if (recurse && (rel->rd_rel->relkind == RELKIND_RELATION || - rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)) + rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE || + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)) { Oid relid = RelationGetRelid(rel); ListCell *child; @@ -4527,7 +4842,8 @@ find_composite_type_dependencies(Oid typeOid, Relation origRelation, att = rel->rd_att->attrs[pg_depend->objsubid - 1]; if (rel->rd_rel->relkind == RELKIND_RELATION || - rel->rd_rel->relkind == RELKIND_MATVIEW) + rel->rd_rel->relkind == RELKIND_MATVIEW || + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { if (origTypeName) ereport(ERROR, @@ -4728,6 +5044,11 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, if (recursing) ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE); + if (rel->rd_rel->relispartition && !recursing) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot add column to a partition"))); + attrdesc = heap_open(AttributeRelationId, RowExclusiveLock); /* @@ -5174,6 +5495,20 @@ ATPrepAddOids(List **wqueue, Relation rel, bool recurse, AlterTableCmd *cmd, LOC * Return the address of the modified column. If the column was already * nullable, InvalidObjectAddress is returned. */ + +static void +ATPrepDropNotNull(Relation rel, bool recurse, bool recursing) +{ + /* + * If the parent is a partitioned table, like check constraints, NOT NULL + * constraints must be dropped from child tables. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && + !recurse && !recursing) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraint must be dropped from child tables too"))); +} static ObjectAddress ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode) { @@ -5249,6 +5584,45 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode) list_free(indexoidlist); + /* If rel is partition, shouldn't drop NOT NULL if parent has the same */ + if (rel->rd_rel->relispartition) + { + Oid parentId = get_partition_parent(RelationGetRelid(rel)); + Relation parent = heap_open(parentId, AccessShareLock); + TupleDesc tupDesc = RelationGetDescr(parent); + AttrNumber parent_attnum; + + parent_attnum = get_attnum(parentId, colName); + if (tupDesc->attrs[parent_attnum - 1]->attnotnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("column \"%s\" is marked NOT NULL in parent table", + colName))); + heap_close(parent, AccessShareLock); + } + + /* + * If the table is a range partitioned table, check that the column + * is not in the partition key. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + PartitionKey key = RelationGetPartitionKey(rel); + int partnatts = get_partition_natts(key), + i; + + for (i = 0; i < partnatts; i++) + { + AttrNumber partattnum = get_partition_col_attnum(key, i); + + if (partattnum == attnum) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("column \"%s\" is in range partition key", + colName))); + } + } + /* * Okay, actually perform the catalog change ... if needed */ @@ -5281,6 +5655,21 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode) * Return the address of the modified column. If the column was already NOT * NULL, InvalidObjectAddress is returned. */ + +static void +ATPrepSetNotNull(Relation rel, bool recurse, bool recursing) +{ + /* + * If the parent is a partitioned table, like check constraints, NOT NULL + * constraints must be added to the child tables. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && + !recurse && !recursing) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraint must be added to child tables too"))); +} + static ObjectAddress ATExecSetNotNull(AlteredTableInfo *tab, Relation rel, const char *colName, LOCKMODE lockmode) @@ -5419,7 +5808,8 @@ ATPrepSetStatistics(Relation rel, const char *colName, Node *newValue, LOCKMODE if (rel->rd_rel->relkind != RELKIND_RELATION && rel->rd_rel->relkind != RELKIND_MATVIEW && rel->rd_rel->relkind != RELKIND_INDEX && - rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE) + rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, materialized view, index, or foreign table", @@ -5692,6 +6082,68 @@ ATPrepDropColumn(List **wqueue, Relation rel, bool recurse, bool recursing, } /* + * Checks if attnum is a partition attribute for rel + * + * Sets *used_in_expr if attnum is found to be referenced in some partition + * key expression. It's possible for a column to be both used directly and + * as part of an expression; if that happens, *used_in_expr may end up as + * either true or false. That's OK for current uses of this function, because + * *used_in_expr is only used to tailor the error message text. + */ +static bool +is_partition_attr(Relation rel, AttrNumber attnum, bool *used_in_expr) +{ + PartitionKey key; + int partnatts; + List *partexprs; + ListCell *partexprs_item; + int i; + + if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + return false; + + key = RelationGetPartitionKey(rel); + partnatts = get_partition_natts(key); + partexprs = get_partition_exprs(key); + + partexprs_item = list_head(partexprs); + for (i = 0; i < partnatts; i++) + { + AttrNumber partattno = get_partition_col_attnum(key, i); + + if (partattno != 0) + { + if (attnum == partattno) + { + if (used_in_expr) + *used_in_expr = false; + return true; + } + } + else + { + /* Arbitrary expression */ + Node *expr = (Node *) lfirst(partexprs_item); + Bitmapset *expr_attrs = NULL; + + /* Find all attributes referenced */ + pull_varattnos(expr, 1, &expr_attrs); + partexprs_item = lnext(partexprs_item); + + if (bms_is_member(attnum - FirstLowInvalidHeapAttributeNumber, + expr_attrs)) + { + if (used_in_expr) + *used_in_expr = true; + return true; + } + } + } + + return false; +} + +/* * Return value is the address of the dropped column. */ static ObjectAddress @@ -5705,6 +6157,7 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName, AttrNumber attnum; List *children; ObjectAddress object; + bool is_expr; /* At top level, permission check was done in ATPrepCmd, else do it */ if (recursing) @@ -5749,6 +6202,19 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName, errmsg("cannot drop inherited column \"%s\"", colName))); + /* Don't drop columns used in the partition key */ + if (is_partition_attr(rel, attnum, &is_expr)) + { + if (!is_expr) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot drop column named in partition key"))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot drop column referenced in partition key expression"))); + } + ReleaseSysCache(tuple); /* @@ -5763,6 +6229,15 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName, Relation attr_rel; ListCell *child; + /* + * In case of a partitioned table, the column must be dropped from the + * partitions as well. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && !recurse) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("column must be dropped from child tables too"))); + attr_rel = heap_open(AttributeRelationId, RowExclusiveLock); foreach(child, children) { @@ -6267,6 +6742,12 @@ ATAddForeignKeyConstraint(AlteredTableInfo *tab, Relation rel, * Validity checks (permission checks wait till we have the column * numbers) */ + if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot reference partitioned table \"%s\"", + RelationGetRelationName(pkrel)))); + if (pkrel->rd_rel->relkind != RELKIND_RELATION) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -7777,6 +8258,16 @@ ATExecDropConstraint(Relation rel, const char *constrName, } /* + * In case of a partitioned table, the constraint must be dropped from + * the partitions too. There is no such thing as NO INHERIT constraints + * in case of partitioned tables. + */ + if (!recurse && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraint must be dropped from child tables too"))); + + /* * Propagate to children as appropriate. Unlike most other ALTER * routines, we have to do this one level of recursion at a time; we can't * use find_all_inheritors to do it in one pass. @@ -7904,6 +8395,7 @@ ATPrepAlterColumnType(List **wqueue, NewColumnValue *newval; ParseState *pstate = make_parsestate(NULL); AclResult aclresult; + bool is_expr; if (rel->rd_rel->reloftype && !recursing) ereport(ERROR, @@ -7934,6 +8426,19 @@ ATPrepAlterColumnType(List **wqueue, errmsg("cannot alter inherited column \"%s\"", colName))); + /* Don't alter columns used in the partition key */ + if (is_partition_attr(rel, attnum, &is_expr)) + { + if (!is_expr) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot alter type of column named in partition key"))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot alter type of column referenced in partition key expression"))); + } + /* Look up the target type */ typenameTypeIdAndMod(NULL, typeName, &targettype, &targettypmod); @@ -7949,7 +8454,8 @@ ATPrepAlterColumnType(List **wqueue, list_make1_oid(rel->rd_rel->reltype), false); - if (tab->relkind == RELKIND_RELATION) + if (tab->relkind == RELKIND_RELATION || + tab->relkind == RELKIND_PARTITIONED_TABLE) { /* * Set up an expression to transform the old data value to the new @@ -8979,6 +9485,7 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock case RELKIND_VIEW: case RELKIND_MATVIEW: case RELKIND_FOREIGN_TABLE: + case RELKIND_PARTITIONED_TABLE: /* ok to change owner */ break; case RELKIND_INDEX: @@ -9440,6 +9947,7 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, case RELKIND_RELATION: case RELKIND_TOASTVALUE: case RELKIND_MATVIEW: + case RELKIND_PARTITIONED_TABLE: (void) heap_reloptions(rel->rd_rel->relkind, newOptions, true); break; case RELKIND_VIEW: @@ -9860,7 +10368,8 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt) /* Only move the object type requested */ if ((stmt->objtype == OBJECT_TABLE && - relForm->relkind != RELKIND_RELATION) || + relForm->relkind != RELKIND_RELATION && + relForm->relkind != RELKIND_PARTITIONED_TABLE) || (stmt->objtype == OBJECT_INDEX && relForm->relkind != RELKIND_INDEX) || (stmt->objtype == OBJECT_MATVIEW && @@ -10059,6 +10568,16 @@ ATPrepAddInherit(Relation child_rel) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot change inheritance of typed table"))); + + if (child_rel->rd_rel->relispartition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change inheritance of a partition"))); + + if (child_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change inheritance of partitioned table"))); } /* @@ -10067,12 +10586,7 @@ ATPrepAddInherit(Relation child_rel) static ObjectAddress ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode) { - Relation parent_rel, - catalogRelation; - SysScanDesc scan; - ScanKeyData key; - HeapTuple inheritsTuple; - int32 inhseqno; + Relation parent_rel; List *children; ObjectAddress address; @@ -10110,37 +10624,18 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode) (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot inherit to temporary relation of another session"))); - /* - * Check for duplicates in the list of parents, and determine the highest - * inhseqno already present; we'll use the next one for the new parent. - * (Note: get RowExclusiveLock because we will write pg_inherits below.) - * - * Note: we do not reject the case where the child already inherits from - * the parent indirectly; CREATE TABLE doesn't reject comparable cases. - */ - catalogRelation = heap_open(InheritsRelationId, RowExclusiveLock); - ScanKeyInit(&key, - Anum_pg_inherits_inhrelid, - BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(RelationGetRelid(child_rel))); - scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, - true, NULL, 1, &key); - - /* inhseqno sequences start at 1 */ - inhseqno = 0; - while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan))) - { - Form_pg_inherits inh = (Form_pg_inherits) GETSTRUCT(inheritsTuple); + /* Prevent partitioned tables from becoming inheritance parents */ + if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from partitioned table \"%s\"", + parent->relname))); - if (inh->inhparent == RelationGetRelid(parent_rel)) - ereport(ERROR, - (errcode(ERRCODE_DUPLICATE_TABLE), - errmsg("relation \"%s\" would be inherited from more than once", - RelationGetRelationName(parent_rel)))); - if (inh->inhseqno > inhseqno) - inhseqno = inh->inhseqno; - } - systable_endscan(scan); + /* Likewise for partitions */ + if (parent_rel->rd_rel->relispartition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from a partition"))); /* * Prevent circularity by seeing if proposed parent inherits from child. @@ -10175,6 +10670,69 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode) RelationGetRelationName(child_rel), RelationGetRelationName(parent_rel)))); + /* OK to create inheritance */ + CreateInheritance(child_rel, parent_rel); + + ObjectAddressSet(address, RelationRelationId, + RelationGetRelid(parent_rel)); + + /* keep our lock on the parent relation until commit */ + heap_close(parent_rel, NoLock); + + return address; +} + +/* + * CreateInheritance + * Catalog manipulation portion of creating inheritance between a child + * table and a parent table. + * + * Common to ATExecAddInherit() and ATExecAttachPartition(). + */ +static void +CreateInheritance(Relation child_rel, Relation parent_rel) +{ + Relation catalogRelation; + SysScanDesc scan; + ScanKeyData key; + HeapTuple inheritsTuple; + int32 inhseqno; + + /* Note: get RowExclusiveLock because we will write pg_inherits below. */ + catalogRelation = heap_open(InheritsRelationId, RowExclusiveLock); + + /* + * Check for duplicates in the list of parents, and determine the highest + * inhseqno already present; we'll use the next one for the new parent. + * Also, if proposed child is a partition, it cannot already be inheriting. + * + * Note: we do not reject the case where the child already inherits from + * the parent indirectly; CREATE TABLE doesn't reject comparable cases. + */ + ScanKeyInit(&key, + Anum_pg_inherits_inhrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(child_rel))); + scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, + true, NULL, 1, &key); + + /* inhseqno sequences start at 1 */ + inhseqno = 0; + while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan))) + { + Form_pg_inherits inh = (Form_pg_inherits) GETSTRUCT(inheritsTuple); + + if (inh->inhparent == RelationGetRelid(parent_rel)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" would be inherited from more than once", + RelationGetRelationName(parent_rel)))); + + if (inh->inhseqno > inhseqno) + inhseqno = inh->inhseqno; + } + systable_endscan(scan); + /* Match up the columns and bump attinhcount as needed */ MergeAttributesIntoExisting(child_rel, parent_rel); @@ -10189,16 +10747,8 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode) inhseqno + 1, catalogRelation); - ObjectAddressSet(address, RelationRelationId, - RelationGetRelid(parent_rel)); - /* Now we're done with pg_inherits */ heap_close(catalogRelation, RowExclusiveLock); - - /* keep our lock on the parent relation until commit */ - heap_close(parent_rel, NoLock); - - return address; } /* @@ -10249,7 +10799,7 @@ constraints_equivalent(HeapTuple a, HeapTuple b, TupleDesc tupleDesc) * Check columns in child table match up with columns in parent, and increment * their attinhcount. * - * Called by ATExecAddInherit + * Called by CreateInheritance * * Currently all parent columns must be found in child. Missing columns are an * error. One day we might consider creating new columns like CREATE TABLE @@ -10267,12 +10817,17 @@ MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel) int parent_natts; TupleDesc tupleDesc; HeapTuple tuple; + bool child_is_partition = false; attrrel = heap_open(AttributeRelationId, RowExclusiveLock); tupleDesc = RelationGetDescr(parent_rel); parent_natts = tupleDesc->natts; + /* If parent_rel is a partitioned table, child_rel must be a partition */ + if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + child_is_partition = true; + for (parent_attno = 1; parent_attno <= parent_natts; parent_attno++) { Form_pg_attribute attribute = tupleDesc->attrs[parent_attno - 1]; @@ -10320,6 +10875,18 @@ MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel) * later on, this change will just roll back.) */ childatt->attinhcount++; + + /* + * In case of partitions, we must enforce that value of attislocal + * is same in all partitions. (Note: there are only inherited + * attributes in partitions) + */ + if (child_is_partition) + { + Assert(childatt->attinhcount == 1); + childatt->attislocal = false; + } + simple_heap_update(attrrel, &tuple->t_self, tuple); CatalogUpdateIndexes(attrrel, tuple); heap_freetuple(tuple); @@ -10342,7 +10909,7 @@ MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel) * * Constraints that are marked ONLY in the parent are ignored. * - * Called by ATExecAddInherit + * Called by CreateInheritance * * Currently all constraints in parent must be present in the child. One day we * may consider adding new constraints like CREATE TABLE does. @@ -10361,10 +10928,15 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel) SysScanDesc parent_scan; ScanKeyData parent_key; HeapTuple parent_tuple; + bool child_is_partition = false; catalog_relation = heap_open(ConstraintRelationId, RowExclusiveLock); tuple_desc = RelationGetDescr(catalog_relation); + /* If parent_rel is a partitioned table, child_rel must be a partition */ + if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + child_is_partition = true; + /* Outer loop scans through the parent's constraint definitions */ ScanKeyInit(&parent_key, Anum_pg_constraint_conrelid, @@ -10441,6 +11013,18 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel) child_copy = heap_copytuple(child_tuple); child_con = (Form_pg_constraint) GETSTRUCT(child_copy); child_con->coninhcount++; + + /* + * In case of partitions, an inherited constraint must be + * inherited only once since it cannot have multiple parents and + * it is never considered local. + */ + if (child_is_partition) + { + Assert(child_con->coninhcount == 1); + child_con->conislocal = false; + } + simple_heap_update(catalog_relation, &child_copy->t_self, child_copy); CatalogUpdateIndexes(catalog_relation, child_copy); heap_freetuple(child_copy); @@ -10465,6 +11049,46 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel) /* * ALTER TABLE NO INHERIT * + * Return value is the address of the relation that is no longer parent. + */ +static ObjectAddress +ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) +{ + ObjectAddress address; + Relation parent_rel; + + if (rel->rd_rel->relispartition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change inheritance of a partition"))); + + /* + * AccessShareLock on the parent is probably enough, seeing that DROP + * TABLE doesn't lock parent tables at all. We need some lock since we'll + * be inspecting the parent's schema. + */ + parent_rel = heap_openrv(parent, AccessShareLock); + + /* + * We don't bother to check ownership of the parent table --- ownership of + * the child is presumed enough rights. + */ + + /* Off to RemoveInheritance() where most of the work happens */ + RemoveInheritance(rel, parent_rel); + + /* keep our lock on the parent relation until commit */ + heap_close(parent_rel, NoLock); + + ObjectAddressSet(address, RelationRelationId, + RelationGetRelid(parent_rel)); + + return address; +} + +/* + * RemoveInheritance + * * Drop a parent from the child's parents. This just adjusts the attinhcount * and attislocal of the columns and removes the pg_inherit and pg_depend * entries. @@ -10478,13 +11102,11 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel) * coninhcount and conislocal for inherited constraints are adjusted in * exactly the same way. * - * Return value is the address of the relation that is no longer parent. + * Common to ATExecDropInherit() and ATExecDetachPartition(). */ -static ObjectAddress -ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) +static void +RemoveInheritance(Relation child_rel, Relation parent_rel) { - Relation parent_rel; - Oid parent_oid; Relation catalogRelation; SysScanDesc scan; ScanKeyData key[3]; @@ -10493,19 +11115,11 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) constraintTuple; List *connames; bool found = false; - ObjectAddress address; - - /* - * AccessShareLock on the parent is probably enough, seeing that DROP - * TABLE doesn't lock parent tables at all. We need some lock since we'll - * be inspecting the parent's schema. - */ - parent_rel = heap_openrv(parent, AccessShareLock); + bool child_is_partition = false; - /* - * We don't bother to check ownership of the parent table --- ownership of - * the child is presumed enough rights. - */ + /* If parent_rel is a partitioned table, child_rel must be a partition */ + if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + child_is_partition = true; /* * Find and destroy the pg_inherits entry linking the two, or error out if @@ -10515,7 +11129,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) ScanKeyInit(&key[0], Anum_pg_inherits_inhrelid, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(RelationGetRelid(rel))); + ObjectIdGetDatum(RelationGetRelid(child_rel))); scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true, NULL, 1, key); @@ -10536,11 +11150,20 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) heap_close(catalogRelation, RowExclusiveLock); if (!found) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_TABLE), - errmsg("relation \"%s\" is not a parent of relation \"%s\"", - RelationGetRelationName(parent_rel), - RelationGetRelationName(rel)))); + { + if (child_is_partition) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("relation \"%s\" is not a partition of relation \"%s\"", + RelationGetRelationName(child_rel), + RelationGetRelationName(parent_rel)))); + else + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("relation \"%s\" is not a parent of relation \"%s\"", + RelationGetRelationName(parent_rel), + RelationGetRelationName(child_rel)))); + } /* * Search through child columns looking for ones matching parent rel @@ -10549,7 +11172,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) ScanKeyInit(&key[0], Anum_pg_attribute_attrelid, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(RelationGetRelid(rel))); + ObjectIdGetDatum(RelationGetRelid(child_rel))); scan = systable_beginscan(catalogRelation, AttributeRelidNumIndexId, true, NULL, 1, key); while (HeapTupleIsValid(attributeTuple = systable_getnext(scan))) @@ -10611,7 +11234,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) ScanKeyInit(&key[0], Anum_pg_constraint_conrelid, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(RelationGetRelid(rel))); + ObjectIdGetDatum(RelationGetRelid(child_rel))); scan = systable_beginscan(catalogRelation, ConstraintRelidIndexId, true, NULL, 1, key); @@ -10642,7 +11265,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) if (copy_con->coninhcount <= 0) /* shouldn't happen */ elog(ERROR, "relation %u has non-inherited constraint \"%s\"", - RelationGetRelid(rel), NameStr(copy_con->conname)); + RelationGetRelid(child_rel), NameStr(copy_con->conname)); copy_con->coninhcount--; if (copy_con->coninhcount == 0) @@ -10654,30 +11277,20 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) } } - parent_oid = RelationGetRelid(parent_rel); - systable_endscan(scan); heap_close(catalogRelation, RowExclusiveLock); - drop_parent_dependency(RelationGetRelid(rel), + drop_parent_dependency(RelationGetRelid(child_rel), RelationRelationId, RelationGetRelid(parent_rel)); - /* * Post alter hook of this inherits. Since object_access_hook doesn't take * multiple object identifiers, we relay oid of parent relation using * auxiliary_id argument. */ InvokeObjectPostAlterHookArg(InheritsRelationId, - RelationGetRelid(rel), 0, + RelationGetRelid(child_rel), 0, RelationGetRelid(parent_rel), false); - - /* keep our lock on the parent relation until commit */ - heap_close(parent_rel, NoLock); - - ObjectAddressSet(address, RelationRelationId, parent_oid); - - return address; } /* @@ -11499,7 +12112,8 @@ AlterTableNamespaceInternal(Relation rel, Oid oldNspOid, Oid nspOid, /* Fix other dependent stuff */ if (rel->rd_rel->relkind == RELKIND_RELATION || - rel->rd_rel->relkind == RELKIND_MATVIEW) + rel->rd_rel->relkind == RELKIND_MATVIEW || + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { AlterIndexNamespaces(classRel, rel, oldNspOid, nspOid, objsMoved); AlterSeqNamespaces(classRel, rel, oldNspOid, nspOid, @@ -11948,7 +12562,7 @@ RangeVarCallbackOwnsTable(const RangeVar *relation, if (!relkind) return; if (relkind != RELKIND_RELATION && relkind != RELKIND_TOASTVALUE && - relkind != RELKIND_MATVIEW) + relkind != RELKIND_MATVIEW && relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table or materialized view", relation->relname))); @@ -12105,7 +12719,8 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, relkind != RELKIND_VIEW && relkind != RELKIND_MATVIEW && relkind != RELKIND_SEQUENCE && - relkind != RELKIND_FOREIGN_TABLE) + relkind != RELKIND_FOREIGN_TABLE && + relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, view, materialized view, sequence, or foreign table", @@ -12113,3 +12728,701 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, ReleaseSysCache(tuple); } + +/* + * Transform any expressions present in the partition key + */ +static PartitionSpec * +transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy) +{ + PartitionSpec *newspec; + ParseState *pstate; + RangeTblEntry *rte; + ListCell *l; + + newspec = (PartitionSpec *) makeNode(PartitionSpec); + + newspec->strategy = partspec->strategy; + newspec->location = partspec->location; + newspec->partParams = NIL; + + /* Parse partitioning strategy name */ + if (!pg_strcasecmp(partspec->strategy, "list")) + *strategy = PARTITION_STRATEGY_LIST; + else if (!pg_strcasecmp(partspec->strategy, "range")) + *strategy = PARTITION_STRATEGY_RANGE; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized partitioning strategy \"%s\"", + partspec->strategy))); + + /* + * Create a dummy ParseState and insert the target relation as its sole + * rangetable entry. We need a ParseState for transformExpr. + */ + pstate = make_parsestate(NULL); + rte = addRangeTableEntryForRelation(pstate, rel, NULL, false, true); + addRTEtoQuery(pstate, rte, true, true, true); + + /* take care of any partition expressions */ + foreach(l, partspec->partParams) + { + ListCell *lc; + PartitionElem *pelem = (PartitionElem *) lfirst(l); + + /* Check for PARTITION BY ... (foo, foo) */ + foreach(lc, newspec->partParams) + { + PartitionElem *pparam = (PartitionElem *) lfirst(lc); + + if (pelem->name && pparam->name && + !strcmp(pelem->name, pparam->name)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("column \"%s\" appears more than once in partition key", + pelem->name), + parser_errposition(pstate, pelem->location))); + } + + if (pelem->expr) + { + /* Now do parse transformation of the expression */ + pelem->expr = transformExpr(pstate, pelem->expr, + EXPR_KIND_PARTITION_EXPRESSION); + + /* we have to fix its collations too */ + assign_expr_collations(pstate, pelem->expr); + } + + newspec->partParams = lappend(newspec->partParams, pelem); + } + + return newspec; +} + +/* + * Compute per-partition-column information from a list of PartitionElem's + */ +static void +ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs, + List **partexprs, Oid *partopclass, Oid *partcollation) +{ + int attn; + ListCell *lc; + + attn = 0; + foreach(lc, partParams) + { + PartitionElem *pelem = (PartitionElem *) lfirst(lc); + Oid atttype; + Oid attcollation; + + if (pelem->name != NULL) + { + /* Simple attribute reference */ + HeapTuple atttuple; + Form_pg_attribute attform; + + atttuple = SearchSysCacheAttName(RelationGetRelid(rel), pelem->name); + if (!HeapTupleIsValid(atttuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" named in partition key does not exist", + pelem->name))); + attform = (Form_pg_attribute) GETSTRUCT(atttuple); + + if (attform->attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("cannot use system column \"%s\" in partition key", + pelem->name))); + + partattrs[attn] = attform->attnum; + atttype = attform->atttypid; + attcollation = attform->attcollation; + ReleaseSysCache(atttuple); + + /* Note that whole-row references can't happen here; see below */ + } + else + { + /* Expression */ + Node *expr = pelem->expr; + + Assert(expr != NULL); + atttype = exprType(expr); + attcollation = exprCollation(expr); + + /* + * Strip any top-level COLLATE clause. This ensures that we treat + * "x COLLATE y" and "(x COLLATE y)" alike. + */ + while (IsA(expr, CollateExpr)) + expr = (Node *) ((CollateExpr *) expr)->arg; + + if (IsA(expr, Var) && + ((Var *) expr)->varattno != InvalidAttrNumber) + { + /* + * User wrote "(column)" or "(column COLLATE something)". + * Treat it like simple attribute anyway. + */ + partattrs[attn] = ((Var *) expr)->varattno; + } + else + { + Bitmapset *expr_attrs = NULL; + + partattrs[attn] = 0; /* marks the column as expression */ + *partexprs = lappend(*partexprs, expr); + + /* + * Note that expression_planner does not change the passed in + * expression destructively and we have already saved the + * expression to be stored into the catalog above. + */ + expr = (Node *) expression_planner((Expr *) expr); + + /* + * Partition expression cannot contain mutable functions, + * because a given row must always map to the same partition + * as long as there is no change in the partition boundary + * structure. + */ + if (contain_mutable_functions(expr)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("functions in partition key expression must be marked IMMUTABLE"))); + + /* + * While it is not exactly *wrong* for an expression to be + * a constant value, it seems better to prevent such input. + */ + if (IsA(expr, Const)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot use constant expression as partition key"))); + + /* + * transformPartitionSpec() should have already rejected subqueries, + * aggregates, window functions, and SRFs, based on the EXPR_KIND_ + * for partition expressions. + */ + + /* Cannot have expressions containing whole-row references */ + pull_varattnos(expr, 1, &expr_attrs); + if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, + expr_attrs)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("partition key expressions cannot contain whole-row references"))); + } + } + + /* + * Apply collation override if any + */ + if (pelem->collation) + attcollation = get_collation_oid(pelem->collation, false); + + /* + * Check we have a collation iff it's a collatable type. The only + * expected failures here are (1) COLLATE applied to a noncollatable + * type, or (2) partition expression had an unresolved collation. + * But we might as well code this to be a complete consistency check. + */ + if (type_is_collatable(atttype)) + { + if (!OidIsValid(attcollation)) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for partition expression"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + else + { + if (OidIsValid(attcollation)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("collations are not supported by type %s", + format_type_be(atttype)))); + } + + partcollation[attn] = attcollation; + + /* + * Identify a btree opclass to use. Currently, we use only btree + * operators, which seems enough for list and range partitioning. + */ + if (!pelem->opclass) + { + partopclass[attn] = GetDefaultOpClass(atttype, BTREE_AM_OID); + + if (!OidIsValid(partopclass[attn])) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("data type %s has no default btree operator class", + format_type_be(atttype)), + errhint("You must specify a btree operator class or define a default btree operator class for the data type."))); + } + else + partopclass[attn] = ResolveOpClass(pelem->opclass, + atttype, + "btree", + BTREE_AM_OID); + + attn++; + } +} + +/* + * ALTER TABLE <name> ATTACH PARTITION <partition-name> FOR VALUES + * + * Return the address of the newly attached partition. + */ +static ObjectAddress +ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd) +{ + PartitionKey key = RelationGetPartitionKey(rel); + Relation attachRel, + catalog; + List *childrels; + TupleConstr *attachRel_constr; + List *partConstraint, + *existConstraint; + SysScanDesc scan; + ScanKeyData skey; + HeapTuple tuple; + AttrNumber attno; + int natts; + TupleDesc tupleDesc; + bool skip_validate = false; + ObjectAddress address; + + attachRel = heap_openrv(cmd->name, AccessExclusiveLock); + + /* + * Must be owner of both parent and source table -- parent was checked by + * ATSimplePermissions call in ATPrepCmd + */ + ATSimplePermissions(attachRel, ATT_TABLE | ATT_FOREIGN_TABLE); + + /* A partition can only have one parent */ + if (attachRel->rd_rel->relispartition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is already a partition", + RelationGetRelationName(attachRel)))); + + if (attachRel->rd_rel->reloftype) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach a typed table as partition"))); + + /* + * Table being attached should not already be part of inheritance; either + * as a child table... + */ + catalog = heap_open(InheritsRelationId, AccessShareLock); + ScanKeyInit(&skey, + Anum_pg_inherits_inhrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(attachRel))); + scan = systable_beginscan(catalog, InheritsRelidSeqnoIndexId, true, + NULL, 1, &skey); + if (HeapTupleIsValid(systable_getnext(scan))) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach inheritance child as partition"))); + systable_endscan(scan); + + /* ...or as a parent table (except the case when it is partitioned) */ + ScanKeyInit(&skey, + Anum_pg_inherits_inhparent, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(attachRel))); + scan = systable_beginscan(catalog, InheritsParentIndexId, true, NULL, + 1, &skey); + if (HeapTupleIsValid(systable_getnext(scan)) && + attachRel->rd_rel->relkind == RELKIND_RELATION) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach inheritance parent as partition"))); + systable_endscan(scan); + heap_close(catalog, AccessShareLock); + + /* + * Prevent circularity by seeing if rel is a partition of attachRel. + * (In particular, this disallows making a rel a partition of itself.) + */ + childrels = find_all_inheritors(RelationGetRelid(attachRel), + AccessShareLock, NULL); + if (list_member_oid(childrels, RelationGetRelid(rel))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("circular inheritance not allowed"), + errdetail("\"%s\" is already a child of \"%s\".", + RelationGetRelationName(rel), + RelationGetRelationName(attachRel)))); + + /* Temp parent cannot have a partition that is itself not a temp */ + if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + attachRel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach a permanent relation as partition of temporary relation \"%s\"", + RelationGetRelationName(rel)))); + + /* If the parent is temp, it must belong to this session */ + if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + !rel->rd_islocaltemp) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach as partition of temporary relation of another session"))); + + /* Ditto for the partition */ + if (attachRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + !attachRel->rd_islocaltemp) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach temporary relation of another session as partition"))); + + /* If parent has OIDs then child must have OIDs */ + if (rel->rd_rel->relhasoids && !attachRel->rd_rel->relhasoids) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach table \"%s\" without OIDs as partition of" + " table \"%s\" with OIDs", RelationGetRelationName(attachRel), + RelationGetRelationName(rel)))); + + /* OTOH, if parent doesn't have them, do not allow in attachRel either */ + if (attachRel->rd_rel->relhasoids && !rel->rd_rel->relhasoids) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach table \"%s\" with OIDs as partition of table" + " \"%s\" without OIDs", RelationGetRelationName(attachRel), + RelationGetRelationName(rel)))); + + /* Check if there are any columns in attachRel that aren't in the parent */ + tupleDesc = RelationGetDescr(attachRel); + natts = tupleDesc->natts; + for (attno = 1; attno <= natts; attno++) + { + Form_pg_attribute attribute = tupleDesc->attrs[attno - 1]; + char *attributeName = NameStr(attribute->attname); + + /* Ignore dropped */ + if (attribute->attisdropped) + continue; + + /* Find same column in parent (matching on column name). */ + tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), attributeName); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("table \"%s\" contains column \"%s\" not found in parent \"%s\"", + RelationGetRelationName(attachRel), attributeName, + RelationGetRelationName(rel)), + errdetail("New partition should contain only the columns present in parent."))); + } + + /* OK to create inheritance. Rest of the checks performed there */ + CreateInheritance(attachRel, rel); + + /* + * Check that the new partition's bound is valid and does not overlap any + * of existing partitions of the parent - note that it does not return + * on error. + */ + check_new_partition_bound(RelationGetRelationName(attachRel), rel, + cmd->bound); + + /* Update the pg_class entry. */ + StorePartitionBound(attachRel, cmd->bound); + + /* + * Generate partition constraint from the partition bound specification. + * If the parent itself is a partition, make sure to include its + * constraint as well. + */ + partConstraint = list_concat(get_qual_from_partbound(attachRel, rel, + cmd->bound), + RelationGetPartitionQual(rel, true)); + partConstraint = (List *) eval_const_expressions(NULL, + (Node *) partConstraint); + partConstraint = (List *) canonicalize_qual((Expr *) partConstraint); + partConstraint = list_make1(make_ands_explicit(partConstraint)); + + /* + * Check if we can do away with having to scan the table being attached + * to validate the partition constraint, by *proving* that the existing + * constraints of the table *imply* the partition predicate. We include + * the table's check constraints and NOT NULL constraints in the list of + * clauses passed to predicate_implied_by(). + * + * There is a case in which we cannot rely on just the result of the + * proof. + */ + tupleDesc = RelationGetDescr(attachRel); + attachRel_constr = tupleDesc->constr; + existConstraint = NIL; + if (attachRel_constr > 0) + { + int num_check = attachRel_constr->num_check; + int i; + Bitmapset *not_null_attrs = NULL; + List *part_constr; + ListCell *lc; + bool partition_accepts_null = true; + int partnatts; + + if (attachRel_constr->has_not_null) + { + int natts = attachRel->rd_att->natts; + + for (i = 1; i <= natts; i++) + { + Form_pg_attribute att = attachRel->rd_att->attrs[i - 1]; + + if (att->attnotnull && !att->attisdropped) + { + NullTest *ntest = makeNode(NullTest); + + ntest->arg = (Expr *) makeVar(1, + i, + att->atttypid, + att->atttypmod, + att->attcollation, + 0); + ntest->nulltesttype = IS_NOT_NULL; + + /* + * argisrow=false is correct even for a composite column, + * because attnotnull does not represent a SQL-spec IS NOT + * NULL test in such a case, just IS DISTINCT FROM NULL. + */ + ntest->argisrow = false; + ntest->location = -1; + existConstraint = lappend(existConstraint, ntest); + not_null_attrs = bms_add_member(not_null_attrs, i); + } + } + } + + for (i = 0; i < num_check; i++) + { + Node *cexpr; + + /* + * If this constraint hasn't been fully validated yet, we must + * ignore it here. + */ + if (!attachRel_constr->check[i].ccvalid) + continue; + + cexpr = stringToNode(attachRel_constr->check[i].ccbin); + + /* + * Run each expression through const-simplification and + * canonicalization. It is necessary, because we will be + * comparing it to similarly-processed qual clauses, and may fail + * to detect valid matches without this. + */ + cexpr = eval_const_expressions(NULL, cexpr); + cexpr = (Node *) canonicalize_qual((Expr *) cexpr); + + existConstraint = list_concat(existConstraint, + make_ands_implicit((Expr *) cexpr)); + } + + existConstraint = list_make1(make_ands_explicit(existConstraint)); + + /* And away we go ... */ + if (predicate_implied_by(partConstraint, existConstraint)) + skip_validate = true; + + /* + * We choose to err on the safer side, ie, give up on skipping the + * the validation scan, if the partition key column doesn't have + * the NOT NULL constraint and the table is to become a list partition + * that does not accept nulls. In this case, the partition predicate + * (partConstraint) does include an 'key IS NOT NULL' expression, + * however, because of the way predicate_implied_by_simple_clause() + * is designed to handle IS NOT NULL predicates in the absence of a + * IS NOT NULL clause, we cannot rely on just the above proof. + * + * That is not an issue in case of a range partition, because if there + * were no NOT NULL constraint defined on the key columns, an error + * would be thrown before we get here anyway. That is not true, + * however, if any of the partition keys is an expression, which is + * handled below. + */ + part_constr = linitial(partConstraint); + part_constr = make_ands_implicit((Expr *) part_constr); + + /* + * part_constr contains an IS NOT NULL expression, if this is a list + * partition that does not accept nulls (in fact, also if this is a + * range partition and some partition key is an expression, but we + * never skip validation in that case anyway; see below) + */ + foreach(lc, part_constr) + { + Node *expr = lfirst(lc); + + if (IsA(expr, NullTest) && + ((NullTest *) expr)->nulltesttype == IS_NOT_NULL) + { + partition_accepts_null = false; + break; + } + } + + partnatts = get_partition_natts(key); + for (i = 0; i < partnatts; i++) + { + AttrNumber partattno; + + partattno = get_partition_col_attnum(key, i); + + /* If partition key is an expression, must not skip validation */ + if (!partition_accepts_null && + (partattno == 0 || + !bms_is_member(partattno, not_null_attrs))) + skip_validate = false; + } + } + + if (skip_validate) + elog(NOTICE, "skipping scan to validate partition constraint"); + + /* + * Set up to have the table to be scanned to validate the partition + * constraint (see partConstraint above). If it's a partitioned table, + * we instead schdule its leaf partitions to be scanned instead. + */ + if (!skip_validate) + { + List *all_parts; + ListCell *lc; + + /* Take an exclusive lock on the partitions to be checked */ + if (attachRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + all_parts = find_all_inheritors(RelationGetRelid(attachRel), + AccessExclusiveLock, NULL); + else + all_parts = list_make1_oid(RelationGetRelid(attachRel)); + + foreach(lc, all_parts) + { + AlteredTableInfo *tab; + Oid part_relid = lfirst_oid(lc); + Relation part_rel; + Expr *constr; + + /* Lock already taken */ + if (part_relid != RelationGetRelid(attachRel)) + part_rel = heap_open(part_relid, NoLock); + else + part_rel = attachRel; + + /* + * Skip if it's a partitioned table. Only RELKIND_RELATION + * relations (ie, leaf partitions) need to be scanned. + */ + if (part_rel != attachRel && + part_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + heap_close(part_rel, NoLock); + continue; + } + + /* Grab a work queue entry */ + tab = ATGetQueueEntry(wqueue, part_rel); + + constr = linitial(partConstraint); + tab->partition_constraint = make_ands_implicit((Expr *) constr); + + /* keep our lock until commit */ + if (part_rel != attachRel) + heap_close(part_rel, NoLock); + } + } + + /* + * Invalidate the relcache so that the new partition is now included + * in rel's partition descriptor. + */ + CacheInvalidateRelcache(rel); + + ObjectAddressSet(address, RelationRelationId, RelationGetRelid(attachRel)); + + /* keep our lock until commit */ + heap_close(attachRel, NoLock); + + return address; +} + +/* + * ALTER TABLE DETACH PARTITION + * + * Return the address of the relation that is no longer a partition of rel. + */ +static ObjectAddress +ATExecDetachPartition(Relation rel, RangeVar *name) +{ + Relation partRel, + classRel; + HeapTuple tuple, + newtuple; + Datum new_val[Natts_pg_class]; + bool isnull, + new_null[Natts_pg_class], + new_repl[Natts_pg_class]; + ObjectAddress address; + + partRel = heap_openrv(name, AccessShareLock); + + /* All inheritance related checks are performed within the function */ + RemoveInheritance(partRel, rel); + + /* Update pg_class tuple */ + classRel = heap_open(RelationRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(partRel))); + Assert(((Form_pg_class) GETSTRUCT(tuple))->relispartition); + + (void) SysCacheGetAttr(RELOID, tuple, Anum_pg_class_relpartbound, + &isnull); + Assert(!isnull); + + /* Clear relpartbound and reset relispartition */ + memset(new_val, 0, sizeof(new_val)); + memset(new_null, false, sizeof(new_null)); + memset(new_repl, false, sizeof(new_repl)); + new_val[Anum_pg_class_relpartbound - 1] = (Datum) 0; + new_null[Anum_pg_class_relpartbound - 1] = true; + new_repl[Anum_pg_class_relpartbound - 1] = true; + newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel), + new_val, new_null, new_repl); + + ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = false; + simple_heap_update(classRel, &newtuple->t_self, newtuple); + CatalogUpdateIndexes(classRel, newtuple); + heap_freetuple(newtuple); + heap_close(classRel, RowExclusiveLock); + + /* + * Invalidate the relcache so that the partition is no longer included + * in our partition descriptor. + */ + CacheInvalidateRelcache(rel); + + ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel)); + + /* keep our lock until commit */ + heap_close(partRel, NoLock); + + return address; +} diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 1c264b77361..02e9693f28f 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -176,7 +176,8 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString, * Triggers must be on tables or views, and there are additional * relation-type-specific restrictions. */ - if (rel->rd_rel->relkind == RELKIND_RELATION) + if (rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { /* Tables can't have INSTEAD OF triggers */ if (stmt->timing != TRIGGER_TYPE_BEFORE && @@ -186,6 +187,13 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString, errmsg("\"%s\" is a table", RelationGetRelationName(rel)), errdetail("Tables cannot have INSTEAD OF triggers."))); + /* Disallow ROW triggers on partitioned tables */ + if (stmt->row && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a partitioned table", + RelationGetRelationName(rel)), + errdetail("Partitioned tables cannot have ROW triggers."))); } else if (rel->rd_rel->relkind == RELKIND_VIEW) { @@ -1211,7 +1219,8 @@ RemoveTriggerById(Oid trigOid) if (rel->rd_rel->relkind != RELKIND_RELATION && rel->rd_rel->relkind != RELKIND_VIEW && - rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE) + rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, view, or foreign table", @@ -1316,7 +1325,8 @@ RangeVarCallbackForRenameTrigger(const RangeVar *rv, Oid relid, Oid oldrelid, /* only tables and views can have triggers */ if (form->relkind != RELKIND_RELATION && form->relkind != RELKIND_VIEW && - form->relkind != RELKIND_FOREIGN_TABLE) + form->relkind != RELKIND_FOREIGN_TABLE && + form->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, view, or foreign table", diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 056933a5845..5e3989acd22 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -2107,7 +2107,8 @@ DefineCompositeType(RangeVar *typevar, List *coldeflist) /* * Finally create the relation. This also creates the type. */ - DefineRelation(createStmt, RELKIND_COMPOSITE_TYPE, InvalidOid, &address); + DefineRelation(createStmt, RELKIND_COMPOSITE_TYPE, InvalidOid, &address, + NULL); return address; } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 58bbf5548bc..b1be2f7ad59 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1314,7 +1314,8 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params) */ if (onerel->rd_rel->relkind != RELKIND_RELATION && onerel->rd_rel->relkind != RELKIND_MATVIEW && - onerel->rd_rel->relkind != RELKIND_TOASTVALUE) + onerel->rd_rel->relkind != RELKIND_TOASTVALUE && + onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) { ereport(WARNING, (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables", diff --git a/src/backend/commands/view.c b/src/backend/commands/view.c index 325a81096fb..c6b0e4f2b3c 100644 --- a/src/backend/commands/view.c +++ b/src/backend/commands/view.c @@ -228,7 +228,8 @@ DefineVirtualRelation(RangeVar *relation, List *tlist, bool replace, * existing view, so we don't need more code to complain if "replace" * is false). */ - address = DefineRelation(createStmt, RELKIND_VIEW, InvalidOid, NULL); + address = DefineRelation(createStmt, RELKIND_VIEW, InvalidOid, NULL, + NULL); Assert(address.objectId != InvalidOid); return address; } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 71c07288a19..0f47c7e0104 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -42,6 +42,7 @@ #include "access/transam.h" #include "access/xact.h" #include "catalog/namespace.h" +#include "catalog/partition.h" #include "commands/matview.h" #include "commands/trigger.h" #include "executor/execdebug.h" @@ -825,6 +826,7 @@ InitPlan(QueryDesc *queryDesc, int eflags) InitResultRelInfo(resultRelInfo, resultRelation, resultRelationIndex, + true, estate->es_instrument); resultRelInfo++; } @@ -1019,6 +1021,7 @@ CheckValidResultRel(Relation resultRel, CmdType operation) switch (resultRel->rd_rel->relkind) { case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: /* OK */ break; case RELKIND_SEQUENCE: @@ -1152,6 +1155,7 @@ CheckValidRowMarkRel(Relation rel, RowMarkType markType) switch (rel->rd_rel->relkind) { case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: /* OK */ break; case RELKIND_SEQUENCE: @@ -1212,6 +1216,7 @@ void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, Index resultRelationIndex, + bool load_partition_check, int instrument_options) { MemSet(resultRelInfo, 0, sizeof(ResultRelInfo)); @@ -1249,6 +1254,10 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, resultRelInfo->ri_ConstraintExprs = NULL; resultRelInfo->ri_junkFilter = NULL; resultRelInfo->ri_projectReturning = NULL; + if (load_partition_check) + resultRelInfo->ri_PartitionCheck = + RelationGetPartitionQual(resultRelationDesc, + true); } /* @@ -1311,6 +1320,7 @@ ExecGetTriggerResultRel(EState *estate, Oid relid) InitResultRelInfo(rInfo, rel, 0, /* dummy rangetable index */ + true, estate->es_instrument); estate->es_trig_target_relations = lappend(estate->es_trig_target_relations, rInfo); @@ -1691,6 +1701,46 @@ ExecRelCheck(ResultRelInfo *resultRelInfo, return NULL; } +/* + * ExecPartitionCheck --- check that tuple meets the partition constraint. + * + * Note: This is called *iff* resultRelInfo is the main target table. + */ +static bool +ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, + EState *estate) +{ + ExprContext *econtext; + + /* + * If first time through, build expression state tree for the partition + * check expression. Keep it in the per-query memory context so they'll + * survive throughout the query. + */ + if (resultRelInfo->ri_PartitionCheckExpr == NULL) + { + List *qual = resultRelInfo->ri_PartitionCheck; + + resultRelInfo->ri_PartitionCheckExpr = (List *) + ExecPrepareExpr((Expr *) qual, estate); + } + + /* + * We will use the EState's per-tuple context for evaluating constraint + * expressions (creating it if it's not already there). + */ + econtext = GetPerTupleExprContext(estate); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* + * As in case of the catalogued constraints, we treat a NULL result as + * success here, not a failure. + */ + return ExecQual(resultRelInfo->ri_PartitionCheckExpr, econtext, true); +} + void ExecConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate) @@ -1702,9 +1752,9 @@ ExecConstraints(ResultRelInfo *resultRelInfo, Bitmapset *insertedCols; Bitmapset *updatedCols; - Assert(constr); + Assert(constr || resultRelInfo->ri_PartitionCheck); - if (constr->has_not_null) + if (constr && constr->has_not_null) { int natts = tupdesc->natts; int attrChk; @@ -1735,7 +1785,7 @@ ExecConstraints(ResultRelInfo *resultRelInfo, } } - if (constr->num_check > 0) + if (constr && constr->num_check > 0) { const char *failed; @@ -1759,6 +1809,26 @@ ExecConstraints(ResultRelInfo *resultRelInfo, errtableconstraint(rel, failed))); } } + + if (resultRelInfo->ri_PartitionCheck && + !ExecPartitionCheck(resultRelInfo, slot, estate)) + { + char *val_desc; + + insertedCols = GetInsertedColumns(resultRelInfo, estate); + updatedCols = GetUpdatedColumns(resultRelInfo, estate); + modifiedCols = bms_union(insertedCols, updatedCols); + val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel), + slot, + tupdesc, + modifiedCols, + 64); + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("new row for relation \"%s\" violates partition constraint", + RelationGetRelationName(rel)), + val_desc ? errdetail("Failing row contains %s.", val_desc) : 0)); + } } /* @@ -2926,3 +2996,52 @@ EvalPlanQualEnd(EPQState *epqstate) epqstate->planstate = NULL; epqstate->origslot = NULL; } + +/* + * ExecFindPartition -- Find a leaf partition in the partition tree rooted + * at parent, for the heap tuple contained in *slot + * + * estate must be non-NULL; we'll need it to compute any expressions in the + * partition key(s) + * + * If no leaf partition is found, this routine errors out with the appropriate + * error message, else it returns the leaf partition sequence number returned + * by get_partition_for_tuple() unchanged. + */ +int +ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd, + TupleTableSlot *slot, EState *estate) +{ + int result; + Oid failed_at; + ExprContext *econtext = GetPerTupleExprContext(estate); + + econtext->ecxt_scantuple = slot; + result = get_partition_for_tuple(pd, slot, estate, &failed_at); + if (result < 0) + { + Relation rel = resultRelInfo->ri_RelationDesc; + char *val_desc; + Bitmapset *insertedCols, + *updatedCols, + *modifiedCols; + TupleDesc tupDesc = RelationGetDescr(rel); + + insertedCols = GetInsertedColumns(resultRelInfo, estate); + updatedCols = GetUpdatedColumns(resultRelInfo, estate); + modifiedCols = bms_union(insertedCols, updatedCols); + val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel), + slot, + tupDesc, + modifiedCols, + 64); + Assert(OidIsValid(failed_at)); + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("no partition of relation \"%s\" found for row", + get_rel_name(failed_at)), + val_desc ? errdetail("Failing row contains %s.", val_desc) : 0)); + } + + return result; +} diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index efb0c5e8e5d..c0b58d1841c 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -258,6 +258,7 @@ ExecInsert(ModifyTableState *mtstate, { HeapTuple tuple; ResultRelInfo *resultRelInfo; + ResultRelInfo *saved_resultRelInfo = NULL; Relation resultRelationDesc; Oid newId; List *recheckIndexes = NIL; @@ -272,6 +273,56 @@ ExecInsert(ModifyTableState *mtstate, * get information on the (current) result relation */ resultRelInfo = estate->es_result_relation_info; + + /* Determine the partition to heap_insert the tuple into */ + if (mtstate->mt_partition_dispatch_info) + { + int leaf_part_index; + TupleConversionMap *map; + + /* + * Away we go ... If we end up not finding a partition after all, + * ExecFindPartition() does not return and errors out instead. + * Otherwise, the returned value is to be used as an index into + * arrays mt_partitions[] and mt_partition_tupconv_maps[] that + * will get us the ResultRelInfo and TupleConversionMap for the + * partition, respectively. + */ + leaf_part_index = ExecFindPartition(resultRelInfo, + mtstate->mt_partition_dispatch_info, + slot, + estate); + Assert(leaf_part_index >= 0 && + leaf_part_index < mtstate->mt_num_partitions); + + /* + * Save the old ResultRelInfo and switch to the one corresponding to + * the selected partition. + */ + saved_resultRelInfo = resultRelInfo; + resultRelInfo = mtstate->mt_partitions + leaf_part_index; + + /* We do not yet have a way to insert into a foreign partition */ + if (resultRelInfo->ri_FdwRoutine) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot route inserted tuples to a foreign table"))); + + /* For ExecInsertIndexTuples() to work on the partition's indexes */ + estate->es_result_relation_info = resultRelInfo; + + /* + * We might need to convert from the parent rowtype to the partition + * rowtype. + */ + map = mtstate->mt_partition_tupconv_maps[leaf_part_index]; + if (map) + { + tuple = do_convert_tuple(tuple, map); + ExecStoreTuple(tuple, slot, InvalidBuffer, true); + } + } + resultRelationDesc = resultRelInfo->ri_RelationDesc; /* @@ -369,7 +420,7 @@ ExecInsert(ModifyTableState *mtstate, /* * Check the constraints of the tuple */ - if (resultRelationDesc->rd_att->constr) + if (resultRelationDesc->rd_att->constr || resultRelInfo->ri_PartitionCheck) ExecConstraints(resultRelInfo, slot, estate); if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0) @@ -511,6 +562,12 @@ ExecInsert(ModifyTableState *mtstate, list_free(recheckIndexes); + if (saved_resultRelInfo) + { + resultRelInfo = saved_resultRelInfo; + estate->es_result_relation_info = resultRelInfo; + } + /* * Check any WITH CHECK OPTION constraints from parent views. We are * required to do this after testing all constraints and uniqueness @@ -922,7 +979,7 @@ lreplace:; /* * Check the constraints of the tuple */ - if (resultRelationDesc->rd_att->constr) + if (resultRelationDesc->rd_att->constr || resultRelInfo->ri_PartitionCheck) ExecConstraints(resultRelInfo, slot, estate); /* @@ -1565,6 +1622,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) Plan *subplan; ListCell *l; int i; + Relation rel; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -1655,6 +1713,75 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) estate->es_result_relation_info = saved_resultRelInfo; + /* Build state for INSERT tuple routing */ + rel = mtstate->resultRelInfo->ri_RelationDesc; + if (operation == CMD_INSERT && + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + PartitionDispatch *pd; + int i, + j, + num_parted, + num_leaf_parts; + List *leaf_parts; + ListCell *cell; + ResultRelInfo *leaf_part_rri; + + /* Form the partition node tree and lock partitions */ + pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock, + &num_parted, &leaf_parts); + mtstate->mt_partition_dispatch_info = pd; + mtstate->mt_num_dispatch = num_parted; + num_leaf_parts = list_length(leaf_parts); + mtstate->mt_num_partitions = num_leaf_parts; + mtstate->mt_partitions = (ResultRelInfo *) + palloc0(num_leaf_parts * sizeof(ResultRelInfo)); + mtstate->mt_partition_tupconv_maps = (TupleConversionMap **) + palloc0(num_leaf_parts * sizeof(TupleConversionMap *)); + + leaf_part_rri = mtstate->mt_partitions; + i = j = 0; + foreach(cell, leaf_parts) + { + Oid partrelid = lfirst_oid(cell); + Relation partrel; + + /* + * We locked all the partitions above including the leaf + * partitions. Note that each of the relations in + * mtstate->mt_partitions will be closed by ExecEndModifyTable(). + */ + partrel = heap_open(partrelid, NoLock); + + /* + * Verify result relation is a valid target for the current + * operation + */ + CheckValidResultRel(partrel, CMD_INSERT); + + InitResultRelInfo(leaf_part_rri, + partrel, + 1, /* dummy */ + false, /* no partition constraint checks */ + eflags); + + /* Open partition indices (note: ON CONFLICT unsupported)*/ + if (partrel->rd_rel->relhasindex && operation != CMD_DELETE && + leaf_part_rri->ri_IndexRelationDescs == NULL) + ExecOpenIndices(leaf_part_rri, false); + + if (!equalTupleDescs(RelationGetDescr(rel), + RelationGetDescr(partrel))) + mtstate->mt_partition_tupconv_maps[i] = + convert_tuples_by_name(RelationGetDescr(rel), + RelationGetDescr(partrel), + gettext_noop("could not convert row type")); + + leaf_part_rri++; + i++; + } + } + /* * Initialize any WITH CHECK OPTION constraints if needed. */ @@ -1886,7 +2013,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind; if (relkind == RELKIND_RELATION || - relkind == RELKIND_MATVIEW) + relkind == RELKIND_MATVIEW || + relkind == RELKIND_PARTITIONED_TABLE) { j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid"); if (!AttributeNumberIsValid(j->jf_junkAttNo)) @@ -1971,6 +2099,26 @@ ExecEndModifyTable(ModifyTableState *node) resultRelInfo); } + /* Close all the partitioned tables, leaf partitions, and their indices + * + * Remember node->mt_partition_dispatch_info[0] corresponds to the root + * partitioned table, which we must not try to close, because it is the + * main target table of the query that will be closed by ExecEndPlan(). + */ + for (i = 1; i < node->mt_num_dispatch; i++) + { + PartitionDispatch pd = node->mt_partition_dispatch_info[i]; + + heap_close(pd->reldesc, NoLock); + } + for (i = 0; i < node->mt_num_partitions; i++) + { + ResultRelInfo *resultRelInfo = node->mt_partitions + i; + + ExecCloseIndices(resultRelInfo); + heap_close(resultRelInfo->ri_RelationDesc, NoLock); + } + /* * Free the exprcontext */ diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index dd66adb0b24..e30c57e86be 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -3030,6 +3030,8 @@ CopyCreateStmtFields(const CreateStmt *from, CreateStmt *newnode) COPY_NODE_FIELD(relation); COPY_NODE_FIELD(tableElts); COPY_NODE_FIELD(inhRelations); + COPY_NODE_FIELD(partspec); + COPY_NODE_FIELD(partbound); COPY_NODE_FIELD(ofTypename); COPY_NODE_FIELD(constraints); COPY_NODE_FIELD(options); @@ -4188,6 +4190,70 @@ _copyAlterPolicyStmt(const AlterPolicyStmt *from) return newnode; } +static PartitionSpec * +_copyPartitionSpec(const PartitionSpec *from) +{ + + PartitionSpec *newnode = makeNode(PartitionSpec); + + COPY_STRING_FIELD(strategy); + COPY_NODE_FIELD(partParams); + COPY_LOCATION_FIELD(location); + + return newnode; +} + +static PartitionElem * +_copyPartitionElem(const PartitionElem *from) +{ + PartitionElem *newnode = makeNode(PartitionElem); + + COPY_STRING_FIELD(name); + COPY_NODE_FIELD(expr); + COPY_NODE_FIELD(collation); + COPY_NODE_FIELD(opclass); + COPY_LOCATION_FIELD(location); + + return newnode; +} + +static PartitionBoundSpec * +_copyPartitionBoundSpec(const PartitionBoundSpec *from) +{ + PartitionBoundSpec *newnode = makeNode(PartitionBoundSpec); + + COPY_SCALAR_FIELD(strategy); + COPY_NODE_FIELD(listdatums); + COPY_NODE_FIELD(lowerdatums); + COPY_NODE_FIELD(upperdatums); + COPY_LOCATION_FIELD(location); + + return newnode; +} + +static PartitionRangeDatum * +_copyPartitionRangeDatum(const PartitionRangeDatum *from) +{ + PartitionRangeDatum *newnode = makeNode(PartitionRangeDatum); + + COPY_SCALAR_FIELD(infinite); + COPY_NODE_FIELD(value); + COPY_LOCATION_FIELD(location); + + return newnode; +} + +static PartitionCmd * +_copyPartitionCmd(const PartitionCmd *from) +{ + PartitionCmd *newnode = makeNode(PartitionCmd); + + COPY_NODE_FIELD(name); + COPY_NODE_FIELD(bound); + + return newnode; +} + /* **************************************************************** * pg_list.h copy functions * **************************************************************** @@ -5105,6 +5171,21 @@ copyObject(const void *from) case T_TriggerTransition: retval = _copyTriggerTransition(from); break; + case T_PartitionSpec: + retval = _copyPartitionSpec(from); + break; + case T_PartitionElem: + retval = _copyPartitionElem(from); + break; + case T_PartitionBoundSpec: + retval = _copyPartitionBoundSpec(from); + break; + case T_PartitionRangeDatum: + retval = _copyPartitionRangeDatum(from); + break; + case T_PartitionCmd: + retval = _copyPartitionCmd(from); + break; /* * MISCELLANEOUS NODES diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index cad3aebecd5..b7a109cfb04 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1168,6 +1168,8 @@ _equalCreateStmt(const CreateStmt *a, const CreateStmt *b) COMPARE_NODE_FIELD(relation); COMPARE_NODE_FIELD(tableElts); COMPARE_NODE_FIELD(inhRelations); + COMPARE_NODE_FIELD(partspec); + COMPARE_NODE_FIELD(partbound); COMPARE_NODE_FIELD(ofTypename); COMPARE_NODE_FIELD(constraints); COMPARE_NODE_FIELD(options); @@ -2646,6 +2648,59 @@ _equalTriggerTransition(const TriggerTransition *a, const TriggerTransition *b) return true; } +static bool +_equalPartitionSpec(const PartitionSpec *a, const PartitionSpec *b) +{ + COMPARE_STRING_FIELD(strategy); + COMPARE_NODE_FIELD(partParams); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool +_equalPartitionElem(const PartitionElem *a, const PartitionElem *b) +{ + COMPARE_STRING_FIELD(name); + COMPARE_NODE_FIELD(expr); + COMPARE_NODE_FIELD(collation); + COMPARE_NODE_FIELD(opclass); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool +_equalPartitionBoundSpec(const PartitionBoundSpec *a, const PartitionBoundSpec *b) +{ + COMPARE_SCALAR_FIELD(strategy); + COMPARE_NODE_FIELD(listdatums); + COMPARE_NODE_FIELD(lowerdatums); + COMPARE_NODE_FIELD(upperdatums); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool +_equalPartitionRangeDatum(const PartitionRangeDatum *a, const PartitionRangeDatum *b) +{ + COMPARE_SCALAR_FIELD(infinite); + COMPARE_NODE_FIELD(value); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool +_equalPartitionCmd(const PartitionCmd *a, const PartitionCmd *b) +{ + COMPARE_NODE_FIELD(name); + COMPARE_NODE_FIELD(bound); + + return true; +} + /* * Stuff from pg_list.h */ @@ -3402,6 +3457,21 @@ equal(const void *a, const void *b) case T_TriggerTransition: retval = _equalTriggerTransition(a, b); break; + case T_PartitionSpec: + retval = _equalPartitionSpec(a, b); + break; + case T_PartitionElem: + retval = _equalPartitionElem(a, b); + break; + case T_PartitionBoundSpec: + retval = _equalPartitionBoundSpec(a, b); + break; + case T_PartitionRangeDatum: + retval = _equalPartitionRangeDatum(a, b); + break; + case T_PartitionCmd: + retval = _equalPartitionCmd(a, b); + break; default: elog(ERROR, "unrecognized node type: %d", diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 399744193c0..973fb152c19 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -1552,6 +1552,12 @@ exprLocation(const Node *expr) /* just use nested expr's location */ loc = exprLocation((Node *) ((const InferenceElem *) expr)->expr); break; + case T_PartitionBoundSpec: + loc = ((const PartitionBoundSpec *) expr)->location; + break; + case T_PartitionRangeDatum: + loc = ((const PartitionRangeDatum *) expr)->location; + break; default: /* for any other node type it's just unknown... */ loc = -1; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 748b6879292..0d858f59206 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2392,6 +2392,8 @@ _outCreateStmtInfo(StringInfo str, const CreateStmt *node) WRITE_NODE_FIELD(relation); WRITE_NODE_FIELD(tableElts); WRITE_NODE_FIELD(inhRelations); + WRITE_NODE_FIELD(partspec); + WRITE_NODE_FIELD(partbound); WRITE_NODE_FIELD(ofTypename); WRITE_NODE_FIELD(constraints); WRITE_NODE_FIELD(options); @@ -3277,6 +3279,47 @@ _outForeignKeyCacheInfo(StringInfo str, const ForeignKeyCacheInfo *node) appendStringInfo(str, " %u", node->conpfeqop[i]); } +static void +_outPartitionSpec(StringInfo str, const PartitionSpec *node) +{ + WRITE_NODE_TYPE("PARTITIONBY"); + + WRITE_STRING_FIELD(strategy); + WRITE_NODE_FIELD(partParams); + WRITE_LOCATION_FIELD(location); +} + +static void +_outPartitionElem(StringInfo str, const PartitionElem *node) +{ + WRITE_NODE_TYPE("PARTITIONELEM"); + + WRITE_STRING_FIELD(name); + WRITE_NODE_FIELD(expr); + WRITE_NODE_FIELD(collation); + WRITE_NODE_FIELD(opclass); + WRITE_LOCATION_FIELD(location); +} + +static void +_outPartitionBoundSpec(StringInfo str, const PartitionBoundSpec *node) +{ + WRITE_NODE_TYPE("PARTITIONBOUND"); + + WRITE_CHAR_FIELD(strategy); + WRITE_NODE_FIELD(listdatums); + WRITE_NODE_FIELD(lowerdatums); + WRITE_NODE_FIELD(upperdatums); +} + +static void +_outPartitionRangeDatum(StringInfo str, const PartitionRangeDatum *node) +{ + WRITE_NODE_TYPE("PARTRANGEDATUM"); + + WRITE_BOOL_FIELD(infinite); + WRITE_NODE_FIELD(value); +} /* * outNode - @@ -3865,6 +3908,18 @@ outNode(StringInfo str, const void *obj) case T_TriggerTransition: _outTriggerTransition(str, obj); break; + case T_PartitionSpec: + _outPartitionSpec(str, obj); + break; + case T_PartitionElem: + _outPartitionElem(str, obj); + break; + case T_PartitionBoundSpec: + _outPartitionBoundSpec(str, obj); + break; + case T_PartitionRangeDatum: + _outPartitionRangeDatum(str, obj); + break; default: diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 917e6c8a65e..c587d4e1d72 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -2266,6 +2266,36 @@ _readExtensibleNode(void) } /* + * _readPartitionBoundSpec + */ +static PartitionBoundSpec * +_readPartitionBoundSpec(void) +{ + READ_LOCALS(PartitionBoundSpec); + + READ_CHAR_FIELD(strategy); + READ_NODE_FIELD(listdatums); + READ_NODE_FIELD(lowerdatums); + READ_NODE_FIELD(upperdatums); + + READ_DONE(); +} + +/* + * _readPartitionRangeDatum + */ +static PartitionRangeDatum * +_readPartitionRangeDatum(void) +{ + READ_LOCALS(PartitionRangeDatum); + + READ_BOOL_FIELD(infinite); + READ_NODE_FIELD(value); + + READ_DONE(); +} + +/* * parseNodeString * * Given a character string representing a node tree, parseNodeString creates @@ -2497,6 +2527,10 @@ parseNodeString(void) return_value = _readAlternativeSubPlan(); else if (MATCH("EXTENSIBLENODE", 14)) return_value = _readExtensibleNode(); + else if (MATCH("PARTITIONBOUND", 14)) + return_value = _readPartitionBoundSpec(); + else if (MATCH("PARTRANGEDATUM", 14)) + return_value = _readPartitionRangeDatum(); else { elog(ERROR, "badly formatted node string \"%.32s\"...", token); diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index bb16c59028d..72272d9bb7b 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -27,6 +27,7 @@ #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/heap.h" +#include "catalog/partition.h" #include "catalog/pg_am.h" #include "foreign/fdwapi.h" #include "miscadmin.h" @@ -1140,6 +1141,7 @@ get_relation_constraints(PlannerInfo *root, Index varno = rel->relid; Relation relation; TupleConstr *constr; + List *pcqual; /* * We assume the relation has already been safely locked. @@ -1225,6 +1227,24 @@ get_relation_constraints(PlannerInfo *root, } } + /* Append partition predicates, if any */ + pcqual = RelationGetPartitionQual(relation, true); + if (pcqual) + { + /* + * Run each expression through const-simplification and + * canonicalization similar to check constraints. + */ + pcqual = (List *) eval_const_expressions(root, (Node *) pcqual); + pcqual = (List *) canonicalize_qual((Expr *) pcqual); + + /* Fix Vars to have the desired varno */ + if (varno != 1) + ChangeVarNodes((Node *) pcqual, 1, varno, 0); + + result = list_concat(result, pcqual); + } + heap_close(relation, NoLock); return result; diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 1a541788eb1..73643461672 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -806,8 +806,16 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) /* Process ON CONFLICT, if any. */ if (stmt->onConflictClause) + { + /* Bail out if target relation is partitioned table */ + if (pstate->p_target_rangetblentry->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ON CONFLICT clause is not supported with partitioned tables"))); + qry->onConflict = transformOnConflictClause(pstate, stmt->onConflictClause); + } /* * If we have a RETURNING clause, we need to add the target relation to diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 414348b95b4..2ed7b5259d0 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -229,6 +229,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); struct ImportQual *importqual; InsertStmt *istmt; VariableSetStmt *vsetstmt; + PartitionElem *partelem; + PartitionSpec *partspec; + PartitionRangeDatum *partrange_datum; } %type <node> stmt schema_stmt @@ -276,7 +279,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <ival> add_drop opt_asc_desc opt_nulls_order %type <node> alter_table_cmd alter_type_cmd opt_collate_clause - replica_identity + replica_identity partition_cmd %type <list> alter_table_cmds alter_type_cmds %type <dbehavior> opt_drop_behavior @@ -545,6 +548,17 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); opt_frame_clause frame_extent frame_bound %type <str> opt_existing_window_name %type <boolean> opt_if_not_exists +%type <partspec> PartitionSpec OptPartitionSpec +%type <str> part_strategy +%type <partelem> part_elem +%type <list> part_params +%type <list> OptPartitionElementList PartitionElementList +%type <node> PartitionElement +%type <node> ForValues +%type <node> partbound_datum +%type <list> partbound_datum_list +%type <partrange_datum> PartitionRangeDatum +%type <list> range_datum_list /* * Non-keyword token types. These are hard-wired into the "flex" lexer. @@ -570,7 +584,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); /* ordinary key words in alphabetical order */ %token <keyword> ABORT_P ABSOLUTE_P ACCESS ACTION ADD_P ADMIN AFTER AGGREGATE ALL ALSO ALTER ALWAYS ANALYSE ANALYZE AND ANY ARRAY AS ASC - ASSERTION ASSIGNMENT ASYMMETRIC AT ATTRIBUTE AUTHORIZATION + ASSERTION ASSIGNMENT ASYMMETRIC AT ATTACH ATTRIBUTE AUTHORIZATION BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT BOOLEAN_P BOTH BY @@ -586,7 +600,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); DATA_P DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DESC - DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP + DETACH DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P + DOUBLE_P DROP EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EVENT EXCEPT EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN @@ -1787,6 +1802,24 @@ AlterTableStmt: n->missing_ok = true; $$ = (Node *)n; } + | ALTER TABLE relation_expr partition_cmd + { + AlterTableStmt *n = makeNode(AlterTableStmt); + n->relation = $3; + n->cmds = list_make1($4); + n->relkind = OBJECT_TABLE; + n->missing_ok = false; + $$ = (Node *)n; + } + | ALTER TABLE IF_P EXISTS relation_expr partition_cmd + { + AlterTableStmt *n = makeNode(AlterTableStmt); + n->relation = $5; + n->cmds = list_make1($6); + n->relkind = OBJECT_TABLE; + n->missing_ok = true; + $$ = (Node *)n; + } | ALTER TABLE ALL IN_P TABLESPACE name SET TABLESPACE name opt_nowait { AlterTableMoveAllStmt *n = @@ -1932,6 +1965,34 @@ alter_table_cmds: | alter_table_cmds ',' alter_table_cmd { $$ = lappend($1, $3); } ; +partition_cmd: + /* ALTER TABLE <name> ATTACH PARTITION <table_name> FOR VALUES */ + ATTACH PARTITION qualified_name ForValues + { + AlterTableCmd *n = makeNode(AlterTableCmd); + PartitionCmd *cmd = makeNode(PartitionCmd); + + n->subtype = AT_AttachPartition; + cmd->name = $3; + cmd->bound = (Node *) $4; + n->def = (Node *) cmd; + + $$ = (Node *) n; + } + /* ALTER TABLE <name> DETACH PARTITION <partition_name> */ + | DETACH PARTITION qualified_name + { + AlterTableCmd *n = makeNode(AlterTableCmd); + PartitionCmd *cmd = makeNode(PartitionCmd); + + n->subtype = AT_DetachPartition; + cmd->name = $3; + n->def = (Node *) cmd; + + $$ = (Node *) n; + } + ; + alter_table_cmd: /* ALTER TABLE <name> ADD <coldef> */ ADD_P columnDef @@ -2467,6 +2528,73 @@ reloption_elem: } ; +ForValues: + /* a LIST partition */ + FOR VALUES IN_P '(' partbound_datum_list ')' + { + PartitionBoundSpec *n = makeNode(PartitionBoundSpec); + + n->strategy = PARTITION_STRATEGY_LIST; + n->listdatums = $5; + n->location = @3; + + $$ = (Node *) n; + } + + /* a RANGE partition */ + | FOR VALUES FROM '(' range_datum_list ')' TO '(' range_datum_list ')' + { + PartitionBoundSpec *n = makeNode(PartitionBoundSpec); + + n->strategy = PARTITION_STRATEGY_RANGE; + n->lowerdatums = $5; + n->upperdatums = $9; + n->location = @3; + + $$ = (Node *) n; + } + ; + +partbound_datum: + Sconst { $$ = makeStringConst($1, @1); } + | NumericOnly { $$ = makeAConst($1, @1); } + | NULL_P { $$ = makeNullAConst(@1); } + ; + +partbound_datum_list: + partbound_datum { $$ = list_make1($1); } + | partbound_datum_list ',' partbound_datum + { $$ = lappend($1, $3); } + ; + +range_datum_list: + PartitionRangeDatum { $$ = list_make1($1); } + | range_datum_list ',' PartitionRangeDatum + { $$ = lappend($1, $3); } + ; + +PartitionRangeDatum: + UNBOUNDED + { + PartitionRangeDatum *n = makeNode(PartitionRangeDatum); + + n->infinite = true; + n->value = NULL; + n->location = @1; + + $$ = n; + } + | partbound_datum + { + PartitionRangeDatum *n = makeNode(PartitionRangeDatum); + + n->infinite = false; + n->value = $1; + n->location = @1; + + $$ = n; + } + ; /***************************************************************************** * @@ -2812,69 +2940,113 @@ copy_generic_opt_arg_list_item: *****************************************************************************/ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' - OptInherit OptWith OnCommitOption OptTableSpace + OptInherit OptPartitionSpec OptWith OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $4->relpersistence = $2; n->relation = $4; n->tableElts = $6; n->inhRelations = $8; + n->partspec = $9; n->ofTypename = NULL; n->constraints = NIL; - n->options = $9; - n->oncommit = $10; - n->tablespacename = $11; + n->options = $10; + n->oncommit = $11; + n->tablespacename = $12; n->if_not_exists = false; $$ = (Node *)n; } | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name '(' - OptTableElementList ')' OptInherit OptWith OnCommitOption - OptTableSpace + OptTableElementList ')' OptInherit OptPartitionSpec OptWith + OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $7->relpersistence = $2; n->relation = $7; n->tableElts = $9; n->inhRelations = $11; + n->partspec = $12; n->ofTypename = NULL; n->constraints = NIL; - n->options = $12; - n->oncommit = $13; - n->tablespacename = $14; + n->options = $13; + n->oncommit = $14; + n->tablespacename = $15; n->if_not_exists = true; $$ = (Node *)n; } | CREATE OptTemp TABLE qualified_name OF any_name - OptTypedTableElementList OptWith OnCommitOption OptTableSpace + OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption + OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $4->relpersistence = $2; n->relation = $4; n->tableElts = $7; n->inhRelations = NIL; + n->partspec = $8; n->ofTypename = makeTypeNameFromNameList($6); n->ofTypename->location = @6; n->constraints = NIL; - n->options = $8; - n->oncommit = $9; - n->tablespacename = $10; + n->options = $9; + n->oncommit = $10; + n->tablespacename = $11; n->if_not_exists = false; $$ = (Node *)n; } | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name OF any_name - OptTypedTableElementList OptWith OnCommitOption OptTableSpace + OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption + OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $7->relpersistence = $2; n->relation = $7; n->tableElts = $10; n->inhRelations = NIL; + n->partspec = $11; n->ofTypename = makeTypeNameFromNameList($9); n->ofTypename->location = @9; n->constraints = NIL; + n->options = $12; + n->oncommit = $13; + n->tablespacename = $14; + n->if_not_exists = true; + $$ = (Node *)n; + } + | CREATE OptTemp TABLE qualified_name PARTITION OF qualified_name + OptPartitionElementList ForValues OptPartitionSpec OptWith + OnCommitOption OptTableSpace + { + CreateStmt *n = makeNode(CreateStmt); + $4->relpersistence = $2; + n->relation = $4; + n->tableElts = $8; + n->inhRelations = list_make1($7); + n->partbound = (Node *) $9; + n->partspec = $10; + n->ofTypename = NULL; + n->constraints = NIL; n->options = $11; n->oncommit = $12; n->tablespacename = $13; + n->if_not_exists = false; + $$ = (Node *)n; + } + | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name PARTITION OF + qualified_name OptPartitionElementList ForValues OptPartitionSpec + OptWith OnCommitOption OptTableSpace + { + CreateStmt *n = makeNode(CreateStmt); + $7->relpersistence = $2; + n->relation = $7; + n->tableElts = $11; + n->inhRelations = list_make1($10); + n->partbound = (Node *) $12; + n->partspec = $13; + n->ofTypename = NULL; + n->constraints = NIL; + n->options = $14; + n->oncommit = $15; + n->tablespacename = $16; n->if_not_exists = true; $$ = (Node *)n; } @@ -2923,6 +3095,11 @@ OptTypedTableElementList: | /*EMPTY*/ { $$ = NIL; } ; +OptPartitionElementList: + '(' PartitionElementList ')' { $$ = $2; } + | /*EMPTY*/ { $$ = NIL; } + ; + TableElementList: TableElement { @@ -2945,6 +3122,17 @@ TypedTableElementList: } ; +PartitionElementList: + PartitionElement + { + $$ = list_make1($1); + } + | PartitionElementList ',' PartitionElement + { + $$ = lappend($1, $3); + } + ; + TableElement: columnDef { $$ = $1; } | TableLikeClause { $$ = $1; } @@ -2956,6 +3144,28 @@ TypedTableElement: | TableConstraint { $$ = $1; } ; +PartitionElement: + TableConstraint { $$ = $1; } + | ColId ColQualList + { + ColumnDef *n = makeNode(ColumnDef); + n->colname = $1; + n->typeName = NULL; + n->inhcount = 0; + n->is_local = true; + n->is_not_null = false; + n->is_from_type = false; + n->storage = 0; + n->raw_default = NULL; + n->cooked_default = NULL; + n->collOid = InvalidOid; + SplitColQualList($2, &n->constraints, &n->collClause, + yyscanner); + n->location = @1; + $$ = (Node *) n; + } + ; + columnDef: ColId Typename create_generic_options ColQualList { ColumnDef *n = makeNode(ColumnDef); @@ -3419,6 +3629,65 @@ OptInherit: INHERITS '(' qualified_name_list ')' { $$ = $3; } | /*EMPTY*/ { $$ = NIL; } ; +/* Optional partition key specification */ +OptPartitionSpec: PartitionSpec { $$ = $1; } + | /*EMPTY*/ { $$ = NULL; } + ; + +PartitionSpec: PARTITION BY part_strategy '(' part_params ')' + { + PartitionSpec *n = makeNode(PartitionSpec); + + n->strategy = $3; + n->partParams = $5; + n->location = @1; + + $$ = n; + } + ; + +part_strategy: IDENT { $$ = $1; } + | unreserved_keyword { $$ = pstrdup($1); } + ; + +part_params: part_elem { $$ = list_make1($1); } + | part_params ',' part_elem { $$ = lappend($1, $3); } + ; + +part_elem: ColId opt_collate opt_class + { + PartitionElem *n = makeNode(PartitionElem); + + n->name = $1; + n->expr = NULL; + n->collation = $2; + n->opclass = $3; + n->location = @1; + $$ = n; + } + | func_expr_windowless opt_collate opt_class + { + PartitionElem *n = makeNode(PartitionElem); + + n->name = NULL; + n->expr = $1; + n->collation = $2; + n->opclass = $3; + n->location = @1; + $$ = n; + } + | '(' a_expr ')' opt_collate opt_class + { + PartitionElem *n = makeNode(PartitionElem); + + n->name = NULL; + n->expr = $2; + n->collation = $4; + n->opclass = $5; + n->location = @1; + $$ = n; + } + ; /* WITH (options) is preferred, WITH OIDS and WITHOUT OIDS are legacy forms */ OptWith: WITH reloptions { $$ = $2; } @@ -4484,6 +4753,48 @@ CreateForeignTableStmt: n->options = $14; $$ = (Node *) n; } + | CREATE FOREIGN TABLE qualified_name + PARTITION OF qualified_name OptPartitionElementList ForValues + SERVER name create_generic_options + { + CreateForeignTableStmt *n = makeNode(CreateForeignTableStmt); + $4->relpersistence = RELPERSISTENCE_PERMANENT; + n->base.relation = $4; + n->base.inhRelations = list_make1($7); + n->base.tableElts = $8; + n->base.partbound = (Node *) $9; + n->base.ofTypename = NULL; + n->base.constraints = NIL; + n->base.options = NIL; + n->base.oncommit = ONCOMMIT_NOOP; + n->base.tablespacename = NULL; + n->base.if_not_exists = false; + /* FDW-specific data */ + n->servername = $11; + n->options = $12; + $$ = (Node *) n; + } + | CREATE FOREIGN TABLE IF_P NOT EXISTS qualified_name + PARTITION OF qualified_name OptPartitionElementList ForValues + SERVER name create_generic_options + { + CreateForeignTableStmt *n = makeNode(CreateForeignTableStmt); + $7->relpersistence = RELPERSISTENCE_PERMANENT; + n->base.relation = $7; + n->base.inhRelations = list_make1($10); + n->base.tableElts = $11; + n->base.partbound = (Node *) $12; + n->base.ofTypename = NULL; + n->base.constraints = NIL; + n->base.options = NIL; + n->base.oncommit = ONCOMMIT_NOOP; + n->base.tablespacename = NULL; + n->base.if_not_exists = true; + /* FDW-specific data */ + n->servername = $14; + n->options = $15; + $$ = (Node *) n; + } ; /***************************************************************************** @@ -13703,6 +14014,7 @@ unreserved_keyword: | ASSERTION | ASSIGNMENT | AT + | ATTACH | ATTRIBUTE | BACKWARD | BEFORE @@ -13749,6 +14061,7 @@ unreserved_keyword: | DELIMITER | DELIMITERS | DEPENDS + | DETACH | DICTIONARY | DISABLE_P | DISCARD diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 481a4ddc484..92d1577030c 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -501,6 +501,13 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr) err = _("grouping operations are not allowed in trigger WHEN conditions"); break; + case EXPR_KIND_PARTITION_EXPRESSION: + if (isAgg) + err = _("aggregate functions are not allowed in partition key expression"); + else + err = _("grouping operations are not allowed in partition key expression"); + + break; /* * There is intentionally no default: case here, so that the @@ -858,6 +865,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, case EXPR_KIND_TRIGGER_WHEN: err = _("window functions are not allowed in trigger WHEN conditions"); break; + case EXPR_KIND_PARTITION_EXPRESSION: + err = _("window functions are not allowed in partition key expression"); + break; /* * There is intentionally no default: case here, so that the diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 17d1cbf8b32..8a2bdf06e8d 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -1843,6 +1843,9 @@ transformSubLink(ParseState *pstate, SubLink *sublink) case EXPR_KIND_TRIGGER_WHEN: err = _("cannot use subquery in trigger WHEN condition"); break; + case EXPR_KIND_PARTITION_EXPRESSION: + err = _("cannot use subquery in partition key expression"); + break; /* * There is intentionally no default: case here, so that the @@ -3446,6 +3449,8 @@ ParseExprKindName(ParseExprKind exprKind) return "EXECUTE"; case EXPR_KIND_TRIGGER_WHEN: return "WHEN"; + case EXPR_KIND_PARTITION_EXPRESSION: + return "PARTITION BY"; /* * There is intentionally no default: case here, so that the diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c index 56c9a4293df..7d9b4157d4d 100644 --- a/src/backend/parser/parse_func.c +++ b/src/backend/parser/parse_func.c @@ -2166,6 +2166,9 @@ check_srf_call_placement(ParseState *pstate, int location) case EXPR_KIND_TRIGGER_WHEN: err = _("set-returning functions are not allowed in trigger WHEN conditions"); break; + case EXPR_KIND_PARTITION_EXPRESSION: + err = _("set-returning functions are not allowed in partition key expression"); + break; /* * There is intentionally no default: case here, so that the diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 0670bc24822..cc6a961bb46 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -47,8 +47,10 @@ #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/planner.h" #include "parser/analyze.h" #include "parser/parse_clause.h" +#include "parser/parse_coerce.h" #include "parser/parse_collate.h" #include "parser/parse_expr.h" #include "parser/parse_relation.h" @@ -62,6 +64,7 @@ #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/ruleutils.h" #include "utils/syscache.h" #include "utils/typcache.h" @@ -87,6 +90,8 @@ typedef struct List *alist; /* "after list" of things to do after creating * the table */ IndexStmt *pkey; /* PRIMARY KEY index, if any */ + bool ispartitioned; /* true if table is partitioned */ + Node *partbound; /* transformed FOR VALUES */ } CreateStmtContext; /* State shared by transformCreateSchemaStmt and its subroutines */ @@ -129,6 +134,7 @@ static void transformConstraintAttrs(CreateStmtContext *cxt, List *constraintList); static void transformColumnType(CreateStmtContext *cxt, ColumnDef *column); static void setSchemaName(char *context_schema, char **stmt_schema_name); +static void transformAttachPartition(CreateStmtContext *cxt, PartitionCmd *cmd); /* @@ -229,6 +235,7 @@ transformCreateStmt(CreateStmt *stmt, const char *queryString) cxt.blist = NIL; cxt.alist = NIL; cxt.pkey = NULL; + cxt.ispartitioned = stmt->partspec != NULL; /* * Notice that we allow OIDs here only for plain tables, even though @@ -247,6 +254,28 @@ transformCreateStmt(CreateStmt *stmt, const char *queryString) if (stmt->ofTypename) transformOfType(&cxt, stmt->ofTypename); + if (stmt->partspec) + { + int partnatts = list_length(stmt->partspec->partParams); + + if (stmt->inhRelations && !stmt->partbound) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot create partitioned table as inheritance child"))); + + if (partnatts > PARTITION_MAX_KEYS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("cannot partition using more than %d columns", + PARTITION_MAX_KEYS))); + + if (!pg_strcasecmp(stmt->partspec->strategy, "list") && + partnatts > 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot list partition using more than one column"))); + } + /* * Run through each primary element in the table creation clause. Separate * column defs from constraints, and do preliminary analysis. We have to @@ -583,6 +612,12 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column) errmsg("primary key constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("primary key constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); /* FALL THRU */ case CONSTR_UNIQUE: @@ -592,6 +627,12 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column) errmsg("unique constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unique constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); if (constraint->keys == NIL) constraint->keys = list_make1(makeString(column->colname)); cxt->ixconstraints = lappend(cxt->ixconstraints, constraint); @@ -609,6 +650,12 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column) errmsg("foreign key constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("foreign key constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); /* * Fill in the current attribute's name and throw it into the @@ -674,6 +721,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint) errmsg("primary key constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("primary key constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); cxt->ixconstraints = lappend(cxt->ixconstraints, constraint); break; @@ -684,6 +737,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint) errmsg("unique constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unique constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); cxt->ixconstraints = lappend(cxt->ixconstraints, constraint); break; @@ -694,6 +753,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint) errmsg("exclusion constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("exclusion constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); cxt->ixconstraints = lappend(cxt->ixconstraints, constraint); break; @@ -708,6 +773,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint) errmsg("foreign key constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("foreign key constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); cxt->fkconstraints = lappend(cxt->fkconstraints, constraint); break; @@ -763,7 +834,8 @@ transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_cla relation->rd_rel->relkind != RELKIND_VIEW && relation->rd_rel->relkind != RELKIND_MATVIEW && relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE && - relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE) + relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, view, materialized view, composite type, or foreign table", @@ -1854,7 +1926,8 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt) rel = heap_openrv(inh, AccessShareLock); /* check user requested inheritance from valid relkind */ if (rel->rd_rel->relkind != RELKIND_RELATION && - rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE) + rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("inherited relation \"%s\" is not a table or foreign table", @@ -2512,6 +2585,8 @@ transformAlterTableStmt(Oid relid, AlterTableStmt *stmt, cxt.blist = NIL; cxt.alist = NIL; cxt.pkey = NULL; + cxt.ispartitioned = (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + cxt.partbound = NULL; /* * The only subtypes that currently require parse transformation handling @@ -2594,6 +2669,19 @@ transformAlterTableStmt(Oid relid, AlterTableStmt *stmt, break; } + case AT_AttachPartition: + { + PartitionCmd *partcmd = (PartitionCmd *) cmd->def; + + transformAttachPartition(&cxt, partcmd); + + /* assign transformed values */ + partcmd->bound = cxt.partbound; + } + + newcmds = lappend(newcmds, cmd); + break; + default: newcmds = lappend(newcmds, cmd); break; @@ -2958,3 +3046,237 @@ setSchemaName(char *context_schema, char **stmt_schema_name) "different from the one being created (%s)", *stmt_schema_name, context_schema))); } + +/* + * transformAttachPartition + * Analyze ATTACH PARTITION ... FOR VALUES ... + */ +static void +transformAttachPartition(CreateStmtContext *cxt, PartitionCmd *cmd) +{ + Relation parentRel = cxt->rel; + + /* + * We are going to try to validate the partition bound specification + * against the partition key of rel, so it better have one. + */ + if (parentRel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("\"%s\" is not partitioned", + RelationGetRelationName(parentRel)))); + + /* tranform the values */ + Assert(RelationGetPartitionKey(parentRel) != NULL); + cxt->partbound = transformPartitionBound(cxt->pstate, parentRel, + cmd->bound); +} + +/* + * transformPartitionBound + * + * Transform partition bound specification + */ +Node * +transformPartitionBound(ParseState *pstate, Relation parent, Node *bound) +{ + PartitionBoundSpec *spec = (PartitionBoundSpec *) bound, + *result_spec; + PartitionKey key = RelationGetPartitionKey(parent); + char strategy = get_partition_strategy(key); + int partnatts = get_partition_natts(key); + List *partexprs = get_partition_exprs(key); + + result_spec = copyObject(spec); + + if (strategy == PARTITION_STRATEGY_LIST) + { + ListCell *cell; + char *colname; + + /* Get the only column's name in case we need to output an error */ + if (key->partattrs[0] != 0) + colname = get_relid_attribute_name(RelationGetRelid(parent), + key->partattrs[0]); + else + colname = deparse_expression((Node *) linitial(partexprs), + deparse_context_for(RelationGetRelationName(parent), + RelationGetRelid(parent)), + false, false); + + if (spec->strategy != PARTITION_STRATEGY_LIST) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("invalid bound specification for a list partition"), + parser_errposition(pstate, exprLocation(bound)))); + + result_spec->listdatums = NIL; + foreach(cell, spec->listdatums) + { + A_Const *con = (A_Const *) lfirst(cell); + Node *value; + ListCell *cell2; + bool duplicate; + + value = (Node *) make_const(pstate, &con->val, con->location); + value = coerce_to_target_type(pstate, + value, exprType(value), + get_partition_col_typid(key, 0), + get_partition_col_typmod(key, 0), + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + + if (value == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("specified value cannot be cast to type \"%s\" of column \"%s\"", + format_type_be(get_partition_col_typid(key, 0)), + colname), + parser_errposition(pstate, + exprLocation((Node *) con)))); + + /* Simplify the expression */ + value = (Node *) expression_planner((Expr *) value); + + /* Don't add to the result if the value is a duplicate */ + duplicate = false; + foreach(cell2, result_spec->listdatums) + { + Const *value2 = (Const *) lfirst(cell2); + + if (equal(value, value2)) + { + duplicate = true; + break; + } + } + if (duplicate) + continue; + + result_spec->listdatums = lappend(result_spec->listdatums, + value); + } + } + else if (strategy == PARTITION_STRATEGY_RANGE) + { + ListCell *cell1, + *cell2; + int i, + j; + char *colname; + + if (spec->strategy != PARTITION_STRATEGY_RANGE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("invalid bound specification for a range partition"), + parser_errposition(pstate, exprLocation(bound)))); + + Assert(spec->lowerdatums != NIL && spec->upperdatums != NIL); + + if (list_length(spec->lowerdatums) != partnatts) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("FROM must specify exactly one value per partitioning column"))); + if (list_length(spec->upperdatums) != partnatts) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("TO must specify exactly one value per partitioning column"))); + + i = j = 0; + result_spec->lowerdatums = result_spec->upperdatums = NIL; + forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums) + { + PartitionRangeDatum *ldatum, + *rdatum; + Node *value; + A_Const *lcon = NULL, + *rcon = NULL; + + ldatum = (PartitionRangeDatum *) lfirst(cell1); + rdatum = (PartitionRangeDatum *) lfirst(cell2); + /* Get the column's name in case we need to output an error */ + if (key->partattrs[i] != 0) + colname = get_relid_attribute_name(RelationGetRelid(parent), + key->partattrs[i]); + else + { + colname = deparse_expression((Node *) list_nth(partexprs, j), + deparse_context_for(RelationGetRelationName(parent), + RelationGetRelid(parent)), + false, false); + ++j; + } + + if (!ldatum->infinite) + lcon = (A_Const *) ldatum->value; + if (!rdatum->infinite) + rcon = (A_Const *) rdatum->value; + + if (lcon) + { + value = (Node *) make_const(pstate, &lcon->val, lcon->location); + if (((Const *) value)->constisnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot specify NULL in range bound"))); + value = coerce_to_target_type(pstate, + value, exprType(value), + get_partition_col_typid(key, i), + get_partition_col_typmod(key, i), + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (value == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("specified value cannot be cast to type \"%s\" of column \"%s\"", + format_type_be(get_partition_col_typid(key, i)), + colname), + parser_errposition(pstate, exprLocation((Node *) ldatum)))); + + /* Simplify the expression */ + value = (Node *) expression_planner((Expr *) value); + ldatum->value = value; + } + + if (rcon) + { + value = (Node *) make_const(pstate, &rcon->val, rcon->location); + if (((Const *) value)->constisnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot specify NULL in range bound"))); + value = coerce_to_target_type(pstate, + value, exprType(value), + get_partition_col_typid(key, i), + get_partition_col_typmod(key, i), + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (value == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("specified value cannot be cast to type \"%s\" of column \"%s\"", + format_type_be(get_partition_col_typid(key, i)), + colname), + parser_errposition(pstate, exprLocation((Node *) rdatum)))); + + /* Simplify the expression */ + value = (Node *) expression_planner((Expr *) value); + rdatum->value = value; + } + + result_spec->lowerdatums = lappend(result_spec->lowerdatums, + copyObject(ldatum)); + result_spec->upperdatums = lappend(result_spec->upperdatums, + copyObject(rdatum)); + + ++i; + } + } + else + elog(ERROR, "unexpected partition strategy: %d", (int) strategy); + + return (Node *) result_spec; +} diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index f82d891c347..32e13281497 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -261,7 +261,8 @@ DefineQueryRewrite(char *rulename, */ if (event_relation->rd_rel->relkind != RELKIND_RELATION && event_relation->rd_rel->relkind != RELKIND_MATVIEW && - event_relation->rd_rel->relkind != RELKIND_VIEW) + event_relation->rd_rel->relkind != RELKIND_VIEW && + event_relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table or view", diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 65c3d6e0814..bf4f098c153 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -1231,7 +1231,8 @@ rewriteTargetListUD(Query *parsetree, RangeTblEntry *target_rte, TargetEntry *tle; if (target_relation->rd_rel->relkind == RELKIND_RELATION || - target_relation->rd_rel->relkind == RELKIND_MATVIEW) + target_relation->rd_rel->relkind == RELKIND_MATVIEW || + target_relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { /* * Emit CTID so that executor can find the row to update or delete. diff --git a/src/backend/rewrite/rowsecurity.c b/src/backend/rewrite/rowsecurity.c index b7edefc7ddf..e38586dd80b 100644 --- a/src/backend/rewrite/rowsecurity.c +++ b/src/backend/rewrite/rowsecurity.c @@ -121,7 +121,8 @@ get_row_security_policies(Query *root, RangeTblEntry *rte, int rt_index, *hasSubLinks = false; /* If this is not a normal relation, just return immediately */ - if (rte->relkind != RELKIND_RELATION) + if (rte->relkind != RELKIND_RELATION && + rte->relkind != RELKIND_PARTITIONED_TABLE) return; /* Switch to checkAsUser if it's set */ diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index f50ce408ae6..fd4eff49074 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -987,7 +987,8 @@ ProcessUtilitySlow(ParseState *pstate, /* Create the table itself */ address = DefineRelation((CreateStmt *) stmt, RELKIND_RELATION, - InvalidOid, NULL); + InvalidOid, NULL, + queryString); EventTriggerCollectSimpleCommand(address, secondaryObject, stmt); @@ -1020,7 +1021,8 @@ ProcessUtilitySlow(ParseState *pstate, /* Create the table itself */ address = DefineRelation((CreateStmt *) stmt, RELKIND_FOREIGN_TABLE, - InvalidOid, NULL); + InvalidOid, NULL, + queryString); CreateForeignTable((CreateForeignTableStmt *) stmt, address.objectId); EventTriggerCollectSimpleCommand(address, diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index fecee85e5ba..4e2ba19d1b7 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -33,6 +33,7 @@ #include "catalog/pg_language.h" #include "catalog/pg_opclass.h" #include "catalog/pg_operator.h" +#include "catalog/pg_partitioned_table.h" #include "catalog/pg_proc.h" #include "catalog/pg_trigger.h" #include "catalog/pg_type.h" @@ -315,6 +316,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno, const Oid *excludeOps, bool attrsOnly, bool showTblSpc, int prettyFlags, bool missing_ok); +static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags); static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand, int prettyFlags, bool missing_ok); static text *pg_get_expr_worker(text *expr, Oid relid, const char *relname, @@ -1415,6 +1417,163 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, return buf.data; } +/* + * pg_get_partkeydef + * + * Returns the partition key specification, ie, the following: + * + * PARTITION BY { RANGE | LIST } (column opt_collation opt_opclass [, ...]) + */ +Datum +pg_get_partkeydef(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + + PG_RETURN_TEXT_P(string_to_text(pg_get_partkeydef_worker(relid, + PRETTYFLAG_INDENT))); +} + +/* + * Internal workhorse to decompile a partition key definition. + */ +static char * +pg_get_partkeydef_worker(Oid relid, int prettyFlags) +{ + Form_pg_partitioned_table form; + HeapTuple tuple; + oidvector *partclass; + oidvector *partcollation; + List *partexprs; + ListCell *partexpr_item; + List *context; + Datum datum; + bool isnull; + StringInfoData buf; + int keyno; + char *str; + char *sep; + + tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for partition key of %u", relid); + + form = (Form_pg_partitioned_table) GETSTRUCT(tuple); + + Assert(form->partrelid == relid); + + /* Must get partclass and partcollation the hard way */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partclass, &isnull); + Assert(!isnull); + partclass = (oidvector *) DatumGetPointer(datum); + + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partcollation, &isnull); + Assert(!isnull); + partcollation = (oidvector *) DatumGetPointer(datum); + + + /* + * Get the expressions, if any. (NOTE: we do not use the relcache + * versions of the expressions, because we want to display non-const-folded + * expressions.) + */ + if (!heap_attisnull(tuple, Anum_pg_partitioned_table_partexprs)) + { + Datum exprsDatum; + bool isnull; + char *exprsString; + + exprsDatum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partexprs, &isnull); + Assert(!isnull); + exprsString = TextDatumGetCString(exprsDatum); + partexprs = (List *) stringToNode(exprsString); + + if (!IsA(partexprs, List)) + elog(ERROR, "unexpected node type found in partexprs: %d", + (int) nodeTag(partexprs)); + + pfree(exprsString); + } + else + partexprs = NIL; + + partexpr_item = list_head(partexprs); + context = deparse_context_for(get_relation_name(relid), relid); + + initStringInfo(&buf); + + switch (form->partstrat) + { + case PARTITION_STRATEGY_LIST: + appendStringInfo(&buf, "LIST"); + break; + case PARTITION_STRATEGY_RANGE: + appendStringInfo(&buf, "RANGE"); + break; + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) form->partstrat); + } + + appendStringInfo(&buf, " ("); + sep = ""; + for (keyno = 0; keyno < form->partnatts; keyno++) + { + AttrNumber attnum = form->partattrs.values[keyno]; + Oid keycoltype; + Oid keycolcollation; + Oid partcoll; + + appendStringInfoString(&buf, sep); + sep = ", "; + if (attnum != 0) + { + /* Simple attribute reference */ + char *attname; + int32 keycoltypmod; + + attname = get_relid_attribute_name(relid, attnum); + appendStringInfoString(&buf, quote_identifier(attname)); + get_atttypetypmodcoll(relid, attnum, + &keycoltype, &keycoltypmod, + &keycolcollation); + } + else + { + /* Expression */ + Node *partkey; + + if (partexpr_item == NULL) + elog(ERROR, "too few entries in partexprs list"); + partkey = (Node *) lfirst(partexpr_item); + partexpr_item = lnext(partexpr_item); + /* Deparse */ + str = deparse_expression_pretty(partkey, context, false, false, + 0, 0); + + appendStringInfoString(&buf, str); + keycoltype = exprType(partkey); + keycolcollation = exprCollation(partkey); + } + + /* Add collation, if not default for column */ + partcoll = partcollation->values[keyno]; + if (OidIsValid(partcoll) && partcoll != keycolcollation) + appendStringInfo(&buf, " COLLATE %s", + generate_collation_name((partcoll))); + + /* Add the operator class name, if not default */ + get_opclass_name(partclass->values[keyno], keycoltype, &buf); + } + appendStringInfoChar(&buf, ')'); + + /* Clean up */ + ReleaseSysCache(tuple); + + return buf.data; +} /* * pg_get_constraintdef @@ -8291,6 +8450,88 @@ get_rule_expr(Node *node, deparse_context *context, } break; + case T_PartitionBoundSpec: + { + PartitionBoundSpec *spec = (PartitionBoundSpec *) node; + ListCell *cell; + char *sep; + + switch (spec->strategy) + { + case PARTITION_STRATEGY_LIST: + Assert(spec->listdatums != NIL); + + appendStringInfoString(buf, "FOR VALUES"); + appendStringInfoString(buf, " IN ("); + sep = ""; + foreach (cell, spec->listdatums) + { + Const *val = lfirst(cell); + + appendStringInfoString(buf, sep); + get_const_expr(val, context, -1); + sep = ", "; + } + + appendStringInfoString(buf, ")"); + break; + + case PARTITION_STRATEGY_RANGE: + Assert(spec->lowerdatums != NIL && + spec->upperdatums != NIL && + list_length(spec->lowerdatums) == + list_length(spec->upperdatums)); + + appendStringInfoString(buf, "FOR VALUES"); + appendStringInfoString(buf, " FROM"); + appendStringInfoString(buf, " ("); + sep = ""; + foreach (cell, spec->lowerdatums) + { + PartitionRangeDatum *datum = lfirst(cell); + Const *val; + + appendStringInfoString(buf, sep); + if (datum->infinite) + appendStringInfoString(buf, "UNBOUNDED"); + else + { + val = (Const *) datum->value; + get_const_expr(val, context, -1); + } + sep = ", "; + } + appendStringInfoString(buf, ")"); + + appendStringInfoString(buf, " TO"); + appendStringInfoString(buf, " ("); + sep = ""; + foreach (cell, spec->upperdatums) + { + PartitionRangeDatum *datum = lfirst(cell); + Const *val; + + appendStringInfoString(buf, sep); + if (datum->infinite) + appendStringInfoString(buf, "UNBOUNDED"); + else + { + val = (Const *) datum->value; + get_const_expr(val, context, -1); + } + sep = ", "; + } + appendStringInfoString(buf, ")"); + break; + + default: + elog(ERROR, "unrecognized partition strategy: %d", + (int) spec->strategy); + break; + } + } + break; + case T_List: { char *sep; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 79e0b1ff483..2a6835991c3 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -32,6 +32,7 @@ #include "access/htup_details.h" #include "access/multixact.h" +#include "access/nbtree.h" #include "access/reloptions.h" #include "access/sysattr.h" #include "access/xact.h" @@ -40,6 +41,7 @@ #include "catalog/index.h" #include "catalog/indexing.h" #include "catalog/namespace.h" +#include "catalog/partition.h" #include "catalog/pg_am.h" #include "catalog/pg_amproc.h" #include "catalog/pg_attrdef.h" @@ -49,6 +51,7 @@ #include "catalog/pg_database.h" #include "catalog/pg_namespace.h" #include "catalog/pg_opclass.h" +#include "catalog/pg_partitioned_table.h" #include "catalog/pg_proc.h" #include "catalog/pg_rewrite.h" #include "catalog/pg_shseclabel.h" @@ -258,6 +261,8 @@ static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_hi static Relation AllocateRelationDesc(Form_pg_class relp); static void RelationParseRelOptions(Relation relation, HeapTuple tuple); static void RelationBuildTupleDesc(Relation relation); +static void RelationBuildPartitionKey(Relation relation); +static PartitionKey copy_partition_key(PartitionKey fromkey); static Relation RelationBuildDesc(Oid targetRelId, bool insertIt); static void RelationInitPhysicalAddr(Relation relation); static void load_critical_index(Oid indexoid, Oid heapoid); @@ -278,6 +283,8 @@ static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid, StrategyNumber numSupport); static void RelationCacheInitFileRemoveInDir(const char *tblspcpath); static void unlink_initfile(const char *initfilename); +static bool equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1, + PartitionDesc partdesc2); /* @@ -435,6 +442,7 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple) case RELKIND_INDEX: case RELKIND_VIEW: case RELKIND_MATVIEW: + case RELKIND_PARTITIONED_TABLE: break; default: return; @@ -796,6 +804,237 @@ RelationBuildRuleLock(Relation relation) } /* + * RelationBuildPartitionKey + * Build and attach to relcache partition key data of relation + * + * Partitioning key data is stored in CacheMemoryContext to ensure it survives + * as long as the relcache. To avoid leaking memory in that context in case + * of an error partway through this function, we build the structure in the + * working context (which must be short-lived) and copy the completed + * structure into the cache memory. + * + * Also, since the structure being created here is sufficiently complex, we + * make a private child context of CacheMemoryContext for each relation that + * has associated partition key information. That means no complicated logic + * to free individual elements whenever the relcache entry is flushed - just + * delete the context. + */ +static void +RelationBuildPartitionKey(Relation relation) +{ + Form_pg_partitioned_table form; + HeapTuple tuple; + bool isnull; + int i; + PartitionKey key; + AttrNumber *attrs; + oidvector *opclass; + oidvector *collation; + ListCell *partexprs_item; + Datum datum; + MemoryContext partkeycxt, + oldcxt; + + tuple = SearchSysCache1(PARTRELID, + ObjectIdGetDatum(RelationGetRelid(relation))); + + /* + * The following happens when we have created our pg_class entry but not + * the pg_partitioned_table entry yet. + */ + if (!HeapTupleIsValid(tuple)) + return; + + key = (PartitionKey) palloc0(sizeof(PartitionKeyData)); + + /* Fixed-length attributes */ + form = (Form_pg_partitioned_table) GETSTRUCT(tuple); + key->strategy = form->partstrat; + key->partnatts = form->partnatts; + + /* + * We can rely on the first variable-length attribute being mapped to the + * relevant field of the catalog's C struct, because all previous + * attributes are non-nullable and fixed-length. + */ + attrs = form->partattrs.values; + + /* But use the hard way to retrieve further variable-length attributes */ + /* Operator class */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partclass, &isnull); + Assert(!isnull); + opclass = (oidvector *) DatumGetPointer(datum); + + /* Collation */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partcollation, &isnull); + Assert(!isnull); + collation = (oidvector *) DatumGetPointer(datum); + + /* Expressions */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partexprs, &isnull); + if (!isnull) + { + char *exprString; + Node *expr; + + exprString = TextDatumGetCString(datum); + expr = stringToNode(exprString); + pfree(exprString); + + /* + * Run the expressions through const-simplification since the planner + * will be comparing them to similarly-processed qual clause operands, + * and may fail to detect valid matches without this step. We don't + * need to bother with canonicalize_qual() though, because partition + * expressions are not full-fledged qualification clauses. + */ + expr = eval_const_expressions(NULL, (Node *) expr); + + /* May as well fix opfuncids too */ + fix_opfuncids((Node *) expr); + key->partexprs = (List *) expr; + } + + key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber)); + key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo)); + + key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + + /* Gather type and collation info as well */ + key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32)); + key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16)); + key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool)); + key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char)); + key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + + /* Copy partattrs and fill other per-attribute info */ + memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16)); + partexprs_item = list_head(key->partexprs); + for (i = 0; i < key->partnatts; i++) + { + AttrNumber attno = key->partattrs[i]; + HeapTuple opclasstup; + Form_pg_opclass opclassform; + Oid funcid; + + /* Collect opfamily information */ + opclasstup = SearchSysCache1(CLAOID, + ObjectIdGetDatum(opclass->values[i])); + if (!HeapTupleIsValid(opclasstup)) + elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]); + + opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup); + key->partopfamily[i] = opclassform->opcfamily; + key->partopcintype[i] = opclassform->opcintype; + + /* + * A btree support function covers the cases of list and range methods + * currently supported. + */ + funcid = get_opfamily_proc(opclassform->opcfamily, + opclassform->opcintype, + opclassform->opcintype, + BTORDER_PROC); + + fmgr_info(funcid, &key->partsupfunc[i]); + + /* Collation */ + key->partcollation[i] = collation->values[i]; + + /* Collect type information */ + if (attno != 0) + { + key->parttypid[i] = relation->rd_att->attrs[attno - 1]->atttypid; + key->parttypmod[i] = relation->rd_att->attrs[attno - 1]->atttypmod; + key->parttypcoll[i] = relation->rd_att->attrs[attno - 1]->attcollation; + } + else + { + key->parttypid[i] = exprType(lfirst(partexprs_item)); + key->parttypmod[i] = exprTypmod(lfirst(partexprs_item)); + key->parttypcoll[i] = exprCollation(lfirst(partexprs_item)); + } + get_typlenbyvalalign(key->parttypid[i], + &key->parttyplen[i], + &key->parttypbyval[i], + &key->parttypalign[i]); + + ReleaseSysCache(opclasstup); + } + + ReleaseSysCache(tuple); + + /* Success --- now copy to the cache memory */ + partkeycxt = AllocSetContextCreate(CacheMemoryContext, + RelationGetRelationName(relation), + ALLOCSET_SMALL_SIZES); + relation->rd_partkeycxt = partkeycxt; + oldcxt = MemoryContextSwitchTo(relation->rd_partkeycxt); + relation->rd_partkey = copy_partition_key(key); + MemoryContextSwitchTo(oldcxt); +} + +/* + * copy_partition_key + * + * The copy is allocated in the current memory context. + */ +static PartitionKey +copy_partition_key(PartitionKey fromkey) +{ + PartitionKey newkey; + int n; + + newkey = (PartitionKey) palloc(sizeof(PartitionKeyData)); + + newkey->strategy = fromkey->strategy; + newkey->partnatts = n = fromkey->partnatts; + + newkey->partattrs = (AttrNumber *) palloc(n * sizeof(AttrNumber)); + memcpy(newkey->partattrs, fromkey->partattrs, n * sizeof(AttrNumber)); + + newkey->partexprs = copyObject(fromkey->partexprs); + + newkey->partopfamily = (Oid *) palloc(n * sizeof(Oid)); + memcpy(newkey->partopfamily, fromkey->partopfamily, n * sizeof(Oid)); + + newkey->partopcintype = (Oid *) palloc(n * sizeof(Oid)); + memcpy(newkey->partopcintype, fromkey->partopcintype, n * sizeof(Oid)); + + newkey->partsupfunc = (FmgrInfo *) palloc(n * sizeof(FmgrInfo)); + memcpy(newkey->partsupfunc, fromkey->partsupfunc, n * sizeof(FmgrInfo)); + + newkey->partcollation = (Oid *) palloc(n * sizeof(Oid)); + memcpy(newkey->partcollation, fromkey->partcollation, n * sizeof(Oid)); + + newkey->parttypid = (Oid *) palloc(n * sizeof(Oid)); + memcpy(newkey->parttypid, fromkey->parttypid, n * sizeof(Oid)); + + newkey->parttypmod = (int32 *) palloc(n * sizeof(int32)); + memcpy(newkey->parttypmod, fromkey->parttypmod, n * sizeof(int32)); + + newkey->parttyplen = (int16 *) palloc(n * sizeof(int16)); + memcpy(newkey->parttyplen, fromkey->parttyplen, n * sizeof(int16)); + + newkey->parttypbyval = (bool *) palloc(n * sizeof(bool)); + memcpy(newkey->parttypbyval, fromkey->parttypbyval, n * sizeof(bool)); + + newkey->parttypalign = (char *) palloc(n * sizeof(bool)); + memcpy(newkey->parttypalign, fromkey->parttypalign, n * sizeof(char)); + + newkey->parttypcoll = (Oid *) palloc(n * sizeof(Oid)); + memcpy(newkey->parttypcoll, fromkey->parttypcoll, n * sizeof(Oid)); + + return newkey; +} + +/* * equalRuleLocks * * Determine whether two RuleLocks are equivalent @@ -923,6 +1162,58 @@ equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2) } /* + * equalPartitionDescs + * Compare two partition descriptors for logical equality + */ +static bool +equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1, + PartitionDesc partdesc2) +{ + int i; + + if (partdesc1 != NULL) + { + if (partdesc2 == NULL) + return false; + if (partdesc1->nparts != partdesc2->nparts) + return false; + + Assert(key != NULL || partdesc1->nparts == 0); + + /* + * Same oids? If the partitioning structure did not change, that is, + * no partitions were added or removed to the relation, the oids array + * should still match element-by-element. + */ + for (i = 0; i < partdesc1->nparts; i++) + { + if (partdesc1->oids[i] != partdesc2->oids[i]) + return false; + } + + /* + * Now compare partition bound collections. The logic to iterate over + * the collections is private to partition.c. + */ + if (partdesc1->boundinfo != NULL) + { + if (partdesc2->boundinfo == NULL) + return false; + + if (!partition_bounds_equal(key, partdesc1->boundinfo, + partdesc2->boundinfo)) + return false; + } + else if (partdesc2->boundinfo != NULL) + return false; + } + else if (partdesc2 != NULL) + return false; + + return true; +} + +/* * RelationBuildDesc * * Build a relation descriptor. The caller must hold at least @@ -1050,6 +1341,20 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) relation->rd_fkeylist = NIL; relation->rd_fkeyvalid = false; + /* if a partitioned table, initialize key and partition descriptor info */ + if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + RelationBuildPartitionKey(relation); + RelationBuildPartitionDesc(relation); + } + else + { + relation->rd_partkeycxt = NULL; + relation->rd_partkey = NULL; + relation->rd_partdesc = NULL; + relation->rd_pdcxt = NULL; + } + /* * if it's an index, initialize index-related information */ @@ -2042,6 +2347,12 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) MemoryContextDelete(relation->rd_rulescxt); if (relation->rd_rsdesc) MemoryContextDelete(relation->rd_rsdesc->rscxt); + if (relation->rd_partkeycxt) + MemoryContextDelete(relation->rd_partkeycxt); + if (relation->rd_pdcxt) + MemoryContextDelete(relation->rd_pdcxt); + if (relation->rd_partcheck) + pfree(relation->rd_partcheck); if (relation->rd_fdwroutine) pfree(relation->rd_fdwroutine); pfree(relation); @@ -2190,11 +2501,12 @@ RelationClearRelation(Relation relation, bool rebuild) * * When rebuilding an open relcache entry, we must preserve ref count, * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also - * attempt to preserve the pg_class entry (rd_rel), tupledesc, and - * rewrite-rule substructures in place, because various places assume - * that these structures won't move while they are working with an - * open relcache entry. (Note: the refcount mechanism for tupledescs - * might someday allow us to remove this hack for the tupledesc.) + * attempt to preserve the pg_class entry (rd_rel), tupledesc, + * rewrite-rule, partition key, and partition descriptor substructures + * in place, because various places assume that these structures won't + * move while they are working with an open relcache entry. (Note: + * the refcount mechanism for tupledescs might someday allow us to + * remove this hack for the tupledesc.) * * Note that this process does not touch CurrentResourceOwner; which * is good because whatever ref counts the entry may have do not @@ -2205,6 +2517,8 @@ RelationClearRelation(Relation relation, bool rebuild) bool keep_tupdesc; bool keep_rules; bool keep_policies; + bool keep_partkey; + bool keep_partdesc; /* Build temporary entry, but don't link it into hashtable */ newrel = RelationBuildDesc(save_relid, false); @@ -2235,6 +2549,10 @@ RelationClearRelation(Relation relation, bool rebuild) keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att); keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules); keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc); + keep_partkey = (relation->rd_partkey != NULL); + keep_partdesc = equalPartitionDescs(relation->rd_partkey, + relation->rd_partdesc, + newrel->rd_partdesc); /* * Perform swapping of the relcache entry contents. Within this @@ -2289,6 +2607,18 @@ RelationClearRelation(Relation relation, bool rebuild) SWAPFIELD(Oid, rd_toastoid); /* pgstat_info must be preserved */ SWAPFIELD(struct PgStat_TableStatus *, pgstat_info); + /* partition key must be preserved, if we have one */ + if (keep_partkey) + { + SWAPFIELD(PartitionKey, rd_partkey); + SWAPFIELD(MemoryContext, rd_partkeycxt); + } + /* preserve old partdesc if no logical change */ + if (keep_partdesc) + { + SWAPFIELD(PartitionDesc, rd_partdesc); + SWAPFIELD(MemoryContext, rd_pdcxt); + } #undef SWAPFIELD @@ -2983,7 +3313,9 @@ RelationBuildLocalRelation(const char *relname, /* system relations and non-table objects don't have one */ if (!IsSystemNamespace(relnamespace) && - (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW)) + (relkind == RELKIND_RELATION || + relkind == RELKIND_MATVIEW || + relkind == RELKIND_PARTITIONED_TABLE)) rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT; else rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING; @@ -3514,6 +3846,20 @@ RelationCacheInitializePhase3(void) restart = true; } + /* + * Reload partition key and descriptor for a partitioned table. + */ + if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + RelationBuildPartitionKey(relation); + Assert(relation->rd_partkey != NULL); + + RelationBuildPartitionDesc(relation); + Assert(relation->rd_partdesc != NULL); + + restart = true; + } + /* Release hold on the relation */ RelationDecrementReferenceCount(relation); @@ -4267,6 +4613,8 @@ RelationGetIndexExpressions(Relation relation) */ result = (List *) eval_const_expressions(NULL, (Node *) result); + result = (List *) canonicalize_qual((Expr *) result); + /* May as well fix opfuncids too */ fix_opfuncids((Node *) result); @@ -5035,6 +5383,10 @@ load_relcache_init_file(bool shared) rel->rd_rulescxt = NULL; rel->trigdesc = NULL; rel->rd_rsdesc = NULL; + rel->rd_partkeycxt = NULL; + rel->rd_partkey = NULL; + rel->rd_partdesc = NULL; + rel->rd_partcheck = NIL; rel->rd_indexprs = NIL; rel->rd_indpred = NIL; rel->rd_exclops = NULL; diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 65ffe844093..a3e0517b940 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -48,6 +48,7 @@ #include "catalog/pg_opclass.h" #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" +#include "catalog/pg_partitioned_table.h" #include "catalog/pg_proc.h" #include "catalog/pg_range.h" #include "catalog/pg_rewrite.h" @@ -568,6 +569,17 @@ static const struct cachedesc cacheinfo[] = { }, 8 }, + {PartitionedRelationId, /* PARTRELID */ + PartitionedRelidIndexId, + 1, + { + Anum_pg_partitioned_table_partrelid, + 0, + 0, + 0 + }, + 32 + }, {ProcedureRelationId, /* PROCNAMEARGSNSP */ ProcedureNameArgsNspIndexId, 3, |