diff options
author | Andres Freund <andres@anarazel.de> | 2019-03-28 20:01:14 -0700 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2019-03-28 20:01:43 -0700 |
commit | d25f519107bff602e1ebc81853fe592d020c118d (patch) | |
tree | 00cfff63480c0d555f372ba2e1866d6622740432 /src/backend/commands/tablecmds.c | |
parent | 7e69323bf72a924fd1b04a7a91da343a0cda91cf (diff) | |
download | postgresql-d25f519107bff602e1ebc81853fe592d020c118d.tar.gz postgresql-d25f519107bff602e1ebc81853fe592d020c118d.zip |
tableam: relation creation, VACUUM FULL/CLUSTER, SET TABLESPACE.
This moves the responsibility for:
- creating the storage necessary for a relation, including creating a
new relfilenode for a relation with existing storage
- non-transactional truncation of a relation
- VACUUM FULL / CLUSTER's rewrite of a table
below tableam.
This is fairly straight forward, with a bit of complexity smattered in
to move the computation of xid / multixid horizons below the AM, as
they don't make sense for every table AM.
Author: Andres Freund
Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
Diffstat (limited to 'src/backend/commands/tablecmds.c')
-rw-r--r-- | src/backend/commands/tablecmds.c | 180 |
1 files changed, 48 insertions, 132 deletions
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 048c1196685..06e7caa9cff 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -20,6 +20,7 @@ #include "access/multixact.h" #include "access/reloptions.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/tableam.h" #include "access/tupconvert.h" @@ -473,8 +474,7 @@ static void ATExecEnableRowSecurity(Relation rel); static void ATExecDisableRowSecurity(Relation rel); static void ATExecForceNoForceRowSecurity(Relation rel, bool force_rls); -static void copy_relation_data(SMgrRelation rel, SMgrRelation dst, - ForkNumber forkNum, char relpersistence); +static void index_copy_data(Relation rel, RelFileNode newrnode); static const char *storage_name(char c); static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, @@ -1697,7 +1697,6 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, { Oid heap_relid; Oid toast_relid; - MultiXactId minmulti; /* * This effectively deletes all rows in the table, and may be done @@ -1707,8 +1706,6 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, */ CheckTableForSerializableConflictIn(rel); - minmulti = GetOldestMultiXactId(); - /* * Need the full transaction-safe pushups. * @@ -1716,10 +1713,7 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, * as the relfilenode value. The old storage file is scheduled for * deletion at commit. */ - RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence, - RecentXmin, minmulti); - if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) - heap_create_init_fork(rel); + RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence); heap_relid = RelationGetRelid(rel); @@ -1731,12 +1725,8 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, { Relation toastrel = relation_open(toast_relid, AccessExclusiveLock); - RelationSetNewRelfilenode(toastrel, - toastrel->rd_rel->relpersistence, - RecentXmin, minmulti); - if (toastrel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) - heap_create_init_fork(toastrel); + toastrel->rd_rel->relpersistence); table_close(toastrel, NoLock); } @@ -4928,13 +4918,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) /* Write the tuple out to the new relation */ if (newrel) - { - HeapTuple tuple; - - tuple = ExecFetchSlotHeapTuple(newslot, true, NULL); - heap_insert(newrel, tuple, mycid, hi_options, bistate); - ItemPointerCopy(&tuple->t_self, &newslot->tts_tid); - } + table_insert(newrel, insertslot, mycid, hi_options, bistate); ResetExprContext(econtext); @@ -11492,11 +11476,9 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) Oid reltoastrelid; Oid newrelfilenode; RelFileNode newrnode; - SMgrRelation dstrel; Relation pg_class; HeapTuple tuple; Form_pg_class rd_rel; - ForkNumber forkNum; List *reltoastidxids = NIL; ListCell *lc; @@ -11581,46 +11563,19 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) newrnode = rel->rd_node; newrnode.relNode = newrelfilenode; newrnode.spcNode = newTableSpace; - dstrel = smgropen(newrnode, rel->rd_backend); - - RelationOpenSmgr(rel); - - /* - * Create and copy all forks of the relation, and schedule unlinking of - * old physical files. - * - * NOTE: any conflict in relfilenode value will be caught in - * RelationCreateStorage(). - */ - RelationCreateStorage(newrnode, rel->rd_rel->relpersistence); - - /* copy main fork */ - copy_relation_data(rel->rd_smgr, dstrel, MAIN_FORKNUM, - rel->rd_rel->relpersistence); - /* copy those extra forks that exist */ - for (forkNum = MAIN_FORKNUM + 1; forkNum <= MAX_FORKNUM; forkNum++) + /* hand off to AM to actually create the new filenode and copy the data */ + if (rel->rd_rel->relkind == RELKIND_INDEX) { - if (smgrexists(rel->rd_smgr, forkNum)) - { - smgrcreate(dstrel, forkNum, false); - - /* - * WAL log creation if the relation is persistent, or this is the - * init fork of an unlogged relation. - */ - if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT || - (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED && - forkNum == INIT_FORKNUM)) - log_smgrcreate(&newrnode, forkNum); - copy_relation_data(rel->rd_smgr, dstrel, forkNum, - rel->rd_rel->relpersistence); - } + index_copy_data(rel, newrnode); + } + else + { + Assert(rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_MATVIEW || + rel->rd_rel->relkind == RELKIND_TOASTVALUE); + table_relation_copy_data(rel, newrnode); } - - /* drop old relation, and close new one */ - RelationDropStorage(rel); - smgrclose(dstrel); /* update the pg_class row */ rd_rel->reltablespace = (newTableSpace == MyDatabaseTableSpace) ? InvalidOid : newTableSpace; @@ -11882,90 +11837,51 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt) return new_tablespaceoid; } -/* - * Copy data, block by block - */ static void -copy_relation_data(SMgrRelation src, SMgrRelation dst, - ForkNumber forkNum, char relpersistence) +index_copy_data(Relation rel, RelFileNode newrnode) { - PGAlignedBlock buf; - Page page; - bool use_wal; - bool copying_initfork; - BlockNumber nblocks; - BlockNumber blkno; - - page = (Page) buf.data; + SMgrRelation dstrel; - /* - * The init fork for an unlogged relation in many respects has to be - * treated the same as normal relation, changes need to be WAL logged and - * it needs to be synced to disk. - */ - copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED && - forkNum == INIT_FORKNUM; + dstrel = smgropen(newrnode, rel->rd_backend); + RelationOpenSmgr(rel); /* - * We need to log the copied data in WAL iff WAL archiving/streaming is - * enabled AND it's a permanent relation. + * Create and copy all forks of the relation, and schedule unlinking of + * old physical files. + * + * NOTE: any conflict in relfilenode value will be caught in + * RelationCreateStorage(). */ - use_wal = XLogIsNeeded() && - (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork); + RelationCreateStorage(newrnode, rel->rd_rel->relpersistence); - nblocks = smgrnblocks(src, forkNum); + /* copy main fork */ + RelationCopyStorage(rel->rd_smgr, dstrel, MAIN_FORKNUM, + rel->rd_rel->relpersistence); - for (blkno = 0; blkno < nblocks; blkno++) + /* copy those extra forks that exist */ + for (ForkNumber forkNum = MAIN_FORKNUM + 1; + forkNum <= MAX_FORKNUM; forkNum++) { - /* If we got a cancel signal during the copy of the data, quit */ - CHECK_FOR_INTERRUPTS(); - - smgrread(src, forkNum, blkno, buf.data); - - if (!PageIsVerified(page, blkno)) - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("invalid page in block %u of relation %s", - blkno, - relpathbackend(src->smgr_rnode.node, - src->smgr_rnode.backend, - forkNum)))); - - /* - * WAL-log the copied page. Unfortunately we don't know what kind of a - * page this is, so we have to log the full page including any unused - * space. - */ - if (use_wal) - log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false); - - PageSetChecksumInplace(page, blkno); + if (smgrexists(rel->rd_smgr, forkNum)) + { + smgrcreate(dstrel, forkNum, false); - /* - * Now write the page. We say isTemp = true even if it's not a temp - * rel, because there's no need for smgr to schedule an fsync for this - * write; we'll do it ourselves below. - */ - smgrextend(dst, forkNum, blkno, buf.data, true); + /* + * WAL log creation if the relation is persistent, or this is the + * init fork of an unlogged relation. + */ + if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT || + (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED && + forkNum == INIT_FORKNUM)) + log_smgrcreate(&newrnode, forkNum); + RelationCopyStorage(rel->rd_smgr, dstrel, forkNum, + rel->rd_rel->relpersistence); + } } - /* - * If the rel is WAL-logged, must fsync before commit. We use heap_sync - * to ensure that the toast table gets fsync'd too. (For a temp or - * unlogged rel we don't care since the data will be gone after a crash - * anyway.) - * - * It's obvious that we must do this when not WAL-logging the copy. It's - * less obvious that we have to do it even if we did WAL-log the copied - * pages. The reason is that since we're copying outside shared buffers, a - * CHECKPOINT occurring during the copy has no way to flush the previously - * written data to disk (indeed it won't know the new rel even exists). A - * crash later on would replay WAL from the checkpoint, therefore it - * wouldn't replay our earlier WAL entries. If we do not fsync those pages - * here, they might still not be on disk when the crash occurs. - */ - if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork) - smgrimmedsync(dst, forkNum); + /* drop old relation, and close new one */ + RelationDropStorage(rel); + smgrclose(dstrel); } /* |