aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2010-12-09 13:03:11 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2010-12-09 13:03:42 -0500
commit2ffcb0cb6a5bf97de22f0ce58f55537ce1c87653 (patch)
tree5ab04d596c0056a5a45f92e89a1c7480ec3767d6
parent87eadd7e3d6f5581d5b4cb8083212a323050e388 (diff)
downloadpostgresql-2ffcb0cb6a5bf97de22f0ce58f55537ce1c87653.tar.gz
postgresql-2ffcb0cb6a5bf97de22f0ce58f55537ce1c87653.zip
Eliminate O(N^2) behavior in parallel restore with many blobs.
With hundreds of thousands of TOC entries, the repeated searches in reduce_dependencies() become the dominant cost. Get rid of that searching by constructing reverse-dependency lists, which we can do in O(N) time during the fix_dependencies() preprocessing. I chose to store the reverse dependencies as DumpId arrays for consistency with the forward-dependency representation, and keep the previously-transient tocsByDumpId[] array around to locate actual TOC entry structs quickly from dump IDs. While this fixes the slow case reported by Vlad Arkhipov, there is still a potential for O(N^2) behavior with sufficiently many tables: fix_dependencies itself, as well as mark_create_done and inhibit_data_for_failed_table, are doing repeated searches to deal with table-to-table-data dependencies. Possibly this work could be extended to deal with that, although the latter two functions are also used in non-parallel restore where we currently don't run fix_dependencies. Another TODO is that we fail to parallelize restore of multiple blobs at all. This appears to require changes in the archive format to fix. Back-patch to 9.0 where the problem was reported. 8.4 has potential issues as well; but since it doesn't create a separate TOC entry for each blob, it's at much less risk of having enough TOC entries to cause real problems.
-rw-r--r--src/bin/pg_dump/pg_backup_archiver.c116
-rw-r--r--src/bin/pg_dump/pg_backup_archiver.h2
2 files changed, 76 insertions, 42 deletions
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index be5339e4ff6..6528f4d19c6 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -79,6 +79,10 @@ const char *progname;
static const char *modulename = gettext_noop("archiver");
+/* index array created by fix_dependencies -- only used in parallel restore */
+static TocEntry **tocsByDumpId; /* index by dumpId - 1 */
+static DumpId maxDumpId; /* length of above array */
+
static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
const int compression, ArchiveMode mode);
@@ -134,9 +138,7 @@ static void fix_dependencies(ArchiveHandle *AH);
static bool has_lock_conflicts(TocEntry *te1, TocEntry *te2);
static void repoint_table_dependencies(ArchiveHandle *AH,
DumpId tableId, DumpId tableDataId);
-static void identify_locking_dependencies(TocEntry *te,
- TocEntry **tocsByDumpId,
- DumpId maxDumpId);
+static void identify_locking_dependencies(TocEntry *te);
static void reduce_dependencies(ArchiveHandle *AH, TocEntry *te,
TocEntry *ready_list);
static void mark_create_done(ArchiveHandle *AH, TocEntry *te);
@@ -3737,7 +3739,12 @@ mark_work_done(ArchiveHandle *AH, TocEntry *ready_list,
/*
* Process the dependency information into a form useful for parallel restore.
*
- * We set up depCount fields that are the number of as-yet-unprocessed
+ * This function takes care of fixing up some missing or badly designed
+ * dependencies, and then prepares subsidiary data structures that will be
+ * used in the main parallel-restore logic, including:
+ * 1. We build the tocsByDumpId[] index array.
+ * 2. We build the revDeps[] arrays of incoming dependency dumpIds.
+ * 3. We set up depCount fields that are the number of as-yet-unprocessed
* dependencies for each TOC entry.
*
* We also identify locking dependencies so that we can avoid trying to
@@ -3746,22 +3753,20 @@ mark_work_done(ArchiveHandle *AH, TocEntry *ready_list,
static void
fix_dependencies(ArchiveHandle *AH)
{
- TocEntry **tocsByDumpId;
TocEntry *te;
- DumpId maxDumpId;
int i;
/*
- * For some of the steps here, it is convenient to have an array that
- * indexes the TOC entries by dump ID, rather than searching the TOC list
- * repeatedly. Entries for dump IDs not present in the TOC will be NULL.
+ * It is convenient to have an array that indexes the TOC entries by dump
+ * ID, rather than searching the TOC list repeatedly. Entries for dump
+ * IDs not present in the TOC will be NULL.
*
* NOTE: because maxDumpId is just the highest dump ID defined in the
* archive, there might be dependencies for IDs > maxDumpId. All uses of
* this array must guard against out-of-range dependency numbers.
*
- * Also, initialize the depCount fields, and make sure all the TOC items
- * are marked as not being in any parallel-processing list.
+ * Also, initialize the depCount/revDeps/nRevDeps fields, and make sure
+ * the TOC items are marked as not being in any parallel-processing list.
*/
maxDumpId = AH->maxDumpId;
tocsByDumpId = (TocEntry **) calloc(maxDumpId, sizeof(TocEntry *));
@@ -3769,6 +3774,8 @@ fix_dependencies(ArchiveHandle *AH)
{
tocsByDumpId[te->dumpId - 1] = te;
te->depCount = te->nDeps;
+ te->revDeps = NULL;
+ te->nRevDeps = 0;
te->par_prev = NULL;
te->par_next = NULL;
}
@@ -3786,6 +3793,9 @@ fix_dependencies(ArchiveHandle *AH)
* TABLE, if possible. However, if the dependency isn't in the archive
* then just assume it was a TABLE; this is to cover cases where the table
* was suppressed but we have the data and some dependent post-data items.
+ *
+ * XXX this is O(N^2) if there are a lot of tables. We ought to fix
+ * pg_dump to produce correctly-linked dependencies in the first place.
*/
for (te = AH->toc->next; te != AH->toc; te = te->next)
{
@@ -3832,8 +3842,14 @@ fix_dependencies(ArchiveHandle *AH)
}
/*
- * It is possible that the dependencies list items that are not in the
- * archive at all. Subtract such items from the depCounts.
+ * At this point we start to build the revDeps reverse-dependency arrays,
+ * so all changes of dependencies must be complete.
+ */
+
+ /*
+ * Count the incoming dependencies for each item. Also, it is possible
+ * that the dependencies list items that are not in the archive at
+ * all. Subtract such items from the depCounts.
*/
for (te = AH->toc->next; te != AH->toc; te = te->next)
{
@@ -3841,22 +3857,52 @@ fix_dependencies(ArchiveHandle *AH)
{
DumpId depid = te->dependencies[i];
- if (depid > maxDumpId || tocsByDumpId[depid - 1] == NULL)
+ if (depid <= maxDumpId && tocsByDumpId[depid - 1] != NULL)
+ tocsByDumpId[depid - 1]->nRevDeps++;
+ else
te->depCount--;
}
}
/*
+ * Allocate space for revDeps[] arrays, and reset nRevDeps so we can
+ * use it as a counter below.
+ */
+ for (te = AH->toc->next; te != AH->toc; te = te->next)
+ {
+ if (te->nRevDeps > 0)
+ te->revDeps = (DumpId *) malloc(te->nRevDeps * sizeof(DumpId));
+ te->nRevDeps = 0;
+ }
+
+ /*
+ * Build the revDeps[] arrays of incoming-dependency dumpIds. This
+ * had better agree with the loops above.
+ */
+ for (te = AH->toc->next; te != AH->toc; te = te->next)
+ {
+ for (i = 0; i < te->nDeps; i++)
+ {
+ DumpId depid = te->dependencies[i];
+
+ if (depid <= maxDumpId && tocsByDumpId[depid - 1] != NULL)
+ {
+ TocEntry *otherte = tocsByDumpId[depid - 1];
+
+ otherte->revDeps[otherte->nRevDeps++] = te->dumpId;
+ }
+ }
+ }
+
+ /*
* Lastly, work out the locking dependencies.
*/
for (te = AH->toc->next; te != AH->toc; te = te->next)
{
te->lockDeps = NULL;
te->nLockDeps = 0;
- identify_locking_dependencies(te, tocsByDumpId, maxDumpId);
+ identify_locking_dependencies(te);
}
-
- free(tocsByDumpId);
}
/*
@@ -3890,13 +3936,9 @@ repoint_table_dependencies(ArchiveHandle *AH,
* Identify which objects we'll need exclusive lock on in order to restore
* the given TOC entry (*other* than the one identified by the TOC entry
* itself). Record their dump IDs in the entry's lockDeps[] array.
- * tocsByDumpId[] is a convenience array (of size maxDumpId) to avoid
- * searching the TOC for each dependency.
*/
static void
-identify_locking_dependencies(TocEntry *te,
- TocEntry **tocsByDumpId,
- DumpId maxDumpId)
+identify_locking_dependencies(TocEntry *te)
{
DumpId *lockids;
int nlockids;
@@ -3950,31 +3992,21 @@ identify_locking_dependencies(TocEntry *te,
static void
reduce_dependencies(ArchiveHandle *AH, TocEntry *te, TocEntry *ready_list)
{
- DumpId target = te->dumpId;
int i;
- ahlog(AH, 2, "reducing dependencies for %d\n", target);
+ ahlog(AH, 2, "reducing dependencies for %d\n", te->dumpId);
- /*
- * We must examine all entries, not only the ones after the target item,
- * because if the user used a -L switch then the original dependency-
- * respecting order has been destroyed by SortTocFromFile.
- */
- for (te = AH->toc->next; te != AH->toc; te = te->next)
+ for (i = 0; i < te->nRevDeps; i++)
{
- for (i = 0; i < te->nDeps; i++)
+ TocEntry *otherte = tocsByDumpId[te->revDeps[i] - 1];
+
+ otherte->depCount--;
+ if (otherte->depCount == 0 && otherte->par_prev != NULL)
{
- if (te->dependencies[i] == target)
- {
- te->depCount--;
- if (te->depCount == 0 && te->par_prev != NULL)
- {
- /* It must be in the pending list, so remove it ... */
- par_list_remove(te);
- /* ... and add to ready_list */
- par_list_append(ready_list, te);
- }
- }
+ /* It must be in the pending list, so remove it ... */
+ par_list_remove(otherte);
+ /* ... and add to ready_list */
+ par_list_append(ready_list, otherte);
}
}
}
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 0a135ee126f..9f826b678a1 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -321,6 +321,8 @@ typedef struct _tocEntry
struct _tocEntry *par_next; /* these are NULL if not in either list */
bool created; /* set for DATA member if TABLE was created */
int depCount; /* number of dependencies not yet restored */
+ DumpId *revDeps; /* dumpIds of objects depending on this one */
+ int nRevDeps; /* number of such dependencies */
DumpId *lockDeps; /* dumpIds of objects this one needs lock on */
int nLockDeps; /* number of such dependencies */
} TocEntry;