Skip to content

Commit 820a388

Browse files
author
Commitfest Bot
committed
[CF 5692] v1 - Create DATA dump objects for partitioned tables too
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/5692 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/[email protected] Author(s): Tom Lane
2 parents c9e38a5 + 5ca55b6 commit 820a388

File tree

7 files changed

+223
-41
lines changed

7 files changed

+223
-41
lines changed

src/bin/pg_dump/pg_backup_archiver.c

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2003,12 +2003,14 @@ buildTocEntryArrays(ArchiveHandle *AH)
20032003
AH->tocsByDumpId[te->dumpId] = te;
20042004

20052005
/*
2006-
* tableDataId provides the TABLE DATA item's dump ID for each TABLE
2007-
* TOC entry that has a DATA item. We compute this by reversing the
2008-
* TABLE DATA item's dependency, knowing that a TABLE DATA item has
2009-
* just one dependency and it is the TABLE item.
2006+
* tableDataId provides the DATA item's dump ID for each TABLE TOC
2007+
* entry that has a TABLE DATA or PARTITIONED DATA item. We compute
2008+
* this by reversing the DATA item's dependency, knowing that its
2009+
* first dependency is the TABLE item.
20102010
*/
2011-
if (strcmp(te->desc, "TABLE DATA") == 0 && te->nDeps > 0)
2011+
if (te->nDeps > 0 &&
2012+
(strcmp(te->desc, "TABLE DATA") == 0 ||
2013+
strcmp(te->desc, "PARTITIONED DATA") == 0))
20122014
{
20132015
DumpId tableId = te->dependencies[0];
20142016

@@ -2018,7 +2020,7 @@ buildTocEntryArrays(ArchiveHandle *AH)
20182020
* item's dump ID, so there should be a place for it in the array.
20192021
*/
20202022
if (tableId <= 0 || tableId > maxDumpId)
2021-
pg_fatal("bad table dumpId for TABLE DATA item");
2023+
pg_fatal("bad table dumpId for %s item", te->desc);
20222024

20232025
AH->tableDataId[tableId] = te->dumpId;
20242026
}
@@ -3202,6 +3204,7 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
32023204
{
32033205
if (strcmp(te->desc, "TABLE") == 0 ||
32043206
strcmp(te->desc, "TABLE DATA") == 0 ||
3207+
strcmp(te->desc, "PARTITIONED DATA") == 0 ||
32053208
strcmp(te->desc, "VIEW") == 0 ||
32063209
strcmp(te->desc, "FOREIGN TABLE") == 0 ||
32073210
strcmp(te->desc, "MATERIALIZED VIEW") == 0 ||
@@ -3256,13 +3259,14 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
32563259
if (!te->hadDumper)
32573260
{
32583261
/*
3259-
* Special Case: If 'SEQUENCE SET' or anything to do with LOs, then it
3260-
* is considered a data entry. We don't need to check for BLOBS or
3261-
* old-style BLOB COMMENTS entries, because they will have hadDumper =
3262-
* true ... but we do need to check new-style BLOB ACLs, comments,
3263-
* etc.
3262+
* Special Case: If 'PARTITIONED DATA', 'SEQUENCE SET' or anything to
3263+
* do with LOs, then it is considered a data entry. We don't need to
3264+
* check for BLOBS or old-style BLOB COMMENTS entries, because they
3265+
* will have hadDumper = true ... but we do need to check new-style
3266+
* BLOB ACLs, comments, etc.
32643267
*/
3265-
if (strcmp(te->desc, "SEQUENCE SET") == 0 ||
3268+
if (strcmp(te->desc, "PARTITIONED DATA") == 0 ||
3269+
strcmp(te->desc, "SEQUENCE SET") == 0 ||
32663270
strcmp(te->desc, "BLOB") == 0 ||
32673271
strcmp(te->desc, "BLOB METADATA") == 0 ||
32683272
(strcmp(te->desc, "ACL") == 0 &&
@@ -5034,14 +5038,14 @@ identify_locking_dependencies(ArchiveHandle *AH, TocEntry *te)
50345038
return;
50355039

50365040
/*
5037-
* We assume the entry requires exclusive lock on each TABLE or TABLE DATA
5038-
* item listed among its dependencies. Originally all of these would have
5039-
* been TABLE items, but repoint_table_dependencies would have repointed
5040-
* them to the TABLE DATA items if those are present (which they might not
5041-
* be, eg in a schema-only dump). Note that all of the entries we are
5042-
* processing here are POST_DATA; otherwise there might be a significant
5043-
* difference between a dependency on a table and a dependency on its
5044-
* data, so that closer analysis would be needed here.
5041+
* We assume the entry requires exclusive lock on each TABLE, TABLE DATA,
5042+
* or PARTITIONED DATA item listed among its dependencies. Originally all
5043+
* of these would have been TABLE items, but repoint_table_dependencies
5044+
* would have repointed them to the DATA items if those are present (which
5045+
* they might not be, eg in a schema-only dump). Note that all of the
5046+
* entries we are processing here are POST_DATA; otherwise there might be
5047+
* a significant difference between a dependency on a table and a
5048+
* dependency on its data, so that closer analysis would be needed here.
50455049
*/
50465050
lockids = (DumpId *) pg_malloc(te->nDeps * sizeof(DumpId));
50475051
nlockids = 0;
@@ -5050,8 +5054,9 @@ identify_locking_dependencies(ArchiveHandle *AH, TocEntry *te)
50505054
DumpId depid = te->dependencies[i];
50515055

50525056
if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL &&
5053-
((strcmp(AH->tocsByDumpId[depid]->desc, "TABLE DATA") == 0) ||
5054-
strcmp(AH->tocsByDumpId[depid]->desc, "TABLE") == 0))
5057+
(strcmp(AH->tocsByDumpId[depid]->desc, "TABLE") == 0 ||
5058+
strcmp(AH->tocsByDumpId[depid]->desc, "TABLE DATA") == 0 ||
5059+
strcmp(AH->tocsByDumpId[depid]->desc, "PARTITIONED DATA") == 0))
50555060
lockids[nlockids++] = depid;
50565061
}
50575062

src/bin/pg_dump/pg_backup_archiver.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,8 @@ struct _archiveHandle
310310

311311
/* arrays created after the TOC list is complete: */
312312
struct _tocEntry **tocsByDumpId; /* TOCs indexed by dumpId */
313-
DumpId *tableDataId; /* TABLE DATA ids, indexed by table dumpId */
313+
DumpId *tableDataId; /* TABLE DATA and PARTITIONED DATA dumpIds,
314+
* indexed by table dumpId */
314315

315316
struct _tocEntry *currToc; /* Used when dumping data */
316317
pg_compress_specification compression_spec; /* Requested specification for

src/bin/pg_dump/pg_backup_custom.c

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
*/
2626
#include "postgres_fe.h"
2727

28+
#include <limits.h>
29+
2830
#include "common/file_utils.h"
2931
#include "compress_io.h"
3032
#include "pg_backup_utils.h"
@@ -826,17 +828,18 @@ _ReopenArchive(ArchiveHandle *AH)
826828
/*
827829
* Prepare for parallel restore.
828830
*
829-
* The main thing that needs to happen here is to fill in TABLE DATA and BLOBS
830-
* TOC entries' dataLength fields with appropriate values to guide the
831-
* ordering of restore jobs. The source of said data is format-dependent,
832-
* as is the exact meaning of the values.
831+
* The main thing that needs to happen here is to fill in TABLE DATA,
832+
* PARTITIONED_DATA, and BLOBS TOC entries' dataLength fields with appropriate
833+
* values to guide the ordering of restore jobs. The source of said data is
834+
* format-dependent, as is the exact meaning of the values.
833835
*
834836
* A format module might also choose to do other setup here.
835837
*/
836838
static void
837839
_PrepParallelRestore(ArchiveHandle *AH)
838840
{
839841
lclContext *ctx = (lclContext *) AH->formatData;
842+
bool have_partitioned_data = false;
840843
TocEntry *prev_te = NULL;
841844
lclTocEntry *prev_tctx = NULL;
842845
TocEntry *te;
@@ -850,6 +853,10 @@ _PrepParallelRestore(ArchiveHandle *AH)
850853
{
851854
lclTocEntry *tctx = (lclTocEntry *) te->formatData;
852855

856+
/* Track whether there are any PARTITIONED_DATA items */
857+
if (!have_partitioned_data && strcmp(te->desc, "PARTITIONED DATA") == 0)
858+
have_partitioned_data = true;
859+
853860
/*
854861
* Ignore entries without a known data offset; if we were unable to
855862
* seek to rewrite the TOC when creating the archive, this'll be all
@@ -880,6 +887,38 @@ _PrepParallelRestore(ArchiveHandle *AH)
880887
if (endpos > prev_tctx->dataPos)
881888
prev_te->dataLength = endpos - prev_tctx->dataPos;
882889
}
890+
891+
/*
892+
* For PARTITIONED DATA items, add up the sizes of their child objects.
893+
* (We couldn't do this earlier, since when we encounter a PARTITIONED
894+
* DATA item in the first loop we typically don't know the dataLength of
895+
* its last child yet.)
896+
*/
897+
if (have_partitioned_data)
898+
{
899+
for (te = AH->toc->next; te != AH->toc; te = te->next)
900+
{
901+
if (strcmp(te->desc, "PARTITIONED DATA") != 0)
902+
continue;
903+
for (int i = 0; i < te->nDeps; i++)
904+
{
905+
DumpId depid = te->dependencies[i];
906+
907+
if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL)
908+
{
909+
pgoff_t childLength = AH->tocsByDumpId[depid]->dataLength;
910+
911+
te->dataLength += childLength;
912+
/* handle overflow -- unlikely except with 32-bit pgoff_t */
913+
if (unlikely(te->dataLength < 0))
914+
{
915+
te->dataLength = INT_MAX;
916+
break;
917+
}
918+
}
919+
}
920+
}
921+
}
883922
}
884923

885924
/*

src/bin/pg_dump/pg_backup_directory.c

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "postgres_fe.h"
3838

3939
#include <dirent.h>
40+
#include <limits.h>
4041
#include <sys/stat.h>
4142

4243
#include "common/file_utils.h"
@@ -697,16 +698,17 @@ setFilePath(ArchiveHandle *AH, char *buf, const char *relativeFilename)
697698
/*
698699
* Prepare for parallel restore.
699700
*
700-
* The main thing that needs to happen here is to fill in TABLE DATA and BLOBS
701-
* TOC entries' dataLength fields with appropriate values to guide the
702-
* ordering of restore jobs. The source of said data is format-dependent,
703-
* as is the exact meaning of the values.
701+
* The main thing that needs to happen here is to fill in TABLE DATA,
702+
* PARTITIONED_DATA, and BLOBS TOC entries' dataLength fields with appropriate
703+
* values to guide the ordering of restore jobs. The source of said data is
704+
* format-dependent, as is the exact meaning of the values.
704705
*
705706
* A format module might also choose to do other setup here.
706707
*/
707708
static void
708709
_PrepParallelRestore(ArchiveHandle *AH)
709710
{
711+
bool have_partitioned_data = false;
710712
TocEntry *te;
711713

712714
for (te = AH->toc->next; te != AH->toc; te = te->next)
@@ -715,6 +717,10 @@ _PrepParallelRestore(ArchiveHandle *AH)
715717
char fname[MAXPGPATH];
716718
struct stat st;
717719

720+
/* Track whether there are any PARTITIONED_DATA items */
721+
if (!have_partitioned_data && strcmp(te->desc, "PARTITIONED DATA") == 0)
722+
have_partitioned_data = true;
723+
718724
/*
719725
* A dumpable object has set tctx->filename, any other object has not.
720726
* (see _ArchiveEntry).
@@ -759,6 +765,38 @@ _PrepParallelRestore(ArchiveHandle *AH)
759765
if (strcmp(te->desc, "BLOBS") == 0)
760766
te->dataLength *= 1024;
761767
}
768+
769+
/*
770+
* For PARTITIONED DATA items, add up the sizes of their child objects.
771+
* (Unlike pg_backup_custom.c, we could theoretically do this within the
772+
* previous loop, but it seems best to keep the logic looking the same in
773+
* both functions.)
774+
*/
775+
if (have_partitioned_data)
776+
{
777+
for (te = AH->toc->next; te != AH->toc; te = te->next)
778+
{
779+
if (strcmp(te->desc, "PARTITIONED DATA") != 0)
780+
continue;
781+
for (int i = 0; i < te->nDeps; i++)
782+
{
783+
DumpId depid = te->dependencies[i];
784+
785+
if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL)
786+
{
787+
pgoff_t childLength = AH->tocsByDumpId[depid]->dataLength;
788+
789+
te->dataLength += childLength;
790+
/* handle overflow -- unlikely except with 32-bit pgoff_t */
791+
if (unlikely(te->dataLength < 0))
792+
{
793+
te->dataLength = INT_MAX;
794+
break;
795+
}
796+
}
797+
}
798+
}
799+
}
762800
}
763801

764802
/*

0 commit comments

Comments
 (0)