Skip to content

Commit 9bb9443

Browse files
author
Commitfest Bot
committed
[CF 5467] v20 - speedup COPY TO for partitioned table
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/5467 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/CAD21AoDtx8zdzww8z-aQStLocB7mxM3UCzJyBohFYHaVg0-Z-w@mail.gmail.com Author(s): Jian He
2 parents e1a912c + 1de2d0e commit 9bb9443

File tree

9 files changed

+200
-36
lines changed

9 files changed

+200
-36
lines changed

contrib/postgres_fdw/expected/postgres_fdw.out

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11599,6 +11599,11 @@ SELECT * FROM result_tbl ORDER BY a;
1159911599
(3 rows)
1160011600

1160111601
DELETE FROM result_tbl;
11602+
-- Test COPY TO when foreign table is partition
11603+
COPY async_pt TO stdout; --error
11604+
ERROR: cannot copy from foreign table "async_p1"
11605+
DETAIL: Partition "async_p1" is a foreign table in partitioned table "async_pt"
11606+
HINT: Try the COPY (SELECT ...) TO variant.
1160211607
DROP FOREIGN TABLE async_p3;
1160311608
DROP TABLE base_tbl3;
1160411609
-- Check case where the partitioned table has local/remote partitions

contrib/postgres_fdw/sql/postgres_fdw.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3941,6 +3941,9 @@ INSERT INTO result_tbl SELECT * FROM async_pt WHERE b === 505;
39413941
SELECT * FROM result_tbl ORDER BY a;
39423942
DELETE FROM result_tbl;
39433943

3944+
-- Test COPY TO when foreign table is partition
3945+
COPY async_pt TO stdout; --error
3946+
39443947
DROP FOREIGN TABLE async_p3;
39453948
DROP TABLE base_tbl3;
39463949

doc/src/sgml/ref/copy.sgml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -539,13 +539,14 @@ COPY <replaceable class="parameter">count</replaceable>
539539

540540
<para>
541541
<command>COPY TO</command> can be used with plain
542-
tables and populated materialized views.
543-
For example,
544-
<literal>COPY <replaceable class="parameter">table</replaceable>
545-
TO</literal> copies the same rows as
542+
tables, populated materialized views, and partitioned tables.
543+
For non-partitioned tables, COPY <replaceable class="parameter">table</replaceable>
544+
copies the same rows as
546545
<literal>SELECT * FROM ONLY <replaceable class="parameter">table</replaceable></literal>.
546+
For partitioned tables, it copies the same rows as
547+
<literal>SELECT * FROM <replaceable class="parameter">table</replaceable></literal>.
547548
However it doesn't directly support other relation types,
548-
such as partitioned tables, inheritance child tables, or views.
549+
such as inheritance child tables, or views.
549550
To copy all rows from such relations, use <literal>COPY (SELECT * FROM
550551
<replaceable class="parameter">table</replaceable>) TO</literal>.
551552
</para>

src/backend/commands/copy.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,15 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
251251
* relation which we have opened and locked. Use "ONLY" so that
252252
* COPY retrieves rows from only the target table not any
253253
* inheritance children, the same as when RLS doesn't apply.
254+
*
255+
* However, when copying data from a partitioned table, we don't
256+
* use "ONLY", since we need to retrieve rows from its descendant
257+
* tables too.
254258
*/
255259
from = makeRangeVar(get_namespace_name(RelationGetNamespace(rel)),
256260
pstrdup(RelationGetRelationName(rel)),
257261
-1);
258-
from->inh = false; /* apply ONLY */
262+
from->inh = (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
259263

260264
/* Build query */
261265
select = makeNode(SelectStmt);

src/backend/commands/copyto.c

Lines changed: 123 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
#include <unistd.h>
1919
#include <sys/stat.h>
2020

21+
#include "access/table.h"
2122
#include "access/tableam.h"
23+
#include "catalog/pg_inherits.h"
2224
#include "commands/copyapi.h"
2325
#include "commands/progress.h"
2426
#include "executor/execdesc.h"
@@ -86,6 +88,7 @@ typedef struct CopyToStateData
8688

8789
CopyFormatOptions opts;
8890
Node *whereClause; /* WHERE condition (or NULL) */
91+
List *partitions; /* OID list of partitions to copy data from */
8992

9093
/*
9194
* Working state
@@ -116,6 +119,8 @@ static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot);
116119
static void CopyAttributeOutText(CopyToState cstate, const char *string);
117120
static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
118121
bool use_quote);
122+
static void CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel,
123+
uint64 *processed);
119124

120125
/* built-in format-specific routines */
121126
static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc);
@@ -602,6 +607,10 @@ EndCopy(CopyToState cstate)
602607
pgstat_progress_end_command();
603608

604609
MemoryContextDelete(cstate->copycontext);
610+
611+
if (cstate->partitions)
612+
list_free(cstate->partitions);
613+
605614
pfree(cstate);
606615
}
607616

@@ -643,6 +652,7 @@ BeginCopyTo(ParseState *pstate,
643652
PROGRESS_COPY_COMMAND_TO,
644653
0
645654
};
655+
List *children = NIL;
646656

647657
if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
648658
{
@@ -673,11 +683,34 @@ BeginCopyTo(ParseState *pstate,
673683
errmsg("cannot copy from sequence \"%s\"",
674684
RelationGetRelationName(rel))));
675685
else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
676-
ereport(ERROR,
677-
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
678-
errmsg("cannot copy from partitioned table \"%s\"",
679-
RelationGetRelationName(rel)),
680-
errhint("Try the COPY (SELECT ...) TO variant.")));
686+
{
687+
/*
688+
* Collect OIDs of relation containing data, so that later
689+
* DoCopyTo can copy the data from them.
690+
*/
691+
children = find_all_inheritors(RelationGetRelid(rel), AccessShareLock, NULL);
692+
693+
foreach_oid(child, children)
694+
{
695+
char relkind = get_rel_relkind(child);
696+
697+
if (relkind == RELKIND_FOREIGN_TABLE)
698+
{
699+
char *relation_name = get_rel_name(child);
700+
701+
ereport(ERROR,
702+
errcode(ERRCODE_WRONG_OBJECT_TYPE),
703+
errmsg("cannot copy from foreign table \"%s\"", relation_name),
704+
errdetail("Partition \"%s\" is a foreign table in partitioned table \"%s\"",
705+
relation_name, RelationGetRelationName(rel)),
706+
errhint("Try the COPY (SELECT ...) TO variant."));
707+
}
708+
709+
/* Exclude tables with no data */
710+
if (RELKIND_HAS_PARTITIONS(relkind))
711+
children = foreach_delete_current(children, child);
712+
}
713+
}
681714
else
682715
ereport(ERROR,
683716
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -713,6 +746,7 @@ BeginCopyTo(ParseState *pstate,
713746
cstate->rel = rel;
714747

715748
tupDesc = RelationGetDescr(cstate->rel);
749+
cstate->partitions = children;
716750
}
717751
else
718752
{
@@ -722,6 +756,7 @@ BeginCopyTo(ParseState *pstate,
722756
DestReceiver *dest;
723757

724758
cstate->rel = NULL;
759+
cstate->partitions = NIL;
725760

726761
/*
727762
* Run parse analysis and rewrite. Note this also acquires sufficient
@@ -1030,7 +1065,7 @@ DoCopyTo(CopyToState cstate)
10301065
TupleDesc tupDesc;
10311066
int num_phys_attrs;
10321067
ListCell *cur;
1033-
uint64 processed;
1068+
uint64 processed = 0;
10341069

10351070
if (fe_copy)
10361071
SendCopyBegin(cstate);
@@ -1070,33 +1105,24 @@ DoCopyTo(CopyToState cstate)
10701105

10711106
if (cstate->rel)
10721107
{
1073-
TupleTableSlot *slot;
1074-
TableScanDesc scandesc;
1075-
1076-
scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
1077-
slot = table_slot_create(cstate->rel, NULL);
1078-
1079-
processed = 0;
1080-
while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
1108+
/*
1109+
* If COPY TO source table is a partitioned table, then open each
1110+
* partition and process each individual partition.
1111+
*/
1112+
if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
10811113
{
1082-
CHECK_FOR_INTERRUPTS();
1083-
1084-
/* Deconstruct the tuple ... */
1085-
slot_getallattrs(slot);
1086-
1087-
/* Format and send the data */
1088-
CopyOneRowTo(cstate, slot);
1114+
foreach_oid(child, cstate->partitions)
1115+
{
1116+
Relation scan_rel;
10891117

1090-
/*
1091-
* Increment the number of processed tuples, and report the
1092-
* progress.
1093-
*/
1094-
pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
1095-
++processed);
1118+
/* We already got the lock in BeginCopyTo */
1119+
scan_rel = table_open(child, NoLock);
1120+
CopyRelationTo(cstate, scan_rel, cstate->rel, &processed);
1121+
table_close(scan_rel, NoLock);
1122+
}
10961123
}
1097-
1098-
ExecDropSingleTupleTableSlot(slot);
1099-
table_endscan(scandesc);
1124+
else
1125+
CopyRelationTo(cstate, cstate->rel, NULL, &processed);
11001126
}
11011127
else
11021128
{
@@ -1115,6 +1141,73 @@ DoCopyTo(CopyToState cstate)
11151141
return processed;
11161142
}
11171143

1144+
/*
1145+
* Scans a single table and exports its rows to the COPY destination.
1146+
*
1147+
* root_rel can be set to the root table of rel if rel is a partition
1148+
* table so that we can send tuples in root_rel's rowtype, which might
1149+
* differ from individual partitions.
1150+
*/
1151+
static void
1152+
CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel, uint64 *processed)
1153+
{
1154+
TupleTableSlot *slot;
1155+
TableScanDesc scandesc;
1156+
AttrMap *map = NULL;
1157+
TupleTableSlot *root_slot = NULL;
1158+
1159+
scandesc = table_beginscan(rel, GetActiveSnapshot(), 0, NULL);
1160+
slot = table_slot_create(rel, NULL);
1161+
1162+
/*
1163+
* If we are exporting partition data here, we check if converting tuples
1164+
* to the root table's rowtype, because a partition might have column
1165+
* order different than its root table.
1166+
*/
1167+
if (root_rel != NULL)
1168+
{
1169+
root_slot = table_slot_create(root_rel, NULL);
1170+
map = build_attrmap_by_name_if_req(RelationGetDescr(root_rel),
1171+
RelationGetDescr(rel),
1172+
false);
1173+
}
1174+
1175+
while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
1176+
{
1177+
TupleTableSlot *copyslot;
1178+
1179+
CHECK_FOR_INTERRUPTS();
1180+
1181+
if (map != NULL)
1182+
copyslot = execute_attr_map_slot(map, slot, root_slot);
1183+
else
1184+
{
1185+
/* Deconstruct the tuple */
1186+
slot_getallattrs(slot);
1187+
copyslot = slot;
1188+
}
1189+
1190+
/* Format and send the data */
1191+
CopyOneRowTo(cstate, copyslot);
1192+
1193+
/*
1194+
* Increment the number of processed tuples, and report the progress.
1195+
*/
1196+
pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
1197+
++(*processed));
1198+
}
1199+
1200+
ExecDropSingleTupleTableSlot(slot);
1201+
1202+
if (root_slot != NULL)
1203+
ExecDropSingleTupleTableSlot(root_slot);
1204+
1205+
if (map != NULL)
1206+
free_attrmap(map);
1207+
1208+
table_endscan(scandesc);
1209+
}
1210+
11181211
/*
11191212
* Emit one row during DoCopyTo().
11201213
*/

src/test/regress/expected/copy.out

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,3 +373,23 @@ COPY copytest_mv(id) TO stdout WITH (header);
373373
id
374374
1
375375
DROP MATERIALIZED VIEW copytest_mv;
376+
-- Tests for COPY TO with partitioned tables.
377+
-- The child table pp_2 has a different column order than the root table pp.
378+
-- Check if COPY TO exports tuples as the root table's column order.
379+
CREATE TABLE pp (id int,val int) PARTITION BY RANGE (id);
380+
CREATE TABLE pp_1 (val int, id int) PARTITION BY RANGE (id);
381+
CREATE TABLE pp_2 (id int, val int) PARTITION BY RANGE (id);
382+
ALTER TABLE pp ATTACH PARTITION pp_1 FOR VALUES FROM (1) TO (5);
383+
ALTER TABLE pp ATTACH PARTITION pp_2 FOR VALUES FROM (5) TO (10);
384+
CREATE TABLE pp_15 PARTITION OF pp_1 FOR VALUES FROM (1) TO (5);
385+
CREATE TABLE pp_510 PARTITION OF pp_2 FOR VALUES FROM (5) TO (10);
386+
INSERT INTO pp SELECT g, 10 + g FROM generate_series(1,6) g;
387+
COPY pp TO stdout(header);
388+
id val
389+
1 11
390+
2 12
391+
3 13
392+
4 14
393+
5 15
394+
6 16
395+
DROP TABLE PP;

src/test/regress/expected/rowsecurity.out

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -986,6 +986,11 @@ NOTICE: f_leak => my first satire
986986
9 | 11 | 1 | regress_rls_dave | awesome science fiction
987987
(4 rows)
988988

989+
COPY part_document TO stdout WITH (DELIMITER ',');
990+
1,11,1,regress_rls_bob,my first novel
991+
6,11,1,regress_rls_carol,great science fiction
992+
9,11,1,regress_rls_dave,awesome science fiction
993+
4,55,1,regress_rls_bob,my first satire
989994
EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
990995
QUERY PLAN
991996
-------------------------------------------------------------------------
@@ -1028,6 +1033,17 @@ NOTICE: f_leak => awesome technology book
10281033
10 | 99 | 2 | regress_rls_dave | awesome technology book
10291034
(10 rows)
10301035

1036+
COPY part_document TO stdout WITH (DELIMITER ',');
1037+
1,11,1,regress_rls_bob,my first novel
1038+
2,11,2,regress_rls_bob,my second novel
1039+
6,11,1,regress_rls_carol,great science fiction
1040+
9,11,1,regress_rls_dave,awesome science fiction
1041+
4,55,1,regress_rls_bob,my first satire
1042+
8,55,2,regress_rls_carol,great satire
1043+
3,99,2,regress_rls_bob,my science textbook
1044+
5,99,2,regress_rls_bob,my history book
1045+
7,99,2,regress_rls_carol,great technology book
1046+
10,99,2,regress_rls_dave,awesome technology book
10311047
EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
10321048
QUERY PLAN
10331049
-------------------------------------------------------------------------
@@ -1058,6 +1074,11 @@ NOTICE: f_leak => awesome science fiction
10581074
9 | 11 | 1 | regress_rls_dave | awesome science fiction
10591075
(4 rows)
10601076

1077+
COPY part_document TO stdout WITH (DELIMITER ',');
1078+
1,11,1,regress_rls_bob,my first novel
1079+
2,11,2,regress_rls_bob,my second novel
1080+
6,11,1,regress_rls_carol,great science fiction
1081+
9,11,1,regress_rls_dave,awesome science fiction
10611082
EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
10621083
QUERY PLAN
10631084
----------------------------------------------------------------------------------

src/test/regress/sql/copy.sql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,3 +405,17 @@ COPY copytest_mv(id) TO stdout WITH (header);
405405
REFRESH MATERIALIZED VIEW copytest_mv;
406406
COPY copytest_mv(id) TO stdout WITH (header);
407407
DROP MATERIALIZED VIEW copytest_mv;
408+
409+
-- Tests for COPY TO with partitioned tables.
410+
-- The child table pp_2 has a different column order than the root table pp.
411+
-- Check if COPY TO exports tuples as the root table's column order.
412+
CREATE TABLE pp (id int,val int) PARTITION BY RANGE (id);
413+
CREATE TABLE pp_1 (val int, id int) PARTITION BY RANGE (id);
414+
CREATE TABLE pp_2 (id int, val int) PARTITION BY RANGE (id);
415+
ALTER TABLE pp ATTACH PARTITION pp_1 FOR VALUES FROM (1) TO (5);
416+
ALTER TABLE pp ATTACH PARTITION pp_2 FOR VALUES FROM (5) TO (10);
417+
CREATE TABLE pp_15 PARTITION OF pp_1 FOR VALUES FROM (1) TO (5);
418+
CREATE TABLE pp_510 PARTITION OF pp_2 FOR VALUES FROM (5) TO (10);
419+
INSERT INTO pp SELECT g, 10 + g FROM generate_series(1,6) g;
420+
COPY pp TO stdout(header);
421+
DROP TABLE PP;

src/test/regress/sql/rowsecurity.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,16 +362,19 @@ SELECT * FROM pg_policies WHERE schemaname = 'regress_rls_schema' AND tablename
362362
SET SESSION AUTHORIZATION regress_rls_bob;
363363
SET row_security TO ON;
364364
SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did;
365+
COPY part_document TO stdout WITH (DELIMITER ',');
365366
EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
366367

367368
-- viewpoint from regress_rls_carol
368369
SET SESSION AUTHORIZATION regress_rls_carol;
369370
SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did;
371+
COPY part_document TO stdout WITH (DELIMITER ',');
370372
EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
371373

372374
-- viewpoint from regress_rls_dave
373375
SET SESSION AUTHORIZATION regress_rls_dave;
374376
SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did;
377+
COPY part_document TO stdout WITH (DELIMITER ',');
375378
EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
376379

377380
-- pp1 ERROR

0 commit comments

Comments
 (0)