Skip to content

Commit 4bea91f

Browse files
Support COPY TO for partitioned tables.
Previously, COPY TO command didn't support directly specifying partitioned tables so users had to use COPY (SELECT ...) TO variant. This commit adds direct COPY TO support for partitioned tables, improving both usability and performance. Performance tests show it's faster than the COPY (SELECT ...) TO variant as it avoids the overheads of query processing and sending results to the COPY TO command. When used with partitioned tables, COPY TO copies the same rows as SELECT * FROM table. Row-level security policies of the partitioned table are applied in the same way as when executing COPY TO on a plain table. Author: jian he <[email protected]> Reviewed-by: vignesh C <[email protected]> Reviewed-by: David Rowley <[email protected]> Reviewed-by: Melih Mutlu <[email protected]> Reviewed-by: Kirill Reshke <[email protected]> Reviewed-by: Atsushi Torikoshi <[email protected]> Reviewed-by: Álvaro Herrera <[email protected]> Reviewed-by: Masahiko Sawada <[email protected]> Reviewed-by: Chao Li <[email protected]> Discussion: https://postgr.es/m/CACJufxEZt%2BG19Ors3bQUq-42-61__C%3Dy5k2wk%3DsHEFRusu7%3DiQ%40mail.gmail.com
1 parent d74cfe3 commit 4bea91f

File tree

9 files changed

+200
-36
lines changed

9 files changed

+200
-36
lines changed

contrib/postgres_fdw/expected/postgres_fdw.out

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11599,6 +11599,11 @@ SELECT * FROM result_tbl ORDER BY a;
1159911599
(3 rows)
1160011600

1160111601
DELETE FROM result_tbl;
11602+
-- Test COPY TO when foreign table is partition
11603+
COPY async_pt TO stdout; --error
11604+
ERROR: cannot copy from foreign table "async_p1"
11605+
DETAIL: Partition "async_p1" is a foreign table in partitioned table "async_pt"
11606+
HINT: Try the COPY (SELECT ...) TO variant.
1160211607
DROP FOREIGN TABLE async_p3;
1160311608
DROP TABLE base_tbl3;
1160411609
-- Check case where the partitioned table has local/remote partitions

contrib/postgres_fdw/sql/postgres_fdw.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3941,6 +3941,9 @@ INSERT INTO result_tbl SELECT * FROM async_pt WHERE b === 505;
39413941
SELECT * FROM result_tbl ORDER BY a;
39423942
DELETE FROM result_tbl;
39433943

3944+
-- Test COPY TO when foreign table is partition
3945+
COPY async_pt TO stdout; --error
3946+
39443947
DROP FOREIGN TABLE async_p3;
39453948
DROP TABLE base_tbl3;
39463949

doc/src/sgml/ref/copy.sgml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -539,13 +539,14 @@ COPY <replaceable class="parameter">count</replaceable>
539539

540540
<para>
541541
<command>COPY TO</command> can be used with plain
542-
tables and populated materialized views.
543-
For example,
544-
<literal>COPY <replaceable class="parameter">table</replaceable>
545-
TO</literal> copies the same rows as
542+
tables, populated materialized views, and partitioned tables.
543+
For non-partitioned tables, COPY <replaceable class="parameter">table</replaceable>
544+
copies the same rows as
546545
<literal>SELECT * FROM ONLY <replaceable class="parameter">table</replaceable></literal>.
546+
For partitioned tables, it copies the same rows as
547+
<literal>SELECT * FROM <replaceable class="parameter">table</replaceable></literal>.
547548
However it doesn't directly support other relation types,
548-
such as partitioned tables, inheritance child tables, or views.
549+
such as inheritance child tables, or views.
549550
To copy all rows from such relations, use <literal>COPY (SELECT * FROM
550551
<replaceable class="parameter">table</replaceable>) TO</literal>.
551552
</para>

src/backend/commands/copy.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,15 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
251251
* relation which we have opened and locked. Use "ONLY" so that
252252
* COPY retrieves rows from only the target table not any
253253
* inheritance children, the same as when RLS doesn't apply.
254+
*
255+
* However, when copying data from a partitioned table, we don't
256+
* use "ONLY", since we need to retrieve rows from its descendant
257+
* tables too.
254258
*/
255259
from = makeRangeVar(get_namespace_name(RelationGetNamespace(rel)),
256260
pstrdup(RelationGetRelationName(rel)),
257261
-1);
258-
from->inh = false; /* apply ONLY */
262+
from->inh = (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
259263

260264
/* Build query */
261265
select = makeNode(SelectStmt);

src/backend/commands/copyto.c

Lines changed: 123 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
#include <unistd.h>
1919
#include <sys/stat.h>
2020

21+
#include "access/table.h"
2122
#include "access/tableam.h"
23+
#include "catalog/pg_inherits.h"
2224
#include "commands/copyapi.h"
2325
#include "commands/progress.h"
2426
#include "executor/execdesc.h"
@@ -86,6 +88,7 @@ typedef struct CopyToStateData
8688

8789
CopyFormatOptions opts;
8890
Node *whereClause; /* WHERE condition (or NULL) */
91+
List *partitions; /* OID list of partitions to copy data from */
8992

9093
/*
9194
* Working state
@@ -116,6 +119,8 @@ static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot);
116119
static void CopyAttributeOutText(CopyToState cstate, const char *string);
117120
static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
118121
bool use_quote);
122+
static void CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel,
123+
uint64 *processed);
119124

120125
/* built-in format-specific routines */
121126
static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc);
@@ -602,6 +607,10 @@ EndCopy(CopyToState cstate)
602607
pgstat_progress_end_command();
603608

604609
MemoryContextDelete(cstate->copycontext);
610+
611+
if (cstate->partitions)
612+
list_free(cstate->partitions);
613+
605614
pfree(cstate);
606615
}
607616

@@ -643,6 +652,7 @@ BeginCopyTo(ParseState *pstate,
643652
PROGRESS_COPY_COMMAND_TO,
644653
0
645654
};
655+
List *children = NIL;
646656

647657
if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
648658
{
@@ -673,11 +683,34 @@ BeginCopyTo(ParseState *pstate,
673683
errmsg("cannot copy from sequence \"%s\"",
674684
RelationGetRelationName(rel))));
675685
else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
676-
ereport(ERROR,
677-
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
678-
errmsg("cannot copy from partitioned table \"%s\"",
679-
RelationGetRelationName(rel)),
680-
errhint("Try the COPY (SELECT ...) TO variant.")));
686+
{
687+
/*
688+
* Collect OIDs of relation containing data, so that later
689+
* DoCopyTo can copy the data from them.
690+
*/
691+
children = find_all_inheritors(RelationGetRelid(rel), AccessShareLock, NULL);
692+
693+
foreach_oid(child, children)
694+
{
695+
char relkind = get_rel_relkind(child);
696+
697+
if (relkind == RELKIND_FOREIGN_TABLE)
698+
{
699+
char *relation_name = get_rel_name(child);
700+
701+
ereport(ERROR,
702+
errcode(ERRCODE_WRONG_OBJECT_TYPE),
703+
errmsg("cannot copy from foreign table \"%s\"", relation_name),
704+
errdetail("Partition \"%s\" is a foreign table in partitioned table \"%s\"",
705+
relation_name, RelationGetRelationName(rel)),
706+
errhint("Try the COPY (SELECT ...) TO variant."));
707+
}
708+
709+
/* Exclude tables with no data */
710+
if (RELKIND_HAS_PARTITIONS(relkind))
711+
children = foreach_delete_current(children, child);
712+
}
713+
}
681714
else
682715
ereport(ERROR,
683716
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -713,6 +746,7 @@ BeginCopyTo(ParseState *pstate,
713746
cstate->rel = rel;
714747

715748
tupDesc = RelationGetDescr(cstate->rel);
749+
cstate->partitions = children;
716750
}
717751
else
718752
{
@@ -722,6 +756,7 @@ BeginCopyTo(ParseState *pstate,
722756
DestReceiver *dest;
723757

724758
cstate->rel = NULL;
759+
cstate->partitions = NIL;
725760

726761
/*
727762
* Run parse analysis and rewrite. Note this also acquires sufficient
@@ -1030,7 +1065,7 @@ DoCopyTo(CopyToState cstate)
10301065
TupleDesc tupDesc;
10311066
int num_phys_attrs;
10321067
ListCell *cur;
1033-
uint64 processed;
1068+
uint64 processed = 0;
10341069

10351070
if (fe_copy)
10361071
SendCopyBegin(cstate);
@@ -1070,33 +1105,24 @@ DoCopyTo(CopyToState cstate)
10701105

10711106
if (cstate->rel)
10721107
{
1073-
TupleTableSlot *slot;
1074-
TableScanDesc scandesc;
1075-
1076-
scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
1077-
slot = table_slot_create(cstate->rel, NULL);
1078-
1079-
processed = 0;
1080-
while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
1108+
/*
1109+
* If COPY TO source table is a partitioned table, then open each
1110+
* partition and process each individual partition.
1111+
*/
1112+
if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
10811113
{
1082-
CHECK_FOR_INTERRUPTS();
1083-
1084-
/* Deconstruct the tuple ... */
1085-
slot_getallattrs(slot);
1086-
1087-
/* Format and send the data */
1088-
CopyOneRowTo(cstate, slot);
1114+
foreach_oid(child, cstate->partitions)
1115+
{
1116+
Relation scan_rel;
10891117

1090-
/*
1091-
* Increment the number of processed tuples, and report the
1092-
* progress.
1093-
*/
1094-
pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
1095-
++processed);
1118+
/* We already got the lock in BeginCopyTo */
1119+
scan_rel = table_open(child, NoLock);
1120+
CopyRelationTo(cstate, scan_rel, cstate->rel, &processed);
1121+
table_close(scan_rel, NoLock);
1122+
}
10961123
}
1097-
1098-
ExecDropSingleTupleTableSlot(slot);
1099-
table_endscan(scandesc);
1124+
else
1125+
CopyRelationTo(cstate, cstate->rel, NULL, &processed);
11001126
}
11011127
else
11021128
{
@@ -1115,6 +1141,73 @@ DoCopyTo(CopyToState cstate)
11151141
return processed;
11161142
}
11171143

1144+
/*
1145+
* Scans a single table and exports its rows to the COPY destination.
1146+
*
1147+
* root_rel can be set to the root table of rel if rel is a partition
1148+
* table so that we can send tuples in root_rel's rowtype, which might
1149+
* differ from individual partitions.
1150+
*/
1151+
static void
1152+
CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel, uint64 *processed)
1153+
{
1154+
TupleTableSlot *slot;
1155+
TableScanDesc scandesc;
1156+
AttrMap *map = NULL;
1157+
TupleTableSlot *root_slot = NULL;
1158+
1159+
scandesc = table_beginscan(rel, GetActiveSnapshot(), 0, NULL);
1160+
slot = table_slot_create(rel, NULL);
1161+
1162+
/*
1163+
* If we are exporting partition data here, we check if converting tuples
1164+
* to the root table's rowtype, because a partition might have column
1165+
* order different than its root table.
1166+
*/
1167+
if (root_rel != NULL)
1168+
{
1169+
root_slot = table_slot_create(root_rel, NULL);
1170+
map = build_attrmap_by_name_if_req(RelationGetDescr(root_rel),
1171+
RelationGetDescr(rel),
1172+
false);
1173+
}
1174+
1175+
while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
1176+
{
1177+
TupleTableSlot *copyslot;
1178+
1179+
CHECK_FOR_INTERRUPTS();
1180+
1181+
if (map != NULL)
1182+
copyslot = execute_attr_map_slot(map, slot, root_slot);
1183+
else
1184+
{
1185+
/* Deconstruct the tuple */
1186+
slot_getallattrs(slot);
1187+
copyslot = slot;
1188+
}
1189+
1190+
/* Format and send the data */
1191+
CopyOneRowTo(cstate, copyslot);
1192+
1193+
/*
1194+
* Increment the number of processed tuples, and report the progress.
1195+
*/
1196+
pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
1197+
++(*processed));
1198+
}
1199+
1200+
ExecDropSingleTupleTableSlot(slot);
1201+
1202+
if (root_slot != NULL)
1203+
ExecDropSingleTupleTableSlot(root_slot);
1204+
1205+
if (map != NULL)
1206+
free_attrmap(map);
1207+
1208+
table_endscan(scandesc);
1209+
}
1210+
11181211
/*
11191212
* Emit one row during DoCopyTo().
11201213
*/

src/test/regress/expected/copy.out

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,3 +373,23 @@ COPY copytest_mv(id) TO stdout WITH (header);
373373
id
374374
1
375375
DROP MATERIALIZED VIEW copytest_mv;
376+
-- Tests for COPY TO with partitioned tables.
377+
-- The child table pp_2 has a different column order than the root table pp.
378+
-- Check if COPY TO exports tuples as the root table's column order.
379+
CREATE TABLE pp (id int,val int) PARTITION BY RANGE (id);
380+
CREATE TABLE pp_1 (val int, id int) PARTITION BY RANGE (id);
381+
CREATE TABLE pp_2 (id int, val int) PARTITION BY RANGE (id);
382+
ALTER TABLE pp ATTACH PARTITION pp_1 FOR VALUES FROM (1) TO (5);
383+
ALTER TABLE pp ATTACH PARTITION pp_2 FOR VALUES FROM (5) TO (10);
384+
CREATE TABLE pp_15 PARTITION OF pp_1 FOR VALUES FROM (1) TO (5);
385+
CREATE TABLE pp_510 PARTITION OF pp_2 FOR VALUES FROM (5) TO (10);
386+
INSERT INTO pp SELECT g, 10 + g FROM generate_series(1,6) g;
387+
COPY pp TO stdout(header);
388+
id val
389+
1 11
390+
2 12
391+
3 13
392+
4 14
393+
5 15
394+
6 16
395+
DROP TABLE PP;

src/test/regress/expected/rowsecurity.out

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -986,6 +986,11 @@ NOTICE: f_leak => my first satire
986986
9 | 11 | 1 | regress_rls_dave | awesome science fiction
987987
(4 rows)
988988

989+
COPY part_document TO stdout WITH (DELIMITER ',');
990+
1,11,1,regress_rls_bob,my first novel
991+
6,11,1,regress_rls_carol,great science fiction
992+
9,11,1,regress_rls_dave,awesome science fiction
993+
4,55,1,regress_rls_bob,my first satire
989994
EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
990995
QUERY PLAN
991996
-------------------------------------------------------------------------
@@ -1028,6 +1033,17 @@ NOTICE: f_leak => awesome technology book
10281033
10 | 99 | 2 | regress_rls_dave | awesome technology book
10291034
(10 rows)
10301035

1036+
COPY part_document TO stdout WITH (DELIMITER ',');
1037+
1,11,1,regress_rls_bob,my first novel
1038+
2,11,2,regress_rls_bob,my second novel
1039+
6,11,1,regress_rls_carol,great science fiction
1040+
9,11,1,regress_rls_dave,awesome science fiction
1041+
4,55,1,regress_rls_bob,my first satire
1042+
8,55,2,regress_rls_carol,great satire
1043+
3,99,2,regress_rls_bob,my science textbook
1044+
5,99,2,regress_rls_bob,my history book
1045+
7,99,2,regress_rls_carol,great technology book
1046+
10,99,2,regress_rls_dave,awesome technology book
10311047
EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
10321048
QUERY PLAN
10331049
-------------------------------------------------------------------------
@@ -1058,6 +1074,11 @@ NOTICE: f_leak => awesome science fiction
10581074
9 | 11 | 1 | regress_rls_dave | awesome science fiction
10591075
(4 rows)
10601076

1077+
COPY part_document TO stdout WITH (DELIMITER ',');
1078+
1,11,1,regress_rls_bob,my first novel
1079+
2,11,2,regress_rls_bob,my second novel
1080+
6,11,1,regress_rls_carol,great science fiction
1081+
9,11,1,regress_rls_dave,awesome science fiction
10611082
EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
10621083
QUERY PLAN
10631084
----------------------------------------------------------------------------------

src/test/regress/sql/copy.sql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,3 +405,17 @@ COPY copytest_mv(id) TO stdout WITH (header);
405405
REFRESH MATERIALIZED VIEW copytest_mv;
406406
COPY copytest_mv(id) TO stdout WITH (header);
407407
DROP MATERIALIZED VIEW copytest_mv;
408+
409+
-- Tests for COPY TO with partitioned tables.
410+
-- The child table pp_2 has a different column order than the root table pp.
411+
-- Check if COPY TO exports tuples as the root table's column order.
412+
CREATE TABLE pp (id int,val int) PARTITION BY RANGE (id);
413+
CREATE TABLE pp_1 (val int, id int) PARTITION BY RANGE (id);
414+
CREATE TABLE pp_2 (id int, val int) PARTITION BY RANGE (id);
415+
ALTER TABLE pp ATTACH PARTITION pp_1 FOR VALUES FROM (1) TO (5);
416+
ALTER TABLE pp ATTACH PARTITION pp_2 FOR VALUES FROM (5) TO (10);
417+
CREATE TABLE pp_15 PARTITION OF pp_1 FOR VALUES FROM (1) TO (5);
418+
CREATE TABLE pp_510 PARTITION OF pp_2 FOR VALUES FROM (5) TO (10);
419+
INSERT INTO pp SELECT g, 10 + g FROM generate_series(1,6) g;
420+
COPY pp TO stdout(header);
421+
DROP TABLE PP;

src/test/regress/sql/rowsecurity.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,16 +362,19 @@ SELECT * FROM pg_policies WHERE schemaname = 'regress_rls_schema' AND tablename
362362
SET SESSION AUTHORIZATION regress_rls_bob;
363363
SET row_security TO ON;
364364
SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did;
365+
COPY part_document TO stdout WITH (DELIMITER ',');
365366
EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
366367

367368
-- viewpoint from regress_rls_carol
368369
SET SESSION AUTHORIZATION regress_rls_carol;
369370
SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did;
371+
COPY part_document TO stdout WITH (DELIMITER ',');
370372
EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
371373

372374
-- viewpoint from regress_rls_dave
373375
SET SESSION AUTHORIZATION regress_rls_dave;
374376
SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did;
377+
COPY part_document TO stdout WITH (DELIMITER ',');
375378
EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
376379

377380
-- pp1 ERROR

0 commit comments

Comments
 (0)