mirror of
https://github.com/postgres/postgres.git
synced 2026-03-10 02:01:23 -04:00
CopyFrom allows multi-inserts to be used for non-partitioned tables, but this was disabled for partitioned tables. The reason for this appeared to be that the tuple may not belong to the same partition as the previous tuple did. Not allowing multi-inserts here greatly slowed down imports into partitioned tables. These could take twice as long as a copy to an equivalent non-partitioned table. It seems wise to do something about this, so this change allows the multi-inserts by flushing the so-far inserted tuples to the partition when the next tuple does not belong to the same partition, or when the buffer fills. This improves performance when the next tuple in the stream commonly belongs to the same partition as the previous tuple. In cases where the target partition changes on every tuple, using multi-inserts slightly slows the performance. To get around this we track the average size of the batches that have been inserted and adaptively enable or disable multi-inserts based on the size of the batch. Some testing was done and the regression only seems to exist when the average size of the insert batch is close to 1, so let's just enable multi-inserts when the average size is at least 1.3. More performance testing might reveal a better number for, this, but since the slowdown was only 1-2% it does not seem critical enough to spend too much time calculating it. In any case it may depend on other factors rather than just the size of the batch. Allowing multi-inserts for partitions required a bit of work around the per-tuple memory contexts as we must flush the tuples when the next tuple does not belong the same partition. In which case there is no good time to reset the per-tuple context, as we've already built the new tuple by this time. In order to work around this we maintain two per-tuple contexts and just switch between them every time the partition changes and reset the old one. This does mean that the first of each batch of tuples is not allocated in the same memory context as the others, but that does not matter since we only reset the context once the previous batch has been inserted. Author: David Rowley <david.rowley@2ndquadrant.com> Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
165 lines
4.4 KiB
Text
165 lines
4.4 KiB
Text
--
|
|
-- COPY
|
|
--
|
|
|
|
-- CLASS POPULATION
|
|
-- (any resemblance to real life is purely coincidental)
|
|
--
|
|
COPY aggtest FROM '@abs_srcdir@/data/agg.data';
|
|
|
|
COPY onek FROM '@abs_srcdir@/data/onek.data';
|
|
|
|
COPY onek TO '@abs_builddir@/results/onek.data';
|
|
|
|
DELETE FROM onek;
|
|
|
|
COPY onek FROM '@abs_builddir@/results/onek.data';
|
|
|
|
COPY tenk1 FROM '@abs_srcdir@/data/tenk.data';
|
|
|
|
COPY slow_emp4000 FROM '@abs_srcdir@/data/rect.data';
|
|
|
|
COPY person FROM '@abs_srcdir@/data/person.data';
|
|
|
|
COPY emp FROM '@abs_srcdir@/data/emp.data';
|
|
|
|
COPY student FROM '@abs_srcdir@/data/student.data';
|
|
|
|
COPY stud_emp FROM '@abs_srcdir@/data/stud_emp.data';
|
|
|
|
COPY road FROM '@abs_srcdir@/data/streets.data';
|
|
|
|
COPY real_city FROM '@abs_srcdir@/data/real_city.data';
|
|
|
|
COPY hash_i4_heap FROM '@abs_srcdir@/data/hash.data';
|
|
|
|
COPY hash_name_heap FROM '@abs_srcdir@/data/hash.data';
|
|
|
|
COPY hash_txt_heap FROM '@abs_srcdir@/data/hash.data';
|
|
|
|
COPY hash_f8_heap FROM '@abs_srcdir@/data/hash.data';
|
|
|
|
COPY test_tsvector FROM '@abs_srcdir@/data/tsearch.data';
|
|
|
|
COPY testjsonb FROM '@abs_srcdir@/data/jsonb.data';
|
|
|
|
-- the data in this file has a lot of duplicates in the index key
|
|
-- fields, leading to long bucket chains and lots of table expansion.
|
|
-- this is therefore a stress test of the bucket overflow code (unlike
|
|
-- the data in hash.data, which has unique index keys).
|
|
--
|
|
-- COPY hash_ovfl_heap FROM '@abs_srcdir@/data/hashovfl.data';
|
|
|
|
COPY bt_i4_heap FROM '@abs_srcdir@/data/desc.data';
|
|
|
|
COPY bt_name_heap FROM '@abs_srcdir@/data/hash.data';
|
|
|
|
COPY bt_txt_heap FROM '@abs_srcdir@/data/desc.data';
|
|
|
|
COPY bt_f8_heap FROM '@abs_srcdir@/data/hash.data';
|
|
|
|
COPY array_op_test FROM '@abs_srcdir@/data/array.data';
|
|
|
|
COPY array_index_op_test FROM '@abs_srcdir@/data/array.data';
|
|
|
|
-- analyze all the data we just loaded, to ensure plan consistency
|
|
-- in later tests
|
|
|
|
ANALYZE aggtest;
|
|
ANALYZE onek;
|
|
ANALYZE tenk1;
|
|
ANALYZE slow_emp4000;
|
|
ANALYZE person;
|
|
ANALYZE emp;
|
|
ANALYZE student;
|
|
ANALYZE stud_emp;
|
|
ANALYZE road;
|
|
ANALYZE real_city;
|
|
ANALYZE hash_i4_heap;
|
|
ANALYZE hash_name_heap;
|
|
ANALYZE hash_txt_heap;
|
|
ANALYZE hash_f8_heap;
|
|
ANALYZE test_tsvector;
|
|
ANALYZE bt_i4_heap;
|
|
ANALYZE bt_name_heap;
|
|
ANALYZE bt_txt_heap;
|
|
ANALYZE bt_f8_heap;
|
|
ANALYZE array_op_test;
|
|
ANALYZE array_index_op_test;
|
|
|
|
--- test copying in CSV mode with various styles
|
|
--- of embedded line ending characters
|
|
|
|
create temp table copytest (
|
|
style text,
|
|
test text,
|
|
filler int);
|
|
|
|
insert into copytest values('DOS',E'abc\r\ndef',1);
|
|
insert into copytest values('Unix',E'abc\ndef',2);
|
|
insert into copytest values('Mac',E'abc\rdef',3);
|
|
insert into copytest values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4);
|
|
|
|
copy copytest to '@abs_builddir@/results/copytest.csv' csv;
|
|
|
|
create temp table copytest2 (like copytest);
|
|
|
|
copy copytest2 from '@abs_builddir@/results/copytest.csv' csv;
|
|
|
|
select * from copytest except select * from copytest2;
|
|
|
|
truncate copytest2;
|
|
|
|
--- same test but with an escape char different from quote char
|
|
|
|
copy copytest to '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\';
|
|
|
|
copy copytest2 from '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\';
|
|
|
|
select * from copytest except select * from copytest2;
|
|
|
|
|
|
-- test header line feature
|
|
|
|
create temp table copytest3 (
|
|
c1 int,
|
|
"col with , comma" text,
|
|
"col with "" quote" int);
|
|
|
|
copy copytest3 from stdin csv header;
|
|
this is just a line full of junk that would error out if parsed
|
|
1,a,1
|
|
2,b,2
|
|
\.
|
|
|
|
copy copytest3 to stdout csv header;
|
|
|
|
-- test copy from with a partitioned table
|
|
create table parted_copytest (
|
|
a int,
|
|
b int,
|
|
c text
|
|
) partition by list (b);
|
|
|
|
create table parted_copytest_a1 (c text, b int, a int);
|
|
create table parted_copytest_a2 (a int, c text, b int);
|
|
|
|
alter table parted_copytest attach partition parted_copytest_a1 for values in(1);
|
|
alter table parted_copytest attach partition parted_copytest_a2 for values in(2);
|
|
|
|
-- We must insert enough rows to trigger multi-inserts. These are only
|
|
-- enabled adaptively when there are few enough partition changes.
|
|
insert into parted_copytest select x,1,'One' from generate_series(1,1000) x;
|
|
insert into parted_copytest select x,2,'Two' from generate_series(1001,1010) x;
|
|
insert into parted_copytest select x,1,'One' from generate_series(1011,1020) x;
|
|
|
|
copy (select * from parted_copytest order by a) to '@abs_builddir@/results/parted_copytest.csv';
|
|
|
|
truncate parted_copytest;
|
|
|
|
copy parted_copytest from '@abs_builddir@/results/parted_copytest.csv';
|
|
|
|
select tableoid::regclass,count(*),sum(a) from parted_copytest
|
|
group by tableoid order by tableoid::regclass::name;
|
|
|
|
drop table parted_copytest;
|