postgresql/src/test/regress/expected/memoize.out
Robert Haas 95dbd827f2 EXPLAIN: Always use two fractional digits for row counts.
Commit ddb17e387a attempted to avoid
confusing users by displaying digits after the decimal point only when
nloops > 1, since it's impossible to have a fraction row count after a
single iteration. However, this made the regression tests unstable since
parallal queries will have nloops>1 for all nodes below the Gather or
Gather Merge in normal cases, but if the workers don't start in time and
the leader finishes all the work, they will suddenly have nloops==1,
making it unpredictable whether the digits after the decimal point would
be displayed or not. Although 44cbba9a7f
seemed to fix the immediate failures, it may still be the case that there
are lower-probability failures elsewhere in the regression tests.

Various fixes are possible here. For example, it has previously been
proposed that we should try to display the digits after the decimal
point only if rows/nloops is an integer, but currently rows is storead
as a float so it's not theoretically an exact quantity -- precision
could be lost in extreme cases. It has also been proposed that we
should try to display the digits after the decimal point only if we're
under some sort of construct that could potentially cause looping
regardless of whether it actually does. While such ideas are not
without merit, this patch adopts the much simpler solution of always
display two decimal digits. If that approach stands up to scrutiny
from the buildfarm and human users, it spares us the trouble of doing
anything more complex; if not, we can reassess.

This commit incidentally reverts 44cbba9a7f,
which should no longer be needed.

Author: Robert Haas <robertmhaas@gmail.com>
Author: Ilia Evdokimov <ilya.evdokimov@tantorlabs.com>
Discussion: http://postgr.es/m/CA+TgmoazzVHn8sFOMFAEwoqBTDxKT45D7mvkyeHgqtoD2cn58Q@mail.gmail.com
2025-02-27 11:27:16 -05:00

481 lines
20 KiB
Text

-- Perform tests on the Memoize node.
-- The cache hits/misses/evictions from the Memoize node can vary between
-- machines. Let's just replace the number with an 'N'. In order to allow us
-- to perform validation when the measure was zero, we replace a zero value
-- with "Zero". All other numbers are replaced with 'N'.
create function explain_memoize(query text, hide_hitmiss bool) returns setof text
language plpgsql as
$$
declare
ln text;
begin
for ln in
execute format('explain (analyze, costs off, summary off, timing off, buffers off) %s',
query)
loop
if hide_hitmiss = true then
ln := regexp_replace(ln, 'Hits: 0', 'Hits: Zero');
ln := regexp_replace(ln, 'Hits: \d+', 'Hits: N');
ln := regexp_replace(ln, 'Misses: 0', 'Misses: Zero');
ln := regexp_replace(ln, 'Misses: \d+', 'Misses: N');
end if;
ln := regexp_replace(ln, 'Evictions: 0', 'Evictions: Zero');
ln := regexp_replace(ln, 'Evictions: \d+', 'Evictions: N');
ln := regexp_replace(ln, 'Memory Usage: \d+', 'Memory Usage: N');
ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N');
ln := regexp_replace(ln, 'loops=\d+', 'loops=N');
return next ln;
end loop;
end;
$$;
-- Ensure we get a memoize node on the inner side of the nested loop
SET enable_hashjoin TO off;
SET enable_bitmapscan TO off;
SELECT explain_memoize('
SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1
INNER JOIN tenk1 t2 ON t1.unique1 = t2.twenty
WHERE t2.unique1 < 1000;', false);
explain_memoize
----------------------------------------------------------------------------------------------
Aggregate (actual rows=1.00 loops=N)
-> Nested Loop (actual rows=1000.00 loops=N)
-> Seq Scan on tenk1 t2 (actual rows=1000.00 loops=N)
Filter: (unique1 < 1000)
Rows Removed by Filter: 9000
-> Memoize (actual rows=1.00 loops=N)
Cache Key: t2.twenty
Cache Mode: logical
Hits: 980 Misses: 20 Evictions: Zero Overflows: 0 Memory Usage: NkB
-> Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1.00 loops=N)
Index Cond: (unique1 = t2.twenty)
Heap Fetches: N
(12 rows)
-- And check we get the expected results.
SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1
INNER JOIN tenk1 t2 ON t1.unique1 = t2.twenty
WHERE t2.unique1 < 1000;
count | avg
-------+--------------------
1000 | 9.5000000000000000
(1 row)
-- Try with LATERAL joins
SELECT explain_memoize('
SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1,
LATERAL (SELECT t2.unique1 FROM tenk1 t2
WHERE t1.twenty = t2.unique1 OFFSET 0) t2
WHERE t1.unique1 < 1000;', false);
explain_memoize
----------------------------------------------------------------------------------------------
Aggregate (actual rows=1.00 loops=N)
-> Nested Loop (actual rows=1000.00 loops=N)
-> Seq Scan on tenk1 t1 (actual rows=1000.00 loops=N)
Filter: (unique1 < 1000)
Rows Removed by Filter: 9000
-> Memoize (actual rows=1.00 loops=N)
Cache Key: t1.twenty
Cache Mode: binary
Hits: 980 Misses: 20 Evictions: Zero Overflows: 0 Memory Usage: NkB
-> Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=1.00 loops=N)
Index Cond: (unique1 = t1.twenty)
Heap Fetches: N
(12 rows)
-- And check we get the expected results.
SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1,
LATERAL (SELECT t2.unique1 FROM tenk1 t2
WHERE t1.twenty = t2.unique1 OFFSET 0) t2
WHERE t1.unique1 < 1000;
count | avg
-------+--------------------
1000 | 9.5000000000000000
(1 row)
-- Try with LATERAL joins
SELECT explain_memoize('
SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN
LATERAL (
SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 4 OFFSET 0
) t2
ON t1.two = t2.two
WHERE t1.unique1 < 10;', false);
explain_memoize
-------------------------------------------------------------------------------------------------
Aggregate (actual rows=1.00 loops=N)
-> Nested Loop Left Join (actual rows=20.00 loops=N)
-> Index Scan using tenk1_unique1 on tenk1 t1 (actual rows=10.00 loops=N)
Index Cond: (unique1 < 10)
-> Memoize (actual rows=2.00 loops=N)
Cache Key: t1.two
Cache Mode: binary
Hits: 8 Misses: 2 Evictions: Zero Overflows: 0 Memory Usage: NkB
-> Subquery Scan on t2 (actual rows=2.00 loops=N)
Filter: (t1.two = t2.two)
Rows Removed by Filter: 2
-> Index Scan using tenk1_unique1 on tenk1 t2_1 (actual rows=4.00 loops=N)
Index Cond: (unique1 < 4)
(13 rows)
-- And check we get the expected results.
SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN
LATERAL (
SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 4 OFFSET 0
) t2
ON t1.two = t2.two
WHERE t1.unique1 < 10;
count | avg
-------+------------------------
20 | 0.50000000000000000000
(1 row)
-- Try with LATERAL references within PlaceHolderVars
SELECT explain_memoize('
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
LATERAL (SELECT t1.two+1 AS c1, t2.unique1 AS c2 FROM tenk1 t2) s ON TRUE
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;', false);
explain_memoize
----------------------------------------------------------------------------------------------
Aggregate (actual rows=1.00 loops=N)
-> Nested Loop (actual rows=1000.00 loops=N)
-> Seq Scan on tenk1 t1 (actual rows=1000.00 loops=N)
Filter: (unique1 < 1000)
Rows Removed by Filter: 9000
-> Memoize (actual rows=1.00 loops=N)
Cache Key: (t1.two + 1)
Cache Mode: binary
Hits: 998 Misses: 2 Evictions: Zero Overflows: 0 Memory Usage: NkB
-> Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=1.00 loops=N)
Filter: ((t1.two + 1) = unique1)
Rows Removed by Filter: 9999
Heap Fetches: N
(13 rows)
-- And check we get the expected results.
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
LATERAL (SELECT t1.two+1 AS c1, t2.unique1 AS c2 FROM tenk1 t2) s ON TRUE
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;
count | avg
-------+--------------------
1000 | 9.5000000000000000
(1 row)
-- Ensure we do not omit the cache keys from PlaceHolderVars
SELECT explain_memoize('
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
LATERAL (SELECT t1.twenty AS c1, t2.unique1 AS c2, t2.two FROM tenk1 t2) s
ON t1.two = s.two
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;', false);
explain_memoize
---------------------------------------------------------------------------------------
Aggregate (actual rows=1.00 loops=N)
-> Nested Loop (actual rows=1000.00 loops=N)
-> Seq Scan on tenk1 t1 (actual rows=1000.00 loops=N)
Filter: (unique1 < 1000)
Rows Removed by Filter: 9000
-> Memoize (actual rows=1.00 loops=N)
Cache Key: t1.two, t1.twenty
Cache Mode: binary
Hits: 980 Misses: 20 Evictions: Zero Overflows: 0 Memory Usage: NkB
-> Seq Scan on tenk1 t2 (actual rows=1.00 loops=N)
Filter: ((t1.twenty = unique1) AND (t1.two = two))
Rows Removed by Filter: 9999
(12 rows)
-- And check we get the expected results.
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
LATERAL (SELECT t1.twenty AS c1, t2.unique1 AS c2, t2.two FROM tenk1 t2) s
ON t1.two = s.two
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;
count | avg
-------+--------------------
1000 | 9.5000000000000000
(1 row)
SET enable_mergejoin TO off;
-- Test for varlena datatype with expr evaluation
CREATE TABLE expr_key (x numeric, t text);
INSERT INTO expr_key (x, t)
SELECT d1::numeric, d1::text FROM (
SELECT round((d / pi())::numeric, 7) AS d1 FROM generate_series(1, 20) AS d
) t;
-- duplicate rows so we get some cache hits
INSERT INTO expr_key SELECT * FROM expr_key;
CREATE INDEX expr_key_idx_x_t ON expr_key (x, t);
VACUUM ANALYZE expr_key;
-- Ensure we get we get a cache miss and hit for each of the 20 distinct values
SELECT explain_memoize('
SELECT * FROM expr_key t1 INNER JOIN expr_key t2
ON t1.x = t2.t::numeric AND t1.t::numeric = t2.x;', false);
explain_memoize
----------------------------------------------------------------------------------------------
Nested Loop (actual rows=80.00 loops=N)
-> Seq Scan on expr_key t1 (actual rows=40.00 loops=N)
-> Memoize (actual rows=2.00 loops=N)
Cache Key: t1.x, (t1.t)::numeric
Cache Mode: logical
Hits: 20 Misses: 20 Evictions: Zero Overflows: 0 Memory Usage: NkB
-> Index Only Scan using expr_key_idx_x_t on expr_key t2 (actual rows=2.00 loops=N)
Index Cond: (x = (t1.t)::numeric)
Filter: (t1.x = (t)::numeric)
Heap Fetches: N
(10 rows)
DROP TABLE expr_key;
-- Reduce work_mem and hash_mem_multiplier so that we see some cache evictions
SET work_mem TO '64kB';
SET hash_mem_multiplier TO 1.0;
-- Ensure we get some evictions. We're unable to validate the hits and misses
-- here as the number of entries that fit in the cache at once will vary
-- between different machines.
SELECT explain_memoize('
SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1
INNER JOIN tenk1 t2 ON t1.unique1 = t2.thousand
WHERE t2.unique1 < 1200;', true);
explain_memoize
----------------------------------------------------------------------------------------------
Aggregate (actual rows=1.00 loops=N)
-> Nested Loop (actual rows=1200.00 loops=N)
-> Seq Scan on tenk1 t2 (actual rows=1200.00 loops=N)
Filter: (unique1 < 1200)
Rows Removed by Filter: 8800
-> Memoize (actual rows=1.00 loops=N)
Cache Key: t2.thousand
Cache Mode: logical
Hits: N Misses: N Evictions: N Overflows: 0 Memory Usage: NkB
-> Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1.00 loops=N)
Index Cond: (unique1 = t2.thousand)
Heap Fetches: N
(12 rows)
CREATE TABLE flt (f float);
CREATE INDEX flt_f_idx ON flt (f);
INSERT INTO flt VALUES('-0.0'::float),('+0.0'::float);
ANALYZE flt;
SET enable_seqscan TO off;
-- Ensure memoize operates in logical mode
SELECT explain_memoize('
SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f = f2.f;', false);
explain_memoize
----------------------------------------------------------------------------------
Nested Loop (actual rows=4.00 loops=N)
-> Index Only Scan using flt_f_idx on flt f1 (actual rows=2.00 loops=N)
Heap Fetches: N
-> Memoize (actual rows=2.00 loops=N)
Cache Key: f1.f
Cache Mode: logical
Hits: 1 Misses: 1 Evictions: Zero Overflows: 0 Memory Usage: NkB
-> Index Only Scan using flt_f_idx on flt f2 (actual rows=2.00 loops=N)
Index Cond: (f = f1.f)
Heap Fetches: N
(10 rows)
-- Ensure memoize operates in binary mode
SELECT explain_memoize('
SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f >= f2.f;', false);
explain_memoize
----------------------------------------------------------------------------------
Nested Loop (actual rows=4.00 loops=N)
-> Index Only Scan using flt_f_idx on flt f1 (actual rows=2.00 loops=N)
Heap Fetches: N
-> Memoize (actual rows=2.00 loops=N)
Cache Key: f1.f
Cache Mode: binary
Hits: 0 Misses: 2 Evictions: Zero Overflows: 0 Memory Usage: NkB
-> Index Only Scan using flt_f_idx on flt f2 (actual rows=2.00 loops=N)
Index Cond: (f <= f1.f)
Heap Fetches: N
(10 rows)
DROP TABLE flt;
-- Exercise Memoize in binary mode with a large fixed width type and a
-- varlena type.
CREATE TABLE strtest (n name, t text);
CREATE INDEX strtest_n_idx ON strtest (n);
CREATE INDEX strtest_t_idx ON strtest (t);
INSERT INTO strtest VALUES('one','one'),('two','two'),('three',repeat(fipshash('three'),100));
-- duplicate rows so we get some cache hits
INSERT INTO strtest SELECT * FROM strtest;
ANALYZE strtest;
-- Ensure we get 3 hits and 3 misses
SELECT explain_memoize('
SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.n >= s2.n;', false);
explain_memoize
-------------------------------------------------------------------------------------
Nested Loop (actual rows=24.00 loops=N)
-> Seq Scan on strtest s1 (actual rows=6.00 loops=N)
Disabled: true
-> Memoize (actual rows=4.00 loops=N)
Cache Key: s1.n
Cache Mode: binary
Hits: 3 Misses: 3 Evictions: Zero Overflows: 0 Memory Usage: NkB
-> Index Scan using strtest_n_idx on strtest s2 (actual rows=4.00 loops=N)
Index Cond: (n <= s1.n)
(9 rows)
-- Ensure we get 3 hits and 3 misses
SELECT explain_memoize('
SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.t >= s2.t;', false);
explain_memoize
-------------------------------------------------------------------------------------
Nested Loop (actual rows=24.00 loops=N)
-> Seq Scan on strtest s1 (actual rows=6.00 loops=N)
Disabled: true
-> Memoize (actual rows=4.00 loops=N)
Cache Key: s1.t
Cache Mode: binary
Hits: 3 Misses: 3 Evictions: Zero Overflows: 0 Memory Usage: NkB
-> Index Scan using strtest_t_idx on strtest s2 (actual rows=4.00 loops=N)
Index Cond: (t <= s1.t)
(9 rows)
DROP TABLE strtest;
-- Ensure memoize works with partitionwise join
SET enable_partitionwise_join TO on;
CREATE TABLE prt (a int) PARTITION BY RANGE(a);
CREATE TABLE prt_p1 PARTITION OF prt FOR VALUES FROM (0) TO (10);
CREATE TABLE prt_p2 PARTITION OF prt FOR VALUES FROM (10) TO (20);
INSERT INTO prt VALUES (0), (0), (0), (0);
INSERT INTO prt VALUES (10), (10), (10), (10);
CREATE INDEX iprt_p1_a ON prt_p1 (a);
CREATE INDEX iprt_p2_a ON prt_p2 (a);
ANALYZE prt;
SELECT explain_memoize('
SELECT * FROM prt t1 INNER JOIN prt t2 ON t1.a = t2.a;', false);
explain_memoize
---------------------------------------------------------------------------------------------
Append (actual rows=32.00 loops=N)
-> Nested Loop (actual rows=16.00 loops=N)
-> Index Only Scan using iprt_p1_a on prt_p1 t1_1 (actual rows=4.00 loops=N)
Heap Fetches: N
-> Memoize (actual rows=4.00 loops=N)
Cache Key: t1_1.a
Cache Mode: logical
Hits: 3 Misses: 1 Evictions: Zero Overflows: 0 Memory Usage: NkB
-> Index Only Scan using iprt_p1_a on prt_p1 t2_1 (actual rows=4.00 loops=N)
Index Cond: (a = t1_1.a)
Heap Fetches: N
-> Nested Loop (actual rows=16.00 loops=N)
-> Index Only Scan using iprt_p2_a on prt_p2 t1_2 (actual rows=4.00 loops=N)
Heap Fetches: N
-> Memoize (actual rows=4.00 loops=N)
Cache Key: t1_2.a
Cache Mode: logical
Hits: 3 Misses: 1 Evictions: Zero Overflows: 0 Memory Usage: NkB
-> Index Only Scan using iprt_p2_a on prt_p2 t2_2 (actual rows=4.00 loops=N)
Index Cond: (a = t1_2.a)
Heap Fetches: N
(21 rows)
-- Ensure memoize works with parameterized union-all Append path
SET enable_partitionwise_join TO off;
SELECT explain_memoize('
SELECT * FROM prt_p1 t1 INNER JOIN
(SELECT * FROM prt_p1 UNION ALL SELECT * FROM prt_p2) t2
ON t1.a = t2.a;', false);
explain_memoize
----------------------------------------------------------------------------------------
Nested Loop (actual rows=16.00 loops=N)
-> Index Only Scan using iprt_p1_a on prt_p1 t1 (actual rows=4.00 loops=N)
Heap Fetches: N
-> Memoize (actual rows=4.00 loops=N)
Cache Key: t1.a
Cache Mode: logical
Hits: 3 Misses: 1 Evictions: Zero Overflows: 0 Memory Usage: NkB
-> Append (actual rows=4.00 loops=N)
-> Index Only Scan using iprt_p1_a on prt_p1 (actual rows=4.00 loops=N)
Index Cond: (a = t1.a)
Heap Fetches: N
-> Index Only Scan using iprt_p2_a on prt_p2 (actual rows=0.00 loops=N)
Index Cond: (a = t1.a)
Heap Fetches: N
(14 rows)
DROP TABLE prt;
RESET enable_partitionwise_join;
-- Exercise Memoize code that flushes the cache when a parameter changes which
-- is not part of the cache key.
-- Ensure we get a Memoize plan
EXPLAIN (COSTS OFF)
SELECT unique1 FROM tenk1 t0
WHERE unique1 < 3
AND EXISTS (
SELECT 1 FROM tenk1 t1
INNER JOIN tenk1 t2 ON t1.unique1 = t2.hundred
WHERE t0.ten = t1.twenty AND t0.two <> t2.four OFFSET 0);
QUERY PLAN
----------------------------------------------------------------
Index Scan using tenk1_unique1 on tenk1 t0
Index Cond: (unique1 < 3)
Filter: EXISTS(SubPlan 1)
SubPlan 1
-> Nested Loop
-> Index Scan using tenk1_hundred on tenk1 t2
Filter: (t0.two <> four)
-> Memoize
Cache Key: t2.hundred
Cache Mode: logical
-> Index Scan using tenk1_unique1 on tenk1 t1
Index Cond: (unique1 = t2.hundred)
Filter: (t0.ten = twenty)
(13 rows)
-- Ensure the above query returns the correct result
SELECT unique1 FROM tenk1 t0
WHERE unique1 < 3
AND EXISTS (
SELECT 1 FROM tenk1 t1
INNER JOIN tenk1 t2 ON t1.unique1 = t2.hundred
WHERE t0.ten = t1.twenty AND t0.two <> t2.four OFFSET 0);
unique1
---------
2
(1 row)
RESET enable_seqscan;
RESET enable_mergejoin;
RESET work_mem;
RESET hash_mem_multiplier;
RESET enable_bitmapscan;
RESET enable_hashjoin;
-- Test parallel plans with Memoize
SET min_parallel_table_scan_size TO 0;
SET parallel_setup_cost TO 0;
SET parallel_tuple_cost TO 0;
SET max_parallel_workers_per_gather TO 2;
-- Ensure we get a parallel plan.
EXPLAIN (COSTS OFF)
SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1,
LATERAL (SELECT t2.unique1 FROM tenk1 t2 WHERE t1.twenty = t2.unique1) t2
WHERE t1.unique1 < 1000;
QUERY PLAN
-------------------------------------------------------------------------------
Finalize Aggregate
-> Gather
Workers Planned: 2
-> Partial Aggregate
-> Nested Loop
-> Parallel Bitmap Heap Scan on tenk1 t1
Recheck Cond: (unique1 < 1000)
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 < 1000)
-> Memoize
Cache Key: t1.twenty
Cache Mode: logical
-> Index Only Scan using tenk1_unique1 on tenk1 t2
Index Cond: (unique1 = t1.twenty)
(14 rows)
-- And ensure the parallel plan gives us the correct results.
SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1,
LATERAL (SELECT t2.unique1 FROM tenk1 t2 WHERE t1.twenty = t2.unique1) t2
WHERE t1.unique1 < 1000;
count | avg
-------+--------------------
1000 | 9.5000000000000000
(1 row)
RESET max_parallel_workers_per_gather;
RESET parallel_tuple_cost;
RESET parallel_setup_cost;
RESET min_parallel_table_scan_size;