postgresql/contrib/pg_plan_advice/expected/join_order.out
Robert Haas 5883ff30b0 Add pg_plan_advice contrib module.
Provide a facility that (1) can be used to stabilize certain plan choices
so that the planner cannot reverse course without authorization and
(2) can be used by knowledgeable users to insist on plan choices contrary
to what the planner believes best. In both cases, terrible outcomes are
possible: users should think twice and perhaps three times before
constraining the planner's ability to do as it thinks best; nevertheless,
there are problems that are much more easily solved with these facilities
than without them.

This patch takes the approach of analyzing a finished plan to produce
textual output, which we call "plan advice", that describes key
decisions made during plan; if that plan advice is provided during
future planning cycles, it will force those key decisions to be made in
the same way.  Not all planner decisions can be controlled using advice;
for example, decisions about how to perform aggregation are currently
out of scope, as is choice of sort order. Plan advice can also be edited
by the user, or even written from scratch in simple cases, making it
possible to generate outcomes that the planner would not have produced.
Partial advice can be provided to control some planner outcomes but not
others.

Currently, plan advice is focused only on specific outcomes, such as
the choice to use a sequential scan for a particular relation, and not
on estimates that might contribute to those outcomes, such as a
possibly-incorrect selectivity estimate. While it would be useful to
users to be able to provide plan advice that affects selectivity
estimates or other aspects of costing, that is out of scope for this
commit.

Reviewed-by: Lukas Fittl <lukas@fittl.com>
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reviewed-by: Greg Burd <greg@burd.me>
Reviewed-by: Jacob Champion <jacob.champion@enterprisedb.com>
Reviewed-by: Haibo Yan <tristan.yim@gmail.com>
Reviewed-by: Dian Fay <di@nmfay.com>
Reviewed-by: Ajay Pal <ajay.pal.k@gmail.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Reviewed-by: Alexandra Wang <alexandra.wang.oss@gmail.com>
Discussion: http://postgr.es/m/CA+TgmoZ-Jh1T6QyWoCODMVQdhTUPYkaZjWztzP1En4=ZHoKPzw@mail.gmail.com
2026-03-12 13:00:43 -04:00

500 lines
16 KiB
Text

LOAD 'pg_plan_advice';
SET max_parallel_workers_per_gather = 0;
CREATE TABLE jo_dim1 (id integer primary key, dim1 text, val1 int)
WITH (autovacuum_enabled = false);
INSERT INTO jo_dim1 (id, dim1, val1)
SELECT g, 'some filler text ' || g, (g % 3) + 1
FROM generate_series(1,100) g;
VACUUM ANALYZE jo_dim1;
CREATE TABLE jo_dim2 (id integer primary key, dim2 text, val2 int)
WITH (autovacuum_enabled = false);
INSERT INTO jo_dim2 (id, dim2, val2)
SELECT g, 'some filler text ' || g, (g % 53) + 1
FROM generate_series(1,1000) g;
VACUUM ANALYZE jo_dim2;
CREATE TABLE jo_fact (
id int primary key,
dim1_id integer not null references jo_dim1 (id),
dim2_id integer not null references jo_dim2 (id)
) WITH (autovacuum_enabled = false);
INSERT INTO jo_fact
SELECT g, (g%100)+1, (g%100)+1 FROM generate_series(1,100000) g;
VACUUM ANALYZE jo_fact;
-- We expect to join to d2 first and then d1, since the condition on d2
-- is more selective.
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
QUERY PLAN
------------------------------------------
Hash Join
Hash Cond: (f.dim1_id = d1.id)
-> Hash Join
Hash Cond: (f.dim2_id = d2.id)
-> Seq Scan on jo_fact f
-> Hash
-> Seq Scan on jo_dim2 d2
Filter: (val2 = 1)
-> Hash
-> Seq Scan on jo_dim1 d1
Filter: (val1 = 1)
Generated Plan Advice:
JOIN_ORDER(f d2 d1)
HASH_JOIN(d2 d1)
SEQ_SCAN(f d2 d1)
NO_GATHER(f d1 d2)
(16 rows)
-- Force a few different join orders. Some of these are very inefficient,
-- but the planner considers them all viable.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
QUERY PLAN
------------------------------------------
Hash Join
Hash Cond: (f.dim2_id = d2.id)
-> Hash Join
Hash Cond: (f.dim1_id = d1.id)
-> Seq Scan on jo_fact f
-> Hash
-> Seq Scan on jo_dim1 d1
Filter: (val1 = 1)
-> Hash
-> Seq Scan on jo_dim2 d2
Filter: (val2 = 1)
Supplied Plan Advice:
JOIN_ORDER(f d1 d2) /* matched */
Generated Plan Advice:
JOIN_ORDER(f d1 d2)
HASH_JOIN(d1 d2)
SEQ_SCAN(f d1 d2)
NO_GATHER(f d1 d2)
(18 rows)
SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
QUERY PLAN
------------------------------------------
Hash Join
Hash Cond: (f.dim1_id = d1.id)
-> Hash Join
Hash Cond: (f.dim2_id = d2.id)
-> Seq Scan on jo_fact f
-> Hash
-> Seq Scan on jo_dim2 d2
Filter: (val2 = 1)
-> Hash
-> Seq Scan on jo_dim1 d1
Filter: (val1 = 1)
Supplied Plan Advice:
JOIN_ORDER(f d2 d1) /* matched */
Generated Plan Advice:
JOIN_ORDER(f d2 d1)
HASH_JOIN(d2 d1)
SEQ_SCAN(f d2 d1)
NO_GATHER(f d1 d2)
(18 rows)
SET LOCAL pg_plan_advice.advice = 'join_order(d1 f d2)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
QUERY PLAN
-----------------------------------------
Hash Join
Hash Cond: (f.dim2_id = d2.id)
-> Hash Join
Hash Cond: (d1.id = f.dim1_id)
-> Seq Scan on jo_dim1 d1
Filter: (val1 = 1)
-> Hash
-> Seq Scan on jo_fact f
-> Hash
-> Seq Scan on jo_dim2 d2
Filter: (val2 = 1)
Supplied Plan Advice:
JOIN_ORDER(d1 f d2) /* matched */
Generated Plan Advice:
JOIN_ORDER(d1 f d2)
HASH_JOIN(f d2)
SEQ_SCAN(d1 f d2)
NO_GATHER(f d1 d2)
(18 rows)
SET LOCAL pg_plan_advice.advice = 'join_order(f (d1 d2))';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
QUERY PLAN
------------------------------------------------------------
Hash Join
Hash Cond: ((f.dim1_id = d1.id) AND (f.dim2_id = d2.id))
-> Seq Scan on jo_fact f
-> Hash
-> Nested Loop
-> Seq Scan on jo_dim1 d1
Filter: (val1 = 1)
-> Materialize
-> Seq Scan on jo_dim2 d2
Filter: (val2 = 1)
Supplied Plan Advice:
JOIN_ORDER(f (d1 d2)) /* matched */
Generated Plan Advice:
JOIN_ORDER(f (d1 d2))
NESTED_LOOP_MATERIALIZE(d2)
HASH_JOIN((d1 d2))
SEQ_SCAN(f d1 d2)
NO_GATHER(f d1 d2)
(18 rows)
SET LOCAL pg_plan_advice.advice = 'join_order(f {d1 d2})';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
QUERY PLAN
------------------------------------------------------------
Hash Join
Hash Cond: ((f.dim1_id = d1.id) AND (f.dim2_id = d2.id))
-> Seq Scan on jo_fact f
-> Hash
-> Nested Loop
-> Seq Scan on jo_dim1 d1
Filter: (val1 = 1)
-> Materialize
-> Seq Scan on jo_dim2 d2
Filter: (val2 = 1)
Supplied Plan Advice:
JOIN_ORDER(f {d1 d2}) /* matched, failed */
Generated Plan Advice:
JOIN_ORDER(f (d1 d2))
NESTED_LOOP_MATERIALIZE(d2)
HASH_JOIN((d1 d2))
SEQ_SCAN(f d1 d2)
NO_GATHER(f d1 d2)
(18 rows)
COMMIT;
-- Force a join order by mentioning just a prefix of the join list.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'join_order(d2)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
QUERY PLAN
------------------------------------------------
Hash Join
Hash Cond: (d2.id = f.dim2_id)
-> Seq Scan on jo_dim2 d2
Filter: (val2 = 1)
-> Hash
-> Hash Join
Hash Cond: (f.dim1_id = d1.id)
-> Seq Scan on jo_fact f
-> Hash
-> Seq Scan on jo_dim1 d1
Filter: (val1 = 1)
Supplied Plan Advice:
JOIN_ORDER(d2) /* matched */
Generated Plan Advice:
JOIN_ORDER(d2 (f d1))
HASH_JOIN(d1 (f d1))
SEQ_SCAN(d2 f d1)
NO_GATHER(f d1 d2)
(18 rows)
SET LOCAL pg_plan_advice.advice = 'join_order(d2 d1)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
QUERY PLAN
------------------------------------------------------------
Hash Join
Hash Cond: ((d1.id = f.dim1_id) AND (d2.id = f.dim2_id))
-> Nested Loop
-> Seq Scan on jo_dim2 d2
Filter: (val2 = 1)
-> Materialize
-> Seq Scan on jo_dim1 d1
Filter: (val1 = 1)
-> Hash
-> Seq Scan on jo_fact f
Supplied Plan Advice:
JOIN_ORDER(d2 d1) /* matched */
Generated Plan Advice:
JOIN_ORDER(d2 d1 f)
NESTED_LOOP_MATERIALIZE(d1)
HASH_JOIN(f)
SEQ_SCAN(d2 d1 f)
NO_GATHER(f d1 d2)
(18 rows)
COMMIT;
-- jo_fact is not partitioned, but let's try pretending that it is and
-- verifying that the advice does not apply.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'join_order(f/d1 d1 d2)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
QUERY PLAN
-------------------------------------------------------------
Nested Loop
Disabled: true
-> Nested Loop
Disabled: true
-> Seq Scan on jo_fact f
-> Index Scan using jo_dim1_pkey on jo_dim1 d1
Index Cond: (id = f.dim1_id)
Filter: (val1 = 1)
-> Index Scan using jo_dim2_pkey on jo_dim2 d2
Index Cond: (id = f.dim2_id)
Filter: (val2 = 1)
Supplied Plan Advice:
JOIN_ORDER(f/d1 d1 d2) /* partially matched */
Generated Plan Advice:
JOIN_ORDER(f d1 d2)
NESTED_LOOP_PLAIN(d1 d2)
SEQ_SCAN(f)
INDEX_SCAN(d1 public.jo_dim1_pkey d2 public.jo_dim2_pkey)
NO_GATHER(f d1 d2)
(19 rows)
SET LOCAL pg_plan_advice.advice = 'join_order(f/d1 (d1 d2))';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
QUERY PLAN
--------------------------------------------------------------
Nested Loop
Disabled: true
Join Filter: ((d1.id = f.dim1_id) AND (d2.id = f.dim2_id))
-> Nested Loop
-> Seq Scan on jo_dim1 d1
Filter: (val1 = 1)
-> Materialize
-> Seq Scan on jo_dim2 d2
Filter: (val2 = 1)
-> Seq Scan on jo_fact f
Supplied Plan Advice:
JOIN_ORDER(f/d1 (d1 d2)) /* partially matched */
Generated Plan Advice:
JOIN_ORDER(d1 d2 f)
NESTED_LOOP_PLAIN(f)
NESTED_LOOP_MATERIALIZE(d2)
SEQ_SCAN(d1 d2 f)
NO_GATHER(f d1 d2)
(18 rows)
COMMIT;
-- The unusual formulation of this query is intended to prevent the query
-- planner from reducing the FULL JOIN to some other join type, so that we
-- can test what happens with a join type that cannot be reordered.
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_dim1 d1
INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
QUERY PLAN
-------------------------------------------------------------
Nested Loop
Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
-> Merge Full Join
Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
-> Sort
Sort Key: ((d2.id + 0))
-> Seq Scan on jo_dim2 d2
-> Sort
Sort Key: ((f.dim2_id + 0))
-> Seq Scan on jo_fact f
-> Materialize
-> Seq Scan on jo_dim1 d1
Generated Plan Advice:
JOIN_ORDER(d2 f d1)
MERGE_JOIN_PLAIN(f)
NESTED_LOOP_MATERIALIZE(d1)
SEQ_SCAN(d2 f d1)
NO_GATHER(d1 f d2)
(18 rows)
-- We should not be able to force the planner to join f to d1 first, because
-- that is not a valid join order, but we should be able to force the planner
-- to make either d2 or f the driving table.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_dim1 d1
INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
QUERY PLAN
-------------------------------------------------------------
Nested Loop
Disabled: true
Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
-> Merge Full Join
Disabled: true
Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
-> Sort
Sort Key: ((d2.id + 0))
-> Seq Scan on jo_dim2 d2
-> Sort
Sort Key: ((f.dim2_id + 0))
-> Seq Scan on jo_fact f
-> Seq Scan on jo_dim1 d1
Supplied Plan Advice:
JOIN_ORDER(f d1 d2) /* partially matched */
Generated Plan Advice:
JOIN_ORDER(d2 f d1)
MERGE_JOIN_PLAIN(f)
NESTED_LOOP_PLAIN(d1)
SEQ_SCAN(d2 f d1)
NO_GATHER(d1 f d2)
(21 rows)
SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_dim1 d1
INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
QUERY PLAN
-------------------------------------------------------------
Nested Loop
Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
-> Merge Full Join
Merge Cond: (((f.dim2_id + 0)) = ((d2.id + 0)))
-> Sort
Sort Key: ((f.dim2_id + 0))
-> Seq Scan on jo_fact f
-> Sort
Sort Key: ((d2.id + 0))
-> Seq Scan on jo_dim2 d2
-> Materialize
-> Seq Scan on jo_dim1 d1
Supplied Plan Advice:
JOIN_ORDER(f d2 d1) /* matched */
Generated Plan Advice:
JOIN_ORDER(f d2 d1)
MERGE_JOIN_PLAIN(d2)
NESTED_LOOP_MATERIALIZE(d1)
SEQ_SCAN(f d2 d1)
NO_GATHER(d1 f d2)
(20 rows)
SET LOCAL pg_plan_advice.advice = 'join_order(d2 f d1)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_dim1 d1
INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
QUERY PLAN
-------------------------------------------------------------
Nested Loop
Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
-> Merge Full Join
Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
-> Sort
Sort Key: ((d2.id + 0))
-> Seq Scan on jo_dim2 d2
-> Sort
Sort Key: ((f.dim2_id + 0))
-> Seq Scan on jo_fact f
-> Materialize
-> Seq Scan on jo_dim1 d1
Supplied Plan Advice:
JOIN_ORDER(d2 f d1) /* matched */
Generated Plan Advice:
JOIN_ORDER(d2 f d1)
MERGE_JOIN_PLAIN(f)
NESTED_LOOP_MATERIALIZE(d1)
SEQ_SCAN(d2 f d1)
NO_GATHER(d1 f d2)
(20 rows)
COMMIT;
-- Two incompatible join orders should conflict. In the second case,
-- the conflict is implicit: if d1 is on the inner side of a join of any
-- type, it cannot also be the driving table.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'join_order(f) join_order(d1)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_dim1 d1
INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
QUERY PLAN
-------------------------------------------------------------
Nested Loop
Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
-> Merge Full Join
Merge Cond: (((f.dim2_id + 0)) = ((d2.id + 0)))
-> Sort
Sort Key: ((f.dim2_id + 0))
-> Seq Scan on jo_fact f
-> Sort
Sort Key: ((d2.id + 0))
-> Seq Scan on jo_dim2 d2
-> Materialize
-> Seq Scan on jo_dim1 d1
Supplied Plan Advice:
JOIN_ORDER(f) /* matched, conflicting */
JOIN_ORDER(d1) /* matched, conflicting, failed */
Generated Plan Advice:
JOIN_ORDER(f d2 d1)
MERGE_JOIN_PLAIN(d2)
NESTED_LOOP_MATERIALIZE(d1)
SEQ_SCAN(f d2 d1)
NO_GATHER(d1 f d2)
(21 rows)
SET LOCAL pg_plan_advice.advice = 'join_order(d1) hash_join(d1)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_dim1 d1
INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
QUERY PLAN
---------------------------------------------------------------
Nested Loop
Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
-> Seq Scan on jo_dim1 d1
-> Materialize
-> Merge Full Join
Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
-> Sort
Sort Key: ((d2.id + 0))
-> Seq Scan on jo_dim2 d2
-> Sort
Sort Key: ((f.dim2_id + 0))
-> Seq Scan on jo_fact f
Supplied Plan Advice:
JOIN_ORDER(d1) /* matched, conflicting */
HASH_JOIN(d1) /* matched, conflicting, failed */
Generated Plan Advice:
JOIN_ORDER(d1 (d2 f))
MERGE_JOIN_PLAIN(f)
NESTED_LOOP_MATERIALIZE((f d2))
SEQ_SCAN(d1 d2 f)
NO_GATHER(d1 f d2)
(21 rows)
COMMIT;