postgresql/src/test/regress/sql/btree_index.sql
Peter Geoghegan 5bf748b86b Enhance nbtree ScalarArrayOp execution.
Commit 9e8da0f7 taught nbtree to handle ScalarArrayOpExpr quals
natively.  This works by pushing down the full context (the array keys)
to the nbtree index AM, enabling it to execute multiple primitive index
scans that the planner treats as one continuous index scan/index path.
This earlier enhancement enabled nbtree ScalarArrayOp index-only scans.
It also allowed scans with ScalarArrayOp quals to return ordered results
(with some notable restrictions, described further down).

Take this general approach a lot further: teach nbtree SAOP index scans
to decide how to execute ScalarArrayOp scans (when and where to start
the next primitive index scan) based on physical index characteristics.
This can be far more efficient.  All SAOP scans will now reliably avoid
duplicative leaf page accesses (just like any other nbtree index scan).
SAOP scans whose array keys are naturally clustered together now require
far fewer index descents, since we'll reliably avoid starting a new
primitive scan just to get to a later offset from the same leaf page.

The scan's arrays now advance using binary searches for the array
element that best matches the next tuple's attribute value.  Required
scan key arrays (i.e. arrays from scan keys that can terminate the scan)
ratchet forward in lockstep with the index scan.  Non-required arrays
(i.e. arrays from scan keys that can only exclude non-matching tuples)
"advance" without the process ever rolling over to a higher-order array.

Naturally, only required SAOP scan keys trigger skipping over leaf pages
(non-required arrays cannot safely end or start primitive index scans).
Consequently, even index scans of a composite index with a high-order
inequality scan key (which we'll mark required) and a low-order SAOP
scan key (which we won't mark required) now avoid repeating leaf page
accesses -- that benefit isn't limited to simpler equality-only cases.
In general, all nbtree index scans now output tuples as if they were one
continuous index scan -- even scans that mix a high-order inequality
with lower-order SAOP equalities reliably output tuples in index order.
This allows us to remove a couple of special cases that were applied
when building index paths with SAOP clauses during planning.

Bugfix commit 807a40c5 taught the planner to avoid generating unsafe
path keys: path keys on a multicolumn index path, with a SAOP clause on
any attribute beyond the first/most significant attribute.  These cases
are now all safe, so we go back to generating path keys without regard
for the presence of SAOP clauses (just like with any other clause type).
Affected queries can now exploit scan output order in all the usual ways
(e.g., certain "ORDER BY ... LIMIT n" queries can now terminate early).

Also undo changes from follow-up bugfix commit a4523c5a, which taught
the planner to produce alternative index paths, with path keys, but
without low-order SAOP index quals (filter quals were used instead).
We'll no longer generate these alternative paths, since they can no
longer offer any meaningful advantages over standard index qual paths.
Affected queries thereby avoid all of the disadvantages that come from
using filter quals within index scan nodes.  They can avoid extra heap
page accesses from using filter quals to exclude non-matching tuples
(index quals will never have that problem).  They can also skip over
irrelevant sections of the index in more cases (though only when nbtree
determines that starting another primitive scan actually makes sense).

There is a theoretical risk that removing restrictions on SAOP index
paths from the planner will break compatibility with amcanorder-based
index AMs maintained as extensions.  Such an index AM could have the
same limitations around ordered SAOP scans as nbtree had up until now.
Adding a pro forma incompatibility item about the issue to the Postgres
17 release notes seems like a good idea.

Author: Peter Geoghegan <pg@bowt.ie>
Author: Matthias van de Meent <boekewurm+postgres@gmail.com>
Reviewed-By: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-By: Matthias van de Meent <boekewurm+postgres@gmail.com>
Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com>
Discussion: https://postgr.es/m/CAH2-Wz=ksvN_sjcnD1+Bt-WtifRA5ok48aDYnq3pkKhxgMQpcw@mail.gmail.com
2024-04-06 11:47:10 -04:00

284 lines
9.4 KiB
SQL

--
-- BTREE_INDEX
--
-- directory paths are passed to us in environment variables
\getenv abs_srcdir PG_ABS_SRCDIR
CREATE TABLE bt_i4_heap (
seqno int4,
random int4
);
CREATE TABLE bt_name_heap (
seqno name,
random int4
);
CREATE TABLE bt_txt_heap (
seqno text,
random int4
);
CREATE TABLE bt_f8_heap (
seqno float8,
random int4
);
\set filename :abs_srcdir '/data/desc.data'
COPY bt_i4_heap FROM :'filename';
\set filename :abs_srcdir '/data/hash.data'
COPY bt_name_heap FROM :'filename';
\set filename :abs_srcdir '/data/desc.data'
COPY bt_txt_heap FROM :'filename';
\set filename :abs_srcdir '/data/hash.data'
COPY bt_f8_heap FROM :'filename';
ANALYZE bt_i4_heap;
ANALYZE bt_name_heap;
ANALYZE bt_txt_heap;
ANALYZE bt_f8_heap;
--
-- BTREE ascending/descending cases
--
-- we load int4/text from pure descending data (each key is a new
-- low key) and name/f8 from pure ascending data (each key is a new
-- high key). we had a bug where new low keys would sometimes be
-- "lost".
--
CREATE INDEX bt_i4_index ON bt_i4_heap USING btree (seqno int4_ops);
CREATE INDEX bt_name_index ON bt_name_heap USING btree (seqno name_ops);
CREATE INDEX bt_txt_index ON bt_txt_heap USING btree (seqno text_ops);
CREATE INDEX bt_f8_index ON bt_f8_heap USING btree (seqno float8_ops);
--
-- test retrieval of min/max keys for each index
--
SELECT b.*
FROM bt_i4_heap b
WHERE b.seqno < 1;
SELECT b.*
FROM bt_i4_heap b
WHERE b.seqno >= 9999;
SELECT b.*
FROM bt_i4_heap b
WHERE b.seqno = 4500;
SELECT b.*
FROM bt_name_heap b
WHERE b.seqno < '1'::name;
SELECT b.*
FROM bt_name_heap b
WHERE b.seqno >= '9999'::name;
SELECT b.*
FROM bt_name_heap b
WHERE b.seqno = '4500'::name;
SELECT b.*
FROM bt_txt_heap b
WHERE b.seqno < '1'::text;
SELECT b.*
FROM bt_txt_heap b
WHERE b.seqno >= '9999'::text;
SELECT b.*
FROM bt_txt_heap b
WHERE b.seqno = '4500'::text;
SELECT b.*
FROM bt_f8_heap b
WHERE b.seqno < '1'::float8;
SELECT b.*
FROM bt_f8_heap b
WHERE b.seqno >= '9999'::float8;
SELECT b.*
FROM bt_f8_heap b
WHERE b.seqno = '4500'::float8;
--
-- Add coverage for optimization of backwards scan index descents
--
-- Here we expect _bt_search to descend straight to a leaf page containing a
-- non-pivot tuple with the value '47', which comes last (after 11 similar
-- non-pivot tuples). Query execution should only need to visit a single
-- leaf page here.
--
-- Test case relies on tenk1_hundred index having a leaf page whose high key
-- is '(48, -inf)'. We use a low cardinality index to make our test case less
-- sensitive to implementation details that may change in the future.
set enable_seqscan to false;
set enable_indexscan to true;
set enable_bitmapscan to false;
explain (costs off)
select hundred, twenty from tenk1 where hundred < 48 order by hundred desc limit 1;
select hundred, twenty from tenk1 where hundred < 48 order by hundred desc limit 1;
-- This variant of the query need only return a single tuple located to the immediate
-- right of the '(48, -inf)' high key. It also only needs to scan one single
-- leaf page (the right sibling of the page scanned by the last test case):
explain (costs off)
select hundred, twenty from tenk1 where hundred <= 48 order by hundred desc limit 1;
select hundred, twenty from tenk1 where hundred <= 48 order by hundred desc limit 1;
--
-- Add coverage for ScalarArrayOp btree quals with pivot tuple constants
--
explain (costs off)
select distinct hundred from tenk1 where hundred in (47, 48, 72, 82);
select distinct hundred from tenk1 where hundred in (47, 48, 72, 82);
explain (costs off)
select distinct hundred from tenk1 where hundred in (47, 48, 72, 82) order by hundred desc;
select distinct hundred from tenk1 where hundred in (47, 48, 72, 82) order by hundred desc;
explain (costs off)
select thousand from tenk1 where thousand in (364, 366,380) and tenthous = 200000;
select thousand from tenk1 where thousand in (364, 366,380) and tenthous = 200000;
--
-- Check correct optimization of LIKE (special index operator support)
-- for both indexscan and bitmapscan cases
--
set enable_seqscan to false;
set enable_indexscan to true;
set enable_bitmapscan to false;
explain (costs off)
select proname from pg_proc where proname like E'RI\\_FKey%del' order by 1;
select proname from pg_proc where proname like E'RI\\_FKey%del' order by 1;
explain (costs off)
select proname from pg_proc where proname ilike '00%foo' order by 1;
select proname from pg_proc where proname ilike '00%foo' order by 1;
explain (costs off)
select proname from pg_proc where proname ilike 'ri%foo' order by 1;
set enable_indexscan to false;
set enable_bitmapscan to true;
explain (costs off)
select proname from pg_proc where proname like E'RI\\_FKey%del' order by 1;
select proname from pg_proc where proname like E'RI\\_FKey%del' order by 1;
explain (costs off)
select proname from pg_proc where proname ilike '00%foo' order by 1;
select proname from pg_proc where proname ilike '00%foo' order by 1;
explain (costs off)
select proname from pg_proc where proname ilike 'ri%foo' order by 1;
reset enable_seqscan;
reset enable_indexscan;
reset enable_bitmapscan;
-- Also check LIKE optimization with binary-compatible cases
create temp table btree_bpchar (f1 text collate "C");
create index on btree_bpchar(f1 bpchar_ops) WITH (deduplicate_items=on);
insert into btree_bpchar values ('foo'), ('fool'), ('bar'), ('quux');
-- doesn't match index:
explain (costs off)
select * from btree_bpchar where f1 like 'foo';
select * from btree_bpchar where f1 like 'foo';
explain (costs off)
select * from btree_bpchar where f1 like 'foo%';
select * from btree_bpchar where f1 like 'foo%';
-- these do match the index:
explain (costs off)
select * from btree_bpchar where f1::bpchar like 'foo';
select * from btree_bpchar where f1::bpchar like 'foo';
explain (costs off)
select * from btree_bpchar where f1::bpchar like 'foo%';
select * from btree_bpchar where f1::bpchar like 'foo%';
-- get test coverage for "single value" deduplication strategy:
insert into btree_bpchar select 'foo' from generate_series(1,1500);
--
-- Perform unique checking, with and without the use of deduplication
--
CREATE TABLE dedup_unique_test_table (a int) WITH (autovacuum_enabled=false);
CREATE UNIQUE INDEX dedup_unique ON dedup_unique_test_table (a) WITH (deduplicate_items=on);
CREATE UNIQUE INDEX plain_unique ON dedup_unique_test_table (a) WITH (deduplicate_items=off);
-- Generate enough garbage tuples in index to ensure that even the unique index
-- with deduplication enabled has to check multiple leaf pages during unique
-- checking (at least with a BLCKSZ of 8192 or less)
DO $$
BEGIN
FOR r IN 1..1350 LOOP
DELETE FROM dedup_unique_test_table;
INSERT INTO dedup_unique_test_table SELECT 1;
END LOOP;
END$$;
-- Exercise the LP_DEAD-bit-set tuple deletion code with a posting list tuple.
-- The implementation prefers deleting existing items to merging any duplicate
-- tuples into a posting list, so we need an explicit test to make sure we get
-- coverage (note that this test also assumes BLCKSZ is 8192 or less):
DROP INDEX plain_unique;
DELETE FROM dedup_unique_test_table WHERE a = 1;
INSERT INTO dedup_unique_test_table SELECT i FROM generate_series(0,450) i;
--
-- Test B-tree fast path (cache rightmost leaf page) optimization.
--
-- First create a tree that's at least three levels deep (i.e. has one level
-- between the root and leaf levels). The text inserted is long. It won't be
-- TOAST compressed because we use plain storage in the table. Only a few
-- index tuples fit on each internal page, allowing us to get a tall tree with
-- few pages. (A tall tree is required to trigger caching.)
--
-- The text column must be the leading column in the index, since suffix
-- truncation would otherwise truncate tuples on internal pages, leaving us
-- with a short tree.
create table btree_tall_tbl(id int4, t text);
alter table btree_tall_tbl alter COLUMN t set storage plain;
create index btree_tall_idx on btree_tall_tbl (t, id) with (fillfactor = 10);
insert into btree_tall_tbl select g, repeat('x', 250)
from generate_series(1, 130) g;
--
-- Test for multilevel page deletion
--
CREATE TABLE delete_test_table (a bigint, b bigint, c bigint, d bigint);
INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i;
ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d);
-- Delete most entries, and vacuum, deleting internal pages and creating "fast
-- root"
DELETE FROM delete_test_table WHERE a < 79990;
VACUUM delete_test_table;
--
-- Test B-tree insertion with a metapage update (XLOG_BTREE_INSERT_META
-- WAL record type). This happens when a "fast root" page is split. This
-- also creates coverage for nbtree FSM page recycling.
--
-- The vacuum above should've turned the leaf page into a fast root. We just
-- need to insert some rows to cause the fast root page to split.
INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,1000) i;
-- Test unsupported btree opclass parameters
create index on btree_tall_tbl (id int4_ops(foo=1));
-- Test case of ALTER INDEX with abuse of column names for indexes.
-- This grammar is not officially supported, but the parser allows it.
CREATE INDEX btree_tall_idx2 ON btree_tall_tbl (id);
ALTER INDEX btree_tall_idx2 ALTER COLUMN id SET (n_distinct=100);
DROP INDEX btree_tall_idx2;
-- Partitioned index
CREATE TABLE btree_part (id int4) PARTITION BY RANGE (id);
CREATE INDEX btree_part_idx ON btree_part(id);
ALTER INDEX btree_part_idx ALTER COLUMN id SET (n_distinct=100);
DROP TABLE btree_part;