postgresql/src/include/executor/execdebug.h
Tomas Vondra d2d8a229bc Implement Incremental Sort
Incremental Sort is an optimized variant of multikey sort for cases when
the input is already sorted by a prefix of the requested sort keys. For
example when the relation is already sorted by (key1, key2) and we need
to sort it by (key1, key2, key3) we can simply split the input rows into
groups having equal values in (key1, key2), and only sort/compare the
remaining column key3.

This has a number of benefits:

- Reduced memory consumption, because only a single group (determined by
  values in the sorted prefix) needs to be kept in memory. This may also
  eliminate the need to spill to disk.

- Lower startup cost, because Incremental Sort produce results after each
  prefix group, which is beneficial for plans where startup cost matters
  (like for example queries with LIMIT clause).

We consider both Sort and Incremental Sort, and decide based on costing.

The implemented algorithm operates in two different modes:

- Fetching a minimum number of tuples without check of equality on the
  prefix keys, and sorting on all columns when safe.

- Fetching all tuples for a single prefix group and then sorting by
  comparing only the remaining (non-prefix) keys.

We always start in the first mode, and employ a heuristic to switch into
the second mode if we believe it's beneficial - the goal is to minimize
the number of unnecessary comparions while keeping memory consumption
below work_mem.

This is a very old patch series. The idea was originally proposed by
Alexander Korotkov back in 2013, and then revived in 2017. In 2018 the
patch was taken over by James Coleman, who wrote and rewrote most of the
current code.

There were many reviewers/contributors since 2013 - I've done my best to
pick the most active ones, and listed them in this commit message.

Author: James Coleman, Alexander Korotkov
Reviewed-by: Tomas Vondra, Andreas Karlsson, Marti Raudsepp, Peter Geoghegan, Robert Haas, Thomas Munro, Antonin Houska, Andres Freund, Alexander Kuzmenkov
Discussion: https://postgr.es/m/CAPpHfdscOX5an71nHd8WSUH6GNOCf=V7wgDaTXdDd9=goN-gfA@mail.gmail.com
Discussion: https://postgr.es/m/CAPpHfds1waRZ=NOmueYq0sx1ZSCnt+5QJvizT8ndT2=etZEeAQ@mail.gmail.com
2020-04-06 21:35:10 +02:00

130 lines
3.9 KiB
C

/*-------------------------------------------------------------------------
*
* execdebug.h
* #defines governing debugging behaviour in the executor
*
* XXX this is all pretty old and crufty. Newer code tends to use elog()
* for debug printouts, because that's more flexible than printf().
*
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/executor/execdebug.h
*
*-------------------------------------------------------------------------
*/
#ifndef EXECDEBUG_H
#define EXECDEBUG_H
#include "executor/executor.h"
#include "nodes/print.h"
/* ----------------------------------------------------------------
* debugging defines.
*
* If you want certain debugging behaviour, then #define
* the variable to 1. No need to explicitly #undef by default,
* since we can use -D compiler options to enable features.
* - thomas 1999-02-20
* ----------------------------------------------------------------
*/
/* ----------------
* EXEC_NESTLOOPDEBUG is a flag which turns on debugging of the
* nest loop node by NL_printf() and ENL_printf() in nodeNestloop.c
* ----------------
#undef EXEC_NESTLOOPDEBUG
*/
/* ----------------
* EXEC_SORTDEBUG is a flag which turns on debugging of
* the ExecSort() stuff by SO_printf() in nodeSort.c
* ----------------
#undef EXEC_SORTDEBUG
*/
/* ----------------
* EXEC_MERGEJOINDEBUG is a flag which turns on debugging of
* the ExecMergeJoin() stuff by MJ_printf() in nodeMergejoin.c
* ----------------
#undef EXEC_MERGEJOINDEBUG
*/
/* ----------------------------------------------------------------
* #defines controlled by above definitions
*
* Note: most of these are "incomplete" because I didn't
* need the ones not defined. More should be added
* only as necessary -cim 10/26/89
* ----------------------------------------------------------------
*/
#define T_OR_F(b) ((b) ? "true" : "false")
#define NULL_OR_TUPLE(slot) (TupIsNull(slot) ? "null" : "a tuple")
/* ----------------
* nest loop debugging defines
* ----------------
*/
#ifdef EXEC_NESTLOOPDEBUG
#define NL_nodeDisplay(l) nodeDisplay(l)
#define NL_printf(s) printf(s)
#define NL1_printf(s, a) printf(s, a)
#define ENL1_printf(message) printf("ExecNestLoop: %s\n", message)
#else
#define NL_nodeDisplay(l)
#define NL_printf(s)
#define NL1_printf(s, a)
#define ENL1_printf(message)
#endif /* EXEC_NESTLOOPDEBUG */
/* ----------------
* sort node debugging defines
* ----------------
*/
#ifdef EXEC_SORTDEBUG
#define SO_nodeDisplay(l) nodeDisplay(l)
#define SO_printf(s) printf(s)
#define SO1_printf(s, p) printf(s, p)
#define SO2_printf(s, p1, p2) printf(s, p1, p2)
#else
#define SO_nodeDisplay(l)
#define SO_printf(s)
#define SO1_printf(s, p)
#define SO2_printf(s, p1, p2)
#endif /* EXEC_SORTDEBUG */
/* ----------------
* merge join debugging defines
* ----------------
*/
#ifdef EXEC_MERGEJOINDEBUG
#define MJ_nodeDisplay(l) nodeDisplay(l)
#define MJ_printf(s) printf(s)
#define MJ1_printf(s, p) printf(s, p)
#define MJ2_printf(s, p1, p2) printf(s, p1, p2)
#define MJ_debugtup(slot) debugtup(slot, NULL)
#define MJ_dump(state) ExecMergeTupleDump(state)
#define MJ_DEBUG_COMPARE(res) \
MJ1_printf(" MJCompare() returns %d\n", (res))
#define MJ_DEBUG_QUAL(clause, res) \
MJ2_printf(" ExecQual(%s, econtext) returns %s\n", \
CppAsString(clause), T_OR_F(res))
#define MJ_DEBUG_PROC_NODE(slot) \
MJ2_printf(" %s = ExecProcNode(...) returns %s\n", \
CppAsString(slot), NULL_OR_TUPLE(slot))
#else
#define MJ_nodeDisplay(l)
#define MJ_printf(s)
#define MJ1_printf(s, p)
#define MJ2_printf(s, p1, p2)
#define MJ_debugtup(slot)
#define MJ_dump(state)
#define MJ_DEBUG_COMPARE(res)
#define MJ_DEBUG_QUAL(clause, res)
#define MJ_DEBUG_PROC_NODE(slot)
#endif /* EXEC_MERGEJOINDEBUG */
#endif /* EXECDEBUG_H */