mirror of
https://github.com/postgres/postgres.git
synced 2026-04-15 22:10:45 -04:00
clauses in which one side or the other references both sides of the join cannot be removed as redundant, because that expression won't have been constrained below the join. Per report from Sergey Burladyan.
503 lines
15 KiB
C
503 lines
15 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* restrictinfo.c
|
|
* RestrictInfo node manipulation routines.
|
|
*
|
|
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.31.4.1 2007/07/31 19:54:10 tgl Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "optimizer/clauses.h"
|
|
#include "optimizer/cost.h"
|
|
#include "optimizer/paths.h"
|
|
#include "optimizer/restrictinfo.h"
|
|
#include "optimizer/var.h"
|
|
|
|
|
|
static RestrictInfo *make_restrictinfo_internal(Expr *clause,
|
|
Expr *orclause,
|
|
bool is_pushed_down,
|
|
bool valid_everywhere);
|
|
static Expr *make_sub_restrictinfos(Expr *clause,
|
|
bool is_pushed_down,
|
|
bool valid_everywhere);
|
|
static RestrictInfo *join_clause_is_redundant(Query *root,
|
|
RestrictInfo *rinfo,
|
|
List *reference_list,
|
|
Relids outer_relids,
|
|
Relids inner_relids,
|
|
JoinType jointype);
|
|
|
|
|
|
/*
|
|
* make_restrictinfo
|
|
*
|
|
* Build a RestrictInfo node containing the given subexpression.
|
|
*
|
|
* The is_pushed_down and valid_everywhere flags must be supplied by the
|
|
* caller.
|
|
*
|
|
* We initialize fields that depend only on the given subexpression, leaving
|
|
* others that depend on context (or may never be needed at all) to be filled
|
|
* later.
|
|
*/
|
|
RestrictInfo *
|
|
make_restrictinfo(Expr *clause, bool is_pushed_down, bool valid_everywhere)
|
|
{
|
|
Expr *orclause;
|
|
|
|
/*
|
|
* If it's an OR clause, build a modified copy with RestrictInfos
|
|
* inserted above each subclause of the top-level AND/OR structure.
|
|
*/
|
|
if (or_clause((Node *) clause))
|
|
{
|
|
orclause = make_sub_restrictinfos(clause,
|
|
is_pushed_down,
|
|
valid_everywhere);
|
|
}
|
|
else
|
|
{
|
|
/* Shouldn't be an AND clause, else flatten_andors messed up */
|
|
Assert(!and_clause((Node *) clause));
|
|
|
|
orclause = NULL;
|
|
}
|
|
|
|
return make_restrictinfo_internal(clause, orclause,
|
|
is_pushed_down, valid_everywhere);
|
|
}
|
|
|
|
/*
|
|
* make_restrictinfo_from_indexclauses
|
|
*
|
|
* Given an indexclauses structure, convert to ordinary expression format
|
|
* and build RestrictInfo node(s).
|
|
*
|
|
* The result is a List since we might need to return multiple RestrictInfos.
|
|
*
|
|
* This could be done as make_restrictinfo(make_expr_from_indexclauses()),
|
|
* but if we did it that way then we would strip the original RestrictInfo
|
|
* nodes from the index clauses and be forced to build new ones. It's better
|
|
* to have a specialized routine that allows sharing of RestrictInfos.
|
|
*/
|
|
List *
|
|
make_restrictinfo_from_indexclauses(List *indexclauses,
|
|
bool is_pushed_down,
|
|
bool valid_everywhere)
|
|
{
|
|
List *withris = NIL;
|
|
List *withoutris = NIL;
|
|
ListCell *orlist;
|
|
|
|
/* Empty list probably can't happen, but here's what to do */
|
|
if (indexclauses == NIL)
|
|
return NIL;
|
|
/* If single indexscan, just return the ANDed clauses */
|
|
if (list_length(indexclauses) == 1)
|
|
return (List *) linitial(indexclauses);
|
|
/* Else we need an OR RestrictInfo structure */
|
|
foreach(orlist, indexclauses)
|
|
{
|
|
List *andlist = (List *) lfirst(orlist);
|
|
|
|
/* Create AND subclause with RestrictInfos */
|
|
withris = lappend(withris, make_ands_explicit(andlist));
|
|
/* And one without */
|
|
andlist = get_actual_clauses(andlist);
|
|
withoutris = lappend(withoutris, make_ands_explicit(andlist));
|
|
}
|
|
return list_make1(make_restrictinfo_internal(make_orclause(withoutris),
|
|
make_orclause(withris),
|
|
is_pushed_down,
|
|
valid_everywhere));
|
|
}
|
|
|
|
/*
|
|
* make_restrictinfo_internal
|
|
*
|
|
* Common code for the above two entry points.
|
|
*/
|
|
static RestrictInfo *
|
|
make_restrictinfo_internal(Expr *clause, Expr *orclause,
|
|
bool is_pushed_down, bool valid_everywhere)
|
|
{
|
|
RestrictInfo *restrictinfo = makeNode(RestrictInfo);
|
|
|
|
restrictinfo->clause = clause;
|
|
restrictinfo->orclause = orclause;
|
|
restrictinfo->is_pushed_down = is_pushed_down;
|
|
restrictinfo->valid_everywhere = valid_everywhere;
|
|
restrictinfo->can_join = false; /* may get set below */
|
|
|
|
/*
|
|
* If it's a binary opclause, set up left/right relids info. In any
|
|
* case set up the total clause relids info.
|
|
*/
|
|
if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
|
|
{
|
|
restrictinfo->left_relids = pull_varnos(get_leftop(clause));
|
|
restrictinfo->right_relids = pull_varnos(get_rightop(clause));
|
|
|
|
restrictinfo->clause_relids = bms_union(restrictinfo->left_relids,
|
|
restrictinfo->right_relids);
|
|
|
|
/*
|
|
* Does it look like a normal join clause, i.e., a binary operator
|
|
* relating expressions that come from distinct relations? If so
|
|
* we might be able to use it in a join algorithm. Note that this
|
|
* is a purely syntactic test that is made regardless of context.
|
|
*/
|
|
if (!bms_is_empty(restrictinfo->left_relids) &&
|
|
!bms_is_empty(restrictinfo->right_relids) &&
|
|
!bms_overlap(restrictinfo->left_relids,
|
|
restrictinfo->right_relids))
|
|
restrictinfo->can_join = true;
|
|
}
|
|
else
|
|
{
|
|
/* Not a binary opclause, so mark left/right relid sets as empty */
|
|
restrictinfo->left_relids = NULL;
|
|
restrictinfo->right_relids = NULL;
|
|
/* and get the total relid set the hard way */
|
|
restrictinfo->clause_relids = pull_varnos((Node *) clause);
|
|
}
|
|
|
|
/*
|
|
* Fill in all the cacheable fields with "not yet set" markers. None
|
|
* of these will be computed until/unless needed. Note in particular
|
|
* that we don't mark a binary opclause as mergejoinable or
|
|
* hashjoinable here; that happens only if it appears in the right
|
|
* context (top level of a joinclause list).
|
|
*/
|
|
restrictinfo->eval_cost.startup = -1;
|
|
restrictinfo->this_selec = -1;
|
|
|
|
restrictinfo->mergejoinoperator = InvalidOid;
|
|
restrictinfo->left_sortop = InvalidOid;
|
|
restrictinfo->right_sortop = InvalidOid;
|
|
|
|
restrictinfo->left_pathkey = NIL;
|
|
restrictinfo->right_pathkey = NIL;
|
|
|
|
restrictinfo->left_mergescansel = -1;
|
|
restrictinfo->right_mergescansel = -1;
|
|
|
|
restrictinfo->hashjoinoperator = InvalidOid;
|
|
|
|
restrictinfo->left_bucketsize = -1;
|
|
restrictinfo->right_bucketsize = -1;
|
|
|
|
return restrictinfo;
|
|
}
|
|
|
|
/*
|
|
* Recursively insert sub-RestrictInfo nodes into a boolean expression.
|
|
*/
|
|
static Expr *
|
|
make_sub_restrictinfos(Expr *clause, bool is_pushed_down,
|
|
bool valid_everywhere)
|
|
{
|
|
if (or_clause((Node *) clause))
|
|
{
|
|
List *orlist = NIL;
|
|
ListCell *temp;
|
|
|
|
foreach(temp, ((BoolExpr *) clause)->args)
|
|
orlist = lappend(orlist,
|
|
make_sub_restrictinfos(lfirst(temp),
|
|
is_pushed_down,
|
|
valid_everywhere));
|
|
return make_orclause(orlist);
|
|
}
|
|
else if (and_clause((Node *) clause))
|
|
{
|
|
List *andlist = NIL;
|
|
ListCell *temp;
|
|
|
|
foreach(temp, ((BoolExpr *) clause)->args)
|
|
andlist = lappend(andlist,
|
|
make_sub_restrictinfos(lfirst(temp),
|
|
is_pushed_down,
|
|
valid_everywhere));
|
|
return make_andclause(andlist);
|
|
}
|
|
else
|
|
return (Expr *) make_restrictinfo_internal(clause,
|
|
NULL,
|
|
is_pushed_down,
|
|
valid_everywhere);
|
|
}
|
|
|
|
/*
|
|
* restriction_is_or_clause
|
|
*
|
|
* Returns t iff the restrictinfo node contains an 'or' clause.
|
|
*/
|
|
bool
|
|
restriction_is_or_clause(RestrictInfo *restrictinfo)
|
|
{
|
|
if (restrictinfo->orclause != NULL)
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* get_actual_clauses
|
|
*
|
|
* Returns a list containing the bare clauses from 'restrictinfo_list'.
|
|
*/
|
|
List *
|
|
get_actual_clauses(List *restrictinfo_list)
|
|
{
|
|
List *result = NIL;
|
|
ListCell *temp;
|
|
|
|
foreach(temp, restrictinfo_list)
|
|
{
|
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(temp);
|
|
|
|
Assert(IsA(rinfo, RestrictInfo));
|
|
|
|
result = lappend(result, rinfo->clause);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* get_actual_join_clauses
|
|
*
|
|
* Extract clauses from 'restrictinfo_list', separating those that
|
|
* syntactically match the join level from those that were pushed down.
|
|
*/
|
|
void
|
|
get_actual_join_clauses(List *restrictinfo_list,
|
|
List **joinquals, List **otherquals)
|
|
{
|
|
ListCell *temp;
|
|
|
|
*joinquals = NIL;
|
|
*otherquals = NIL;
|
|
|
|
foreach(temp, restrictinfo_list)
|
|
{
|
|
RestrictInfo *clause = (RestrictInfo *) lfirst(temp);
|
|
|
|
if (clause->is_pushed_down)
|
|
*otherquals = lappend(*otherquals, clause->clause);
|
|
else
|
|
*joinquals = lappend(*joinquals, clause->clause);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* remove_redundant_join_clauses
|
|
*
|
|
* Given a list of RestrictInfo clauses that are to be applied in a join,
|
|
* remove any duplicate or redundant clauses.
|
|
*
|
|
* We must eliminate duplicates when forming the restrictlist for a joinrel,
|
|
* since we will see many of the same clauses arriving from both input
|
|
* relations. Also, if a clause is a mergejoinable clause, it's possible that
|
|
* it is redundant with previous clauses (see optimizer/README for
|
|
* discussion). We detect that case and omit the redundant clause from the
|
|
* result list.
|
|
*
|
|
* The result is a fresh List, but it points to the same member nodes
|
|
* as were in the input.
|
|
*/
|
|
List *
|
|
remove_redundant_join_clauses(Query *root, List *restrictinfo_list,
|
|
Relids outer_relids,
|
|
Relids inner_relids,
|
|
JoinType jointype)
|
|
{
|
|
List *result = NIL;
|
|
ListCell *item;
|
|
QualCost cost;
|
|
|
|
/*
|
|
* If there are any redundant clauses, we want to eliminate the ones
|
|
* that are more expensive in favor of the ones that are less so. Run
|
|
* cost_qual_eval() to ensure the eval_cost fields are set up.
|
|
*/
|
|
cost_qual_eval(&cost, restrictinfo_list);
|
|
|
|
/*
|
|
* We don't have enough knowledge yet to be able to estimate the
|
|
* number of times a clause might be evaluated, so it's hard to weight
|
|
* the startup and per-tuple costs appropriately. For now just weight
|
|
* 'em the same.
|
|
*/
|
|
#define CLAUSECOST(r) ((r)->eval_cost.startup + (r)->eval_cost.per_tuple)
|
|
|
|
foreach(item, restrictinfo_list)
|
|
{
|
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
|
|
RestrictInfo *prevrinfo;
|
|
|
|
/* is it redundant with any prior clause? */
|
|
prevrinfo = join_clause_is_redundant(root, rinfo, result,
|
|
outer_relids, inner_relids,
|
|
jointype);
|
|
if (prevrinfo == NULL)
|
|
{
|
|
/* no, so add it to result list */
|
|
result = lappend(result, rinfo);
|
|
}
|
|
else if (CLAUSECOST(rinfo) < CLAUSECOST(prevrinfo))
|
|
{
|
|
/* keep this one, drop the previous one */
|
|
result = list_delete_ptr(result, prevrinfo);
|
|
result = lappend(result, rinfo);
|
|
}
|
|
/* else, drop this one */
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* select_nonredundant_join_clauses
|
|
*
|
|
* Given a list of RestrictInfo clauses that are to be applied in a join,
|
|
* select the ones that are not redundant with any clause in the
|
|
* reference_list.
|
|
*
|
|
* This is similar to remove_redundant_join_clauses, but we are looking for
|
|
* redundancies with a separate list of clauses (i.e., clauses that have
|
|
* already been applied below the join itself).
|
|
*
|
|
* Note that we assume the given restrictinfo_list has already been checked
|
|
* for local redundancies, so we don't check again.
|
|
*/
|
|
List *
|
|
select_nonredundant_join_clauses(Query *root,
|
|
List *restrictinfo_list,
|
|
List *reference_list,
|
|
Relids outer_relids,
|
|
Relids inner_relids,
|
|
JoinType jointype)
|
|
{
|
|
List *result = NIL;
|
|
ListCell *item;
|
|
|
|
foreach(item, restrictinfo_list)
|
|
{
|
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
|
|
|
|
/* drop it if redundant with any reference clause */
|
|
if (join_clause_is_redundant(root, rinfo, reference_list,
|
|
outer_relids, inner_relids,
|
|
jointype) != NULL)
|
|
continue;
|
|
|
|
/* otherwise, add it to result list */
|
|
result = lappend(result, rinfo);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* join_clause_is_redundant
|
|
* If rinfo is redundant with any clause in reference_list,
|
|
* return one such clause; otherwise return NULL.
|
|
*
|
|
* This is the guts of both remove_redundant_join_clauses and
|
|
* select_nonredundant_join_clauses. See the docs above for motivation.
|
|
*
|
|
* We can detect redundant mergejoinable clauses very cheaply by using their
|
|
* left and right pathkeys, which uniquely identify the sets of equijoined
|
|
* variables in question. All the members of a pathkey set that are in the
|
|
* left relation have already been forced to be equal; likewise for those in
|
|
* the right relation. So, we need to have only one clause that checks
|
|
* equality between any set member on the left and any member on the right;
|
|
* by transitivity, all the rest are then equal.
|
|
*
|
|
* However, clauses that are of the form "var expr = const expr" cannot be
|
|
* eliminated as redundant. This is because when there are const expressions
|
|
* in a pathkey set, generate_implied_equalities() suppresses "var = var"
|
|
* clauses in favor of "var = const" clauses. We cannot afford to drop any
|
|
* of the latter, even though they might seem redundant by the pathkey
|
|
* membership test.
|
|
*
|
|
* Also, we cannot eliminate clauses wherein one side mentions vars from
|
|
* both relations, as in "WHERE t1.f1 = t2.f1 AND t1.f1 = t1.f2 - t2.f2".
|
|
* In this example, "t1.f2 - t2.f2" could not have been computed at all
|
|
* before forming the join of t1 and t2, so it certainly wasn't constrained
|
|
* earlier.
|
|
*
|
|
* Weird special case: if we have two clauses that seem redundant
|
|
* except one is pushed down into an outer join and the other isn't,
|
|
* then they're not really redundant, because one constrains the
|
|
* joined rows after addition of null fill rows, and the other doesn't.
|
|
*/
|
|
static RestrictInfo *
|
|
join_clause_is_redundant(Query *root,
|
|
RestrictInfo *rinfo,
|
|
List *reference_list,
|
|
Relids outer_relids,
|
|
Relids inner_relids,
|
|
JoinType jointype)
|
|
{
|
|
ListCell *refitem;
|
|
|
|
/* always consider exact duplicates redundant */
|
|
foreach(refitem, reference_list)
|
|
{
|
|
RestrictInfo *refrinfo = (RestrictInfo *) lfirst(refitem);
|
|
|
|
if (equal(rinfo, refrinfo))
|
|
return refrinfo;
|
|
}
|
|
|
|
/* check for redundant merge clauses */
|
|
if (rinfo->mergejoinoperator != InvalidOid)
|
|
{
|
|
/* do the cheap test first: is it a "var = const" clause? */
|
|
if (bms_is_empty(rinfo->left_relids) ||
|
|
bms_is_empty(rinfo->right_relids))
|
|
return NULL; /* var = const, so not redundant */
|
|
|
|
/* check for either side mentioning both rels */
|
|
if (bms_overlap(rinfo->left_relids, outer_relids) &&
|
|
bms_overlap(rinfo->left_relids, inner_relids))
|
|
return NULL; /* clause LHS uses both, so not redundant */
|
|
if (bms_overlap(rinfo->right_relids, outer_relids) &&
|
|
bms_overlap(rinfo->right_relids, inner_relids))
|
|
return NULL; /* clause RHS uses both, so not redundant */
|
|
|
|
cache_mergeclause_pathkeys(root, rinfo);
|
|
|
|
foreach(refitem, reference_list)
|
|
{
|
|
RestrictInfo *refrinfo = (RestrictInfo *) lfirst(refitem);
|
|
|
|
if (refrinfo->mergejoinoperator != InvalidOid)
|
|
{
|
|
cache_mergeclause_pathkeys(root, refrinfo);
|
|
|
|
if (rinfo->left_pathkey == refrinfo->left_pathkey &&
|
|
rinfo->right_pathkey == refrinfo->right_pathkey &&
|
|
(rinfo->is_pushed_down == refrinfo->is_pushed_down ||
|
|
!IS_OUTER_JOIN(jointype)))
|
|
{
|
|
/* Yup, it's redundant */
|
|
return refrinfo;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* otherwise, not redundant */
|
|
return NULL;
|
|
}
|