aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/src/sgml/ref/select.sgml109
-rw-r--r--src/backend/parser/parse_clause.c89
-rw-r--r--src/test/regress/expected/numerology.out12
-rw-r--r--src/test/regress/sql/numerology.sql5
4 files changed, 139 insertions, 76 deletions
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml
index 674234d8e33..c88d9b54f92 100644
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -1,5 +1,5 @@
<!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/select.sgml,v 1.25 2000/02/21 01:13:52 tgl Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/select.sgml,v 1.26 2000/03/15 23:31:19 tgl Exp $
Postgres documentation
-->
@@ -37,7 +37,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
<refsect2 id="R2-SQL-SELECT-1">
<refsect2info>
- <date>1998-09-24</date>
+ <date>2000-03-15</date>
</refsect2info>
<title>
Inputs
@@ -59,10 +59,12 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
<listitem>
<para>
Specifies another name for a column or an expression using
- the AS clause. This name is primarily used to label the output
- column. The <replaceable class="PARAMETER">name</replaceable>
- cannot be used in the WHERE, GROUP BY, or HAVING clauses.
- It can, however, be referenced in ORDER BY clauses.
+ the AS clause. This name is primarily used to label the column
+ for display. It can also be used to refer to the column's value in
+ ORDER BY and GROUP BY clauses. But the
+ <replaceable class="PARAMETER">name</replaceable>
+ cannot be used in the WHERE or HAVING clauses; write out the
+ expression instead.
</para>
</listitem>
</varlistentry>
@@ -72,7 +74,8 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
<term>TEMP</term>
<listitem>
<para>
- The table is created unique to this session, and is
+ If TEMPORARY or TEMP is specified,
+ the table is created unique to this session, and is
automatically dropped on session exit.
</para>
</listitem>
@@ -83,10 +86,10 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
<listitem>
<para>
If the INTO TABLE clause is specified, the result of the
- query will be stored in another table with the indicated
+ query will be stored in a new table with the indicated
name.
The target table (<replaceable class="PARAMETER">new_table</replaceable>) will
- be created automatically and should not exist before this command.
+ be created automatically and must not exist before this command.
Refer to <command>SELECT INTO</command> for more information.
<note>
@@ -143,7 +146,8 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
<term><replaceable class="PARAMETER">select</replaceable></term>
<listitem>
<para>
- A select statement with all features except the ORDER BY clause.
+ A select statement with all features except the ORDER BY and
+ LIMIT clauses.
</para>
</listitem>
</varlistentry>
@@ -188,7 +192,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
<refsect1 id="R1-SQL-SELECT-1">
<refsect1info>
- <date>1998-09-24</date>
+ <date>2000-03-15</date>
</refsect1info>
<title>
Description
@@ -210,7 +214,9 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
<para>
<command>DISTINCT ON</command> eliminates rows that match on all the
specified expressions, keeping only the first row of each set of
- duplicates. Note that "the first row" of each set is unpredictable
+ duplicates. The DISTINCT ON expressions are interpreted using the
+ same rules as for ORDER BY items; see below.
+ Note that "the first row" of each set is unpredictable
unless <command>ORDER BY</command> is used to ensure that the desired
row appears first. For example,
<programlisting>
@@ -226,21 +232,20 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
<para>
The GROUP BY clause allows a user to divide a table
- conceptually into groups.
+ into groups of rows that match on one or more values.
(See <xref linkend="sql-groupby" endterm="sql-groupby-title">.)
</para>
<para>
- The HAVING clause specifies a grouped table derived by the
- elimination of groups from the result of the previously
- specified clause.
+ The HAVING clause allows selection of only those groups of rows
+ meeting the specified condition.
(See <xref linkend="sql-having" endterm="sql-having-title">.)
</para>
<para>
- The ORDER BY clause allows a user to specify that he/she
- wishes the rows sorted according to the ASCending or
- DESCending mode operator.
+ The ORDER BY clause causes the returned rows to be sorted in a specified
+ order. If ORDER BY is not given, the rows are returned in whatever order
+ the system finds cheapest to produce.
(See <xref linkend="sql-orderby-title" endterm="sql-orderby-title">.)
</para>
@@ -279,7 +284,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
<refsect2 id="SQL-WHERE">
<refsect2info>
- <date>1998-09-24</date>
+ <date>2000-03-15</date>
</refsect2info>
<title id="sql-where-title">
WHERE Clause
@@ -312,15 +317,14 @@ WHERE <replaceable class="PARAMETER">boolean_expr</replaceable>
locally-defined operator,
and <replaceable class="PARAMETER">log_op</replaceable> can be one
of: AND, OR, NOT.
- The comparison returns either TRUE or FALSE and all
- instances will be discarded
- if the expression evaluates to FALSE.
+ SELECT will ignore all rows for which the WHERE condition does not return
+ TRUE.
</para>
</refsect2>
<refsect2 id="SQL-GROUPBY">
<refsect2info>
- <date>1998-09-24</date>
+ <date>2000-03-15</date>
</refsect2info>
<title id="sql-groupby-title">
GROUP BY Clause
@@ -334,20 +338,28 @@ GROUP BY <replaceable class="PARAMETER">column</replaceable> [, ...]
</para>
<para>
- GROUP BY will condense into a single row all rows that share the
+ GROUP BY will condense into a single row all selected rows that share the
same values for the grouped columns. Aggregate functions, if any,
are computed across all rows making up each group, producing a
separate value for each group (whereas without GROUP BY, an
aggregate produces a single value computed across all the selected
- rows). When GROUP BY is present, it is not valid to refer to
+ rows). When GROUP BY is present, it is not valid for the SELECT
+ output expression(s) to refer to
ungrouped columns except within aggregate functions, since there
would be more than one possible value to return for an ungrouped column.
</para>
+
+ <para>
+ An item in GROUP BY can also be the name or ordinal number of an output
+ column (SELECT expression), or it can be an arbitrary expression formed
+ from input-column values. In case of ambiguity, a GROUP BY name will
+ be interpreted as an input-column name rather than an output column name.
+ </para>
</refsect2>
<refsect2 id="SQL-HAVING">
<refsect2info>
- <date>1998-09-24</date>
+ <date>2000-03-15</date>
</refsect2info>
<title id="sql-having-title">
HAVING Clause
@@ -365,8 +377,12 @@ HAVING <replaceable class="PARAMETER">cond_expr</replaceable>
<para>
HAVING specifies a grouped table derived by the elimination
- of groups from the result of the previously specified clause
- that do not meet the <replaceable class="PARAMETER">cond_expr</replaceable>.</para>
+ of group rows that do not satisfy the
+ <replaceable class="PARAMETER">cond_expr</replaceable>.
+ HAVING is different from WHERE:
+ WHERE filters individual rows before application of GROUP BY,
+ while HAVING filters group rows created by GROUP BY.
+ </para>
<para>
Each column referenced in
@@ -378,7 +394,7 @@ HAVING <replaceable class="PARAMETER">cond_expr</replaceable>
<refsect2 id="SQL-ORDERBY">
<refsect2info>
- <date>1998-09-24</date>
+ <date>2000-03-15</date>
</refsect2info>
<title id="sql-orderby-title">
ORDER BY Clause
@@ -389,15 +405,15 @@ ORDER BY <replaceable class="PARAMETER">column</replaceable> [ ASC | DESC ] [, .
</synopsis></para>
<para>
- <replaceable class="PARAMETER">column</replaceable> can be either a column
- name or an ordinal number.
+ <replaceable class="PARAMETER">column</replaceable> can be either a
+ result column name or an ordinal number.
</para>
<para>
The ordinal numbers refers to the ordinal (left-to-right) position
- of the column. This feature makes it possible to define an ordering
+ of the result column. This feature makes it possible to define an ordering
on the basis of a column that does not have a proper name.
This is never absolutely necessary because it is always possible
- to assign a name to a calculated column using the AS clause, e.g.:
+ to assign a name to a result column using the AS clause, e.g.:
<programlisting>
SELECT title, date_prod + 1 AS newlen FROM films ORDER BY newlen;
</programlisting></para>
@@ -410,6 +426,11 @@ SELECT title, date_prod + 1 AS newlen FROM films ORDER BY newlen;
<programlisting>
SELECT name FROM distributors ORDER BY code;
</programlisting>
+ Note that if an ORDER BY item is a simple name that matches both
+ a result column name and an input column name, ORDER BY will interpret
+ it as the result column name. This is the opposite of the choice that
+ GROUP BY will make in the same situation. This inconsistency is
+ mandated by the SQL92 standard.
</para>
<para>
@@ -436,7 +457,7 @@ SELECT name FROM distributors ORDER BY code;
where
<replaceable class="PARAMETER">table_query</replaceable>
- specifies any select expression without an ORDER BY clause.
+ specifies any select expression without an ORDER BY or LIMIT clause.
</para>
<para>
@@ -476,7 +497,7 @@ SELECT name FROM distributors ORDER BY code;
where
<replaceable class="PARAMETER">table_query</replaceable>
- specifies any select expression without an ORDER BY clause.
+ specifies any select expression without an ORDER BY or LIMIT clause.
</para>
<para>
@@ -507,7 +528,7 @@ SELECT name FROM distributors ORDER BY code;
where
<replaceable class="PARAMETER">table_query</replaceable>
- specifies any select expression without an ORDER BY clause.
+ specifies any select expression without an ORDER BY or LIMIT clause.
</para>
<para>
@@ -560,7 +581,7 @@ SELECT name FROM distributors ORDER BY code;
</para>
<para>
- As of Postgres 7.0, the
+ As of PostgreSQL 7.0, the
query optimizer takes LIMIT into account when generating a query plan,
so you are very likely to get different plans (yielding different row
orders) depending on what you give for LIMIT and OFFSET. Thus, using
@@ -765,6 +786,18 @@ SELECT distributors.* WHERE name = 'Westwood';
The DISTINCT ON phrase is not part of <acronym>SQL92</acronym>.
Nor are LIMIT and OFFSET.
</para>
+
+ <para>
+ In <acronym>SQL92</acronym>, an ORDER BY clause may only use result
+ column names or numbers, while a GROUP BY clause may only use input
+ column names.
+ <productname>Postgres</productname> extends each of these clauses to
+ allow the other choice as well (but it uses the standard's interpretation
+ if there is ambiguity).
+ <productname>Postgres</productname> also allows both clauses to specify
+ arbitrary expressions. Note that names appearing in an expression will
+ always be taken as input-column names, not as result-column names.
+ </para>
</refsect3>
<refsect3 id="R3-SQL-UNION-1">
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 53d9b25f11b..96a005ff0d9 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.56 2000/03/14 23:06:32 thomas Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.57 2000/03/15 23:31:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -719,9 +719,9 @@ parseFromClause(ParseState *pstate, List *frmList)
* list as a "resjunk" node.
*
* node the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
- * tlist the existing target list (NB: this cannot be NIL, which is a
- * good thing since we'd be unable to append to it...)
- * clause identifies clause type (mainly for error messages).
+ * tlist the existing target list (NB: this will never be NIL, which is a
+ * good thing since we'd be unable to append to it if it were...)
+ * clause identifies clause type being processed.
*/
static TargetEntry *
findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause)
@@ -733,7 +733,7 @@ findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause)
/*----------
* Handle two special cases as mandated by the SQL92 spec:
*
- * 1. ORDER BY ColumnName
+ * 1. Bare ColumnName (no qualifier or subscripts)
* For a bare identifier, we search for a matching column name
* in the existing target list. Multiple matches are an error
* unless they refer to identical values; for example,
@@ -741,49 +741,76 @@ findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause)
* but not SELECT a AS b, b FROM table ORDER BY b
* If no match is found, we fall through and treat the identifier
* as an expression.
- * We do NOT attempt this match for GROUP BY, since it is clearly
- * contrary to the spec to use an output column name in preference
- * to an underlying column name in GROUP BY. DISTINCT ON isn't in
- * the standard, so we can do what we like there; we choose to make
- * it work like GROUP BY.
+ * For GROUP BY, it is incorrect to match the grouping item against
+ * targetlist entries: according to SQL92, an identifier in GROUP BY
+ * is a reference to a column name exposed by FROM, not to a target
+ * list column. However, many implementations (including pre-7.0
+ * PostgreSQL) accept this anyway. So for GROUP BY, we look first
+ * to see if the identifier matches any FROM column name, and only
+ * try for a targetlist name if it doesn't. This ensures that we
+ * adhere to the spec in the case where the name could be both.
+ * DISTINCT ON isn't in the standard, so we can do what we like there;
+ * we choose to make it work like ORDER BY, on the rather flimsy
+ * grounds that ordinary DISTINCT works on targetlist entries.
*
- * 2. ORDER BY/GROUP BY/DISTINCT ON IntegerConstant
+ * 2. IntegerConstant
* This means to use the n'th item in the existing target list.
* Note that it would make no sense to order/group/distinct by an
* actual constant, so this does not create a conflict with our
* extension to order/group by an expression.
- * I believe that GROUP BY column-number is not sanctioned by SQL92,
- * but since the standard has no other behavior defined for this
- * syntax, we may as well continue to support our past behavior.
+ * GROUP BY column-number is not allowed by SQL92, but since
+ * the standard has no other behavior defined for this syntax,
+ * we may as well accept this common extension.
*
- * Note that pre-existing resjunk targets must not be used in either case.
+ * Note that pre-existing resjunk targets must not be used in either case,
+ * since the user didn't write them in his SELECT list.
+ *
+ * If neither special case applies, fall through to treat the item as
+ * an expression.
*----------
*/
- if (clause == ORDER_CLAUSE &&
- IsA(node, Ident) && ((Ident *) node)->indirection == NIL)
+ if (IsA(node, Ident) && ((Ident *) node)->indirection == NIL)
{
char *name = ((Ident *) node)->name;
- foreach(tl, tlist)
+
+ if (clause == GROUP_CLAUSE)
{
- TargetEntry *tle = (TargetEntry *) lfirst(tl);
- Resdom *resnode = tle->resdom;
+ /*
+ * In GROUP BY, we must prefer a match against a FROM-clause
+ * column to one against the targetlist. Look to see if there is
+ * a matching column. If so, fall through to let transformExpr()
+ * do the rest. NOTE: if name could refer ambiguously to more
+ * than one column name exposed by FROM, colnameRangeTableEntry
+ * will elog(ERROR). That's just what we want here.
+ */
+ if (colnameRangeTableEntry(pstate, name) != NULL)
+ name = NULL;
+ }
- if (!resnode->resjunk &&
- strcmp(resnode->resname, name) == 0)
+ if (name != NULL)
+ {
+ foreach(tl, tlist)
{
- if (target_result != NULL)
+ TargetEntry *tle = (TargetEntry *) lfirst(tl);
+ Resdom *resnode = tle->resdom;
+
+ if (!resnode->resjunk &&
+ strcmp(resnode->resname, name) == 0)
{
- if (! equal(target_result->expr, tle->expr))
- elog(ERROR, "%s '%s' is ambiguous",
- clauseText[clause], name);
+ if (target_result != NULL)
+ {
+ if (! equal(target_result->expr, tle->expr))
+ elog(ERROR, "%s '%s' is ambiguous",
+ clauseText[clause], name);
+ }
+ else
+ target_result = tle;
+ /* Stay in loop to check for ambiguity */
}
- else
- target_result = tle;
- /* Stay in loop to check for ambiguity */
}
+ if (target_result != NULL)
+ return target_result; /* return the first match */
}
- if (target_result != NULL)
- return target_result; /* return the first match */
}
if (IsA(node, A_Const))
{
diff --git a/src/test/regress/expected/numerology.out b/src/test/regress/expected/numerology.out
index 8e13a9e6acb..c5ad36fdd32 100644
--- a/src/test/regress/expected/numerology.out
+++ b/src/test/regress/expected/numerology.out
@@ -96,14 +96,18 @@ SELECT f1 AS two, max(f3) AS max_float, min(f3) as min_float
2 | 0 | -1.2345678901234e+200
(2 rows)
--- Postgres used to accept this, but it is clearly against SQL92 to
--- interpret GROUP BY arguments as result column names; they should
--- be source column names *only*. An error is expected.
+-- GROUP BY a result column name is not legal per SQL92, but we accept it
+-- anyway (if the name is not the name of any column exposed by FROM).
SELECT f1 AS two, max(f3) AS max_float, min(f3) AS min_float
FROM TEMP_GROUP
GROUP BY two
ORDER BY two, max_float, min_float;
-ERROR: Attribute 'two' not found
+ two | max_float | min_float
+-----+----------------------+-----------------------
+ 1 | 1.2345678901234e+200 | 0
+ 2 | 0 | -1.2345678901234e+200
+(2 rows)
+
SELECT f1 AS two, (max(f3) + 1) AS max_plus_1, (min(f3) - 1) AS min_minus_1
FROM TEMP_GROUP
GROUP BY f1
diff --git a/src/test/regress/sql/numerology.sql b/src/test/regress/sql/numerology.sql
index b30f008bffb..2220fdba385 100644
--- a/src/test/regress/sql/numerology.sql
+++ b/src/test/regress/sql/numerology.sql
@@ -70,9 +70,8 @@ SELECT f1 AS two, max(f3) AS max_float, min(f3) as min_float
GROUP BY f1
ORDER BY two, max_float, min_float;
--- Postgres used to accept this, but it is clearly against SQL92 to
--- interpret GROUP BY arguments as result column names; they should
--- be source column names *only*. An error is expected.
+-- GROUP BY a result column name is not legal per SQL92, but we accept it
+-- anyway (if the name is not the name of any column exposed by FROM).
SELECT f1 AS two, max(f3) AS max_float, min(f3) AS min_float
FROM TEMP_GROUP
GROUP BY two