4 files changed, 139 insertions, 76 deletions
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml
index 674234d8e33..c88d9b54f92 100644
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/select.sgml,v 1.25 2000/02/21 01:13:52 tgl Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/select.sgml,v 1.26 2000/03/15 23:31:19 tgl Exp $
 Postgres documentation
 -->
 
@@ -37,7 +37,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
   
   <refsect2 id="R2-SQL-SELECT-1">
    <refsect2info>
-    <date>1998-09-24</date>
+    <date>2000-03-15</date>
    </refsect2info>
    <title>
     Inputs
@@ -59,10 +59,12 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
       <listitem>
        <para>
 	Specifies another name for a column or an expression using
-	the AS clause.  This name is primarily used to label the output
-	column.  The <replaceable class="PARAMETER">name</replaceable>
-	cannot be used in the WHERE, GROUP BY, or HAVING clauses.
-	It can, however, be referenced in ORDER BY clauses.
+	the AS clause.  This name is primarily used to label the column
+	for display.  It can also be used to refer to the column's value in
+	ORDER BY and GROUP BY clauses.  But the
+	<replaceable class="PARAMETER">name</replaceable>
+	cannot be used in the WHERE or HAVING clauses; write out the
+	expression instead.
        </para>
       </listitem>
      </varlistentry>
@@ -72,7 +74,8 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
      <term>TEMP</term>
      <listitem>
       <para>
-	The table is created unique to this session, and is
+	If TEMPORARY or TEMP is specified,
+	the table is created unique to this session, and is
 	automatically dropped on session exit.
       </para>
      </listitem>
@@ -83,10 +86,10 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
       <listitem>
        <para>
 	If the INTO TABLE clause is specified, the result of the
-	query will be stored in another table with the indicated
+	query will be stored in a new table with the indicated
 	name.
 	The target table (<replaceable class="PARAMETER">new_table</replaceable>) will
-	be created automatically and should not exist before this command.
+	be created automatically and must not exist before this command.
         Refer to <command>SELECT INTO</command> for more information.
 
 	<note>
@@ -143,7 +146,8 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
       <term><replaceable class="PARAMETER">select</replaceable></term>
       <listitem>
        <para>
-	A select statement with all features except the ORDER BY clause.
+	A select statement with all features except the ORDER BY and
+	LIMIT clauses.
        </para>
       </listitem>
      </varlistentry>
@@ -188,7 +192,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
 
  <refsect1 id="R1-SQL-SELECT-1">
   <refsect1info>
-   <date>1998-09-24</date>
+   <date>2000-03-15</date>
   </refsect1info>
   <title>
    Description
@@ -210,7 +214,9 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
   <para>
    <command>DISTINCT ON</command> eliminates rows that match on all the
    specified expressions, keeping only the first row of each set of
-   duplicates.  Note that "the first row" of each set is unpredictable
+   duplicates.  The DISTINCT ON expressions are interpreted using the
+   same rules as for ORDER BY items; see below.
+   Note that "the first row" of each set is unpredictable
    unless <command>ORDER BY</command> is used to ensure that the desired
    row appears first.  For example,
    <programlisting>
@@ -226,21 +232,20 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
 
   <para>
    The GROUP BY clause allows a user to divide a table
-   conceptually into groups.
+   into groups of rows that match on one or more values.
    (See <xref linkend="sql-groupby" endterm="sql-groupby-title">.)
   </para>
 
   <para>
-   The HAVING clause specifies a grouped table derived by the
-   elimination of groups from the result of the previously
-   specified clause.
+   The HAVING clause allows selection of only those groups of rows
+   meeting the specified condition.
    (See <xref linkend="sql-having" endterm="sql-having-title">.)
   </para>
    
   <para>
-   The ORDER BY clause allows a user to specify that he/she
-   wishes the rows sorted according to the ASCending or 
-   DESCending mode operator.
+   The ORDER BY clause causes the returned rows to be sorted in a specified
+   order.  If ORDER BY is not given, the rows are returned in whatever order
+   the system finds cheapest to produce.
    (See <xref linkend="sql-orderby-title" endterm="sql-orderby-title">.)
   </para>
    
@@ -279,7 +284,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
    
   <refsect2 id="SQL-WHERE">
    <refsect2info>
-    <date>1998-09-24</date>
+    <date>2000-03-15</date>
    </refsect2info>
    <title id="sql-where-title">
     WHERE Clause
@@ -312,15 +317,14 @@ WHERE <replaceable class="PARAMETER">boolean_expr</replaceable>
     locally-defined operator, 
     and <replaceable class="PARAMETER">log_op</replaceable> can be one 
     of: AND, OR, NOT.
-    The comparison returns either TRUE or FALSE and all
-    instances will be discarded
-    if the expression evaluates to FALSE.
+    SELECT will ignore all rows for which the WHERE condition does not return
+    TRUE.
    </para>
   </refsect2>
   
   <refsect2 id="SQL-GROUPBY">
    <refsect2info>
-    <date>1998-09-24</date>
+    <date>2000-03-15</date>
    </refsect2info>
    <title id="sql-groupby-title">
     GROUP BY Clause
@@ -334,20 +338,28 @@ GROUP BY <replaceable class="PARAMETER">column</replaceable> [, ...]
    </para>
 
    <para>
-    GROUP BY will condense into a single row all rows that share the
+    GROUP BY will condense into a single row all selected rows that share the
     same values for the grouped columns.  Aggregate functions, if any,
     are computed across all rows making up each group, producing a
     separate value for each group (whereas without GROUP BY, an
     aggregate produces a single value computed across all the selected
-    rows).  When GROUP BY is present, it is not valid to refer to
+    rows).  When GROUP BY is present, it is not valid for the SELECT
+    output expression(s) to refer to
     ungrouped columns except within aggregate functions, since there
     would be more than one possible value to return for an ungrouped column.
    </para>
+
+   <para>
+    An item in GROUP BY can also be the name or ordinal number of an output
+    column (SELECT expression), or it can be an arbitrary expression formed
+    from input-column values.  In case of ambiguity, a GROUP BY name will
+    be interpreted as an input-column name rather than an output column name.
+   </para>
   </refsect2>
 
   <refsect2 id="SQL-HAVING">
    <refsect2info>
-    <date>1998-09-24</date>
+    <date>2000-03-15</date>
    </refsect2info>
    <title id="sql-having-title">
     HAVING Clause
@@ -365,8 +377,12 @@ HAVING <replaceable class="PARAMETER">cond_expr</replaceable>
     
    <para>
     HAVING specifies a grouped table derived by the elimination
-    of groups from the result of the previously specified clause
-    that do not meet the <replaceable class="PARAMETER">cond_expr</replaceable>.</para>
+    of group rows that do not satisfy the
+    <replaceable class="PARAMETER">cond_expr</replaceable>.
+    HAVING is different from WHERE:
+    WHERE filters individual rows before application of GROUP BY,
+    while HAVING filters group rows created by GROUP BY.
+   </para>
 
    <para>
     Each column referenced in 
@@ -378,7 +394,7 @@ HAVING <replaceable class="PARAMETER">cond_expr</replaceable>
   
   <refsect2 id="SQL-ORDERBY">
    <refsect2info>
-    <date>1998-09-24</date>
+    <date>2000-03-15</date>
    </refsect2info>
    <title id="sql-orderby-title">
     ORDER BY Clause
@@ -389,15 +405,15 @@ ORDER BY <replaceable class="PARAMETER">column</replaceable> [ ASC | DESC ] [, .
     </synopsis></para>
     
    <para>
-    <replaceable class="PARAMETER">column</replaceable> can be either a column
-    name or an ordinal number.
+    <replaceable class="PARAMETER">column</replaceable> can be either a
+    result column name or an ordinal number.
    </para>
    <para>
     The ordinal numbers refers to the ordinal (left-to-right) position
-    of the column. This feature makes it possible to define an ordering
+    of the result column. This feature makes it possible to define an ordering
     on the basis of a column that does not have a proper name.
     This is never absolutely necessary because it is always possible
-    to assign a name to a calculated column using the AS clause, e.g.:
+    to assign a name to a result column using the AS clause, e.g.:
     <programlisting>
 SELECT title, date_prod + 1 AS newlen FROM films ORDER BY newlen;
     </programlisting></para>
@@ -410,6 +426,11 @@ SELECT title, date_prod + 1 AS newlen FROM films ORDER BY newlen;
     <programlisting>
 SELECT name FROM distributors ORDER BY code;
     </programlisting>
+    Note that if an ORDER BY item is a simple name that matches both
+    a result column name and an input column name, ORDER BY will interpret
+    it as the result column name.  This is the opposite of the choice that
+    GROUP BY will make in the same situation.  This inconsistency is
+    mandated by the SQL92 standard.
    </para>
     
    <para>
@@ -436,7 +457,7 @@ SELECT name FROM distributors ORDER BY code;
 
     where
     <replaceable class="PARAMETER">table_query</replaceable>
-    specifies any select expression without an ORDER BY clause.
+    specifies any select expression without an ORDER BY or LIMIT clause.
    </para>
     
    <para>
@@ -476,7 +497,7 @@ SELECT name FROM distributors ORDER BY code;
     
     where
     <replaceable class="PARAMETER">table_query</replaceable>
-    specifies any select expression without an ORDER BY clause.
+    specifies any select expression without an ORDER BY or LIMIT clause.
    </para>
 
    <para>
@@ -507,7 +528,7 @@ SELECT name FROM distributors ORDER BY code;
     
     where
     <replaceable class="PARAMETER">table_query</replaceable>
-    specifies any select expression without an ORDER BY clause.
+    specifies any select expression without an ORDER BY or LIMIT clause.
    </para>
 
    <para>
@@ -560,7 +581,7 @@ SELECT name FROM distributors ORDER BY code;
    </para>
 
    <para>
-    As of Postgres 7.0, the
+    As of PostgreSQL 7.0, the
     query optimizer takes LIMIT into account when generating a query plan,
     so you are very likely to get different plans (yielding different row
     orders) depending on what you give for LIMIT and OFFSET.  Thus, using
@@ -765,6 +786,18 @@ SELECT distributors.* WHERE name = 'Westwood';
      The DISTINCT ON phrase is not part of <acronym>SQL92</acronym>.
      Nor are LIMIT and OFFSET.
     </para>
+     
+    <para>
+     In <acronym>SQL92</acronym>, an ORDER BY clause may only use result
+     column names or numbers, while a GROUP BY clause may only use input
+     column names.
+     <productname>Postgres</productname> extends each of these clauses to
+     allow the other choice as well (but it uses the standard's interpretation
+     if there is ambiguity).
+     <productname>Postgres</productname> also allows both clauses to specify
+     arbitrary expressions.  Note that names appearing in an expression will
+     always be taken as input-column names, not as result-column names.
+    </para>
    </refsect3>
 
    <refsect3 id="R3-SQL-UNION-1">
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 53d9b25f11b..96a005ff0d9 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.56 2000/03/14 23:06:32 thomas Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.57 2000/03/15 23:31:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -719,9 +719,9 @@ parseFromClause(ParseState *pstate, List *frmList)
  *	  list as a "resjunk" node.
  *
  * node		the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
- * tlist	the existing target list (NB: this cannot be NIL, which is a
- *			good thing since we'd be unable to append to it...)
- * clause	identifies clause type (mainly for error messages).
+ * tlist	the existing target list (NB: this will never be NIL, which is a
+ *			good thing since we'd be unable to append to it if it were...)
+ * clause	identifies clause type being processed.
  */
 static TargetEntry *
 findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause)
@@ -733,7 +733,7 @@ findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause)
 	/*----------
 	 * Handle two special cases as mandated by the SQL92 spec:
 	 *
-	 * 1. ORDER BY ColumnName
+	 * 1. Bare ColumnName (no qualifier or subscripts)
 	 *    For a bare identifier, we search for a matching column name
 	 *	  in the existing target list.  Multiple matches are an error
 	 *	  unless they refer to identical values; for example,
@@ -741,49 +741,76 @@ findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause)
 	 *	  but not   SELECT a AS b, b FROM table ORDER BY b
 	 *	  If no match is found, we fall through and treat the identifier
 	 *	  as an expression.
-	 *	  We do NOT attempt this match for GROUP BY, since it is clearly
-	 *	  contrary to the spec to use an output column name in preference
-	 *	  to an underlying column name in GROUP BY.  DISTINCT ON isn't in
-	 *	  the standard, so we can do what we like there; we choose to make
-	 *	  it work like GROUP BY.
+	 *	  For GROUP BY, it is incorrect to match the grouping item against
+	 *	  targetlist entries: according to SQL92, an identifier in GROUP BY
+	 *	  is a reference to a column name exposed by FROM, not to a target
+	 *	  list column.  However, many implementations (including pre-7.0
+	 *	  PostgreSQL) accept this anyway.  So for GROUP BY, we look first
+	 *	  to see if the identifier matches any FROM column name, and only
+	 *	  try for a targetlist name if it doesn't.  This ensures that we
+	 *	  adhere to the spec in the case where the name could be both.
+	 *	  DISTINCT ON isn't in the standard, so we can do what we like there;
+	 *	  we choose to make it work like ORDER BY, on the rather flimsy
+	 *	  grounds that ordinary DISTINCT works on targetlist entries.
 	 *
-	 * 2. ORDER BY/GROUP BY/DISTINCT ON IntegerConstant
+	 * 2. IntegerConstant
 	 *	  This means to use the n'th item in the existing target list.
 	 *	  Note that it would make no sense to order/group/distinct by an
 	 *	  actual constant, so this does not create a conflict with our
 	 *	  extension to order/group by an expression.
-	 *	  I believe that GROUP BY column-number is not sanctioned by SQL92,
-	 *	  but since the standard has no other behavior defined for this
-	 *	  syntax, we may as well continue to support our past behavior.
+	 *	  GROUP BY column-number is not allowed by SQL92, but since
+	 *	  the standard has no other behavior defined for this syntax,
+	 *	  we may as well accept this common extension.
 	 *
-	 * Note that pre-existing resjunk targets must not be used in either case.
+	 * Note that pre-existing resjunk targets must not be used in either case,
+	 * since the user didn't write them in his SELECT list.
+	 *
+	 * If neither special case applies, fall through to treat the item as
+	 * an expression.
 	 *----------
 	 */
-	if (clause == ORDER_CLAUSE &&
-		IsA(node, Ident) && ((Ident *) node)->indirection == NIL)
+	if (IsA(node, Ident) && ((Ident *) node)->indirection == NIL)
 	{
 		char	   *name = ((Ident *) node)->name;
-		foreach(tl, tlist)
+
+		if (clause == GROUP_CLAUSE)
 		{
-			TargetEntry *tle = (TargetEntry *) lfirst(tl);
-			Resdom	   *resnode = tle->resdom;
+			/*
+			 * In GROUP BY, we must prefer a match against a FROM-clause
+			 * column to one against the targetlist.  Look to see if there is
+			 * a matching column.  If so, fall through to let transformExpr()
+			 * do the rest.  NOTE: if name could refer ambiguously to more
+			 * than one column name exposed by FROM, colnameRangeTableEntry
+			 * will elog(ERROR).  That's just what we want here.
+			 */
+			if (colnameRangeTableEntry(pstate, name) != NULL)
+				name = NULL;
+		}
 
-			if (!resnode->resjunk &&
-				strcmp(resnode->resname, name) == 0)
+		if (name != NULL)
+		{
+			foreach(tl, tlist)
 			{
-				if (target_result != NULL)
+				TargetEntry *tle = (TargetEntry *) lfirst(tl);
+				Resdom	   *resnode = tle->resdom;
+
+				if (!resnode->resjunk &&
+					strcmp(resnode->resname, name) == 0)
 				{
-					if (! equal(target_result->expr, tle->expr))
-						elog(ERROR, "%s '%s' is ambiguous",
-							 clauseText[clause], name);
+					if (target_result != NULL)
+					{
+						if (! equal(target_result->expr, tle->expr))
+							elog(ERROR, "%s '%s' is ambiguous",
+								 clauseText[clause], name);
+					}
+					else
+						target_result = tle;
+					/* Stay in loop to check for ambiguity */
 				}
-				else
-					target_result = tle;
-				/* Stay in loop to check for ambiguity */
 			}
+			if (target_result != NULL)
+				return target_result; /* return the first match */
 		}
-		if (target_result != NULL)
-			return target_result; /* return the first match */
 	}
 	if (IsA(node, A_Const))
 	{
diff --git a/src/test/regress/expected/numerology.out b/src/test/regress/expected/numerology.out
index 8e13a9e6acb..c5ad36fdd32 100644
--- a/src/test/regress/expected/numerology.out
+++ b/src/test/regress/expected/numerology.out
@@ -96,14 +96,18 @@ SELECT f1 AS two, max(f3) AS max_float, min(f3) as min_float
    2 |                    0 | -1.2345678901234e+200
 (2 rows)
 
--- Postgres used to accept this, but it is clearly against SQL92 to
--- interpret GROUP BY arguments as result column names; they should
--- be source column names *only*.  An error is expected.
+-- GROUP BY a result column name is not legal per SQL92, but we accept it
+-- anyway (if the name is not the name of any column exposed by FROM).
 SELECT f1 AS two, max(f3) AS max_float, min(f3) AS min_float
   FROM TEMP_GROUP
   GROUP BY two
   ORDER BY two, max_float, min_float;
-ERROR:  Attribute 'two' not found
+ two |      max_float       |       min_float       
+-----+----------------------+-----------------------
+   1 | 1.2345678901234e+200 |                     0
+   2 |                    0 | -1.2345678901234e+200
+(2 rows)
+
 SELECT f1 AS two, (max(f3) + 1) AS max_plus_1, (min(f3) - 1) AS min_minus_1
   FROM TEMP_GROUP
   GROUP BY f1
diff --git a/src/test/regress/sql/numerology.sql b/src/test/regress/sql/numerology.sql
index b30f008bffb..2220fdba385 100644
--- a/src/test/regress/sql/numerology.sql
+++ b/src/test/regress/sql/numerology.sql
@@ -70,9 +70,8 @@ SELECT f1 AS two, max(f3) AS max_float, min(f3) as min_float
   GROUP BY f1
   ORDER BY two, max_float, min_float;
 
--- Postgres used to accept this, but it is clearly against SQL92 to
--- interpret GROUP BY arguments as result column names; they should
--- be source column names *only*.  An error is expected.
+-- GROUP BY a result column name is not legal per SQL92, but we accept it
+-- anyway (if the name is not the name of any column exposed by FROM).
 SELECT f1 AS two, max(f3) AS max_float, min(f3) AS min_float
   FROM TEMP_GROUP
   GROUP BY two