Fix bug in the tsvector stats collection function, which caused a crash if

the sample contains just a one tsvector, containing only one lexeme.
author: Heikki Linnakangas <heikki.linnakangas@iki.fi> 2008-11-27 21:17:39 +0000
committer: Heikki Linnakangas <heikki.linnakangas@iki.fi> 2008-11-27 21:17:39 +0000
commit: a93b3b98cd0ebcec0ec3bc880063581b67f50982 (patch)
tree: 6877b991f59c106bde072b4e46dc4466a5414d5d /src/backend
parent: fb645f6426a933faefbe43a61646b2ec005333dd (diff)
download: postgresql-a93b3b98cd0ebcec0ec3bc880063581b67f50982.tar.gz
postgresql-a93b3b98cd0ebcec0ec3bc880063581b67f50982.zip
1 files changed, 22 insertions, 21 deletions
diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c
index a284360a922..199432097bc 100644
--- a/src/backend/tsearch/ts_typanalyze.c
+++ b/src/backend/tsearch/ts_typanalyze.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.2 2008/09/19 19:03:40 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.3 2008/11/27 21:17:39 heikki Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -290,26 +290,6 @@ compute_tsvector_stats(VacAttrStats *stats,
 		if (num_mcelem > track_len)
 			num_mcelem = track_len;
 
-		/* Grab the minimal and maximal frequencies that will get stored */
-		minfreq = sort_table[num_mcelem - 1]->frequency;
-		maxfreq = sort_table[0]->frequency;
-
-		/*
-		 * We want to store statistics sorted on the lexeme value using first
-		 * length, then byte-for-byte comparison. The reason for doing length
-		 * comparison first is that we don't care about the ordering so long
-		 * as it's consistent, and comparing lengths first gives us a chance
-		 * to avoid a strncmp() call.
-		 *
-		 * This is different from what we do with scalar statistics -- they get
-		 * sorted on frequencies. The rationale is that we usually search
-		 * through most common elements looking for a specific value, so we can
-		 * grab its frequency.  When values are presorted we can employ binary
-		 * search for that.  See ts_selfuncs.c for a real usage scenario.
-		 */
-		qsort(sort_table, num_mcelem, sizeof(TrackItem *),
-			  trackitem_compare_lexemes);
-
 		/* Generate MCELEM slot entry */
 		if (num_mcelem > 0)
 		{
@@ -317,6 +297,27 @@ compute_tsvector_stats(VacAttrStats *stats,
 			Datum			*mcelem_values;
 			float4			*mcelem_freqs;
 
+			/* Grab the minimal and maximal frequencies that will get stored */
+			minfreq = sort_table[num_mcelem - 1]->frequency;
+			maxfreq = sort_table[0]->frequency;
+
+			/*
+			 * We want to store statistics sorted on the lexeme value using
+			 * first length, then byte-for-byte comparison. The reason for
+			 * doing length comparison first is that we don't care about the
+			 * ordering so long as it's consistent, and comparing lengths first
+			 * gives us a chance to avoid a strncmp() call.
+			 *
+			 * This is different from what we do with scalar statistics -- they
+			 * get sorted on frequencies. The rationale is that we usually
+			 * search through most common elements looking for a specific
+			 * value, so we can grab its frequency.  When values are presorted
+			 * we can employ binary search for that.  See ts_selfuncs.c for a
+			 * real usage scenario.
+			 */
+			qsort(sort_table, num_mcelem, sizeof(TrackItem *),
+				  trackitem_compare_lexemes);
+
 			/* Must copy the target values into anl_context */
 			old_context = MemoryContextSwitchTo(stats->anl_context);
author	Heikki Linnakangas <heikki.linnakangas@iki.fi>	2008-11-27 21:17:39 +0000
committer	Heikki Linnakangas <heikki.linnakangas@iki.fi>	2008-11-27 21:17:39 +0000
commit	a93b3b98cd0ebcec0ec3bc880063581b67f50982 (patch)
tree	6877b991f59c106bde072b4e46dc4466a5414d5d /src/backend
parent	fb645f6426a933faefbe43a61646b2ec005333dd (diff)
download	postgresql-a93b3b98cd0ebcec0ec3bc880063581b67f50982.tar.gz postgresql-a93b3b98cd0ebcec0ec3bc880063581b67f50982.zip