diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2008-11-27 21:17:39 +0000 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2008-11-27 21:17:39 +0000 |
commit | a93b3b98cd0ebcec0ec3bc880063581b67f50982 (patch) | |
tree | 6877b991f59c106bde072b4e46dc4466a5414d5d /src/backend | |
parent | fb645f6426a933faefbe43a61646b2ec005333dd (diff) | |
download | postgresql-a93b3b98cd0ebcec0ec3bc880063581b67f50982.tar.gz postgresql-a93b3b98cd0ebcec0ec3bc880063581b67f50982.zip |
Fix bug in the tsvector stats collection function, which caused a crash if
the sample contains just a one tsvector, containing only one lexeme.
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/tsearch/ts_typanalyze.c | 43 |
1 files changed, 22 insertions, 21 deletions
diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c index a284360a922..199432097bc 100644 --- a/src/backend/tsearch/ts_typanalyze.c +++ b/src/backend/tsearch/ts_typanalyze.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.2 2008/09/19 19:03:40 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.3 2008/11/27 21:17:39 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -290,26 +290,6 @@ compute_tsvector_stats(VacAttrStats *stats, if (num_mcelem > track_len) num_mcelem = track_len; - /* Grab the minimal and maximal frequencies that will get stored */ - minfreq = sort_table[num_mcelem - 1]->frequency; - maxfreq = sort_table[0]->frequency; - - /* - * We want to store statistics sorted on the lexeme value using first - * length, then byte-for-byte comparison. The reason for doing length - * comparison first is that we don't care about the ordering so long - * as it's consistent, and comparing lengths first gives us a chance - * to avoid a strncmp() call. - * - * This is different from what we do with scalar statistics -- they get - * sorted on frequencies. The rationale is that we usually search - * through most common elements looking for a specific value, so we can - * grab its frequency. When values are presorted we can employ binary - * search for that. See ts_selfuncs.c for a real usage scenario. - */ - qsort(sort_table, num_mcelem, sizeof(TrackItem *), - trackitem_compare_lexemes); - /* Generate MCELEM slot entry */ if (num_mcelem > 0) { @@ -317,6 +297,27 @@ compute_tsvector_stats(VacAttrStats *stats, Datum *mcelem_values; float4 *mcelem_freqs; + /* Grab the minimal and maximal frequencies that will get stored */ + minfreq = sort_table[num_mcelem - 1]->frequency; + maxfreq = sort_table[0]->frequency; + + /* + * We want to store statistics sorted on the lexeme value using + * first length, then byte-for-byte comparison. The reason for + * doing length comparison first is that we don't care about the + * ordering so long as it's consistent, and comparing lengths first + * gives us a chance to avoid a strncmp() call. + * + * This is different from what we do with scalar statistics -- they + * get sorted on frequencies. The rationale is that we usually + * search through most common elements looking for a specific + * value, so we can grab its frequency. When values are presorted + * we can employ binary search for that. See ts_selfuncs.c for a + * real usage scenario. + */ + qsort(sort_table, num_mcelem, sizeof(TrackItem *), + trackitem_compare_lexemes); + /* Must copy the target values into anl_context */ old_context = MemoryContextSwitchTo(stats->anl_context); |