1 files changed, 66 insertions, 26 deletions
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 3ace2042493..40f5d72056e 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.83 2008/02/29 17:47:41 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.84 2008/03/07 23:20:21 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -576,7 +576,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	/* ----------
 	 * Compress and/or save external until data fits into target length
 	 *
-	 *	1: Inline compress attributes with attstorage 'x'
+	 *	1: Inline compress attributes with attstorage 'x', and store very
+	 *	   large attributes with attstorage 'x' or 'e' external immediately
 	 *	2: Store attributes with attstorage 'x' or 'e' external
 	 *	3: Inline compress attributes with attstorage 'm'
 	 *	4: Store attributes with attstorage 'm' external
@@ -595,7 +596,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	maxDataLen = TOAST_TUPLE_TARGET - hoff;
 
 	/*
-	 * Look for attributes with attstorage 'x' to compress
+	 * Look for attributes with attstorage 'x' to compress.  Also find large
+	 * attributes with attstorage 'x' or 'e', and store them external.
 	 */
 	while (heap_compute_data_size(tupleDesc,
 								  toast_values, toast_isnull) > maxDataLen)
@@ -606,7 +608,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 		Datum		new_value;
 
 		/*
-		 * Search for the biggest yet uncompressed internal attribute
+		 * Search for the biggest yet unprocessed internal attribute
 		 */
 		for (i = 0; i < numAttrs; i++)
 		{
@@ -616,7 +618,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 				continue;		/* can't happen, toast_action would be 'p' */
 			if (VARATT_IS_COMPRESSED(toast_values[i]))
 				continue;
-			if (att[i]->attstorage != 'x')
+			if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
 				continue;
 			if (toast_sizes[i] > biggest_size)
 			{
@@ -629,30 +631,58 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 			break;
 
 		/*
-		 * Attempt to compress it inline
+		 * Attempt to compress it inline, if it has attstorage 'x'
 		 */
 		i = biggest_attno;
-		old_value = toast_values[i];
-		new_value = toast_compress_datum(old_value);
+		if (att[i]->attstorage == 'x')
+		{
+			old_value = toast_values[i];
+			new_value = toast_compress_datum(old_value);
 
-		if (DatumGetPointer(new_value) != NULL)
+			if (DatumGetPointer(new_value) != NULL)
+			{
+				/* successful compression */
+				if (toast_free[i])
+					pfree(DatumGetPointer(old_value));
+				toast_values[i] = new_value;
+				toast_free[i] = true;
+				toast_sizes[i] = VARSIZE(toast_values[i]);
+				need_change = true;
+				need_free = true;
+			}
+			else
+			{
+				/* incompressible, ignore on subsequent compression passes */
+				toast_action[i] = 'x';
+			}
+		}
+		else
 		{
-			/* successful compression */
+			/* has attstorage 'e', ignore on subsequent compression passes */
+			toast_action[i] = 'x';
+		}
+
+		/*
+		 * If this value is by itself more than maxDataLen (after compression
+		 * if any), push it out to the toast table immediately, if possible.
+		 * This avoids uselessly compressing other fields in the common case
+		 * where we have one long field and several short ones.
+		 *
+		 * XXX maybe the threshold should be less than maxDataLen?
+		 */
+		if (toast_sizes[i] > maxDataLen &&
+			rel->rd_rel->reltoastrelid != InvalidOid)
+		{
+			old_value = toast_values[i];
+			toast_action[i] = 'p';
+			toast_values[i] = toast_save_datum(rel, toast_values[i],
+											   use_wal, use_fsm);
 			if (toast_free[i])
 				pfree(DatumGetPointer(old_value));
-			toast_values[i] = new_value;
 			toast_free[i] = true;
-			toast_sizes[i] = VARSIZE(toast_values[i]);
 			need_change = true;
 			need_free = true;
 		}
-		else
-		{
-			/*
-			 * incompressible data, ignore on subsequent compression passes
-			 */
-			toast_action[i] = 'x';
-		}
 	}
 
 	/*
@@ -761,9 +791,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 		}
 		else
 		{
-			/*
-			 * incompressible data, ignore on subsequent compression passes
-			 */
+			/* incompressible, ignore on subsequent compression passes */
 			toast_action[i] = 'x';
 		}
 	}
@@ -1047,16 +1075,28 @@ toast_compress_datum(Datum value)
 	Assert(!VARATT_IS_COMPRESSED(value));
 
 	/*
-	 * No point in wasting a palloc cycle if value is too short for
-	 * compression
+	 * No point in wasting a palloc cycle if value size is out of the
+	 * allowed range for compression
 	 */
-	if (valsize < PGLZ_strategy_default->min_input_size)
+	if (valsize < PGLZ_strategy_default->min_input_size ||
+		valsize > PGLZ_strategy_default->max_input_size)
 		return PointerGetDatum(NULL);
 
 	tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize));
+
+	/*
+	 * We recheck the actual size even if pglz_compress() reports success,
+	 * because it might be satisfied with having saved as little as one byte
+	 * in the compressed data --- which could turn into a net loss once you
+	 * consider header and alignment padding.  Worst case, the compressed
+	 * format might require three padding bytes (plus header, which is included
+	 * in VARSIZE(tmp)), whereas the uncompressed format would take only one
+	 * header byte and no padding if the value is short enough.  So we insist
+	 * on a savings of more than 2 bytes to ensure we have a gain.
+	 */
 	if (pglz_compress(VARDATA_ANY(value), valsize,
 					  (PGLZ_Header *) tmp, PGLZ_strategy_default) &&
-		VARSIZE(tmp) < VARSIZE_ANY(value))
+		VARSIZE(tmp) < valsize - 2)
 	{
 		/* successful compression */
 		return PointerGetDatum(tmp);