Marginal hacking to improve the speed of COPY OUT. I had found in a bit of

profiling that CopyAttributeOutText was taking an unreasonable fraction of the backend run time (like 66%!) on the following trivial test case: $ time psql -c "copy (select repeat('xyzzy',50) from generate_series(1,10000000)) to stdout" regression >/dev/null The time is all being spent on scanning the string for characters to be escaped, which most of the time there aren't any of. Some tweaking to take as many tests as possible out of the inner loop reduced the runtime of this example by more than 10%. In a real-world case it wouldn't be as useful a speedup, but it still seems worth adding a few lines here.
author: Tom Lane <tgl@sss.pgh.pa.us> 2007-06-17 23:39:28 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2007-06-17 23:39:28 +0000
commit: 011b51cb7e912af2674ee6685a51651182ceab4f (patch)
tree: a20466763833d3b4db93a2264e3c0900af67bb36 /src/backend/commands/copy.c
parent: 6775c0108039a13458004d893e4a17b4ae3fae9d (diff)
download: postgresql-011b51cb7e912af2674ee6685a51651182ceab4f.tar.gz
postgresql-011b51cb7e912af2674ee6685a51651182ceab4f.zip
1 files changed, 74 insertions, 54 deletions
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 885411cf2c0..493d2944f1a 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.283 2007/04/27 22:05:46 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.284 2007/06/17 23:39:28 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3075,68 +3075,88 @@ CopyAttributeOutText(CopyState cstate, char *string)
 	 * We have to grovel through the string searching for control characters
 	 * and instances of the delimiter character.  In most cases, though, these
 	 * are infrequent.	To avoid overhead from calling CopySendData once per
-	 * character, we dump out all characters between replaceable characters in
+	 * character, we dump out all characters between escaped characters in
 	 * a single call.  The loop invariant is that the data from "start" to
 	 * "ptr" can be sent literally, but hasn't yet been.
+	 *
+	 * We can skip pg_encoding_mblen() overhead when encoding is safe, because
+	 * in valid backend encodings, extra bytes of a multibyte character never
+	 * look like ASCII.  This loop is sufficiently performance-critical that
+	 * it's worth making two copies of it to get the IS_HIGHBIT_SET() test
+	 * out of the normal safe-encoding path.
 	 */
-	start = ptr;
-	while ((c = *ptr) != '\0')
+	if (cstate->encoding_embeds_ascii)
 	{
-		switch (c)
+		start = ptr;
+		while ((c = *ptr) != '\0')
 		{
-			case '\b':
-				DUMPSOFAR();
-				CopySendString(cstate, "\\b");
-				start = ++ptr;
-				break;
-			case '\f':
-				DUMPSOFAR();
-				CopySendString(cstate, "\\f");
-				start = ++ptr;
-				break;
-			case '\n':
-				DUMPSOFAR();
-				CopySendString(cstate, "\\n");
-				start = ++ptr;
-				break;
-			case '\r':
-				DUMPSOFAR();
-				CopySendString(cstate, "\\r");
-				start = ++ptr;
-				break;
-			case '\t':
-				DUMPSOFAR();
-				CopySendString(cstate, "\\t");
-				start = ++ptr;
-				break;
-			case '\v':
+			if (c == '\\' || c == delimc)
+			{
 				DUMPSOFAR();
-				CopySendString(cstate, "\\v");
-				start = ++ptr;
-				break;
-			case '\\':
+				CopySendChar(cstate, '\\');
+				start = ptr++;		/* we include char in next run */
+			}
+			else if ((unsigned char) c < (unsigned char) 0x20)
+			{
+				switch (c)
+				{
+					/* \r and \n must be escaped, the others are traditional */
+					case '\b':
+					case '\f':
+					case '\n':
+					case '\r':
+					case '\t':
+					case '\v':
+						DUMPSOFAR();
+						CopySendChar(cstate, '\\');
+						start = ptr++;	/* we include char in next run */
+						break;
+					default:
+						/* All ASCII control chars are length 1 */
+						ptr++;
+						break;
+				}
+			}
+			else if (IS_HIGHBIT_SET(c))
+				ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
+			else
+				ptr++;
+		}
+	}
+	else
+	{
+		start = ptr;
+		while ((c = *ptr) != '\0')
+		{
+			if (c == '\\' || c == delimc)
+			{
 				DUMPSOFAR();
-				CopySendString(cstate, "\\\\");
-				start = ++ptr;
-				break;
-			default:
-				if (c == delimc)
+				CopySendChar(cstate, '\\');
+				start = ptr++;		/* we include char in next run */
+			}
+			else if ((unsigned char) c < (unsigned char) 0x20)
+			{
+				switch (c)
 				{
-					DUMPSOFAR();
-					CopySendChar(cstate, '\\');
-					start = ptr;	/* we include char in next run */
+					/* \r and \n must be escaped, the others are traditional */
+					case '\b':
+					case '\f':
+					case '\n':
+					case '\r':
+					case '\t':
+					case '\v':
+						DUMPSOFAR();
+						CopySendChar(cstate, '\\');
+						start = ptr++;	/* we include char in next run */
+						break;
+					default:
+						/* All ASCII control chars are length 1 */
+						ptr++;
+						break;
 				}
-
-				/*
-				 * We can skip pg_encoding_mblen() overhead when encoding is
-				 * safe, because in valid backend encodings, extra bytes of a
-				 * multibyte character never look like ASCII.
-				 */
-				if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
-					ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
-				else
-					ptr++;
-				break;
+			}
+			else
+				ptr++;
 		}
 	}
author	Tom Lane <tgl@sss.pgh.pa.us>	2007-06-17 23:39:28 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2007-06-17 23:39:28 +0000
commit	011b51cb7e912af2674ee6685a51651182ceab4f (patch)
tree	a20466763833d3b4db93a2264e3c0900af67bb36 /src/backend/commands/copy.c
parent	6775c0108039a13458004d893e4a17b4ae3fae9d (diff)
download	postgresql-011b51cb7e912af2674ee6685a51651182ceab4f.tar.gz postgresql-011b51cb7e912af2674ee6685a51651182ceab4f.zip