diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2007-06-17 23:39:28 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2007-06-17 23:39:28 +0000 |
commit | 011b51cb7e912af2674ee6685a51651182ceab4f (patch) | |
tree | a20466763833d3b4db93a2264e3c0900af67bb36 /src/backend/commands/copy.c | |
parent | 6775c0108039a13458004d893e4a17b4ae3fae9d (diff) | |
download | postgresql-011b51cb7e912af2674ee6685a51651182ceab4f.tar.gz postgresql-011b51cb7e912af2674ee6685a51651182ceab4f.zip |
Marginal hacking to improve the speed of COPY OUT. I had found in a bit of
profiling that CopyAttributeOutText was taking an unreasonable fraction of
the backend run time (like 66%!) on the following trivial test case:
$ time psql -c "copy (select repeat('xyzzy',50) from generate_series(1,10000000)) to stdout" regression >/dev/null
The time is all being spent on scanning the string for characters to be
escaped, which most of the time there aren't any of. Some tweaking to take
as many tests as possible out of the inner loop reduced the runtime of this
example by more than 10%. In a real-world case it wouldn't be as useful
a speedup, but it still seems worth adding a few lines here.
Diffstat (limited to 'src/backend/commands/copy.c')
-rw-r--r-- | src/backend/commands/copy.c | 128 |
1 files changed, 74 insertions, 54 deletions
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 885411cf2c0..493d2944f1a 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.283 2007/04/27 22:05:46 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.284 2007/06/17 23:39:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3075,68 +3075,88 @@ CopyAttributeOutText(CopyState cstate, char *string) * We have to grovel through the string searching for control characters * and instances of the delimiter character. In most cases, though, these * are infrequent. To avoid overhead from calling CopySendData once per - * character, we dump out all characters between replaceable characters in + * character, we dump out all characters between escaped characters in * a single call. The loop invariant is that the data from "start" to * "ptr" can be sent literally, but hasn't yet been. + * + * We can skip pg_encoding_mblen() overhead when encoding is safe, because + * in valid backend encodings, extra bytes of a multibyte character never + * look like ASCII. This loop is sufficiently performance-critical that + * it's worth making two copies of it to get the IS_HIGHBIT_SET() test + * out of the normal safe-encoding path. */ - start = ptr; - while ((c = *ptr) != '\0') + if (cstate->encoding_embeds_ascii) { - switch (c) + start = ptr; + while ((c = *ptr) != '\0') { - case '\b': - DUMPSOFAR(); - CopySendString(cstate, "\\b"); - start = ++ptr; - break; - case '\f': - DUMPSOFAR(); - CopySendString(cstate, "\\f"); - start = ++ptr; - break; - case '\n': - DUMPSOFAR(); - CopySendString(cstate, "\\n"); - start = ++ptr; - break; - case '\r': - DUMPSOFAR(); - CopySendString(cstate, "\\r"); - start = ++ptr; - break; - case '\t': - DUMPSOFAR(); - CopySendString(cstate, "\\t"); - start = ++ptr; - break; - case '\v': + if (c == '\\' || c == delimc) + { DUMPSOFAR(); - CopySendString(cstate, "\\v"); - start = ++ptr; - break; - case '\\': + CopySendChar(cstate, '\\'); + start = ptr++; /* we include char in next run */ + } + else if ((unsigned char) c < (unsigned char) 0x20) + { + switch (c) + { + /* \r and \n must be escaped, the others are traditional */ + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': + DUMPSOFAR(); + CopySendChar(cstate, '\\'); + start = ptr++; /* we include char in next run */ + break; + default: + /* All ASCII control chars are length 1 */ + ptr++; + break; + } + } + else if (IS_HIGHBIT_SET(c)) + ptr += pg_encoding_mblen(cstate->client_encoding, ptr); + else + ptr++; + } + } + else + { + start = ptr; + while ((c = *ptr) != '\0') + { + if (c == '\\' || c == delimc) + { DUMPSOFAR(); - CopySendString(cstate, "\\\\"); - start = ++ptr; - break; - default: - if (c == delimc) + CopySendChar(cstate, '\\'); + start = ptr++; /* we include char in next run */ + } + else if ((unsigned char) c < (unsigned char) 0x20) + { + switch (c) { - DUMPSOFAR(); - CopySendChar(cstate, '\\'); - start = ptr; /* we include char in next run */ + /* \r and \n must be escaped, the others are traditional */ + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': + DUMPSOFAR(); + CopySendChar(cstate, '\\'); + start = ptr++; /* we include char in next run */ + break; + default: + /* All ASCII control chars are length 1 */ + ptr++; + break; } - - /* - * We can skip pg_encoding_mblen() overhead when encoding is - * safe, because in valid backend encodings, extra bytes of a - * multibyte character never look like ASCII. - */ - if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii) - ptr += pg_encoding_mblen(cstate->client_encoding, ptr); - else - ptr++; - break; + } + else + ptr++; } } |