diff options
author | Michael Paquier <michael@paquier.xyz> | 2021-08-19 09:20:13 +0900 |
---|---|---|
committer | Michael Paquier <michael@paquier.xyz> | 2021-08-19 09:20:13 +0900 |
commit | 2576dcfb76aa71e4222bac5a3a43f71875bfa9e8 (patch) | |
tree | e035f807a855e56aee90d73bf63798d19133b1a1 /src/backend/utils/adt/encode.c | |
parent | 2313dda9d493d3685ac7328b49dc6f5a87c1c295 (diff) | |
download | postgresql-2576dcfb76aa71e4222bac5a3a43f71875bfa9e8.tar.gz postgresql-2576dcfb76aa71e4222bac5a3a43f71875bfa9e8.zip |
Revert refactoring of hex code to src/common/
This is a combined revert of the following commits:
- c3826f8, a refactoring piece that moved the hex decoding code to
src/common/. This code was cleaned up by aef8948, as it originally
included no overflow checks in the same way as the base64 routines in
src/common/ used by SCRAM, making it unsafe for its purpose.
- aef8948, a more advanced refactoring of the hex encoding/decoding code
to src/common/ that added sanity checks on the result buffer for hex
decoding and encoding. As reported by Hans Buschmann, those overflow
checks are expensive, and it is possible to see a performance drop in
the decoding/encoding of bytea or LOs the longer they are. Simple SQLs
working on large bytea values show a clear difference in perf profile.
- ccf4e27, a cleanup made possible by aef8948.
The reverts of all those commits bring back the performance of hex
decoding and encoding back to what it was in ~13. Fow now and
post-beta3, this is the simplest option.
Reported-by: Hans Buschmann
Discussion: https://postgr.es/m/1629039545467.80333@nidsa.net
Backpatch-through: 14
Diffstat (limited to 'src/backend/utils/adt/encode.c')
-rw-r--r-- | src/backend/utils/adt/encode.c | 158 |
1 files changed, 102 insertions, 56 deletions
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index 8449aaac56a..6dd93f9a322 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -15,7 +15,6 @@ #include <ctype.h> -#include "common/hex.h" #include "mb/pg_wchar.h" #include "utils/builtins.h" #include "utils/memutils.h" @@ -32,12 +31,10 @@ */ struct pg_encoding { - uint64 (*encode_len) (const char *src, size_t srclen); - uint64 (*decode_len) (const char *src, size_t srclen); - uint64 (*encode) (const char *src, size_t srclen, - char *dst, size_t dstlen); - uint64 (*decode) (const char *src, size_t srclen, - char *dst, size_t dstlen); + uint64 (*encode_len) (const char *data, size_t dlen); + uint64 (*decode_len) (const char *data, size_t dlen); + uint64 (*encode) (const char *data, size_t dlen, char *res); + uint64 (*decode) (const char *data, size_t dlen, char *res); }; static const struct pg_encoding *pg_find_encoding(const char *name); @@ -83,7 +80,11 @@ binary_encode(PG_FUNCTION_ARGS) result = palloc(VARHDRSZ + resultlen); - res = enc->encode(dataptr, datalen, VARDATA(result), resultlen); + res = enc->encode(dataptr, datalen, VARDATA(result)); + + /* Make this FATAL 'cause we've trodden on memory ... */ + if (res > resultlen) + elog(FATAL, "overflow - encode estimate too small"); SET_VARSIZE(result, VARHDRSZ + res); @@ -127,7 +128,11 @@ binary_decode(PG_FUNCTION_ARGS) result = palloc(VARHDRSZ + resultlen); - res = enc->decode(dataptr, datalen, VARDATA(result), resultlen); + res = enc->decode(dataptr, datalen, VARDATA(result)); + + /* Make this FATAL 'cause we've trodden on memory ... */ + if (res > resultlen) + elog(FATAL, "overflow - decode estimate too small"); SET_VARSIZE(result, VARHDRSZ + res); @@ -139,20 +144,95 @@ binary_decode(PG_FUNCTION_ARGS) * HEX */ -/* - * Those two wrappers are still needed to match with the layer of - * src/common/. - */ +static const char hextbl[] = "0123456789abcdef"; + +static const int8 hexlookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +uint64 +hex_encode(const char *src, size_t len, char *dst) +{ + const char *end = src + len; + + while (src < end) + { + *dst++ = hextbl[(*src >> 4) & 0xF]; + *dst++ = hextbl[*src & 0xF]; + src++; + } + return (uint64) len * 2; +} + +static inline char +get_hex(const char *cp) +{ + unsigned char c = (unsigned char) *cp; + int res = -1; + + if (c < 127) + res = hexlookup[c]; + + if (res < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid hexadecimal digit: \"%.*s\"", + pg_mblen(cp), cp))); + + return (char) res; +} + +uint64 +hex_decode(const char *src, size_t len, char *dst) +{ + const char *s, + *srcend; + char v1, + v2, + *p; + + srcend = src + len; + s = src; + p = dst; + while (s < srcend) + { + if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r') + { + s++; + continue; + } + v1 = get_hex(s) << 4; + s++; + if (s >= srcend) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid hexadecimal data: odd number of digits"))); + + v2 = get_hex(s); + s++; + *p++ = v1 | v2; + } + + return p - dst; +} + static uint64 hex_enc_len(const char *src, size_t srclen) { - return pg_hex_enc_len(srclen); + return (uint64) srclen << 1; } static uint64 hex_dec_len(const char *src, size_t srclen) { - return pg_hex_dec_len(srclen); + return (uint64) srclen >> 1; } /* @@ -174,12 +254,12 @@ static const int8 b64lookup[128] = { }; static uint64 -pg_base64_encode(const char *src, size_t srclen, char *dst, size_t dstlen) +pg_base64_encode(const char *src, size_t len, char *dst) { char *p, *lend = dst + 76; const char *s, - *end = src + srclen; + *end = src + len; int pos = 2; uint32 buf = 0; @@ -195,8 +275,6 @@ pg_base64_encode(const char *src, size_t srclen, char *dst, size_t dstlen) /* write it out */ if (pos < 0) { - if ((p - dst + 4) > dstlen) - elog(ERROR, "overflow of destination buffer in base64 encoding"); *p++ = _base64[(buf >> 18) & 0x3f]; *p++ = _base64[(buf >> 12) & 0x3f]; *p++ = _base64[(buf >> 6) & 0x3f]; @@ -207,30 +285,25 @@ pg_base64_encode(const char *src, size_t srclen, char *dst, size_t dstlen) } if (p >= lend) { - if ((p - dst + 1) > dstlen) - elog(ERROR, "overflow of destination buffer in base64 encoding"); *p++ = '\n'; lend = p + 76; } } if (pos != 2) { - if ((p - dst + 4) > dstlen) - elog(ERROR, "overflow of destination buffer in base64 encoding"); *p++ = _base64[(buf >> 18) & 0x3f]; *p++ = _base64[(buf >> 12) & 0x3f]; *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '='; *p++ = '='; } - Assert((p - dst) <= dstlen); return p - dst; } static uint64 -pg_base64_decode(const char *src, size_t srclen, char *dst, size_t dstlen) +pg_base64_decode(const char *src, size_t len, char *dst) { - const char *srcend = src + srclen, + const char *srcend = src + len, *s = src; char *p = dst; char c; @@ -278,21 +351,11 @@ pg_base64_decode(const char *src, size_t srclen, char *dst, size_t dstlen) pos++; if (pos == 4) { - if ((p - dst + 1) > dstlen) - elog(ERROR, "overflow of destination buffer in base64 decoding"); *p++ = (buf >> 16) & 255; if (end == 0 || end > 1) - { - if ((p - dst + 1) > dstlen) - elog(ERROR, "overflow of destination buffer in base64 decoding"); *p++ = (buf >> 8) & 255; - } if (end == 0 || end > 2) - { - if ((p - dst + 1) > dstlen) - elog(ERROR, "overflow of destination buffer in base64 decoding"); *p++ = buf & 255; - } buf = 0; pos = 0; } @@ -304,7 +367,6 @@ pg_base64_decode(const char *src, size_t srclen, char *dst, size_t dstlen) errmsg("invalid base64 end sequence"), errhint("Input data is missing padding, is truncated, or is otherwise corrupted."))); - Assert((p - dst) <= dstlen); return p - dst; } @@ -340,7 +402,7 @@ pg_base64_dec_len(const char *src, size_t srclen) #define DIG(VAL) ((VAL) + '0') static uint64 -esc_encode(const char *src, size_t srclen, char *dst, size_t dstlen) +esc_encode(const char *src, size_t srclen, char *dst) { const char *end = src + srclen; char *rp = dst; @@ -352,8 +414,6 @@ esc_encode(const char *src, size_t srclen, char *dst, size_t dstlen) if (c == '\0' || IS_HIGHBIT_SET(c)) { - if ((rp - dst + 4) > dstlen) - elog(ERROR, "overflow of destination buffer in escape encoding"); rp[0] = '\\'; rp[1] = DIG(c >> 6); rp[2] = DIG((c >> 3) & 7); @@ -363,8 +423,6 @@ esc_encode(const char *src, size_t srclen, char *dst, size_t dstlen) } else if (c == '\\') { - if ((rp - dst + 2) > dstlen) - elog(ERROR, "overflow of destination buffer in escape encoding"); rp[0] = '\\'; rp[1] = '\\'; rp += 2; @@ -372,8 +430,6 @@ esc_encode(const char *src, size_t srclen, char *dst, size_t dstlen) } else { - if ((rp - dst + 1) > dstlen) - elog(ERROR, "overflow of destination buffer in escape encoding"); *rp++ = c; len++; } @@ -381,12 +437,11 @@ esc_encode(const char *src, size_t srclen, char *dst, size_t dstlen) src++; } - Assert((rp - dst) <= dstlen); return len; } static uint64 -esc_decode(const char *src, size_t srclen, char *dst, size_t dstlen) +esc_decode(const char *src, size_t srclen, char *dst) { const char *end = src + srclen; char *rp = dst; @@ -395,11 +450,7 @@ esc_decode(const char *src, size_t srclen, char *dst, size_t dstlen) while (src < end) { if (src[0] != '\\') - { - if ((rp - dst + 1) > dstlen) - elog(ERROR, "overflow of destination buffer in escape decoding"); *rp++ = *src++; - } else if (src + 3 < end && (src[1] >= '0' && src[1] <= '3') && (src[2] >= '0' && src[2] <= '7') && @@ -411,16 +462,12 @@ esc_decode(const char *src, size_t srclen, char *dst, size_t dstlen) val <<= 3; val += VAL(src[2]); val <<= 3; - if ((rp - dst + 1) > dstlen) - elog(ERROR, "overflow of destination buffer in escape decoding"); *rp++ = val + VAL(src[3]); src += 4; } else if (src + 1 < end && (src[1] == '\\')) { - if ((rp - dst + 1) > dstlen) - elog(ERROR, "overflow of destination buffer in escape decoding"); *rp++ = '\\'; src += 2; } @@ -438,7 +485,6 @@ esc_decode(const char *src, size_t srclen, char *dst, size_t dstlen) len++; } - Assert((rp - dst) <= dstlen); return len; } @@ -520,7 +566,7 @@ static const struct { "hex", { - hex_enc_len, hex_dec_len, pg_hex_encode, pg_hex_decode + hex_enc_len, hex_dec_len, hex_encode, hex_decode } }, { |