diff options
author | Dean Rasheed <dean.a.rasheed@gmail.com> | 2023-02-04 09:48:51 +0000 |
---|---|---|
committer | Dean Rasheed <dean.a.rasheed@gmail.com> | 2023-02-04 09:48:51 +0000 |
commit | faff8f8e47f18c7d589453e2e0d841d2bd96c1ac (patch) | |
tree | 84c64f4f9cb6e7713d955f8b3193ff84b42c8cee /src/backend | |
parent | 1b6f632a35f8715f8c64e7930adebc7f1d292074 (diff) | |
download | postgresql-faff8f8e47f18c7d589453e2e0d841d2bd96c1ac.tar.gz postgresql-faff8f8e47f18c7d589453e2e0d841d2bd96c1ac.zip |
Allow underscores in integer and numeric constants.
This allows underscores to be used in integer and numeric literals,
and their corresponding type input functions, for visual grouping.
For example:
1_500_000_000
3.14159_26535_89793
0xffff_ffff
0b_1001_0001
A single underscore is allowed between any 2 digits, or immediately
after the base prefix indicator of non-decimal integers, per SQL:202x
draft.
Peter Eisentraut and Dean Rasheed
Discussion: https://postgr.es/m/84aae844-dc55-a4be-86d9-4f0fa405cc97%40enterprisedb.com
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/catalog/sql_features.txt | 1 | ||||
-rw-r--r-- | src/backend/parser/parse_node.c | 43 | ||||
-rw-r--r-- | src/backend/parser/scan.l | 27 | ||||
-rw-r--r-- | src/backend/utils/adt/numeric.c | 106 | ||||
-rw-r--r-- | src/backend/utils/adt/numutils.c | 273 |
5 files changed, 315 insertions, 135 deletions
diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt index abad216b7ee..3766762ae36 100644 --- a/src/backend/catalog/sql_features.txt +++ b/src/backend/catalog/sql_features.txt @@ -528,6 +528,7 @@ T653 SQL-schema statements in external routines YES T654 SQL-dynamic statements in external routines NO T655 Cyclically dependent routines YES T661 Non-decimal integer literals YES SQL:202x draft +T662 Underscores in integer literals YES SQL:202x draft T811 Basic SQL/JSON constructor functions NO T812 SQL/JSON: JSON_OBJECTAGG NO T813 SQL/JSON: JSON_ARRAYAGG with ORDER BY NO diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c index f1967a33bc0..5020b9f0810 100644 --- a/src/backend/parser/parse_node.c +++ b/src/backend/parser/parse_node.c @@ -19,6 +19,7 @@ #include "catalog/pg_type.h" #include "mb/pg_wchar.h" #include "nodes/makefuncs.h" +#include "nodes/miscnodes.h" #include "nodes/nodeFuncs.h" #include "nodes/subscripting.h" #include "parser/parse_coerce.h" @@ -385,47 +386,11 @@ make_const(ParseState *pstate, A_Const *aconst) { /* could be an oversize integer as well as a float ... */ - int base = 10; - char *startptr; - int sign; - char *testvalue; + ErrorSaveContext escontext = {T_ErrorSaveContext}; int64 val64; - char *endptr; - startptr = aconst->val.fval.fval; - if (startptr[0] == '-') - { - sign = -1; - startptr++; - } - else - sign = +1; - if (startptr[0] == '0') - { - if (startptr[1] == 'b' || startptr[1] == 'B') - { - base = 2; - startptr += 2; - } - else if (startptr[1] == 'o' || startptr[1] == 'O') - { - base = 8; - startptr += 2; - } - else if (startptr[1] == 'x' || startptr[1] == 'X') - { - base = 16; - startptr += 2; - } - } - - if (sign == +1) - testvalue = startptr; - else - testvalue = psprintf("-%s", startptr); - errno = 0; - val64 = strtoi64(testvalue, &endptr, base); - if (errno == 0 && *endptr == '\0') + val64 = pg_strtoint64_safe(aconst->val.fval.fval, (Node *) &escontext); + if (!escontext.error_occurred) { /* * It might actually fit in int32. Probably only INT_MIN diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 1e821d4c9e2..b2216a9eacd 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -37,10 +37,12 @@ #include "common/string.h" #include "gramparse.h" +#include "nodes/miscnodes.h" #include "parser/parser.h" /* only needed for GUC variables */ #include "parser/scansup.h" #include "port/pg_bitutils.h" #include "mb/pg_wchar.h" +#include "utils/builtins.h" } %{ @@ -395,19 +397,19 @@ hexdigit [0-9A-Fa-f] octdigit [0-7] bindigit [0-1] -decinteger {decdigit}+ -hexinteger 0[xX]{hexdigit}+ -octinteger 0[oO]{octdigit}+ -bininteger 0[bB]{bindigit}+ +decinteger {decdigit}(_?{decdigit})* +hexinteger 0[xX](_?{hexdigit})+ +octinteger 0[oO](_?{octdigit})+ +bininteger 0[bB](_?{bindigit})+ -hexfail 0[xX] -octfail 0[oO] -binfail 0[bB] +hexfail 0[xX]_? +octfail 0[oO]_? +binfail 0[bB]_? numeric (({decinteger}\.{decinteger}?)|(\.{decinteger})) numericfail {decdigit}+\.\. -real ({decinteger}|{numeric})[Ee][-+]?{decdigit}+ +real ({decinteger}|{numeric})[Ee][-+]?{decinteger} realfail ({decinteger}|{numeric})[Ee][-+] decinteger_junk {decinteger}{ident_start} @@ -1364,12 +1366,11 @@ litbufdup(core_yyscan_t yyscanner) static int process_integer_literal(const char *token, YYSTYPE *lval, int base) { - int val; - char *endptr; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + int32 val; - errno = 0; - val = strtoint(base == 10 ? token : token + 2, &endptr, base); - if (*endptr != '\0' || errno == ERANGE) + val = pg_strtoint32_safe(token, (Node *) &escontext); + if (escontext.error_occurred) { /* integer too large (or contains decimal pt), treat it as a float */ lval->str = pstrdup(token); diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 6bf6db6e27b..a83feea3967 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -6968,10 +6968,7 @@ set_var_from_str(const char *str, const char *cp, } if (!isdigit((unsigned char) *cp)) - ereturn(escontext, false, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s: \"%s\"", - "numeric", str))); + goto invalid_syntax; decdigits = (unsigned char *) palloc(strlen(cp) + DEC_DIGITS * 2); @@ -6992,12 +6989,19 @@ set_var_from_str(const char *str, const char *cp, else if (*cp == '.') { if (have_dp) - ereturn(escontext, false, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s: \"%s\"", - "numeric", str))); + goto invalid_syntax; have_dp = true; cp++; + /* decimal point must not be followed by underscore */ + if (*cp == '_') + goto invalid_syntax; + } + else if (*cp == '_') + { + /* underscore must be followed by more digits */ + cp++; + if (!isdigit((unsigned char) *cp)) + goto invalid_syntax; } else break; @@ -7010,17 +7014,8 @@ set_var_from_str(const char *str, const char *cp, /* Handle exponent, if any */ if (*cp == 'e' || *cp == 'E') { - long exponent; - char *endptr; - - cp++; - exponent = strtol(cp, &endptr, 10); - if (endptr == cp) - ereturn(escontext, false, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s: \"%s\"", - "numeric", str))); - cp = endptr; + int64 exponent = 0; + bool neg = false; /* * At this point, dweight and dscale can't be more than about @@ -7030,10 +7025,43 @@ set_var_from_str(const char *str, const char *cp, * fit in storage format, make_result() will complain about it later; * for consistency use the same ereport errcode/text as make_result(). */ - if (exponent >= INT_MAX / 2 || exponent <= -(INT_MAX / 2)) - ereturn(escontext, false, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("value overflows numeric format"))); + + /* exponent sign */ + cp++; + if (*cp == '+') + cp++; + else if (*cp == '-') + { + neg = true; + cp++; + } + + /* exponent digits */ + if (!isdigit((unsigned char) *cp)) + goto invalid_syntax; + + while (*cp) + { + if (isdigit((unsigned char) *cp)) + { + exponent = exponent * 10 + (*cp++ - '0'); + if (exponent > PG_INT32_MAX / 2) + goto out_of_range; + } + else if (*cp == '_') + { + /* underscore must be followed by more digits */ + cp++; + if (!isdigit((unsigned char) *cp)) + goto invalid_syntax; + } + else + break; + } + + if (neg) + exponent = -exponent; + dweight += (int) exponent; dscale -= (int) exponent; if (dscale < 0) @@ -7085,6 +7113,17 @@ set_var_from_str(const char *str, const char *cp, *endptr = cp; return true; + +out_of_range: + ereturn(escontext, false, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value overflows numeric format"))); + +invalid_syntax: + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "numeric", str))); } @@ -7167,6 +7206,13 @@ set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign, tmp = tmp * 16 + xdigit_value(*cp++); mul = mul * 16; } + else if (*cp == '_') + { + /* Underscore must be followed by more digits */ + cp++; + if (!isxdigit((unsigned char) *cp)) + goto invalid_syntax; + } else break; } @@ -7197,6 +7243,13 @@ set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign, tmp = tmp * 8 + (*cp++ - '0'); mul = mul * 8; } + else if (*cp == '_') + { + /* Underscore must be followed by more digits */ + cp++; + if (*cp < '0' || *cp > '7') + goto invalid_syntax; + } else break; } @@ -7227,6 +7280,13 @@ set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign, tmp = tmp * 2 + (*cp++ - '0'); mul = mul * 2; } + else if (*cp == '_') + { + /* Underscore must be followed by more digits */ + cp++; + if (*cp < '0' || *cp > '1') + goto invalid_syntax; + } else break; } diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index b0e412e7c67..471fbb7ee63 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -141,48 +141,99 @@ pg_strtoint16_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr && isxdigit((unsigned char) *ptr)) + while (*ptr) { - if (unlikely(tmp > -(PG_INT16_MIN / 16))) - goto out_of_range; - - tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + if (isxdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT16_MIN / 16))) + goto out_of_range; + + tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isxdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; } } else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) { firstdigit = ptr += 2; - while (*ptr && (*ptr >= '0' && *ptr <= '7')) + while (*ptr) { - if (unlikely(tmp > -(PG_INT16_MIN / 8))) - goto out_of_range; - - tmp = tmp * 8 + (*ptr++ - '0'); + if (*ptr >= '0' && *ptr <= '7') + { + if (unlikely(tmp > -(PG_INT16_MIN / 8))) + goto out_of_range; + + tmp = tmp * 8 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '7') + goto invalid_syntax; + } + else + break; } } else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) { firstdigit = ptr += 2; - while (*ptr && (*ptr >= '0' && *ptr <= '1')) + while (*ptr) { - if (unlikely(tmp > -(PG_INT16_MIN / 2))) - goto out_of_range; - - tmp = tmp * 2 + (*ptr++ - '0'); + if (*ptr >= '0' && *ptr <= '1') + { + if (unlikely(tmp > -(PG_INT16_MIN / 2))) + goto out_of_range; + + tmp = tmp * 2 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '1') + goto invalid_syntax; + } + else + break; } } else { firstdigit = ptr; - while (*ptr && isdigit((unsigned char) *ptr)) + while (*ptr) { - if (unlikely(tmp > -(PG_INT16_MIN / 10))) - goto out_of_range; - - tmp = tmp * 10 + (*ptr++ - '0'); + if (isdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT16_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore may not be first */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + /* and it must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; } } @@ -268,48 +319,99 @@ pg_strtoint32_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr && isxdigit((unsigned char) *ptr)) + while (*ptr) { - if (unlikely(tmp > -(PG_INT32_MIN / 16))) - goto out_of_range; - - tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + if (isxdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT32_MIN / 16))) + goto out_of_range; + + tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isxdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; } } else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) { firstdigit = ptr += 2; - while (*ptr && (*ptr >= '0' && *ptr <= '7')) + while (*ptr) { - if (unlikely(tmp > -(PG_INT32_MIN / 8))) - goto out_of_range; - - tmp = tmp * 8 + (*ptr++ - '0'); + if (*ptr >= '0' && *ptr <= '7') + { + if (unlikely(tmp > -(PG_INT32_MIN / 8))) + goto out_of_range; + + tmp = tmp * 8 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '7') + goto invalid_syntax; + } + else + break; } } else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) { firstdigit = ptr += 2; - while (*ptr && (*ptr >= '0' && *ptr <= '1')) + while (*ptr) { - if (unlikely(tmp > -(PG_INT32_MIN / 2))) - goto out_of_range; - - tmp = tmp * 2 + (*ptr++ - '0'); + if (*ptr >= '0' && *ptr <= '1') + { + if (unlikely(tmp > -(PG_INT32_MIN / 2))) + goto out_of_range; + + tmp = tmp * 2 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '1') + goto invalid_syntax; + } + else + break; } } else { firstdigit = ptr; - while (*ptr && isdigit((unsigned char) *ptr)) + while (*ptr) { - if (unlikely(tmp > -(PG_INT32_MIN / 10))) - goto out_of_range; - - tmp = tmp * 10 + (*ptr++ - '0'); + if (isdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT32_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore may not be first */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + /* and it must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; } } @@ -395,48 +497,99 @@ pg_strtoint64_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr && isxdigit((unsigned char) *ptr)) + while (*ptr) { - if (unlikely(tmp > -(PG_INT64_MIN / 16))) - goto out_of_range; - - tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + if (isxdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT64_MIN / 16))) + goto out_of_range; + + tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isxdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; } } else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) { firstdigit = ptr += 2; - while (*ptr && (*ptr >= '0' && *ptr <= '7')) + while (*ptr) { - if (unlikely(tmp > -(PG_INT64_MIN / 8))) - goto out_of_range; - - tmp = tmp * 8 + (*ptr++ - '0'); + if (*ptr >= '0' && *ptr <= '7') + { + if (unlikely(tmp > -(PG_INT64_MIN / 8))) + goto out_of_range; + + tmp = tmp * 8 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '7') + goto invalid_syntax; + } + else + break; } } else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) { firstdigit = ptr += 2; - while (*ptr && (*ptr >= '0' && *ptr <= '1')) + while (*ptr) { - if (unlikely(tmp > -(PG_INT64_MIN / 2))) - goto out_of_range; - - tmp = tmp * 2 + (*ptr++ - '0'); + if (*ptr >= '0' && *ptr <= '1') + { + if (unlikely(tmp > -(PG_INT64_MIN / 2))) + goto out_of_range; + + tmp = tmp * 2 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '1') + goto invalid_syntax; + } + else + break; } } else { firstdigit = ptr; - while (*ptr && isdigit((unsigned char) *ptr)) + while (*ptr) { - if (unlikely(tmp > -(PG_INT64_MIN / 10))) - goto out_of_range; - - tmp = tmp * 10 + (*ptr++ - '0'); + if (isdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT64_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore may not be first */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + /* and it must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; } } |