aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorDean Rasheed <dean.a.rasheed@gmail.com>2023-02-04 09:48:51 +0000
committerDean Rasheed <dean.a.rasheed@gmail.com>2023-02-04 09:48:51 +0000
commitfaff8f8e47f18c7d589453e2e0d841d2bd96c1ac (patch)
tree84c64f4f9cb6e7713d955f8b3193ff84b42c8cee /src/backend
parent1b6f632a35f8715f8c64e7930adebc7f1d292074 (diff)
downloadpostgresql-faff8f8e47f18c7d589453e2e0d841d2bd96c1ac.tar.gz
postgresql-faff8f8e47f18c7d589453e2e0d841d2bd96c1ac.zip
Allow underscores in integer and numeric constants.
This allows underscores to be used in integer and numeric literals, and their corresponding type input functions, for visual grouping. For example: 1_500_000_000 3.14159_26535_89793 0xffff_ffff 0b_1001_0001 A single underscore is allowed between any 2 digits, or immediately after the base prefix indicator of non-decimal integers, per SQL:202x draft. Peter Eisentraut and Dean Rasheed Discussion: https://postgr.es/m/84aae844-dc55-a4be-86d9-4f0fa405cc97%40enterprisedb.com
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/catalog/sql_features.txt1
-rw-r--r--src/backend/parser/parse_node.c43
-rw-r--r--src/backend/parser/scan.l27
-rw-r--r--src/backend/utils/adt/numeric.c106
-rw-r--r--src/backend/utils/adt/numutils.c273
5 files changed, 315 insertions, 135 deletions
diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt
index abad216b7ee..3766762ae36 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -528,6 +528,7 @@ T653 SQL-schema statements in external routines YES
T654 SQL-dynamic statements in external routines NO
T655 Cyclically dependent routines YES
T661 Non-decimal integer literals YES SQL:202x draft
+T662 Underscores in integer literals YES SQL:202x draft
T811 Basic SQL/JSON constructor functions NO
T812 SQL/JSON: JSON_OBJECTAGG NO
T813 SQL/JSON: JSON_ARRAYAGG with ORDER BY NO
diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c
index f1967a33bc0..5020b9f0810 100644
--- a/src/backend/parser/parse_node.c
+++ b/src/backend/parser/parse_node.c
@@ -19,6 +19,7 @@
#include "catalog/pg_type.h"
#include "mb/pg_wchar.h"
#include "nodes/makefuncs.h"
+#include "nodes/miscnodes.h"
#include "nodes/nodeFuncs.h"
#include "nodes/subscripting.h"
#include "parser/parse_coerce.h"
@@ -385,47 +386,11 @@ make_const(ParseState *pstate, A_Const *aconst)
{
/* could be an oversize integer as well as a float ... */
- int base = 10;
- char *startptr;
- int sign;
- char *testvalue;
+ ErrorSaveContext escontext = {T_ErrorSaveContext};
int64 val64;
- char *endptr;
- startptr = aconst->val.fval.fval;
- if (startptr[0] == '-')
- {
- sign = -1;
- startptr++;
- }
- else
- sign = +1;
- if (startptr[0] == '0')
- {
- if (startptr[1] == 'b' || startptr[1] == 'B')
- {
- base = 2;
- startptr += 2;
- }
- else if (startptr[1] == 'o' || startptr[1] == 'O')
- {
- base = 8;
- startptr += 2;
- }
- else if (startptr[1] == 'x' || startptr[1] == 'X')
- {
- base = 16;
- startptr += 2;
- }
- }
-
- if (sign == +1)
- testvalue = startptr;
- else
- testvalue = psprintf("-%s", startptr);
- errno = 0;
- val64 = strtoi64(testvalue, &endptr, base);
- if (errno == 0 && *endptr == '\0')
+ val64 = pg_strtoint64_safe(aconst->val.fval.fval, (Node *) &escontext);
+ if (!escontext.error_occurred)
{
/*
* It might actually fit in int32. Probably only INT_MIN
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 1e821d4c9e2..b2216a9eacd 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -37,10 +37,12 @@
#include "common/string.h"
#include "gramparse.h"
+#include "nodes/miscnodes.h"
#include "parser/parser.h" /* only needed for GUC variables */
#include "parser/scansup.h"
#include "port/pg_bitutils.h"
#include "mb/pg_wchar.h"
+#include "utils/builtins.h"
}
%{
@@ -395,19 +397,19 @@ hexdigit [0-9A-Fa-f]
octdigit [0-7]
bindigit [0-1]
-decinteger {decdigit}+
-hexinteger 0[xX]{hexdigit}+
-octinteger 0[oO]{octdigit}+
-bininteger 0[bB]{bindigit}+
+decinteger {decdigit}(_?{decdigit})*
+hexinteger 0[xX](_?{hexdigit})+
+octinteger 0[oO](_?{octdigit})+
+bininteger 0[bB](_?{bindigit})+
-hexfail 0[xX]
-octfail 0[oO]
-binfail 0[bB]
+hexfail 0[xX]_?
+octfail 0[oO]_?
+binfail 0[bB]_?
numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
numericfail {decdigit}+\.\.
-real ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
realfail ({decinteger}|{numeric})[Ee][-+]
decinteger_junk {decinteger}{ident_start}
@@ -1364,12 +1366,11 @@ litbufdup(core_yyscan_t yyscanner)
static int
process_integer_literal(const char *token, YYSTYPE *lval, int base)
{
- int val;
- char *endptr;
+ ErrorSaveContext escontext = {T_ErrorSaveContext};
+ int32 val;
- errno = 0;
- val = strtoint(base == 10 ? token : token + 2, &endptr, base);
- if (*endptr != '\0' || errno == ERANGE)
+ val = pg_strtoint32_safe(token, (Node *) &escontext);
+ if (escontext.error_occurred)
{
/* integer too large (or contains decimal pt), treat it as a float */
lval->str = pstrdup(token);
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 6bf6db6e27b..a83feea3967 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -6968,10 +6968,7 @@ set_var_from_str(const char *str, const char *cp,
}
if (!isdigit((unsigned char) *cp))
- ereturn(escontext, false,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type %s: \"%s\"",
- "numeric", str)));
+ goto invalid_syntax;
decdigits = (unsigned char *) palloc(strlen(cp) + DEC_DIGITS * 2);
@@ -6992,12 +6989,19 @@ set_var_from_str(const char *str, const char *cp,
else if (*cp == '.')
{
if (have_dp)
- ereturn(escontext, false,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type %s: \"%s\"",
- "numeric", str)));
+ goto invalid_syntax;
have_dp = true;
cp++;
+ /* decimal point must not be followed by underscore */
+ if (*cp == '_')
+ goto invalid_syntax;
+ }
+ else if (*cp == '_')
+ {
+ /* underscore must be followed by more digits */
+ cp++;
+ if (!isdigit((unsigned char) *cp))
+ goto invalid_syntax;
}
else
break;
@@ -7010,17 +7014,8 @@ set_var_from_str(const char *str, const char *cp,
/* Handle exponent, if any */
if (*cp == 'e' || *cp == 'E')
{
- long exponent;
- char *endptr;
-
- cp++;
- exponent = strtol(cp, &endptr, 10);
- if (endptr == cp)
- ereturn(escontext, false,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type %s: \"%s\"",
- "numeric", str)));
- cp = endptr;
+ int64 exponent = 0;
+ bool neg = false;
/*
* At this point, dweight and dscale can't be more than about
@@ -7030,10 +7025,43 @@ set_var_from_str(const char *str, const char *cp,
* fit in storage format, make_result() will complain about it later;
* for consistency use the same ereport errcode/text as make_result().
*/
- if (exponent >= INT_MAX / 2 || exponent <= -(INT_MAX / 2))
- ereturn(escontext, false,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("value overflows numeric format")));
+
+ /* exponent sign */
+ cp++;
+ if (*cp == '+')
+ cp++;
+ else if (*cp == '-')
+ {
+ neg = true;
+ cp++;
+ }
+
+ /* exponent digits */
+ if (!isdigit((unsigned char) *cp))
+ goto invalid_syntax;
+
+ while (*cp)
+ {
+ if (isdigit((unsigned char) *cp))
+ {
+ exponent = exponent * 10 + (*cp++ - '0');
+ if (exponent > PG_INT32_MAX / 2)
+ goto out_of_range;
+ }
+ else if (*cp == '_')
+ {
+ /* underscore must be followed by more digits */
+ cp++;
+ if (!isdigit((unsigned char) *cp))
+ goto invalid_syntax;
+ }
+ else
+ break;
+ }
+
+ if (neg)
+ exponent = -exponent;
+
dweight += (int) exponent;
dscale -= (int) exponent;
if (dscale < 0)
@@ -7085,6 +7113,17 @@ set_var_from_str(const char *str, const char *cp,
*endptr = cp;
return true;
+
+out_of_range:
+ ereturn(escontext, false,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value overflows numeric format")));
+
+invalid_syntax:
+ ereturn(escontext, false,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "numeric", str)));
}
@@ -7167,6 +7206,13 @@ set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign,
tmp = tmp * 16 + xdigit_value(*cp++);
mul = mul * 16;
}
+ else if (*cp == '_')
+ {
+ /* Underscore must be followed by more digits */
+ cp++;
+ if (!isxdigit((unsigned char) *cp))
+ goto invalid_syntax;
+ }
else
break;
}
@@ -7197,6 +7243,13 @@ set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign,
tmp = tmp * 8 + (*cp++ - '0');
mul = mul * 8;
}
+ else if (*cp == '_')
+ {
+ /* Underscore must be followed by more digits */
+ cp++;
+ if (*cp < '0' || *cp > '7')
+ goto invalid_syntax;
+ }
else
break;
}
@@ -7227,6 +7280,13 @@ set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign,
tmp = tmp * 2 + (*cp++ - '0');
mul = mul * 2;
}
+ else if (*cp == '_')
+ {
+ /* Underscore must be followed by more digits */
+ cp++;
+ if (*cp < '0' || *cp > '1')
+ goto invalid_syntax;
+ }
else
break;
}
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index b0e412e7c67..471fbb7ee63 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -141,48 +141,99 @@ pg_strtoint16_safe(const char *s, Node *escontext)
{
firstdigit = ptr += 2;
- while (*ptr && isxdigit((unsigned char) *ptr))
+ while (*ptr)
{
- if (unlikely(tmp > -(PG_INT16_MIN / 16)))
- goto out_of_range;
-
- tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
+ if (isxdigit((unsigned char) *ptr))
+ {
+ if (unlikely(tmp > -(PG_INT16_MIN / 16)))
+ goto out_of_range;
+
+ tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
+ }
+ else if (*ptr == '_')
+ {
+ /* underscore must be followed by more digits */
+ ptr++;
+ if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
+ goto invalid_syntax;
+ }
+ else
+ break;
}
}
else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
{
firstdigit = ptr += 2;
- while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+ while (*ptr)
{
- if (unlikely(tmp > -(PG_INT16_MIN / 8)))
- goto out_of_range;
-
- tmp = tmp * 8 + (*ptr++ - '0');
+ if (*ptr >= '0' && *ptr <= '7')
+ {
+ if (unlikely(tmp > -(PG_INT16_MIN / 8)))
+ goto out_of_range;
+
+ tmp = tmp * 8 + (*ptr++ - '0');
+ }
+ else if (*ptr == '_')
+ {
+ /* underscore must be followed by more digits */
+ ptr++;
+ if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
+ goto invalid_syntax;
+ }
+ else
+ break;
}
}
else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
{
firstdigit = ptr += 2;
- while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+ while (*ptr)
{
- if (unlikely(tmp > -(PG_INT16_MIN / 2)))
- goto out_of_range;
-
- tmp = tmp * 2 + (*ptr++ - '0');
+ if (*ptr >= '0' && *ptr <= '1')
+ {
+ if (unlikely(tmp > -(PG_INT16_MIN / 2)))
+ goto out_of_range;
+
+ tmp = tmp * 2 + (*ptr++ - '0');
+ }
+ else if (*ptr == '_')
+ {
+ /* underscore must be followed by more digits */
+ ptr++;
+ if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
+ goto invalid_syntax;
+ }
+ else
+ break;
}
}
else
{
firstdigit = ptr;
- while (*ptr && isdigit((unsigned char) *ptr))
+ while (*ptr)
{
- if (unlikely(tmp > -(PG_INT16_MIN / 10)))
- goto out_of_range;
-
- tmp = tmp * 10 + (*ptr++ - '0');
+ if (isdigit((unsigned char) *ptr))
+ {
+ if (unlikely(tmp > -(PG_INT16_MIN / 10)))
+ goto out_of_range;
+
+ tmp = tmp * 10 + (*ptr++ - '0');
+ }
+ else if (*ptr == '_')
+ {
+ /* underscore may not be first */
+ if (unlikely(ptr == firstdigit))
+ goto invalid_syntax;
+ /* and it must be followed by more digits */
+ ptr++;
+ if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
+ goto invalid_syntax;
+ }
+ else
+ break;
}
}
@@ -268,48 +319,99 @@ pg_strtoint32_safe(const char *s, Node *escontext)
{
firstdigit = ptr += 2;
- while (*ptr && isxdigit((unsigned char) *ptr))
+ while (*ptr)
{
- if (unlikely(tmp > -(PG_INT32_MIN / 16)))
- goto out_of_range;
-
- tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
+ if (isxdigit((unsigned char) *ptr))
+ {
+ if (unlikely(tmp > -(PG_INT32_MIN / 16)))
+ goto out_of_range;
+
+ tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
+ }
+ else if (*ptr == '_')
+ {
+ /* underscore must be followed by more digits */
+ ptr++;
+ if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
+ goto invalid_syntax;
+ }
+ else
+ break;
}
}
else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
{
firstdigit = ptr += 2;
- while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+ while (*ptr)
{
- if (unlikely(tmp > -(PG_INT32_MIN / 8)))
- goto out_of_range;
-
- tmp = tmp * 8 + (*ptr++ - '0');
+ if (*ptr >= '0' && *ptr <= '7')
+ {
+ if (unlikely(tmp > -(PG_INT32_MIN / 8)))
+ goto out_of_range;
+
+ tmp = tmp * 8 + (*ptr++ - '0');
+ }
+ else if (*ptr == '_')
+ {
+ /* underscore must be followed by more digits */
+ ptr++;
+ if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
+ goto invalid_syntax;
+ }
+ else
+ break;
}
}
else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
{
firstdigit = ptr += 2;
- while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+ while (*ptr)
{
- if (unlikely(tmp > -(PG_INT32_MIN / 2)))
- goto out_of_range;
-
- tmp = tmp * 2 + (*ptr++ - '0');
+ if (*ptr >= '0' && *ptr <= '1')
+ {
+ if (unlikely(tmp > -(PG_INT32_MIN / 2)))
+ goto out_of_range;
+
+ tmp = tmp * 2 + (*ptr++ - '0');
+ }
+ else if (*ptr == '_')
+ {
+ /* underscore must be followed by more digits */
+ ptr++;
+ if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
+ goto invalid_syntax;
+ }
+ else
+ break;
}
}
else
{
firstdigit = ptr;
- while (*ptr && isdigit((unsigned char) *ptr))
+ while (*ptr)
{
- if (unlikely(tmp > -(PG_INT32_MIN / 10)))
- goto out_of_range;
-
- tmp = tmp * 10 + (*ptr++ - '0');
+ if (isdigit((unsigned char) *ptr))
+ {
+ if (unlikely(tmp > -(PG_INT32_MIN / 10)))
+ goto out_of_range;
+
+ tmp = tmp * 10 + (*ptr++ - '0');
+ }
+ else if (*ptr == '_')
+ {
+ /* underscore may not be first */
+ if (unlikely(ptr == firstdigit))
+ goto invalid_syntax;
+ /* and it must be followed by more digits */
+ ptr++;
+ if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
+ goto invalid_syntax;
+ }
+ else
+ break;
}
}
@@ -395,48 +497,99 @@ pg_strtoint64_safe(const char *s, Node *escontext)
{
firstdigit = ptr += 2;
- while (*ptr && isxdigit((unsigned char) *ptr))
+ while (*ptr)
{
- if (unlikely(tmp > -(PG_INT64_MIN / 16)))
- goto out_of_range;
-
- tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
+ if (isxdigit((unsigned char) *ptr))
+ {
+ if (unlikely(tmp > -(PG_INT64_MIN / 16)))
+ goto out_of_range;
+
+ tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
+ }
+ else if (*ptr == '_')
+ {
+ /* underscore must be followed by more digits */
+ ptr++;
+ if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
+ goto invalid_syntax;
+ }
+ else
+ break;
}
}
else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
{
firstdigit = ptr += 2;
- while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+ while (*ptr)
{
- if (unlikely(tmp > -(PG_INT64_MIN / 8)))
- goto out_of_range;
-
- tmp = tmp * 8 + (*ptr++ - '0');
+ if (*ptr >= '0' && *ptr <= '7')
+ {
+ if (unlikely(tmp > -(PG_INT64_MIN / 8)))
+ goto out_of_range;
+
+ tmp = tmp * 8 + (*ptr++ - '0');
+ }
+ else if (*ptr == '_')
+ {
+ /* underscore must be followed by more digits */
+ ptr++;
+ if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
+ goto invalid_syntax;
+ }
+ else
+ break;
}
}
else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
{
firstdigit = ptr += 2;
- while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+ while (*ptr)
{
- if (unlikely(tmp > -(PG_INT64_MIN / 2)))
- goto out_of_range;
-
- tmp = tmp * 2 + (*ptr++ - '0');
+ if (*ptr >= '0' && *ptr <= '1')
+ {
+ if (unlikely(tmp > -(PG_INT64_MIN / 2)))
+ goto out_of_range;
+
+ tmp = tmp * 2 + (*ptr++ - '0');
+ }
+ else if (*ptr == '_')
+ {
+ /* underscore must be followed by more digits */
+ ptr++;
+ if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
+ goto invalid_syntax;
+ }
+ else
+ break;
}
}
else
{
firstdigit = ptr;
- while (*ptr && isdigit((unsigned char) *ptr))
+ while (*ptr)
{
- if (unlikely(tmp > -(PG_INT64_MIN / 10)))
- goto out_of_range;
-
- tmp = tmp * 10 + (*ptr++ - '0');
+ if (isdigit((unsigned char) *ptr))
+ {
+ if (unlikely(tmp > -(PG_INT64_MIN / 10)))
+ goto out_of_range;
+
+ tmp = tmp * 10 + (*ptr++ - '0');
+ }
+ else if (*ptr == '_')
+ {
+ /* underscore may not be first */
+ if (unlikely(ptr == firstdigit))
+ goto invalid_syntax;
+ /* and it must be followed by more digits */
+ ptr++;
+ if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
+ goto invalid_syntax;
+ }
+ else
+ break;
}
}