diff options
author | Alexander Korotkov <akorotkov@postgresql.org> | 2018-09-09 21:19:51 +0300 |
---|---|---|
committer | Alexander Korotkov <akorotkov@postgresql.org> | 2018-09-09 21:19:51 +0300 |
commit | cf984672427ebc4446213fe8f71d8899452802b6 (patch) | |
tree | 034f104000ecda901d2ab7a9f459d250d1a7406f /src | |
parent | 5f08accdad2b03e0107bdc73d48783a01fe51c8c (diff) | |
download | postgresql-cf984672427ebc4446213fe8f71d8899452802b6.tar.gz postgresql-cf984672427ebc4446213fe8f71d8899452802b6.zip |
Improve behavior of to_timestamp()/to_date() functions
to_timestamp()/to_date() functions were introduced mainly for Oracle
compatibility, and became very popular among PostgreSQL users. However, some
behavior of to_timestamp()/to_date() functions are both incompatible with Oracle
and confusing for our users. This behavior is related to handling of spaces and
separators in non FX (fixed format) mode. This commit reworks this behavior
making less confusing, better documented and more compatible with Oracle.
Nevertheless, there are still following incompatibilities with Oracle.
1) We don't insist that there are no format string patterns unmatched to
input string.
2) In FX mode we don't insist space and separators in format string to exactly
match input string.
3) When format string patterns are divided by mix of spaces and separators, we
don't distinguish them, while Oracle takes into account only last group of
spaces/separators.
Discussion: https://postgr.es/m/1873520224.1784572.1465833145330.JavaMail.yahoo%40mail.yahoo.com
Author: Artur Zakirov, Alexander Korotkov, Liudmila Mantrova
Review: Amul Sul, Robert Haas, Tom Lane, Dmitry Dolgov, David G. Johnston
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/utils/adt/formatting.c | 112 | ||||
-rw-r--r-- | src/test/regress/expected/horology.out | 105 | ||||
-rw-r--r-- | src/test/regress/sql/horology.sql | 27 |
3 files changed, 222 insertions, 22 deletions
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 30696e3575d..2ed8ca675bd 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -165,6 +165,8 @@ typedef struct #define NODE_TYPE_END 1 #define NODE_TYPE_ACTION 2 #define NODE_TYPE_CHAR 3 +#define NODE_TYPE_SEPARATOR 4 +#define NODE_TYPE_SPACE 5 #define SUFFTYPE_PREFIX 1 #define SUFFTYPE_POSTFIX 2 @@ -955,6 +957,7 @@ typedef struct NUMProc static const KeyWord *index_seq_search(const char *str, const KeyWord *kw, const int *index); static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type); +static bool is_separator_char(const char *str); static void NUMDesc_prepare(NUMDesc *num, FormatNode *n); static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, const KeySuffix *suf, const int *index, int ver, NUMDesc *Num); @@ -1044,6 +1047,16 @@ suff_search(const char *str, const KeySuffix *suf, int type) return NULL; } +static bool +is_separator_char(const char *str) +{ + /* ASCII printable character, but not letter or digit */ + return (*str > 0x20 && *str < 0x7F && + !(*str >= 'A' && *str <= 'Z') && + !(*str >= 'a' && *str <= 'z') && + !(*str >= '0' && *str <= '9')); +} + /* ---------- * Prepare NUMDesc (number description struct) via FormatNode struct * ---------- @@ -1319,7 +1332,14 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, if (*str == '\\' && *(str + 1) == '"') str++; chlen = pg_mblen(str); - n->type = NODE_TYPE_CHAR; + + if (ver == DCH_TYPE && is_separator_char(str)) + n->type = NODE_TYPE_SEPARATOR; + else if (isspace((unsigned char) *str)) + n->type = NODE_TYPE_SPACE; + else + n->type = NODE_TYPE_CHAR; + memcpy(n->character, str, chlen); n->character[chlen] = '\0'; n->key = NULL; @@ -2987,27 +3007,66 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) int len, value; bool fx_mode = false; + /* number of extra skipped characters (more than given in format string) */ + int extra_skip = 0; for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++) { - if (n->type != NODE_TYPE_ACTION) + /* + * Ignore spaces at the beginning of the string and before fields when + * not in FX (fixed width) mode. + */ + if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) && + (n->type == NODE_TYPE_ACTION || n == node)) + { + while (*s != '\0' && isspace((unsigned char) *s)) + { + s++; + extra_skip++; + } + } + + if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR) + { + if (!fx_mode) + { + /* + * In non FX (fixed format) mode one format string space or + * separator match to one space or separator in input string. + * Or match nothing if there is no space or separator in + * the current position of input string. + */ + extra_skip--; + if (isspace((unsigned char) *s) || is_separator_char(s)) + { + s++; + extra_skip++; + } + } + else + { + /* + * In FX mode, on format string space or separator we consume + * exactly one character from input string. Notice we don't + * insist that the consumed character match the format's + * character. + */ + s += pg_mblen(s); + } + continue; + } + else if (n->type != NODE_TYPE_ACTION) { /* - * Separator, so consume one character from input string. Notice - * we don't insist that the consumed character match the format's - * character. + * Text character, so consume one character from input string. + * Notice we don't insist that the consumed character match the + * format's character. + * Text field ignores FX mode. */ s += pg_mblen(s); continue; } - /* Ignore spaces before fields when not in FX (fixed width) mode */ - if (!fx_mode && n->key->id != DCH_FX) - { - while (*s != '\0' && isspace((unsigned char) *s)) - s++; - } - from_char_set_mode(out, n->key->date_mode); switch (n->key->id) @@ -3086,10 +3145,24 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) n->key->name))); break; case DCH_TZH: - out->tzsign = *s == '-' ? -1 : +1; - + /* + * Value of TZH might be negative. And the issue is that we + * might swallow minus sign as the separator. So, if we have + * skipped more characters than specified in the format string, + * then we consider prepending last skipped minus to TZH. + */ if (*s == '+' || *s == '-' || *s == ' ') + { + out->tzsign = *s == '-' ? -1 : +1; s++; + } + else + { + if (extra_skip > 0 && *(s - 1) == '-') + out->tzsign = -1; + else + out->tzsign = +1; + } from_char_parse_int_len(&out->tzh, &s, 2, n); break; @@ -3261,6 +3334,17 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) SKIP_THth(s, n->suffix); break; } + + /* Ignore all spaces after fields */ + if (!fx_mode) + { + extra_skip = 0; + while (*s != '\0' && isspace((unsigned char) *s)) + { + s++; + extra_skip++; + } + } } } diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out index 63e39198e68..7d11f251584 100644 --- a/src/test/regress/expected/horology.out +++ b/src/test/regress/expected/horology.out @@ -2769,14 +2769,32 @@ SELECT to_timestamp('97/2/16 8:14:30', 'FMYYYY/FMMM/FMDD FMHH:FMMI:FMSS'); Sat Feb 16 08:14:30 0097 PST (1 row) +SELECT to_timestamp('2011$03!18 23_38_15', 'YYYY-MM-DD HH24:MI:SS'); + to_timestamp +------------------------------ + Fri Mar 18 23:38:15 2011 PDT +(1 row) + SELECT to_timestamp('1985 January 12', 'YYYY FMMonth DD'); to_timestamp ------------------------------ Sat Jan 12 00:00:00 1985 PST (1 row) +SELECT to_timestamp('1985 FMMonth 12', 'YYYY "FMMonth" DD'); + to_timestamp +------------------------------ + Sat Jan 12 00:00:00 1985 PST +(1 row) + +SELECT to_timestamp('1985 \ 12', 'YYYY \\ DD'); + to_timestamp +------------------------------ + Sat Jan 12 00:00:00 1985 PST +(1 row) + SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16', - '"My birthday-> Year" YYYY, "Month:" FMMonth, "Day:" DD'); + '"My birthday-> Year:" YYYY, "Month:" FMMonth, "Day:" DD'); to_timestamp ------------------------------ Sun May 16 00:00:00 1976 PDT @@ -2789,7 +2807,7 @@ SELECT to_timestamp('1,582nd VIII 21', 'Y,YYYth FMRM DD'); (1 row) SELECT to_timestamp('15 "text between quote marks" 98 54 45', - E'HH24 "\\text between quote marks\\"" YY MI SS'); + E'HH24 "\\"text between quote marks\\"" YY MI SS'); to_timestamp ------------------------------ Thu Jan 01 15:54:45 1998 PST @@ -2810,6 +2828,24 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay'); SELECT to_timestamp('97/Feb/16', 'YYMonDD'); ERROR: invalid value "/Fe" for "Mon" DETAIL: The given value did not match any of the allowed values for this field. +SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD'); + to_timestamp +------------------------------ + Sun Feb 16 00:00:00 1997 PST +(1 row) + +SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD'); + to_timestamp +------------------------------ + Sun Feb 16 00:00:00 1997 PST +(1 row) + +SELECT to_timestamp('97/Feb/16', 'FXYY/Mon/DD'); + to_timestamp +------------------------------ + Sun Feb 16 00:00:00 1997 PST +(1 row) + SELECT to_timestamp('19971116', 'YYYYMMDD'); to_timestamp ------------------------------ @@ -2966,7 +3002,7 @@ SELECT to_timestamp('2011-12-18 11:38 20', 'YYYY-MM-DD HH12:MI TZM'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); to_timestamp ------------------------------ - Sun Dec 18 03:38:15 2011 PST + Sun Dec 18 23:38:15 2011 PST (1 row) SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); @@ -2996,7 +3032,64 @@ SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); to_timestamp ------------------------------ - Sun Dec 18 03:38:15 2011 PST + Sun Dec 18 23:38:15 2011 PST +(1 row) + +SELECT to_timestamp('2000+ JUN', 'YYYY/MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp(' 2000 +JUN', 'YYYY/MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp(' 2000 +JUN', 'YYYY//MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 +JUN', 'YYYY//MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 + JUN', 'YYYY MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 ++ JUN', 'YYYY MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); +ERROR: invalid value "+ J" for "MON" +DETAIL: The given value did not match any of the allowed values for this field. +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 -10', 'YYYY TZH'); + to_timestamp +------------------------------ + Sat Jan 01 02:00:00 2000 PST +(1 row) + +SELECT to_timestamp('2000 -10', 'YYYY TZH'); + to_timestamp +------------------------------ + Fri Dec 31 06:00:00 1999 PST (1 row) SELECT to_date('2011 12 18', 'YYYY MM DD'); @@ -3014,13 +3107,13 @@ SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD'); to_date ------------ - 12-08-2011 + 12-18-2011 (1 row) SELECT to_date('2011 12 18', 'YYYY MM DD'); to_date ------------ - 02-18-2011 + 12-18-2011 (1 row) SELECT to_date('2011 12 18', 'YYYY MM DD'); diff --git a/src/test/regress/sql/horology.sql b/src/test/regress/sql/horology.sql index ebb196a1cfc..807037be76e 100644 --- a/src/test/regress/sql/horology.sql +++ b/src/test/regress/sql/horology.sql @@ -392,15 +392,21 @@ SELECT to_timestamp('0097/Feb/16 --> 08:14:30', 'YYYY/Mon/DD --> HH:MI:SS'); SELECT to_timestamp('97/2/16 8:14:30', 'FMYYYY/FMMM/FMDD FMHH:FMMI:FMSS'); +SELECT to_timestamp('2011$03!18 23_38_15', 'YYYY-MM-DD HH24:MI:SS'); + SELECT to_timestamp('1985 January 12', 'YYYY FMMonth DD'); +SELECT to_timestamp('1985 FMMonth 12', 'YYYY "FMMonth" DD'); + +SELECT to_timestamp('1985 \ 12', 'YYYY \\ DD'); + SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16', - '"My birthday-> Year" YYYY, "Month:" FMMonth, "Day:" DD'); + '"My birthday-> Year:" YYYY, "Month:" FMMonth, "Day:" DD'); SELECT to_timestamp('1,582nd VIII 21', 'Y,YYYth FMRM DD'); SELECT to_timestamp('15 "text between quote marks" 98 54 45', - E'HH24 "\\text between quote marks\\"" YY MI SS'); + E'HH24 "\\"text between quote marks\\"" YY MI SS'); SELECT to_timestamp('05121445482000', 'MMDDHH24MISSYYYY'); @@ -408,6 +414,12 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay'); SELECT to_timestamp('97/Feb/16', 'YYMonDD'); +SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD'); + +SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD'); + +SELECT to_timestamp('97/Feb/16', 'FXYY/Mon/DD'); + SELECT to_timestamp('19971116', 'YYYYMMDD'); SELECT to_timestamp('20000-1116', 'YYYY-MMDD'); @@ -464,6 +476,17 @@ SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); +SELECT to_timestamp('2000+ JUN', 'YYYY/MON'); +SELECT to_timestamp(' 2000 +JUN', 'YYYY/MON'); +SELECT to_timestamp(' 2000 +JUN', 'YYYY//MON'); +SELECT to_timestamp('2000 +JUN', 'YYYY//MON'); +SELECT to_timestamp('2000 + JUN', 'YYYY MON'); +SELECT to_timestamp('2000 ++ JUN', 'YYYY MON'); +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); +SELECT to_timestamp('2000 -10', 'YYYY TZH'); +SELECT to_timestamp('2000 -10', 'YYYY TZH'); + SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD'); |