diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2014-10-16 15:22:10 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2014-10-16 15:22:10 -0400 |
commit | b2cbced9eef20692b51a84d68d469627f4fc43ac (patch) | |
tree | 21774c6c010312abd2045c5d7b73f63a6828ec2c /src/backend/utils | |
parent | 90063a7612e2730f7757c2a80ba384bbe7e35c4b (diff) | |
download | postgresql-b2cbced9eef20692b51a84d68d469627f4fc43ac.tar.gz postgresql-b2cbced9eef20692b51a84d68d469627f4fc43ac.zip |
Support timezone abbreviations that sometimes change.
Up to now, PG has assumed that any given timezone abbreviation (such as
"EDT") represents a constant GMT offset in the usage of any particular
region; we had a way to configure what that offset was, but not for it
to be changeable over time. But, as with most things horological, this
view of the world is too simplistic: there are numerous regions that have
at one time or another switched to a different GMT offset but kept using
the same timezone abbreviation. Almost the entire Russian Federation did
that a few years ago, and later this month they're going to do it again.
And there are similar examples all over the world.
To cope with this, invent the notion of a "dynamic timezone abbreviation",
which is one that is referenced to a particular underlying timezone
(as defined in the IANA timezone database) and means whatever it currently
means in that zone. For zones that use or have used daylight-savings time,
the standard and DST abbreviations continue to have the property that you
can specify standard or DST time and get that time offset whether or not
DST was theoretically in effect at the time. However, the abbreviations
mean what they meant at the time in question (or most recently before that
time) rather than being absolutely fixed.
The standard abbreviation-list files have been changed to use this behavior
for abbreviations that have actually varied in meaning since 1970. The
old simple-numeric definitions are kept for abbreviations that have not
changed, since they are a bit faster to resolve.
While this is clearly a new feature, it seems necessary to back-patch it
into all active branches, because otherwise use of Russian zone
abbreviations is going to become even more problematic than it already was.
This change supersedes the changes in commit 513d06ded et al to modify the
fixed meanings of the Russian abbreviations; since we've not shipped that
yet, this will avoid an undesirably incompatible (not to mention incorrect)
change in behavior for timestamps between 2011 and 2014.
This patch makes some cosmetic changes in ecpglib to keep its usage of
datetime lookup tables as similar as possible to the backend code, but
doesn't do anything about the increasingly obsolete set of timezone
abbreviation definitions that are hard-wired into ecpglib. Whatever we
do about that will likely not be appropriate material for back-patching.
Also, a potential free() of a garbage pointer after an out-of-memory
failure in ecpglib has been fixed.
This patch also fixes pre-existing bugs in DetermineTimeZoneOffset() that
caused it to produce unexpected results near a timezone transition, if
both the "before" and "after" states are marked as standard time. We'd
only ever thought about or tested transitions between standard and DST
time, but that's not what's happening when a zone simply redefines their
base GMT offset.
In passing, update the SGML documentation to refer to the Olson/zoneinfo/
zic timezone database as the "IANA" database, since it's now being
maintained under the auspices of IANA.
Diffstat (limited to 'src/backend/utils')
-rw-r--r-- | src/backend/utils/adt/date.c | 31 | ||||
-rw-r--r-- | src/backend/utils/adt/datetime.c | 588 | ||||
-rw-r--r-- | src/backend/utils/adt/timestamp.c | 96 | ||||
-rw-r--r-- | src/backend/utils/misc/tzparser.c | 77 |
4 files changed, 608 insertions, 184 deletions
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index 073104d4bac..bb23b12c171 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -2695,24 +2695,39 @@ timetz_zone(PG_FUNCTION_ARGS) pg_tz *tzp; /* - * Look up the requested timezone. First we look in the date token table - * (to handle cases like "EST"), and if that fails, we look in the - * timezone database (to handle cases like "America/New_York"). (This - * matches the order in which timestamp input checks the cases; it's - * important because the timezone database unwisely uses a few zone names - * that are identical to offset abbreviations.) + * Look up the requested timezone. First we look in the timezone + * abbreviation table (to handle cases like "EST"), and if that fails, we + * look in the timezone database (to handle cases like + * "America/New_York"). (This matches the order in which timestamp input + * checks the cases; it's important because the timezone database unwisely + * uses a few zone names that are identical to offset abbreviations.) */ text_to_cstring_buffer(zone, tzname, sizeof(tzname)); + + /* DecodeTimezoneAbbrev requires lowercase input */ lowzone = downcase_truncate_identifier(tzname, strlen(tzname), false); - type = DecodeSpecial(0, lowzone, &val); + type = DecodeTimezoneAbbrev(0, lowzone, &val, &tzp); if (type == TZ || type == DTZ) - tz = val * MINS_PER_HOUR; + { + /* fixed-offset abbreviation */ + tz = -val; + } + else if (type == DYNTZ) + { + /* dynamic-offset abbreviation, resolve using current time */ + pg_time_t now = (pg_time_t) time(NULL); + struct pg_tm *tm; + + tm = pg_localtime(&now, tzp); + tz = DetermineTimeZoneAbbrevOffset(tm, tzname, tzp); + } else { + /* try it as a full zone name */ tzp = pg_tzset(tzname); if (tzp) { diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c index 7632d1177e6..4381a5ae437 100644 --- a/src/backend/utils/adt/datetime.c +++ b/src/backend/utils/adt/datetime.c @@ -50,6 +50,11 @@ static void AdjustFractSeconds(double frac, struct pg_tm * tm, fsec_t *fsec, int scale); static void AdjustFractDays(double frac, struct pg_tm * tm, fsec_t *fsec, int scale); +static int DetermineTimeZoneOffsetInternal(struct pg_tm * tm, pg_tz *tzp, + pg_time_t *tp); +static int DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, const char *abbr, + pg_tz *tzp, int *isdst); +static pg_tz *FetchDynamicTimeZone(TimeZoneAbbrevTable *tbl, const datetkn *tp); const int day_tab[2][13] = @@ -70,41 +75,18 @@ const char *const days[] = {"Sunday", "Monday", "Tuesday", "Wednesday", *****************************************************************************/ /* - * Definitions for squeezing values into "value" - * We set aside a high bit for a sign, and scale the timezone offsets - * in minutes by a factor of 15 (so can represent quarter-hour increments). - */ -#define ABS_SIGNBIT ((char) 0200) -#define VALMASK ((char) 0177) -#define POS(n) (n) -#define NEG(n) ((n)|ABS_SIGNBIT) -#define SIGNEDCHAR(c) ((c)&ABS_SIGNBIT? -((c)&VALMASK): (c)) -#define FROMVAL(tp) (-SIGNEDCHAR((tp)->value) * 15) /* uncompress */ -#define TOVAL(tp, v) ((tp)->value = ((v) < 0? NEG((-(v))/15): POS(v)/15)) - -/* * datetktbl holds date/time keywords. * * Note that this table must be strictly alphabetically ordered to allow an * O(ln(N)) search algorithm to be used. * - * The token field is NOT guaranteed to be NULL-terminated. - * - * To keep this table reasonably small, we divide the value for TZ and DTZ - * entries by 15 (so they are on 15 minute boundaries) and truncate the token - * field at TOKMAXLEN characters. - * Formerly, we divided by 10 rather than 15 but there are a few time zones - * which are 30 or 45 minutes away from an even hour, most are on an hour - * boundary, and none on other boundaries. + * The token field must be NUL-terminated; we truncate entries to TOKMAXLEN + * characters to fit. * - * The static table contains no TZ or DTZ entries, rather those are loaded - * from configuration files and stored in timezonetktbl, which has the same - * format as the static datetktbl. + * The static table contains no TZ, DTZ, or DYNTZ entries; rather those + * are loaded from configuration files and stored in zoneabbrevtbl, whose + * abbrevs[] field has the same format as the static datetktbl. */ -static datetkn *timezonetktbl = NULL; - -static int sztimezonetktbl = 0; - static const datetkn datetktbl[] = { /* token, type, value */ {EARLY, RESERV, DTK_EARLY}, /* "-infinity" reserved for "early time" */ @@ -123,7 +105,7 @@ static const datetkn datetktbl[] = { {"december", MONTH, 12}, {"dow", RESERV, DTK_DOW}, /* day of week */ {"doy", RESERV, DTK_DOY}, /* day of year */ - {"dst", DTZMOD, 6}, + {"dst", DTZMOD, SECS_PER_HOUR}, {EPOCH, RESERV, DTK_EPOCH}, /* "epoch" reserved for system epoch time */ {"feb", MONTH, 2}, {"february", MONTH, 2}, @@ -185,6 +167,10 @@ static const datetkn datetktbl[] = { static int szdatetktbl = sizeof datetktbl / sizeof datetktbl[0]; +/* + * deltatktbl: same format as datetktbl, but holds keywords used to represent + * time units (eg, for intervals, and for EXTRACT). + */ static const datetkn deltatktbl[] = { /* token, type, value */ {"@", IGNORE_DTF, 0}, /* postgres relative prefix */ @@ -254,10 +240,16 @@ static const datetkn deltatktbl[] = { static int szdeltatktbl = sizeof deltatktbl / sizeof deltatktbl[0]; +static TimeZoneAbbrevTable *zoneabbrevtbl = NULL; + +/* Caches of recent lookup results in the above tables */ + static const datetkn *datecache[MAXDATEFIELDS] = {NULL}; static const datetkn *deltacache[MAXDATEFIELDS] = {NULL}; +static const datetkn *abbrevcache[MAXDATEFIELDS] = {NULL}; + /* * strtoi --- just like strtol, but returns int not long @@ -798,6 +790,9 @@ DecodeDateTime(char **field, int *ftype, int nf, bool is2digits = FALSE; bool bc = FALSE; pg_tz *namedTz = NULL; + pg_tz *abbrevTz = NULL; + pg_tz *valtz; + char *abbrev = NULL; struct pg_tm cur_tm; /* @@ -1194,7 +1189,10 @@ DecodeDateTime(char **field, int *ftype, int nf, case DTK_STRING: case DTK_SPECIAL: - type = DecodeSpecial(i, field[i], &val); + /* timezone abbrevs take precedence over built-in tokens */ + type = DecodeTimezoneAbbrev(i, field[i], &val, &valtz); + if (type == UNKNOWN_FIELD) + type = DecodeSpecial(i, field[i], &val); if (type == IGNORE_DTF) continue; @@ -1286,7 +1284,7 @@ DecodeDateTime(char **field, int *ftype, int nf, tm->tm_isdst = 1; if (tzp == NULL) return DTERR_BAD_FORMAT; - *tzp += val * MINS_PER_HOUR; + *tzp -= val; break; case DTZ: @@ -1299,17 +1297,23 @@ DecodeDateTime(char **field, int *ftype, int nf, tm->tm_isdst = 1; if (tzp == NULL) return DTERR_BAD_FORMAT; - *tzp = val * MINS_PER_HOUR; + *tzp = -val; break; case TZ: tm->tm_isdst = 0; if (tzp == NULL) return DTERR_BAD_FORMAT; - *tzp = val * MINS_PER_HOUR; + *tzp = -val; break; - case IGNORE_DTF: + case DYNTZ: + tmask |= DTK_M(TZ); + if (tzp == NULL) + return DTERR_BAD_FORMAT; + /* we'll determine the actual offset later */ + abbrevTz = valtz; + abbrev = field[i]; break; case AMPM: @@ -1419,7 +1423,20 @@ DecodeDateTime(char **field, int *ftype, int nf, *tzp = DetermineTimeZoneOffset(tm, namedTz); } - /* timezone not specified? then find local timezone if possible */ + /* + * Likewise, if we had a dynamic timezone abbreviation, resolve it + * now. + */ + if (abbrevTz != NULL) + { + /* daylight savings time modifier disallowed with dynamic TZ */ + if (fmask & DTK_M(DTZMOD)) + return DTERR_BAD_FORMAT; + + *tzp = DetermineTimeZoneAbbrevOffset(tm, abbrev, abbrevTz); + } + + /* timezone not specified? then use session timezone */ if (tzp != NULL && !(fmask & DTK_M(TZ))) { /* @@ -1439,17 +1456,40 @@ DecodeDateTime(char **field, int *ftype, int nf, /* DetermineTimeZoneOffset() * - * Given a struct pg_tm in which tm_year, tm_mon, tm_mday, tm_hour, tm_min, and - * tm_sec fields are set, attempt to determine the applicable time zone - * (ie, regular or daylight-savings time) at that time. Set the struct pg_tm's - * tm_isdst field accordingly, and return the actual timezone offset. + * Given a struct pg_tm in which tm_year, tm_mon, tm_mday, tm_hour, tm_min, + * and tm_sec fields are set, and a zic-style time zone definition, determine + * the applicable GMT offset and daylight-savings status at that time. + * Set the struct pg_tm's tm_isdst field accordingly, and return the GMT + * offset as the function result. + * + * Note: if the date is out of the range we can deal with, we return zero + * as the GMT offset and set tm_isdst = 0. We don't throw an error here, + * though probably some higher-level code will. + */ +int +DetermineTimeZoneOffset(struct pg_tm * tm, pg_tz *tzp) +{ + pg_time_t t; + + return DetermineTimeZoneOffsetInternal(tm, tzp, &t); +} + + +/* DetermineTimeZoneOffsetInternal() + * + * As above, but also return the actual UTC time imputed to the date/time + * into *tp. + * + * In event of an out-of-range date, we punt by returning zero into *tp. + * This is okay for the immediate callers but is a good reason for not + * exposing this worker function globally. * * Note: it might seem that we should use mktime() for this, but bitter * experience teaches otherwise. This code is much faster than most versions * of mktime(), anyway. */ -int -DetermineTimeZoneOffset(struct pg_tm * tm, pg_tz *tzp) +static int +DetermineTimeZoneOffsetInternal(struct pg_tm * tm, pg_tz *tzp, pg_time_t *tp) { int date, sec; @@ -1468,8 +1508,8 @@ DetermineTimeZoneOffset(struct pg_tm * tm, pg_tz *tzp) /* * First, generate the pg_time_t value corresponding to the given * y/m/d/h/m/s taken as GMT time. If this overflows, punt and decide the - * timezone is GMT. (We only need to worry about overflow on machines - * where pg_time_t is 32 bits.) + * timezone is GMT. (For a valid Julian date, integer overflow should be + * impossible with 64-bit pg_time_t, but let's check for safety.) */ if (!IS_VALID_JULIAN(tm->tm_year, tm->tm_mon, tm->tm_mday)) goto overflow; @@ -1506,6 +1546,7 @@ DetermineTimeZoneOffset(struct pg_tm * tm, pg_tz *tzp) { /* Non-DST zone, life is simple */ tm->tm_isdst = before_isdst; + *tp = mytime - before_gmtoff; return -(int) before_gmtoff; } @@ -1526,38 +1567,124 @@ DetermineTimeZoneOffset(struct pg_tm * tm, pg_tz *tzp) goto overflow; /* - * If both before or both after the boundary time, we know what to do + * If both before or both after the boundary time, we know what to do. The + * boundary time itself is considered to be after the transition, which + * means we can accept aftertime == boundary in the second case. */ - if (beforetime <= boundary && aftertime < boundary) + if (beforetime < boundary && aftertime < boundary) { tm->tm_isdst = before_isdst; + *tp = beforetime; return -(int) before_gmtoff; } if (beforetime > boundary && aftertime >= boundary) { tm->tm_isdst = after_isdst; + *tp = aftertime; return -(int) after_gmtoff; } /* - * It's an invalid or ambiguous time due to timezone transition. Prefer - * the standard-time interpretation. + * It's an invalid or ambiguous time due to timezone transition. In a + * spring-forward transition, prefer the "before" interpretation; in a + * fall-back transition, prefer "after". (We used to define and implement + * this test as "prefer the standard-time interpretation", but that rule + * does not help to resolve the behavior when both times are reported as + * standard time; which does happen, eg Europe/Moscow in Oct 2014.) */ - if (after_isdst == 0) + if (beforetime > aftertime) { - tm->tm_isdst = after_isdst; - return -(int) after_gmtoff; + tm->tm_isdst = before_isdst; + *tp = beforetime; + return -(int) before_gmtoff; } - tm->tm_isdst = before_isdst; - return -(int) before_gmtoff; + tm->tm_isdst = after_isdst; + *tp = aftertime; + return -(int) after_gmtoff; overflow: /* Given date is out of range, so assume UTC */ tm->tm_isdst = 0; + *tp = 0; return 0; } +/* DetermineTimeZoneAbbrevOffset() + * + * Determine the GMT offset and DST flag to be attributed to a dynamic + * time zone abbreviation, that is one whose meaning has changed over time. + * *tm contains the local time at which the meaning should be determined, + * and tm->tm_isdst receives the DST flag. + * + * This differs from the behavior of DetermineTimeZoneOffset() in that a + * standard-time or daylight-time abbreviation forces use of the corresponding + * GMT offset even when the zone was then in DS or standard time respectively. + */ +int +DetermineTimeZoneAbbrevOffset(struct pg_tm * tm, const char *abbr, pg_tz *tzp) +{ + pg_time_t t; + + /* + * Compute the UTC time we want to probe at. (In event of overflow, we'll + * probe at the epoch, which is a bit random but probably doesn't matter.) + */ + (void) DetermineTimeZoneOffsetInternal(tm, tzp, &t); + + return DetermineTimeZoneAbbrevOffsetInternal(t, abbr, tzp, &tm->tm_isdst); +} + + +/* DetermineTimeZoneAbbrevOffsetTS() + * + * As above but the probe time is specified as a TimestampTz (hence, UTC time), + * and DST status is returned into *isdst rather than into tm->tm_isdst. + */ +int +DetermineTimeZoneAbbrevOffsetTS(TimestampTz ts, const char *abbr, + pg_tz *tzp, int *isdst) +{ + pg_time_t t = timestamptz_to_time_t(ts); + + return DetermineTimeZoneAbbrevOffsetInternal(t, abbr, tzp, isdst); +} + + +/* DetermineTimeZoneAbbrevOffsetInternal() + * + * Workhorse for above two functions: work from a pg_time_t probe instant. + * DST status is returned into *isdst. + */ +static int +DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, const char *abbr, + pg_tz *tzp, int *isdst) +{ + char upabbr[TZ_STRLEN_MAX + 1]; + unsigned char *p; + long int gmtoff; + + /* We need to force the abbrev to upper case */ + strlcpy(upabbr, abbr, sizeof(upabbr)); + for (p = (unsigned char *) upabbr; *p; p++) + *p = pg_toupper(*p); + + /* Look up the abbrev's meaning at this time in this zone */ + if (!pg_interpret_timezone_abbrev(upabbr, + &t, + &gmtoff, + isdst, + tzp)) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("time zone abbreviation \"%s\" is not used in time zone \"%s\"", + abbr, pg_get_timezone_name(tzp)))); + + /* Change sign to agree with DetermineTimeZoneOffset() */ + return (int) -gmtoff; +} + + /* DecodeTimeOnly() * Interpret parsed string as time fields only. * Returns 0 if successful, DTERR code if bogus input detected. @@ -1586,6 +1713,9 @@ DecodeTimeOnly(char **field, int *ftype, int nf, bool bc = FALSE; int mer = HR24; pg_tz *namedTz = NULL; + pg_tz *abbrevTz = NULL; + char *abbrev = NULL; + pg_tz *valtz; *dtype = DTK_TIME; tm->tm_hour = 0; @@ -1930,7 +2060,10 @@ DecodeTimeOnly(char **field, int *ftype, int nf, case DTK_STRING: case DTK_SPECIAL: - type = DecodeSpecial(i, field[i], &val); + /* timezone abbrevs take precedence over built-in tokens */ + type = DecodeTimezoneAbbrev(i, field[i], &val, &valtz); + if (type == UNKNOWN_FIELD) + type = DecodeSpecial(i, field[i], &val); if (type == IGNORE_DTF) continue; @@ -1978,7 +2111,7 @@ DecodeTimeOnly(char **field, int *ftype, int nf, tm->tm_isdst = 1; if (tzp == NULL) return DTERR_BAD_FORMAT; - *tzp += val * MINS_PER_HOUR; + *tzp -= val; break; case DTZ: @@ -1991,7 +2124,7 @@ DecodeTimeOnly(char **field, int *ftype, int nf, tm->tm_isdst = 1; if (tzp == NULL) return DTERR_BAD_FORMAT; - *tzp = val * MINS_PER_HOUR; + *tzp = -val; ftype[i] = DTK_TZ; break; @@ -1999,11 +2132,18 @@ DecodeTimeOnly(char **field, int *ftype, int nf, tm->tm_isdst = 0; if (tzp == NULL) return DTERR_BAD_FORMAT; - *tzp = val * MINS_PER_HOUR; + *tzp = -val; ftype[i] = DTK_TZ; break; - case IGNORE_DTF: + case DYNTZ: + tmask |= DTK_M(TZ); + if (tzp == NULL) + return DTERR_BAD_FORMAT; + /* we'll determine the actual offset later */ + abbrevTz = valtz; + abbrev = field[i]; + ftype[i] = DTK_TZ; break; case AMPM: @@ -2123,7 +2263,36 @@ DecodeTimeOnly(char **field, int *ftype, int nf, } } - /* timezone not specified? then find local timezone if possible */ + /* + * Likewise, if we had a dynamic timezone abbreviation, resolve it now. + */ + if (abbrevTz != NULL) + { + struct pg_tm tt, + *tmp = &tt; + + /* + * daylight savings time modifier but no standard timezone? then error + */ + if (fmask & DTK_M(DTZMOD)) + return DTERR_BAD_FORMAT; + + if ((fmask & DTK_DATE_M) == 0) + GetCurrentDateTime(tmp); + else + { + tmp->tm_year = tm->tm_year; + tmp->tm_mon = tm->tm_mon; + tmp->tm_mday = tm->tm_mday; + } + tmp->tm_hour = tm->tm_hour; + tmp->tm_min = tm->tm_min; + tmp->tm_sec = tm->tm_sec; + *tzp = DetermineTimeZoneAbbrevOffset(tmp, abbrev, abbrevTz); + tm->tm_isdst = tmp->tm_isdst; + } + + /* timezone not specified? then use session timezone */ if (tzp != NULL && !(fmask & DTK_M(TZ))) { struct pg_tm tt, @@ -2710,8 +2879,6 @@ DecodeNumberField(int len, char *str, int fmask, * Interpret string as a numeric timezone. * * Return 0 if okay (and set *tzp), a DTERR code if not okay. - * - * NB: this must *not* ereport on failure; see commands/variable.c. */ int DecodeTimezone(char *str, int *tzp) @@ -2776,14 +2943,75 @@ DecodeTimezone(char *str, int *tzp) return 0; } + +/* DecodeTimezoneAbbrev() + * Interpret string as a timezone abbreviation, if possible. + * + * Returns an abbreviation type (TZ, DTZ, or DYNTZ), or UNKNOWN_FIELD if + * string is not any known abbreviation. On success, set *offset and *tz to + * represent the UTC offset (for TZ or DTZ) or underlying zone (for DYNTZ). + * Note that full timezone names (such as America/New_York) are not handled + * here, mostly for historical reasons. + * + * Given string must be lowercased already. + * + * Implement a cache lookup since it is likely that dates + * will be related in format. + */ +int +DecodeTimezoneAbbrev(int field, char *lowtoken, + int *offset, pg_tz **tz) +{ + int type; + const datetkn *tp; + + tp = abbrevcache[field]; + /* use strncmp so that we match truncated tokens */ + if (tp == NULL || strncmp(lowtoken, tp->token, TOKMAXLEN) != 0) + { + if (zoneabbrevtbl) + tp = datebsearch(lowtoken, zoneabbrevtbl->abbrevs, + zoneabbrevtbl->numabbrevs); + else + tp = NULL; + } + if (tp == NULL) + { + type = UNKNOWN_FIELD; + *offset = 0; + *tz = NULL; + } + else + { + abbrevcache[field] = tp; + type = tp->type; + if (type == DYNTZ) + { + *offset = 0; + *tz = FetchDynamicTimeZone(zoneabbrevtbl, tp); + } + else + { + *offset = tp->value; + *tz = NULL; + } + } + + return type; +} + + /* DecodeSpecial() * Decode text string using lookup table. * + * Recognizes the keywords listed in datetktbl. + * Note: at one time this would also recognize timezone abbreviations, + * but no more; use DecodeTimezoneAbbrev for that. + * + * Given string must be lowercased already. + * * Implement a cache lookup since it is likely that dates * will be related in format. - * - * NB: this must *not* ereport on failure; - * see commands/variable.c. */ int DecodeSpecial(int field, char *lowtoken, int *val) @@ -2792,11 +3020,10 @@ DecodeSpecial(int field, char *lowtoken, int *val) const datetkn *tp; tp = datecache[field]; + /* use strncmp so that we match truncated tokens */ if (tp == NULL || strncmp(lowtoken, tp->token, TOKMAXLEN) != 0) { - tp = datebsearch(lowtoken, timezonetktbl, sztimezonetktbl); - if (tp == NULL) - tp = datebsearch(lowtoken, datetktbl, szdatetktbl); + tp = datebsearch(lowtoken, datetktbl, szdatetktbl); } if (tp == NULL) { @@ -2807,18 +3034,7 @@ DecodeSpecial(int field, char *lowtoken, int *val) { datecache[field] = tp; type = tp->type; - switch (type) - { - case TZ: - case DTZ: - case DTZMOD: - *val = FROMVAL(tp); - break; - - default: - *val = tp->value; - break; - } + *val = tp->value; } return type; @@ -3494,8 +3710,13 @@ DecodeISO8601Interval(char *str, /* DecodeUnits() * Decode text string using lookup table. - * This routine supports time interval decoding - * (hence, it need not recognize timezone names). + * + * This routine recognizes keywords associated with time interval units. + * + * Given string must be lowercased already. + * + * Implement a cache lookup since it is likely that dates + * will be related in format. */ int DecodeUnits(int field, char *lowtoken, int *val) @@ -3504,6 +3725,7 @@ DecodeUnits(int field, char *lowtoken, int *val) const datetkn *tp; tp = deltacache[field]; + /* use strncmp so that we match truncated tokens */ if (tp == NULL || strncmp(lowtoken, tp->token, TOKMAXLEN) != 0) { tp = datebsearch(lowtoken, deltatktbl, szdeltatktbl); @@ -3517,10 +3739,7 @@ DecodeUnits(int field, char *lowtoken, int *val) { deltacache[field] = tp; type = tp->type; - if (type == TZ || type == DTZ) - *val = FROMVAL(tp); - else - *val = tp->value; + *val = tp->value; } return type; @@ -3593,9 +3812,11 @@ datebsearch(const char *key, const datetkn *base, int nel) while (last >= base) { position = base + ((last - base) >> 1); - result = key[0] - position->token[0]; + /* precheck the first character for a bit of extra speed */ + result = (int) key[0] - (int) position->token[0]; if (result == 0) { + /* use strncmp so that we match truncated tokens */ result = strncmp(key, position->token, TOKMAXLEN); if (result == 0) return position; @@ -4142,15 +4363,26 @@ CheckDateTokenTable(const char *tablename, const datetkn *base, int nel) bool ok = true; int i; - for (i = 1; i < nel; i++) + for (i = 0; i < nel; i++) { - if (strncmp(base[i - 1].token, base[i].token, TOKMAXLEN) >= 0) + /* check for token strings that don't fit */ + if (strlen(base[i].token) > TOKMAXLEN) { /* %.*s is safe since all our tokens are ASCII */ - elog(LOG, "ordering error in %s table: \"%.*s\" >= \"%.*s\"", + elog(LOG, "token too long in %s table: \"%.*s\"", tablename, - TOKMAXLEN, base[i - 1].token, - TOKMAXLEN, base[i].token); + TOKMAXLEN + 1, base[i].token); + ok = false; + break; /* don't risk applying strcmp */ + } + /* check for out of order */ + if (i > 0 && + strcmp(base[i - 1].token, base[i].token) >= 0) + { + elog(LOG, "ordering error in %s table: \"%s\" >= \"%s\"", + tablename, + base[i - 1].token, + base[i].token); ok = false; } } @@ -4208,27 +4440,88 @@ TemporalTransform(int32 max_precis, Node *node) /* * This function gets called during timezone config file load or reload * to create the final array of timezone tokens. The argument array - * is already sorted in name order. The data is converted to datetkn - * format and installed in *tbl, which must be allocated by the caller. + * is already sorted in name order. + * + * The result is a TimeZoneAbbrevTable (which must be a single malloc'd chunk) + * or NULL on malloc failure. No other error conditions are defined. */ -void -ConvertTimeZoneAbbrevs(TimeZoneAbbrevTable *tbl, - struct tzEntry *abbrevs, int n) +TimeZoneAbbrevTable * +ConvertTimeZoneAbbrevs(struct tzEntry *abbrevs, int n) { - datetkn *newtbl = tbl->abbrevs; + TimeZoneAbbrevTable *tbl; + Size tbl_size; int i; + /* Space for fixed fields and datetkn array */ + tbl_size = offsetof(TimeZoneAbbrevTable, abbrevs) + + n * sizeof(datetkn); + tbl_size = MAXALIGN(tbl_size); + /* Count up space for dynamic abbreviations */ + for (i = 0; i < n; i++) + { + struct tzEntry *abbr = abbrevs + i; + + if (abbr->zone != NULL) + { + Size dsize; + + dsize = offsetof(DynamicZoneAbbrev, zone) + + strlen(abbr->zone) + 1; + tbl_size += MAXALIGN(dsize); + } + } + + /* Alloc the result ... */ + tbl = malloc(tbl_size); + if (!tbl) + return NULL; + + /* ... and fill it in */ + tbl->tblsize = tbl_size; tbl->numabbrevs = n; + /* in this loop, tbl_size reprises the space calculation above */ + tbl_size = offsetof(TimeZoneAbbrevTable, abbrevs) + + n * sizeof(datetkn); + tbl_size = MAXALIGN(tbl_size); for (i = 0; i < n; i++) { - /* do NOT use strlcpy here; token field need not be null-terminated */ - strncpy(newtbl[i].token, abbrevs[i].abbrev, TOKMAXLEN); - newtbl[i].type = abbrevs[i].is_dst ? DTZ : TZ; - TOVAL(&newtbl[i], abbrevs[i].offset / MINS_PER_HOUR); + struct tzEntry *abbr = abbrevs + i; + datetkn *dtoken = tbl->abbrevs + i; + + /* use strlcpy to truncate name if necessary */ + strlcpy(dtoken->token, abbr->abbrev, TOKMAXLEN + 1); + if (abbr->zone != NULL) + { + /* Allocate a DynamicZoneAbbrev for this abbreviation */ + DynamicZoneAbbrev *dtza; + Size dsize; + + dtza = (DynamicZoneAbbrev *) ((char *) tbl + tbl_size); + dtza->tz = NULL; + strcpy(dtza->zone, abbr->zone); + + dtoken->type = DYNTZ; + /* value is offset from table start to DynamicZoneAbbrev */ + dtoken->value = (int32) tbl_size; + + dsize = offsetof(DynamicZoneAbbrev, zone) + + strlen(abbr->zone) + 1; + tbl_size += MAXALIGN(dsize); + } + else + { + dtoken->type = abbr->is_dst ? DTZ : TZ; + dtoken->value = abbr->offset; + } } + /* Assert the two loops above agreed on size calculations */ + Assert(tbl->tblsize == tbl_size); + /* Check the ordering, if testing */ - Assert(CheckDateTokenTable("timezone offset", newtbl, n)); + Assert(CheckDateTokenTable("timezone abbreviations", tbl->abbrevs, n)); + + return tbl; } /* @@ -4239,16 +4532,46 @@ ConvertTimeZoneAbbrevs(TimeZoneAbbrevTable *tbl, void InstallTimeZoneAbbrevs(TimeZoneAbbrevTable *tbl) { - int i; + zoneabbrevtbl = tbl; + /* reset abbrevcache, which may contain pointers into old table */ + memset(abbrevcache, 0, sizeof(abbrevcache)); +} - timezonetktbl = tbl->abbrevs; - sztimezonetktbl = tbl->numabbrevs; +/* + * Helper subroutine to locate pg_tz timezone for a dynamic abbreviation. + */ +static pg_tz * +FetchDynamicTimeZone(TimeZoneAbbrevTable *tbl, const datetkn *tp) +{ + DynamicZoneAbbrev *dtza; - /* clear date cache in case it contains any stale timezone names */ - for (i = 0; i < MAXDATEFIELDS; i++) - datecache[i] = NULL; + /* Just some sanity checks to prevent indexing off into nowhere */ + Assert(tp->type == DYNTZ); + Assert(tp->value > 0 && tp->value < tbl->tblsize); + + dtza = (DynamicZoneAbbrev *) ((char *) tbl + tp->value); + + /* Look up the underlying zone if we haven't already */ + if (dtza->tz == NULL) + { + dtza->tz = pg_tzset(dtza->zone); + + /* + * Ideally we'd let the caller ereport instead of doing it here, but + * then there is no way to report the bad time zone name. + */ + if (dtza->tz == NULL) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("time zone \"%s\" not recognized", + dtza->zone), + errdetail("This time zone name appears in the configuration file for time zone abbreviation \"%s\".", + tp->token))); + } + return dtza->tz; } + /* * This set-returning function reads all the available time zone abbreviations * and returns a set of (abbrev, utc_offset, is_dst). @@ -4262,7 +4585,10 @@ pg_timezone_abbrevs(PG_FUNCTION_ARGS) HeapTuple tuple; Datum values[3]; bool nulls[3]; + const datetkn *tp; char buffer[TOKMAXLEN + 1]; + int gmtoffset; + bool is_dst; unsigned char *p; struct pg_tm tm; Interval *resInterval; @@ -4306,31 +4632,65 @@ pg_timezone_abbrevs(PG_FUNCTION_ARGS) funcctx = SRF_PERCALL_SETUP(); pindex = (int *) funcctx->user_fctx; - if (*pindex >= sztimezonetktbl) + if (zoneabbrevtbl == NULL || + *pindex >= zoneabbrevtbl->numabbrevs) SRF_RETURN_DONE(funcctx); + tp = zoneabbrevtbl->abbrevs + *pindex; + + switch (tp->type) + { + case TZ: + gmtoffset = tp->value; + is_dst = false; + break; + case DTZ: + gmtoffset = tp->value; + is_dst = true; + break; + case DYNTZ: + { + /* Determine the current meaning of the abbrev */ + pg_tz *tzp; + TimestampTz now; + int isdst; + + tzp = FetchDynamicTimeZone(zoneabbrevtbl, tp); + now = GetCurrentTransactionStartTimestamp(); + gmtoffset = -DetermineTimeZoneAbbrevOffsetTS(now, + tp->token, + tzp, + &isdst); + is_dst = (bool) isdst; + break; + } + default: + elog(ERROR, "unrecognized timezone type %d", (int) tp->type); + gmtoffset = 0; /* keep compiler quiet */ + is_dst = false; + break; + } + MemSet(nulls, 0, sizeof(nulls)); /* * Convert name to text, using upcasing conversion that is the inverse of * what ParseDateTime() uses. */ - strncpy(buffer, timezonetktbl[*pindex].token, TOKMAXLEN); - buffer[TOKMAXLEN] = '\0'; /* may not be null-terminated */ + strlcpy(buffer, tp->token, sizeof(buffer)); for (p = (unsigned char *) buffer; *p; p++) *p = pg_toupper(*p); values[0] = CStringGetTextDatum(buffer); + /* Convert offset (in seconds) to an interval */ MemSet(&tm, 0, sizeof(struct pg_tm)); - tm.tm_min = (-1) * FROMVAL(&timezonetktbl[*pindex]); + tm.tm_sec = gmtoffset; resInterval = (Interval *) palloc(sizeof(Interval)); tm2interval(&tm, 0, resInterval); values[1] = IntervalPGetDatum(resInterval); - Assert(timezonetktbl[*pindex].type == DTZ || - timezonetktbl[*pindex].type == TZ); - values[2] = BoolGetDatum(timezonetktbl[*pindex].type == DTZ); + values[2] = BoolGetDatum(is_dst); (*pindex)++; diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 11007c6d894..410bf47483e 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -486,6 +486,9 @@ timestamptz_in(PG_FUNCTION_ARGS) /* * Try to parse a timezone specification, and return its timezone offset value * if it's acceptable. Otherwise, an error is thrown. + * + * Note: some code paths update tm->tm_isdst, and some don't; current callers + * don't care, so we don't bother being consistent. */ static int parse_sane_timezone(struct pg_tm * tm, text *zone) @@ -499,12 +502,12 @@ parse_sane_timezone(struct pg_tm * tm, text *zone) /* * Look up the requested timezone. First we try to interpret it as a * numeric timezone specification; if DecodeTimezone decides it doesn't - * like the format, we look in the date token table (to handle cases like - * "EST"), and if that also fails, we look in the timezone database (to - * handle cases like "America/New_York"). (This matches the order in - * which timestamp input checks the cases; it's important because the - * timezone database unwisely uses a few zone names that are identical to - * offset abbreviations.) + * like the format, we look in the timezone abbreviation table (to handle + * cases like "EST"), and if that also fails, we look in the timezone + * database (to handle cases like "America/New_York"). (This matches the + * order in which timestamp input checks the cases; it's important because + * the timezone database unwisely uses a few zone names that are identical + * to offset abbreviations.) * * Note pg_tzset happily parses numeric input that DecodeTimezone would * reject. To avoid having it accept input that would otherwise be seen @@ -524,6 +527,7 @@ parse_sane_timezone(struct pg_tm * tm, text *zone) char *lowzone; int type, val; + pg_tz *tzp; if (rt == DTERR_TZDISP_OVERFLOW) ereport(ERROR, @@ -534,19 +538,26 @@ parse_sane_timezone(struct pg_tm * tm, text *zone) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("time zone \"%s\" not recognized", tzname))); + /* DecodeTimezoneAbbrev requires lowercase input */ lowzone = downcase_truncate_identifier(tzname, strlen(tzname), false); - type = DecodeSpecial(0, lowzone, &val); + type = DecodeTimezoneAbbrev(0, lowzone, &val, &tzp); if (type == TZ || type == DTZ) - tz = val * MINS_PER_HOUR; + { + /* fixed-offset abbreviation */ + tz = -val; + } + else if (type == DYNTZ) + { + /* dynamic-offset abbreviation, resolve using specified time */ + tz = DetermineTimeZoneAbbrevOffset(tm, tzname, tzp); + } else { - pg_tz *tzp; - + /* try it as a full zone name */ tzp = pg_tzset(tzname); - if (tzp) tz = DetermineTimeZoneOffset(tm, tzp); else @@ -4883,39 +4894,52 @@ timestamp_zone(PG_FUNCTION_ARGS) int type, val; pg_tz *tzp; + struct pg_tm tm; + fsec_t fsec; if (TIMESTAMP_NOT_FINITE(timestamp)) PG_RETURN_TIMESTAMPTZ(timestamp); /* - * Look up the requested timezone. First we look in the date token table - * (to handle cases like "EST"), and if that fails, we look in the - * timezone database (to handle cases like "America/New_York"). (This - * matches the order in which timestamp input checks the cases; it's - * important because the timezone database unwisely uses a few zone names - * that are identical to offset abbreviations.) + * Look up the requested timezone. First we look in the timezone + * abbreviation table (to handle cases like "EST"), and if that fails, we + * look in the timezone database (to handle cases like + * "America/New_York"). (This matches the order in which timestamp input + * checks the cases; it's important because the timezone database unwisely + * uses a few zone names that are identical to offset abbreviations.) */ text_to_cstring_buffer(zone, tzname, sizeof(tzname)); + + /* DecodeTimezoneAbbrev requires lowercase input */ lowzone = downcase_truncate_identifier(tzname, strlen(tzname), false); - type = DecodeSpecial(0, lowzone, &val); + type = DecodeTimezoneAbbrev(0, lowzone, &val, &tzp); if (type == TZ || type == DTZ) { - tz = -(val * MINS_PER_HOUR); + /* fixed-offset abbreviation */ + tz = val; + result = dt2local(timestamp, tz); + } + else if (type == DYNTZ) + { + /* dynamic-offset abbreviation, resolve using specified time */ + if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, tzp) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + tz = -DetermineTimeZoneAbbrevOffset(&tm, tzname, tzp); result = dt2local(timestamp, tz); } else { + /* try it as a full zone name */ tzp = pg_tzset(tzname); if (tzp) { /* Apply the timezone change */ - struct pg_tm tm; - fsec_t fsec; - if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, tzp) != 0) ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), @@ -5061,27 +5085,39 @@ timestamptz_zone(PG_FUNCTION_ARGS) PG_RETURN_TIMESTAMP(timestamp); /* - * Look up the requested timezone. First we look in the date token table - * (to handle cases like "EST"), and if that fails, we look in the - * timezone database (to handle cases like "America/New_York"). (This - * matches the order in which timestamp input checks the cases; it's - * important because the timezone database unwisely uses a few zone names - * that are identical to offset abbreviations.) + * Look up the requested timezone. First we look in the timezone + * abbreviation table (to handle cases like "EST"), and if that fails, we + * look in the timezone database (to handle cases like + * "America/New_York"). (This matches the order in which timestamp input + * checks the cases; it's important because the timezone database unwisely + * uses a few zone names that are identical to offset abbreviations.) */ text_to_cstring_buffer(zone, tzname, sizeof(tzname)); + + /* DecodeTimezoneAbbrev requires lowercase input */ lowzone = downcase_truncate_identifier(tzname, strlen(tzname), false); - type = DecodeSpecial(0, lowzone, &val); + type = DecodeTimezoneAbbrev(0, lowzone, &val, &tzp); if (type == TZ || type == DTZ) { - tz = val * MINS_PER_HOUR; + /* fixed-offset abbreviation */ + tz = -val; + result = dt2local(timestamp, tz); + } + else if (type == DYNTZ) + { + /* dynamic-offset abbreviation, resolve using specified time */ + int isdst; + + tz = DetermineTimeZoneAbbrevOffsetTS(timestamp, tzname, tzp, &isdst); result = dt2local(timestamp, tz); } else { + /* try it as a full zone name */ tzp = pg_tzset(tzname); if (tzp) { diff --git a/src/backend/utils/misc/tzparser.c b/src/backend/utils/misc/tzparser.c index 6a5a7b39abf..a6a12ff06e3 100644 --- a/src/backend/utils/misc/tzparser.c +++ b/src/backend/utils/misc/tzparser.c @@ -63,13 +63,6 @@ validateTzEntry(tzEntry *tzentry) tzentry->filename, tzentry->lineno); return false; } - if (tzentry->offset % 900 != 0) - { - GUC_check_errmsg("time zone offset %d is not a multiple of 900 sec (15 min) in time zone file \"%s\", line %d", - tzentry->offset, - tzentry->filename, tzentry->lineno); - return false; - } /* * Sanity-check the offset: shouldn't exceed 14 hours @@ -93,7 +86,11 @@ validateTzEntry(tzEntry *tzentry) } /* - * Attempt to parse the line as a timezone abbrev spec (name, offset, dst) + * Attempt to parse the line as a timezone abbrev spec + * + * Valid formats are: + * name zone + * name offset dst * * Returns TRUE if OK, else false; data is stored in *tzentry */ @@ -116,7 +113,7 @@ splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry) filename, lineno); return false; } - tzentry->abbrev = abbrev; + tzentry->abbrev = pstrdup(abbrev); offset = strtok(NULL, WHITESPACE); if (!offset) @@ -125,25 +122,43 @@ splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry) filename, lineno); return false; } - tzentry->offset = strtol(offset, &offset_endptr, 10); - if (offset_endptr == offset || *offset_endptr != '\0') - { - GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d", - filename, lineno); - return false; - } - is_dst = strtok(NULL, WHITESPACE); - if (is_dst && pg_strcasecmp(is_dst, "D") == 0) + /* We assume zone names don't begin with a digit or sign */ + if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-') { - tzentry->is_dst = true; - remain = strtok(NULL, WHITESPACE); + tzentry->zone = NULL; + tzentry->offset = strtol(offset, &offset_endptr, 10); + if (offset_endptr == offset || *offset_endptr != '\0') + { + GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d", + filename, lineno); + return false; + } + + is_dst = strtok(NULL, WHITESPACE); + if (is_dst && pg_strcasecmp(is_dst, "D") == 0) + { + tzentry->is_dst = true; + remain = strtok(NULL, WHITESPACE); + } + else + { + /* there was no 'D' dst specifier */ + tzentry->is_dst = false; + remain = is_dst; + } } else { - /* there was no 'D' dst specifier */ + /* + * Assume entry is a zone name. We do not try to validate it by + * looking up the zone, because that would force loading of a lot of + * zones that probably will never be used in the current session. + */ + tzentry->zone = pstrdup(offset); + tzentry->offset = 0; tzentry->is_dst = false; - remain = is_dst; + remain = strtok(NULL, WHITESPACE); } if (!remain) /* no more non-whitespace chars */ @@ -201,8 +216,11 @@ addToArray(tzEntry **base, int *arraysize, int n, /* * Found a duplicate entry; complain unless it's the same. */ - if (midptr->offset == entry->offset && - midptr->is_dst == entry->is_dst) + if ((midptr->zone == NULL && entry->zone == NULL && + midptr->offset == entry->offset && + midptr->is_dst == entry->is_dst) || + (midptr->zone != NULL && entry->zone != NULL && + strcmp(midptr->zone, entry->zone) == 0)) { /* return unchanged array */ return n; @@ -210,6 +228,7 @@ addToArray(tzEntry **base, int *arraysize, int n, if (override) { /* same abbrev but something is different, override */ + midptr->zone = entry->zone; midptr->offset = entry->offset; midptr->is_dst = entry->is_dst; return n; @@ -239,9 +258,6 @@ addToArray(tzEntry **base, int *arraysize, int n, memcpy(arrayptr, entry, sizeof(tzEntry)); - /* Must dup the abbrev to ensure it survives */ - arrayptr->abbrev = pstrdup(entry->abbrev); - return n + 1; } @@ -446,15 +462,12 @@ load_tzoffsets(const char *filename) /* Parse the file(s) */ n = ParseTzFile(filename, 0, &array, &arraysize, 0); - /* If no errors so far, allocate result and let datetime.c convert data */ + /* If no errors so far, let datetime.c allocate memory & convert format */ if (n >= 0) { - result = malloc(offsetof(TimeZoneAbbrevTable, abbrevs) + - n * sizeof(datetkn)); + result = ConvertTimeZoneAbbrevs(array, n); if (!result) GUC_check_errmsg("out of memory"); - else - ConvertTimeZoneAbbrevs(result, array, n); } /* Clean up */ |