Remove hard-wired lists of timezone abbreviations in favor of providing

configuration files that can be altered by a DBA. The australian_timezones GUC setting disappears, replaced by a timezone_abbreviations setting (set this to 'Australia' to get the effect of australian_timezones). The list of zone names defined by default has undergone a bit of cleanup, too. Documentation still needs some work --- in particular, should we fix Table B-4, or just get rid of it? Joachim Wieland, with some editorializing by moi.
author: Tom Lane <tgl@sss.pgh.pa.us> 2006-07-25 03:51:23 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2006-07-25 03:51:23 +0000
commit: d8b5c95ca8a2ce7cd4062d4a12710ca977bac87c (patch)
tree: a22dd25471942e3a24edcc2319e03319bed0482b /src/backend/utils/misc/tzparser.c
parent: 631ea6188366ee1b9818fed88d8fadd495246fd7 (diff)
download: postgresql-d8b5c95ca8a2ce7cd4062d4a12710ca977bac87c.tar.gz
postgresql-d8b5c95ca8a2ce7cd4062d4a12710ca977bac87c.zip
1 files changed, 461 insertions, 0 deletions
diff --git a/src/backend/utils/misc/tzparser.c b/src/backend/utils/misc/tzparser.c
new file mode 100644
index 00000000000..1a092ca2422
--- /dev/null
+++ b/src/backend/utils/misc/tzparser.c
@@ -0,0 +1,461 @@
+/*-------------------------------------------------------------------------
+ *
+ * tzparser.c
+ *	  Functions for parsing timezone offset files
+ *
+ * Note: we generally should not throw any errors in this file, but instead
+ * try to return an error code.  This is not completely bulletproof at
+ * present --- in particular out-of-memory will throw an error.  Could
+ * probably fix with PG_TRY if necessary.
+ *
+ *
+ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/utils/misc/tzparser.c,v 1.1 2006/07/25 03:51:21 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "miscadmin.h"
+#include "storage/fd.h"
+#include "utils/datetime.h"
+#include "utils/memutils.h"
+#include "utils/tzparser.h"
+
+
+#define WHITESPACE " \t\n\r"
+
+static int	tz_elevel;			/* to avoid passing this around a lot */
+
+static bool validateTzEntry(tzEntry *tzentry);
+static bool splitTzLine(const char *filename, int lineno,
+						char *line, tzEntry *tzentry);
+static int	addToArray(tzEntry **base, int *arraysize, int n,
+					   tzEntry *entry, bool override);
+static int	ParseTzFile(const char *filename, int depth,
+						tzEntry **base, int *arraysize, int n);
+
+
+/*
+ * Apply additional validation checks to a tzEntry
+ *
+ * Returns TRUE if OK, else false
+ */
+static bool
+validateTzEntry(tzEntry *tzentry)
+{
+	unsigned char *p;
+
+	/*
+	 * Check restrictions imposed by datetkntbl storage format (see datetime.c)
+	 */
+	if (strlen(tzentry->abbrev) > TOKMAXLEN)
+	{
+		ereport(tz_elevel,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
+						tzentry->abbrev, TOKMAXLEN,
+						tzentry->filename, tzentry->lineno)));
+		return false;
+	}
+	if (tzentry->offset % 900 != 0)
+	{
+		ereport(tz_elevel,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("time zone offset %d is not a multiple of 900 sec (15 min) in time zone file \"%s\", line %d",
+						tzentry->offset,
+						tzentry->filename, tzentry->lineno)));
+		return false;
+	}
+
+	/*
+	 * Sanity-check the offset: shouldn't exceed 14 hours
+	 */
+	if (tzentry->offset > 14*60*60 ||
+		tzentry->offset < -14*60*60)
+	{
+		ereport(tz_elevel,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
+						tzentry->offset,
+						tzentry->filename, tzentry->lineno)));
+		return false;
+	}
+
+	/*
+	 * Convert abbrev to lowercase (must match datetime.c's conversion)
+	 */
+	for (p = (unsigned char *) tzentry->abbrev; *p; p++)
+		*p = pg_tolower(*p);
+
+	return true;
+}
+
+/*
+ * Attempt to parse the line as a timezone abbrev spec (name, offset, dst)
+ *
+ * Returns TRUE if OK, else false; data is stored in *tzentry
+ */
+static bool
+splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
+{
+	char	*abbrev;
+	char	*offset;
+	char	*offset_endptr;
+	char	*remain;
+	char	*is_dst;
+
+	tzentry->lineno = lineno;
+	tzentry->filename = filename;
+
+	abbrev = strtok(line, WHITESPACE);
+	if (!abbrev)
+	{
+		ereport(tz_elevel,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
+						filename, lineno)));
+		return false;
+	}
+	tzentry->abbrev = abbrev;
+
+	offset = strtok(NULL, WHITESPACE);
+	if (!offset)
+	{
+		ereport(tz_elevel,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("missing time zone offset in time zone file \"%s\", line %d",
+						filename, lineno)));
+		return false;
+	}
+	tzentry->offset = strtol(offset, &offset_endptr, 10);
+	if (offset_endptr == offset || *offset_endptr != '\0')
+	{
+		ereport(tz_elevel,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
+						filename, lineno)));
+		return false;
+	}
+
+	is_dst = strtok(NULL, WHITESPACE);
+	if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
+	{
+		tzentry->is_dst = true;
+		remain = strtok(NULL, WHITESPACE);
+	}
+	else
+	{
+		/* there was no 'D' dst specifier */
+		tzentry->is_dst = false;
+		remain = is_dst;
+	}
+
+	if (!remain)		/* no more non-whitespace chars */
+		return true;
+
+	if (remain[0] != '#')		/* must be a comment */
+	{
+		ereport(tz_elevel,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid syntax in time zone file \"%s\", line %d",
+						filename, lineno)));
+		return false;
+	}
+	return true;
+}
+
+/*
+ * Insert entry into sorted array
+ *
+ * *base: base address of array (changeable if must enlarge array)
+ * *arraysize: allocated length of array (changeable if must enlarge array)
+ * n: current number of valid elements in array
+ * entry: new data to insert
+ * override: TRUE if OK to override
+ *
+ * Returns the new array length (new value for n), or -1 if error
+ */
+static int
+addToArray(tzEntry **base, int *arraysize, int n,
+		   tzEntry *entry, bool override)
+{
+	tzEntry* arrayptr;
+	int			low;
+	int			high;
+
+	/*
+	 * Search the array for a duplicate; as a useful side effect, the array
+	 * is maintained in sorted order.  We use strcmp() to ensure we match
+	 * the sort order datetime.c expects.
+	 */
+	arrayptr = *base;
+	low = 0;
+	high = n-1;
+	while (low <= high)
+	{
+		int		mid = (low + high) >> 1;
+		tzEntry *midptr = arrayptr + mid;
+		int		cmp;
+
+		cmp = strcmp(entry->abbrev, midptr->abbrev);
+		if (cmp < 0)
+			high = mid - 1;
+		else if (cmp > 0)
+			low = mid + 1;
+		else
+		{
+			/*
+			 * Found a duplicate entry; complain unless it's the same.
+			 */
+			if (midptr->offset == entry->offset	&&
+				midptr->is_dst == entry->is_dst)
+			{
+				/* return unchanged array */
+				return n;
+			}
+			if (override)
+			{
+				/* same abbrev but something is different, override */
+				midptr->offset = entry->offset;
+				midptr->is_dst = entry->is_dst;
+				return n;
+			}
+			/* same abbrev but something is different, complain */
+			ereport(tz_elevel,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("time zone abbreviation \"%s\" is multiply defined",
+							entry->abbrev),
+					 errdetail("Time zone file \"%s\", line %d conflicts with file \"%s\", line %d.",
+							   midptr->filename, midptr->lineno,
+							   entry->filename, entry->lineno)));
+			return -1;
+		}
+	}
+
+	/*
+	 * No match, insert at position "low".
+	 */
+	if (n >= *arraysize)
+	{
+		*arraysize *= 2;
+		*base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
+	}
+
+	arrayptr = *base + low;
+
+	memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
+
+	memcpy(arrayptr, entry, sizeof(tzEntry));
+
+	/* Must dup the abbrev to ensure it survives */
+	arrayptr->abbrev = pstrdup(entry->abbrev);
+
+	return n+1;
+}
+
+/*
+ * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
+ *
+ * filename: user-specified file name (does not include path)
+ * depth: current recursion depth
+ * *base: array for results (changeable if must enlarge array)
+ * *arraysize: allocated length of array (changeable if must enlarge array)
+ * n: current number of valid elements in array
+ *
+ * Returns the new array length (new value for n), or -1 if error
+ */
+static int
+ParseTzFile(const char *filename, int depth,
+			tzEntry **base, int *arraysize, int n)
+{
+	char			share_path[MAXPGPATH];
+	char			file_path[MAXPGPATH];
+	FILE		   *tzFile;
+	char			tzbuf[1024];
+	char		   *line;
+	tzEntry			tzentry;
+	int				lineno = 0;
+	bool			override = false;
+	const char	   *p;
+
+	/*
+	 * We enforce that the filename is all alpha characters.  This may be
+	 * overly restrictive, but we don't want to allow access to anything
+	 * outside the timezonesets directory, so for instance '/' *must* be
+	 * rejected.
+	 */
+	for (p = filename; *p; p++)
+	{
+		if (!isalpha((unsigned char) *p))
+		{
+			/* at level 0, we need no ereport since guc.c will say enough */
+			if (depth > 0)
+				ereport(tz_elevel,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                          errmsg("invalid time zone file name \"%s\"",
+								 filename)));
+			return -1;
+		}
+	}
+
+	/*
+	 * The maximal recursion depth is a pretty arbitrary setting.
+	 * It is hard to imagine that someone needs more than 3 levels so stick
+	 * with this conservative setting until someone complains.
+	 */
+	if (depth > 3)
+	{
+		ereport(tz_elevel,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("time zone file recursion limit exceeded in file \"%s\"",
+						filename)));
+		return -1;
+	}
+
+	get_share_path(my_exec_path, share_path);
+	snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
+			 share_path, filename);
+	tzFile = AllocateFile(file_path, "r");
+	if (!tzFile)
+	{
+		/* at level 0, if file doesn't exist, guc.c's complaint is enough */
+		if (errno != ENOENT || depth > 0)
+			ereport(tz_elevel,
+					(errcode_for_file_access(),
+					 errmsg("could not read time zone file \"%s\": %m",
+							filename)));
+		return -1;
+	}
+
+	while (!feof(tzFile))
+	{
+		lineno++;
+		if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
+		{
+			if (ferror(tzFile))
+			{
+				ereport(tz_elevel,
+						(errcode_for_file_access(),
+						 errmsg("could not read time zone file \"%s\": %m",
+								filename)));
+				return -1;
+			}
+			/* else we're at EOF after all */
+			break;
+		}
+		if (strlen(tzbuf) == sizeof(tzbuf)-1) 
+		{
+			/* the line is too long for tzbuf */
+			ereport(tz_elevel,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("line is too long in time zone file \"%s\", line %d",
+							filename, lineno)));
+			return -1;
+		}
+
+		/* skip over whitespace */
+		line = tzbuf;
+		while (*line && isspace((unsigned char) *line))
+			line++;
+
+		if (*line == '\0')				/* empty line */
+			continue;
+		if (*line == '#')				/* comment line */
+			continue;
+
+		if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
+		{
+			/* pstrdup so we can use filename in result data structure */
+			char* includeFile = pstrdup(line + strlen("@INCLUDE"));
+
+			includeFile = strtok(includeFile, WHITESPACE);
+			if (!includeFile || !*includeFile)
+			{
+				ereport(tz_elevel,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("@INCLUDE without filename in time zone file \"%s\", line %d",
+								filename, lineno)));
+				return -1;
+			}
+			n = ParseTzFile(includeFile, depth + 1,
+							base, arraysize, n);
+			if (n < 0)
+				return -1;
+			continue;
+		}
+
+		if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
+		{
+			override = true;
+			continue;
+		}
+
+		if (!splitTzLine(filename, lineno, line, &tzentry))
+			return -1;
+		if (!validateTzEntry(&tzentry))
+			return -1;
+		n = addToArray(base, arraysize, n, &tzentry, override);
+		if (n < 0)
+			return -1;
+	}
+
+	FreeFile(tzFile);
+
+	return n;
+}
+
+/*
+ * load_tzoffsets --- read and parse the specified timezone offset file
+ *
+ * filename: name specified by user
+ * doit: whether to actually apply the new values, or just check
+ * elevel: elog reporting level (will be less than ERROR)
+ *
+ * Returns TRUE if OK, FALSE if not; should avoid erroring out
+ */
+bool
+load_tzoffsets(const char *filename, bool doit, int elevel)
+{
+	MemoryContext tmpContext;
+	MemoryContext oldContext;
+	tzEntry	   *array;
+	int			arraysize;
+	int			n;
+
+	tz_elevel = elevel;
+
+	/*
+	 * Create a temp memory context to work in.  This makes it easy to
+	 * clean up afterwards.
+	 */
+	tmpContext = AllocSetContextCreate(CurrentMemoryContext,
+									   "TZParserMemory",
+									   ALLOCSET_SMALL_MINSIZE,
+									   ALLOCSET_SMALL_INITSIZE,
+									   ALLOCSET_SMALL_MAXSIZE);
+	oldContext = MemoryContextSwitchTo(tmpContext);
+
+	/* Initialize array at a reasonable size */
+	arraysize = 128;
+	array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
+
+	/* Parse the file(s) */
+	n = ParseTzFile(filename, 0, &array, &arraysize, 0);
+
+	/* If no errors and we should apply the result, pass it to datetime.c */
+	if (n >= 0 && doit)
+		InstallTimeZoneAbbrevs(array, n);
+
+	/* Clean up */
+	MemoryContextSwitchTo(oldContext);
+	MemoryContextDelete(tmpContext);
+
+	return (n >= 0);
+}
author	Tom Lane <tgl@sss.pgh.pa.us>	2006-07-25 03:51:23 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2006-07-25 03:51:23 +0000
commit	d8b5c95ca8a2ce7cd4062d4a12710ca977bac87c (patch)
tree	a22dd25471942e3a24edcc2319e03319bed0482b /src/backend/utils/misc/tzparser.c
parent	631ea6188366ee1b9818fed88d8fadd495246fd7 (diff)
download	postgresql-d8b5c95ca8a2ce7cd4062d4a12710ca977bac87c.tar.gz postgresql-d8b5c95ca8a2ce7cd4062d4a12710ca977bac87c.zip