diff options
author | Andrew Dunstan <andrew@dunslane.net> | 2014-03-04 17:31:59 -0500 |
---|---|---|
committer | Andrew Dunstan <andrew@dunslane.net> | 2014-03-04 17:31:59 -0500 |
commit | 3b5e03dca2afea7a2c12dbc8605175d0568b5555 (patch) | |
tree | 4fadb752688feee0a0c08273f0bbb857e5fce429 /src | |
parent | e2a0fc5363e293d29053d0582a1009bc9fef0276 (diff) | |
download | postgresql-3b5e03dca2afea7a2c12dbc8605175d0568b5555.tar.gz postgresql-3b5e03dca2afea7a2c12dbc8605175d0568b5555.zip |
Provide a FORCE NULL option to COPY in CSV mode.
This forces an input field containing the quoted null string to be
returned as a NULL. Without this option, only unquoted null strings
behave this way. This helps where some CSV producers insist on quoting
every field, whether or not it is needed. The option takes a list of
fields, and only applies to those columns. There is an equivalent
column-level option added to file_fdw.
Ian Barwick, with some tweaking by Andrew Dunstan, reviewed by Payal
Singh.
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/commands/copy.c | 74 | ||||
-rw-r--r-- | src/backend/parser/gram.y | 4 | ||||
-rw-r--r-- | src/test/regress/expected/copy2.out | 48 | ||||
-rw-r--r-- | src/test/regress/sql/copy2.sql | 39 |
4 files changed, 161 insertions, 4 deletions
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 7c4039cb7ff..70ee7e50486 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -125,6 +125,8 @@ typedef struct CopyStateData bool *force_quote_flags; /* per-column CSV FQ flags */ List *force_notnull; /* list of column names */ bool *force_notnull_flags; /* per-column CSV FNN flags */ + List *force_null; /* list of column names */ + bool *force_null_flags; /* per-column CSV FN flags */ bool convert_selectively; /* do selective binary conversion? */ List *convert_select; /* list of column names (can be NIL) */ bool *convert_select_flags; /* per-column CSV/TEXT CS flags */ @@ -1019,6 +1021,20 @@ ProcessCopyOptions(CopyState cstate, errmsg("argument to option \"%s\" must be a list of column names", defel->defname))); } + else if (strcmp(defel->defname, "force_null") == 0) + { + if (cstate->force_null) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + if (defel->arg && IsA(defel->arg, List)) + cstate->force_null = (List *) defel->arg; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument to option \"%s\" must be a list of column names", + defel->defname))); + } else if (strcmp(defel->defname, "convert_selectively") == 0) { /* @@ -1178,6 +1194,17 @@ ProcessCopyOptions(CopyState cstate, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY force not null only available using COPY FROM"))); + /* Check force_null */ + if (!cstate->csv_mode && cstate->force_null != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY force null available only in CSV mode"))); + + if (cstate->force_null != NIL && !is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY force null only available using COPY FROM"))); + /* Don't allow the delimiter to appear in the null string. */ if (strchr(cstate->null_print, cstate->delim[0]) != NULL) ereport(ERROR, @@ -1385,6 +1412,28 @@ BeginCopy(bool is_from, } } + /* Convert FORCE NULL name list to per-column flags, check validity */ + cstate->force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool)); + if (cstate->force_null) + { + List *attnums; + ListCell *cur; + + attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_null); + + foreach(cur, attnums) + { + int attnum = lfirst_int(cur); + + if (!list_member_int(cstate->attnumlist, attnum)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("FORCE NULL column \"%s\" not referenced by COPY", + NameStr(tupDesc->attrs[attnum - 1]->attname)))); + cstate->force_null_flags[attnum - 1] = true; + } + } + /* Convert convert_selectively name list to per-column flags */ if (cstate->convert_selectively) { @@ -2810,11 +2859,28 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext, continue; } - if (cstate->csv_mode && string == NULL && - cstate->force_notnull_flags[m]) + if (cstate->csv_mode) { - /* Go ahead and read the NULL string */ - string = cstate->null_print; + if(string == NULL && + cstate->force_notnull_flags[m]) + { + /* + * FORCE_NOT_NULL option is set and column is NULL - + * convert it to the NULL string. + */ + string = cstate->null_print; + } + else if(string != NULL && cstate->force_null_flags[m] + && strcmp(string,cstate->null_print) == 0 ) + { + /* + * FORCE_NULL option is set and column matches the NULL string. + * It must have been quoted, or otherwise the string would already + * have been set to NULL. + * Convert it to NULL as specified. + */ + string = NULL; + } } cstate->cur_attname = NameStr(attr[m]->attname); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 81169a4896d..e3060a4dffd 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -2548,6 +2548,10 @@ copy_opt_item: { $$ = makeDefElem("force_not_null", (Node *)$4); } + | FORCE NULL_P columnList + { + $$ = makeDefElem("force_null", (Node *)$3); + } | ENCODING Sconst { $$ = makeDefElem("encoding", (Node *)makeString($2)); diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 34fa131c52b..76dea287e28 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -382,6 +382,54 @@ SELECT * FROM vistest; e (2 rows) +-- Test FORCE_NOT_NULL and FORCE_NULL options +-- should succeed with "b" set to an empty string and "c" set to NULL +CREATE TEMP TABLE forcetest ( + a INT NOT NULL, + b TEXT NOT NULL, + c TEXT, + d TEXT, + e TEXT +); +\pset null NULL +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c)); +COMMIT; +SELECT b, c FROM forcetest WHERE a = 1; + b | c +---+------ + | NULL +(1 row) + +-- should succeed with no effect ("b" remains an empty string, "c" remains NULL) +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c)); +COMMIT; +SELECT b, c FROM forcetest WHERE a = 2; + b | c +---+------ + | NULL +(1 row) + +-- should fail with not-null constraint violation +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NULL(b), FORCE_NOT_NULL(c)); +ERROR: null value in column "b" violates not-null constraint +DETAIL: Failing row contains (3, null, , null, null). +CONTEXT: COPY forcetest, line 1: "3,,""" +ROLLBACK; +-- should fail with "not referenced by COPY" error +BEGIN; +COPY forcetest (d, e) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b)); +ERROR: FORCE NOT NULL column "b" not referenced by COPY +ROLLBACK; +-- should fail with "not referenced by COPY" error +BEGIN; +COPY forcetest (d, e) FROM STDIN WITH (FORMAT csv, FORCE_NULL(b)); +ERROR: FORCE NULL column "b" not referenced by COPY +ROLLBACK; +\pset null '' +DROP TABLE forcetest; DROP TABLE vistest; DROP FUNCTION truncate_in_subxact(); DROP TABLE x, y; diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index c46128b38a1..e2be21f5269 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -270,6 +270,45 @@ e SELECT * FROM vistest; COMMIT; SELECT * FROM vistest; +-- Test FORCE_NOT_NULL and FORCE_NULL options +-- should succeed with "b" set to an empty string and "c" set to NULL +CREATE TEMP TABLE forcetest ( + a INT NOT NULL, + b TEXT NOT NULL, + c TEXT, + d TEXT, + e TEXT +); +\pset null NULL +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c)); +1,,"" +\. +COMMIT; +SELECT b, c FROM forcetest WHERE a = 1; +-- should succeed with no effect ("b" remains an empty string, "c" remains NULL) +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c)); +2,,"" +\. +COMMIT; +SELECT b, c FROM forcetest WHERE a = 2; +-- should fail with not-null constraint violation +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NULL(b), FORCE_NOT_NULL(c)); +3,,"" +\. +ROLLBACK; +-- should fail with "not referenced by COPY" error +BEGIN; +COPY forcetest (d, e) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b)); +ROLLBACK; +-- should fail with "not referenced by COPY" error +BEGIN; +COPY forcetest (d, e) FROM STDIN WITH (FORMAT csv, FORCE_NULL(b)); +ROLLBACK; +\pset null '' +DROP TABLE forcetest; DROP TABLE vistest; DROP FUNCTION truncate_in_subxact(); DROP TABLE x, y; |