aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFujii Masao <fujii@postgresql.org>2025-07-03 15:27:26 +0900
committerFujii Masao <fujii@postgresql.org>2025-07-03 15:27:26 +0900
commitbc2f348e87c02de63647dbe290d64ff088880dbe (patch)
tree0e9e65cdaa0177cefc20c5f4666504e4a6ac63e1 /src
parentfd7d7b719137b5c427681a50c0a0ac2d745b68bd (diff)
downloadpostgresql-bc2f348e87c02de63647dbe290d64ff088880dbe.tar.gz
postgresql-bc2f348e87c02de63647dbe290d64ff088880dbe.zip
Support multi-line headers in COPY FROM command.
The COPY FROM command now accepts a non-negative integer for the HEADER option, allowing multiple header lines to be skipped. This is useful when the input contains multi-line headers that should be ignored during data import. Author: Shinya Kato <shinya11.kato@gmail.com> Co-authored-by: Fujii Masao <masao.fujii@gmail.com> Reviewed-by: Yugo Nagata <nagata@sraoss.co.jp> Discussion: https://postgr.es/m/CAOzEurRPxfzbxqeOPF_AGnAUOYf=Wk0we+1LQomPNUNtyZGBZw@mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/commands/copy.c42
-rw-r--r--src/backend/commands/copyfromparse.c17
-rw-r--r--src/backend/commands/copyto.c2
-rw-r--r--src/include/commands/copy.h16
-rw-r--r--src/test/regress/expected/copy.out25
-rw-r--r--src/test/regress/expected/copy2.out6
-rw-r--r--src/test/regress/sql/copy.sql30
-rw-r--r--src/test/regress/sql/copy2.sql3
-rw-r--r--src/tools/pgindent/typedefs.list1
9 files changed, 110 insertions, 32 deletions
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 74ae42b19a7..fae9c41db65 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
}
/*
- * Extract a CopyHeaderChoice value from a DefElem. This is like
- * defGetBoolean() but also accepts the special value "match".
+ * Extract the CopyFormatOptions.header_line value from a DefElem.
+ *
+ * Parses the HEADER option for COPY, which can be a boolean, a non-negative
+ * integer (number of lines to skip), or the special value "match".
*/
-static CopyHeaderChoice
-defGetCopyHeaderChoice(DefElem *def, bool is_from)
+static int
+defGetCopyHeaderOption(DefElem *def, bool is_from)
{
/*
* If no parameter value given, assume "true" is meant.
@@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
return COPY_HEADER_TRUE;
/*
- * Allow 0, 1, "true", "false", "on", "off", or "match".
+ * Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
+ * "match".
*/
switch (nodeTag(def->arg))
{
case T_Integer:
- switch (intVal(def->arg))
{
- case 0:
- return COPY_HEADER_FALSE;
- case 1:
- return COPY_HEADER_TRUE;
- default:
- /* otherwise, error out below */
- break;
+ int ival = intVal(def->arg);
+
+ if (ival < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("a negative integer value cannot be "
+ "specified for %s", def->defname)));
+
+ if (!is_from && ival > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot use multi-line header in COPY TO")));
+
+ return ival;
}
break;
default:
@@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
}
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("%s requires a Boolean value or \"match\"",
+ errmsg("%s requires a Boolean value, a non-negative integer, "
+ "or the string \"match\"",
def->defname)));
return COPY_HEADER_FALSE; /* keep compiler quiet */
}
@@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
if (header_specified)
errorConflictingDefElem(defel, pstate);
header_specified = true;
- opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
+ opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
}
else if (strcmp(defel->defname, "quote") == 0)
{
@@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
/* Check header */
- if (opts_out->binary && opts_out->header_line)
+ if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index f52f2477df1..b1ae97b833d 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
{
int fldct;
- bool done;
+ bool done = false;
/* only available for text or csv input */
Assert(!cstate->opts.binary);
/* on input check that the header line is correct if needed */
- if (cstate->cur_lineno == 0 && cstate->opts.header_line)
+ if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
{
ListCell *cur;
TupleDesc tupDesc;
+ int lines_to_skip = cstate->opts.header_line;
+
+ /* If set to "match", one header line is skipped */
+ if (cstate->opts.header_line == COPY_HEADER_MATCH)
+ lines_to_skip = 1;
tupDesc = RelationGetDescr(cstate->rel);
- cstate->cur_lineno++;
- done = CopyReadLine(cstate, is_csv);
+ for (int i = 0; i < lines_to_skip; i++)
+ {
+ cstate->cur_lineno++;
+ if ((done = CopyReadLine(cstate, is_csv)))
+ break;
+ }
if (cstate->opts.header_line == COPY_HEADER_MATCH)
{
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index ea6f18f2c80..67b94b91cae 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
cstate->file_encoding);
/* if a header has been requested send the line */
- if (cstate->opts.header_line)
+ if (cstate->opts.header_line == COPY_HEADER_TRUE)
{
ListCell *cur;
bool hdr_delim = false;
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 06dfdfef721..541176e1980 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -20,15 +20,12 @@
#include "tcop/dest.h"
/*
- * Represents whether a header line should be present, and whether it must
- * match the actual names (which implies "true").
+ * Represents whether a header line must match the actual names
+ * (which implies "true"), and whether it should be present.
*/
-typedef enum CopyHeaderChoice
-{
- COPY_HEADER_FALSE = 0,
- COPY_HEADER_TRUE,
- COPY_HEADER_MATCH,
-} CopyHeaderChoice;
+#define COPY_HEADER_MATCH -1
+#define COPY_HEADER_FALSE 0
+#define COPY_HEADER_TRUE 1
/*
* Represents where to save input processing errors. More values to be added
@@ -64,7 +61,8 @@ typedef struct CopyFormatOptions
bool binary; /* binary format? */
bool freeze; /* freeze rows on loading? */
bool csv_mode; /* Comma Separated Value format? */
- CopyHeaderChoice header_line; /* header line? */
+ int header_line; /* number of lines to skip or COPY_HEADER_XXX
+ * value (see the above) */
char *null_print; /* NULL marker string (server encoding!) */
int null_print_len; /* length of same */
char *null_print_client; /* same converted to file encoding */
diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out
index 8d5a06563c4..ac66eb55aee 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -81,6 +81,29 @@ copy copytest4 to stdout (header);
c1 colname with tab: \t
1 a
2 b
+-- test multi-line header line feature
+create temp table copytest5 (c1 int);
+copy copytest5 from stdin (format csv, header 2);
+copy copytest5 to stdout (header);
+c1
+1
+2
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 4);
+select count(*) from copytest5;
+ count
+-------
+ 0
+(1 row)
+
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 5);
+select count(*) from copytest5;
+ count
+-------
+ 0
+(1 row)
+
-- test copy from with a partitioned table
create table parted_copytest (
a int,
@@ -224,7 +247,7 @@ alter table header_copytest add column c text;
copy header_copytest to stdout with (header match);
ERROR: cannot use "match" with HEADER in COPY TO
copy header_copytest from stdin with (header wrong_choice);
-ERROR: header requires a Boolean value or "match"
+ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
-- works
copy header_copytest from stdin with (header match);
copy header_copytest (c, a, b) from stdin with (header match);
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 64ea33aeae8..caa3c44f0d0 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -132,6 +132,12 @@ COPY x from stdin with (reject_limit 1);
ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
COPY x from stdin with (on_error ignore, reject_limit 0);
ERROR: REJECT_LIMIT (0) must be greater than zero
+COPY x from stdin with (header -1);
+ERROR: a negative integer value cannot be specified for header
+COPY x from stdin with (header 2.5);
+ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
+COPY x to stdout with (header 2);
+ERROR: cannot use multi-line header in COPY TO
-- too many columns in column list: should fail
COPY x (a, b, c, d, e, d, c) from stdin;
ERROR: column "d" specified more than once
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index f0b88a23db8..a1316c73bac 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -94,6 +94,36 @@ this is just a line full of junk that would error out if parsed
copy copytest4 to stdout (header);
+-- test multi-line header line feature
+
+create temp table copytest5 (c1 int);
+
+copy copytest5 from stdin (format csv, header 2);
+this is a first header line.
+this is a second header line.
+1
+2
+\.
+copy copytest5 to stdout (header);
+
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 4);
+this is a first header line.
+this is a second header line.
+1
+2
+\.
+select count(*) from copytest5;
+
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 5);
+this is a first header line.
+this is a second header line.
+1
+2
+\.
+select count(*) from copytest5;
+
-- test copy from with a partitioned table
create table parted_copytest (
a int,
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 45273557ce0..cef45868db5 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -90,6 +90,9 @@ COPY x to stdout (format BINARY, on_error unsupported);
COPY x from stdin (log_verbosity unsupported);
COPY x from stdin with (reject_limit 1);
COPY x from stdin with (on_error ignore, reject_limit 0);
+COPY x from stdin with (header -1);
+COPY x from stdin with (header 2.5);
+COPY x to stdout with (header 2);
-- too many columns in column list: should fail
COPY x (a, b, c, d, e, d, c) from stdin;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 66c5782688a..e7d1c48e1f2 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -521,7 +521,6 @@ CopyFormatOptions
CopyFromRoutine
CopyFromState
CopyFromStateData
-CopyHeaderChoice
CopyInsertMethod
CopyLogVerbosityChoice
CopyMethod