diff options
Diffstat (limited to 'src/include/commands')
-rw-r--r-- | src/include/commands/copy.h | 61 | ||||
-rw-r--r-- | src/include/commands/copyfrom_internal.h | 164 |
2 files changed, 217 insertions, 8 deletions
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index c639833565d..127a3c61e28 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -19,26 +19,71 @@ #include "parser/parse_node.h" #include "tcop/dest.h" -/* CopyStateData is private in commands/copy.c */ -typedef struct CopyStateData *CopyState; +/* + * A struct to hold COPY options, in a parsed form. All of these are related + * to formatting, except for 'freeze', which doesn't really belong here, but + * it's expedient to parse it along with all the other options. + */ +typedef struct CopyFormatOptions +{ + /* parameters from the COPY command */ + int file_encoding; /* file or remote side's character encoding, + * -1 if not specified */ + bool binary; /* binary format? */ + bool freeze; /* freeze rows on loading? */ + bool csv_mode; /* Comma Separated Value format? */ + bool header_line; /* CSV header line? */ + char *null_print; /* NULL marker string (server encoding!) */ + int null_print_len; /* length of same */ + char *null_print_client; /* same converted to file encoding */ + char *delim; /* column delimiter (must be 1 byte) */ + char *quote; /* CSV quote char (must be 1 byte) */ + char *escape; /* CSV escape char (must be 1 byte) */ + List *force_quote; /* list of column names */ + bool force_quote_all; /* FORCE_QUOTE *? */ + bool *force_quote_flags; /* per-column CSV FQ flags */ + List *force_notnull; /* list of column names */ + bool *force_notnull_flags; /* per-column CSV FNN flags */ + List *force_null; /* list of column names */ + bool *force_null_flags; /* per-column CSV FN flags */ + bool convert_selectively; /* do selective binary conversion? */ + List *convert_select; /* list of column names (can be NIL) */ +} CopyFormatOptions; + +/* These are private in commands/copy[from|to].c */ +typedef struct CopyFromStateData *CopyFromState; +typedef struct CopyToStateData *CopyToState; + typedef int (*copy_data_source_cb) (void *outbuf, int minread, int maxread); extern void DoCopy(ParseState *state, const CopyStmt *stmt, int stmt_location, int stmt_len, uint64 *processed); -extern void ProcessCopyOptions(ParseState *pstate, CopyState cstate, bool is_from, List *options); -extern CopyState BeginCopyFrom(ParseState *pstate, Relation rel, const char *filename, +extern void ProcessCopyOptions(ParseState *pstate, CopyFormatOptions *ops_out, bool is_from, List *options); +extern CopyFromState BeginCopyFrom(ParseState *pstate, Relation rel, Node *whereClause, + const char *filename, bool is_program, copy_data_source_cb data_source_cb, List *attnamelist, List *options); -extern void EndCopyFrom(CopyState cstate); -extern bool NextCopyFrom(CopyState cstate, ExprContext *econtext, +extern void EndCopyFrom(CopyFromState cstate); +extern bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls); -extern bool NextCopyFromRawFields(CopyState cstate, +extern bool NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields); extern void CopyFromErrorCallback(void *arg); -extern uint64 CopyFrom(CopyState cstate); +extern uint64 CopyFrom(CopyFromState cstate); extern DestReceiver *CreateCopyDestReceiver(void); +/* + * internal prototypes + */ +extern CopyToState BeginCopyTo(ParseState *pstate, Relation rel, RawStmt *query, + Oid queryRelId, const char *filename, bool is_program, + List *attnamelist, List *options); +extern void EndCopyTo(CopyToState cstate); +extern uint64 DoCopyTo(CopyToState cstate); +extern List *CopyGetAttnums(TupleDesc tupDesc, Relation rel, + List *attnamelist); + #endif /* COPY_H */ diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h new file mode 100644 index 00000000000..c15ea803c32 --- /dev/null +++ b/src/include/commands/copyfrom_internal.h @@ -0,0 +1,164 @@ +/*------------------------------------------------------------------------- + * + * copyfrom_internal.h + * Internal definitions for COPY FROM command. + * + * + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/commands/copyfrom_internal.h + * + *------------------------------------------------------------------------- + */ +#ifndef COPYFROM_INTERNAL_H +#define COPYFROM_INTERNAL_H + +#include "commands/copy.h" +#include "commands/trigger.h" + +/* + * Represents the different source cases we need to worry about at + * the bottom level + */ +typedef enum CopySource +{ + COPY_FILE, /* from file (or a piped program) */ + COPY_OLD_FE, /* from frontend (2.0 protocol) */ + COPY_NEW_FE, /* from frontend (3.0 protocol) */ + COPY_CALLBACK /* from callback function */ +} CopySource; + +/* + * Represents the end-of-line terminator type of the input + */ +typedef enum EolType +{ + EOL_UNKNOWN, + EOL_NL, + EOL_CR, + EOL_CRNL +} EolType; + +/* + * Represents the heap insert method to be used during COPY FROM. + */ +typedef enum CopyInsertMethod +{ + CIM_SINGLE, /* use table_tuple_insert or fdw routine */ + CIM_MULTI, /* always use table_multi_insert */ + CIM_MULTI_CONDITIONAL /* use table_multi_insert only if valid */ +} CopyInsertMethod; + +/* + * This struct contains all the state variables used throughout a COPY FROM + * operation. + * + * Multi-byte encodings: all supported client-side encodings encode multi-byte + * characters by having the first byte's high bit set. Subsequent bytes of the + * character can have the high bit not set. When scanning data in such an + * encoding to look for a match to a single-byte (ie ASCII) character, we must + * use the full pg_encoding_mblen() machinery to skip over multibyte + * characters, else we might find a false match to a trailing byte. In + * supported server encodings, there is no possibility of a false match, and + * it's faster to make useless comparisons to trailing bytes than it is to + * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true + * when we have to do it the hard way. + */ +typedef struct CopyFromStateData +{ + /* low-level state data */ + CopySource copy_src; /* type of copy source */ + FILE *copy_file; /* used if copy_src == COPY_FILE */ + StringInfo fe_msgbuf; /* used if copy_src == COPY_NEW_FE */ + bool reached_eof; /* true if we read to end of copy data (not + * all copy_src types maintain this) */ + + EolType eol_type; /* EOL type of input */ + int file_encoding; /* file or remote side's character encoding */ + bool need_transcoding; /* file encoding diff from server? */ + bool encoding_embeds_ascii; /* ASCII can be non-first byte? */ + + /* parameters from the COPY command */ + Relation rel; /* relation to copy from */ + List *attnumlist; /* integer list of attnums to copy */ + char *filename; /* filename, or NULL for STDIN */ + bool is_program; /* is 'filename' a program to popen? */ + copy_data_source_cb data_source_cb; /* function for reading data */ + + CopyFormatOptions opts; + bool *convert_select_flags; /* per-column CSV/TEXT CS flags */ + Node *whereClause; /* WHERE condition (or NULL) */ + + /* these are just for error messages, see CopyFromErrorCallback */ + const char *cur_relname; /* table name for error messages */ + uint64 cur_lineno; /* line number for error messages */ + const char *cur_attname; /* current att for error messages */ + const char *cur_attval; /* current att value for error messages */ + + /* + * Working state + */ + MemoryContext copycontext; /* per-copy execution context */ + + AttrNumber num_defaults; + FmgrInfo *in_functions; /* array of input functions for each attrs */ + Oid *typioparams; /* array of element types for in_functions */ + int *defmap; /* array of default att numbers */ + ExprState **defexprs; /* array of default att expressions */ + bool volatile_defexprs; /* is any of defexprs volatile? */ + List *range_table; + ExprState *qualexpr; + + TransitionCaptureState *transition_capture; + + /* + * These variables are used to reduce overhead in COPY FROM. + * + * attribute_buf holds the separated, de-escaped text for each field of + * the current line. The CopyReadAttributes functions return arrays of + * pointers into this buffer. We avoid palloc/pfree overhead by re-using + * the buffer on each cycle. + * + * In binary COPY FROM, attribute_buf holds the binary data for the + * current field, but the usage is otherwise similar. + */ + StringInfoData attribute_buf; + + /* field raw data pointers found by COPY FROM */ + + int max_fields; + char **raw_fields; + + /* + * Similarly, line_buf holds the whole input line being processed. The + * input cycle is first to read the whole line into line_buf, convert it + * to server encoding there, and then extract the individual attribute + * fields into attribute_buf. line_buf is preserved unmodified so that we + * can display it in error messages if appropriate. (In binary mode, + * line_buf is not used.) + */ + StringInfoData line_buf; + bool line_buf_converted; /* converted to server encoding? */ + bool line_buf_valid; /* contains the row being processed? */ + + /* + * Finally, raw_buf holds raw data read from the data source (file or + * client connection). In text mode, CopyReadLine parses this data + * sufficiently to locate line boundaries, then transfers the data to + * line_buf and converts it. In binary mode, CopyReadBinaryData fetches + * appropriate amounts of data from this buffer. In both modes, we + * guarantee that there is a \0 at raw_buf[raw_buf_len]. + */ +#define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */ + char *raw_buf; + int raw_buf_index; /* next byte to process */ + int raw_buf_len; /* total # of bytes stored */ + /* Shorthand for number of unconsumed bytes available in raw_buf */ +#define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index) +} CopyFromStateData; + +extern void ReceiveCopyBegin(CopyFromState cstate); +extern void ReceiveCopyBinaryHeader(CopyFromState cstate); + +#endif /* COPYFROM_INTERNAL_H */ |