/* * test_escape.c Test escape functions * * Copyright (c) 2022-2025, PostgreSQL Global Development Group * * IDENTIFICATION * src/test/modules/test_escape/test_escape.c */ #include "postgres_fe.h" #include #include #include "fe_utils/psqlscan.h" #include "fe_utils/string_utils.h" #include "getopt_long.h" #include "libpq-fe.h" #include "mb/pg_wchar.h" typedef struct pe_test_config { int verbosity; bool force_unsupported; const char *conninfo; PGconn *conn; int test_count; int failure_count; } pe_test_config; /* * An escape function to be tested by this test. */ typedef struct pe_test_escape_func { const char *name; /* * Can the escape method report errors? If so, we validate that it does in * case of various invalid inputs. */ bool reports_errors; /* * Is the escape method known to not handle invalidly encoded input? If * so, we don't run the test unless --force-unsupported is used. */ bool supports_only_valid; /* * Is the escape method known to only handle encodings where no byte in a * multi-byte characters are valid ascii. */ bool supports_only_ascii_overlap; bool (*escape) (PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err); } pe_test_escape_func; /* * A single test input for this test. */ typedef struct pe_test_vector { const char *client_encoding; size_t escape_len; const char *escape; } pe_test_vector; /* * Callback functions from flex lexer. Not currently used by the test. */ static const PsqlScanCallbacks test_scan_callbacks = { NULL }; static bool escape_literal(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err) { char *escaped; escaped = PQescapeLiteral(conn, unescaped, unescaped_len); if (!escaped) { appendPQExpBuffer(escape_err, "%s", PQerrorMessage(conn)); escape_err->data[escape_err->len - 1] = 0; escape_err->len--; return false; } else { appendPQExpBufferStr(target, escaped); PQfreemem(escaped); return true; } } static bool escape_identifier(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err) { char *escaped; escaped = PQescapeIdentifier(conn, unescaped, unescaped_len); if (!escaped) { appendPQExpBuffer(escape_err, "%s", PQerrorMessage(conn)); escape_err->data[escape_err->len - 1] = 0; escape_err->len--; return false; } else { appendPQExpBufferStr(target, escaped); PQfreemem(escaped); return true; } } static bool escape_string_conn(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err) { int error; size_t sz; appendPQExpBufferChar(target, '\''); enlargePQExpBuffer(target, unescaped_len * 2 + 1); sz = PQescapeStringConn(conn, target->data + target->len, unescaped, unescaped_len, &error); target->len += sz; appendPQExpBufferChar(target, '\''); if (error) { appendPQExpBuffer(escape_err, "%s", PQerrorMessage(conn)); escape_err->data[escape_err->len - 1] = 0; escape_err->len--; return false; } else { return true; } } static bool escape_string(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err) { size_t sz; appendPQExpBufferChar(target, '\''); enlargePQExpBuffer(target, unescaped_len * 2 + 1); sz = PQescapeString(target->data + target->len, unescaped, unescaped_len); target->len += sz; appendPQExpBufferChar(target, '\''); return true; } /* * Escape via s/'/''/. Non-core drivers invariably wrap libpq or use this * method. It suffices iff the input passes encoding validation, so it's * marked as supports_only_valid. */ static bool escape_replace(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err) { const char *s = unescaped; appendPQExpBufferChar(target, '\''); for (int i = 0; i < unescaped_len; i++) { char c = *s; if (c == '\'') { appendPQExpBufferStr(target, "''"); } else appendPQExpBufferChar(target, c); s++; } appendPQExpBufferChar(target, '\''); return true; } static bool escape_append_literal(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err) { appendStringLiteral(target, unescaped, PQclientEncoding(conn), 1); return true; } static bool escape_fmt_id(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err) { setFmtEncoding(PQclientEncoding(conn)); appendPQExpBufferStr(target, fmtId(unescaped)); return true; } static pe_test_escape_func pe_test_escape_funcs[] = { { .name = "PQescapeLiteral", .reports_errors = true, .escape = escape_literal, }, { .name = "PQescapeIdentifier", .reports_errors = true, .escape = escape_identifier }, { .name = "PQescapeStringConn", .reports_errors = true, .escape = escape_string_conn }, { .name = "PQescapeString", .reports_errors = false, .escape = escape_string }, { .name = "replace", .reports_errors = false, .supports_only_valid = true, .supports_only_ascii_overlap = true, .escape = escape_replace }, { .name = "appendStringLiteral", .reports_errors = false, .escape = escape_append_literal }, { .name = "fmtId", .reports_errors = false, .escape = escape_fmt_id }, }; #define TV(enc, string) {.client_encoding = (enc), .escape=string, .escape_len=sizeof(string) - 1, } static pe_test_vector pe_test_vectors[] = { /* expected to work sanity checks */ TV("UTF-8", "1"), TV("UTF-8", "'"), TV("UTF-8", "\""), TV("UTF-8", "\'"), TV("UTF-8", "\""), TV("UTF-8", "\\"), TV("UTF-8", "\\'"), TV("UTF-8", "\\\""), /* trailing multi-byte character, paddable in available space */ TV("UTF-8", "1\xC0"), TV("UTF-8", "1\xE0 "), TV("UTF-8", "1\xF0 "), TV("UTF-8", "1\xF0 "), TV("UTF-8", "1\xF0 "), /* trailing multi-byte character, not enough space to pad */ TV("UTF-8", "1\xE0"), TV("UTF-8", "1\xF0"), TV("UTF-8", "\xF0"), /* try to smuggle in something in invalid characters */ TV("UTF-8", "1\xE0'"), TV("UTF-8", "1\xE0\""), TV("UTF-8", "1\xF0'"), TV("UTF-8", "1\xF0\""), TV("UTF-8", "1\xF0'; "), TV("UTF-8", "1\xF0\"; "), TV("UTF-8", "1\xF0';;;;"), TV("UTF-8", "1\xF0 ';;;;"), TV("UTF-8", "1\xF0 \";;;;"), TV("UTF-8", "1\xE0'; \\l ; "), TV("UTF-8", "1\xE0\"; \\l ; "), /* null byte handling */ TV("UTF-8", "some\0thing"), TV("UTF-8", "some\0"), TV("UTF-8", "some\xF0'\0"), TV("UTF-8", "some\xF0'\0'"), TV("UTF-8", "some\xF0" "ab\0'"), /* GB18030's 4 byte encoding requires a 2nd byte limited values */ TV("GB18030", "\x90\x31"), TV("GB18030", "\\\x81\x5c'"), TV("GB18030", "\\\x81\x5c\""), TV("GB18030", "\\\x81\x5c\0'"), /* * \x81 indicates a 2 byte char. ' and " are not a valid second byte, but * that requires encoding verification to know. E.g. replace_string() * doesn't cope. */ TV("GB18030", "\\\x81';"), TV("GB18030", "\\\x81\";"), /* * \x81 indicates a 2 byte char. \ is a valid second character. */ TV("GB18030", "\\\x81\\';"), TV("GB18030", "\\\x81\\\";"), TV("GB18030", "\\\x81\0;"), TV("GB18030", "\\\x81\0'"), TV("GB18030", "\\\x81'\0"), TV("SJIS", "\xF0\x40;"), TV("SJIS", "\xF0';"), TV("SJIS", "\xF0\";"), TV("SJIS", "\xF0\0'"), TV("SJIS", "\\\xF0\\';"), TV("SJIS", "\\\xF0\\\";"), TV("gbk", "\x80';"), TV("gbk", "\x80"), TV("gbk", "\x80'"), TV("gbk", "\x80\""), TV("gbk", "\x80\\"), TV("mule_internal", "\\\x9c';\0;"), TV("sql_ascii", "1\xC0'"), }; /* * Print the string into buf, making characters outside of plain ascii * somewhat easier to recognize. * * The output format could stand to be improved significantly, it's not at all * unambiguous. */ static void escapify(PQExpBuffer buf, const char *str, size_t len) { for (size_t i = 0; i < len; i++) { char c = *str; if (c == '\n') appendPQExpBufferStr(buf, "\\n"); else if (c == '\0') appendPQExpBufferStr(buf, "\\0"); else if (c < ' ' || c > '~') appendPQExpBuffer(buf, "\\x%2x", (uint8_t) c); else appendPQExpBufferChar(buf, c); str++; } } static void report_result(pe_test_config *tc, bool success, PQExpBuffer testname, PQExpBuffer details, const char *subname, const char *resultdesc) { int test_id = ++tc->test_count; bool print_details = true; bool print_result = true; if (success) { if (tc->verbosity <= 0) print_details = false; if (tc->verbosity < 0) print_result = false; } else tc->failure_count++; if (print_details) printf("%s", details->data); if (print_result) printf("%s %d - %s: %s: %s\n", success ? "ok" : "not ok", test_id, testname->data, subname, resultdesc); } /* * Return true for encodings in which bytes in a multi-byte character look * like valid ascii characters. */ static bool encoding_conflicts_ascii(int encoding) { /* * We don't store this property directly anywhere, but whether an encoding * is a client-only encoding is a good proxy. */ if (encoding > PG_ENCODING_BE_LAST) return true; return false; } static const char * scan_res_s(PsqlScanResult res) { #define TOSTR_CASE(sym) case sym: return #sym switch (res) { TOSTR_CASE(PSCAN_SEMICOLON); TOSTR_CASE(PSCAN_BACKSLASH); TOSTR_CASE(PSCAN_INCOMPLETE); TOSTR_CASE(PSCAN_EOL); } pg_unreachable(); return ""; /* silence compiler */ } /* * Verify that psql parses the input as a single statement. If this property * is violated, the escape function does not effectively protect against * smuggling in a second statement. */ static void test_psql_parse(pe_test_config *tc, PQExpBuffer testname, PQExpBuffer input_buf, PQExpBuffer details) { PsqlScanState scan_state; PsqlScanResult scan_result; PQExpBuffer query_buf; promptStatus_t prompt_status = PROMPT_READY; int matches = 0; bool test_fails; const char *resdesc; query_buf = createPQExpBuffer(); scan_state = psql_scan_create(&test_scan_callbacks); /* * TODO: This hardcodes standard conforming strings, it would be useful to * test without as well. */ psql_scan_setup(scan_state, input_buf->data, input_buf->len, PQclientEncoding(tc->conn), 1); do { resetPQExpBuffer(query_buf); scan_result = psql_scan(scan_state, query_buf, &prompt_status); appendPQExpBuffer(details, "#\t\t %d: scan_result: %s prompt: %u, query_buf: ", matches, scan_res_s(scan_result), prompt_status); escapify(details, query_buf->data, query_buf->len); appendPQExpBuffer(details, "\n"); matches++; } while (scan_result != PSCAN_INCOMPLETE && scan_result != PSCAN_EOL); psql_scan_destroy(scan_state); destroyPQExpBuffer(query_buf); test_fails = matches > 1 || scan_result != PSCAN_EOL; if (matches > 1) resdesc = "more than one match"; else if (scan_result != PSCAN_EOL) resdesc = "unexpected end state"; else resdesc = "ok"; report_result(tc, !test_fails, testname, details, "psql parse", resdesc); } static void test_one_vector_escape(pe_test_config *tc, const pe_test_vector *tv, const pe_test_escape_func *ef) { PQExpBuffer testname; PQExpBuffer details; PQExpBuffer escape_buf; PQExpBuffer escape_err; size_t input_encoding_validlen; bool input_encoding_valid; size_t input_encoding0_validlen; bool input_encoding0_valid; bool escape_success; size_t escape_encoding_length; bool escape_encoding_valid; escape_err = createPQExpBuffer(); testname = createPQExpBuffer(); details = createPQExpBuffer(); escape_buf = createPQExpBuffer(); if (ef->supports_only_ascii_overlap && encoding_conflicts_ascii(PQclientEncoding(tc->conn))) { goto out; } /* name to describe the test */ appendPQExpBuffer(testname, ">"); escapify(testname, tv->escape, tv->escape_len); appendPQExpBuffer(testname, "< - %s - %s", tv->client_encoding, ef->name); /* details to describe the test, to allow for debugging */ appendPQExpBuffer(details, "#\t input: %zd bytes: ", tv->escape_len); escapify(details, tv->escape, tv->escape_len); appendPQExpBufferStr(details, "\n"); appendPQExpBuffer(details, "#\t encoding: %s\n", tv->client_encoding); /* check encoding of input, to compare with after the test */ input_encoding_validlen = pg_encoding_verifymbstr(PQclientEncoding(tc->conn), tv->escape, tv->escape_len); input_encoding_valid = input_encoding_validlen == tv->escape_len; appendPQExpBuffer(details, "#\t input encoding valid: %d\n", input_encoding_valid); input_encoding0_validlen = pg_encoding_verifymbstr(PQclientEncoding(tc->conn), tv->escape, strlen(tv->escape)); input_encoding0_valid = input_encoding0_validlen == strlen(tv->escape); appendPQExpBuffer(details, "#\t input encoding valid till 0: %d\n", input_encoding0_valid); appendPQExpBuffer(details, "#\t escape func: %s\n", ef->name); if (!input_encoding_valid && ef->supports_only_valid && !tc->force_unsupported) goto out; /* call the to-be-tested escape function */ escape_success = ef->escape(tc->conn, escape_buf, tv->escape, tv->escape_len, escape_err); if (!escape_success) { appendPQExpBuffer(details, "#\t escape error: %s\n", escape_err->data); } if (escape_buf->len > 0) { appendPQExpBuffer(details, "#\t escaped string: %zd bytes: ", escape_buf->len); escapify(details, escape_buf->data, escape_buf->len); appendPQExpBufferChar(details, '\n'); escape_encoding_length = pg_encoding_verifymbstr(PQclientEncoding(tc->conn), escape_buf->data, escape_buf->len); escape_encoding_valid = escape_encoding_length == escape_buf->len; appendPQExpBuffer(details, "#\t escape encoding valid: %d\n", escape_encoding_valid); } else { escape_encoding_length = 0; escape_encoding_valid = 1; } /* * If the test reports errors, and the input was invalidly encoded, * escaping should fail. One edge-case that we accept for now is that the * input could have an embedded null byte, which the escape functions will * just treat as a shorter string. If the encoding error is after the zero * byte, the output thus won't contain it. */ if (ef->reports_errors) { bool ok = true; const char *resdesc = "ok"; if (escape_success) { if (!input_encoding0_valid) { ok = false; resdesc = "invalid input escaped successfully"; } else if (!input_encoding_valid) resdesc = "invalid input escaped successfully, due to zero byte"; } else { if (input_encoding0_valid) { ok = false; resdesc = "valid input failed to escape"; } else if (input_encoding_valid) resdesc = "valid input failed to escape, due to zero byte"; } report_result(tc, ok, testname, details, "input validity vs escape success", resdesc); } /* * If the input is invalidly encoded, the output should also be invalidly * encoded. We accept the same zero-byte edge case as above. */ { bool ok = true; const char *resdesc = "ok"; if (input_encoding0_valid && !input_encoding_valid && escape_encoding_valid) { resdesc = "invalid input produced valid output, due to zero byte"; } else if (input_encoding0_valid && !escape_encoding_valid) { ok = false; resdesc = "valid input produced invalid output"; } else if (!input_encoding0_valid && (!ef->reports_errors || escape_success) && escape_encoding_valid) { ok = false; resdesc = "invalid input produced valid output"; } report_result(tc, ok, testname, details, "input and escaped encoding validity", resdesc); } /* * Test psql parsing whenever we get any string back, even if the escape * function returned a failure. */ if (escape_buf->len > 0) { test_psql_parse(tc, testname, escape_buf, details); } out: destroyPQExpBuffer(escape_err); destroyPQExpBuffer(details); destroyPQExpBuffer(testname); destroyPQExpBuffer(escape_buf); } static void test_one_vector(pe_test_config *tc, const pe_test_vector *tv) { if (PQsetClientEncoding(tc->conn, tv->client_encoding)) { fprintf(stderr, "failed to set encoding to %s:\n%s\n", tv->client_encoding, PQerrorMessage(tc->conn)); exit(1); } for (int escoff = 0; escoff < lengthof(pe_test_escape_funcs); escoff++) { const pe_test_escape_func *ef = &pe_test_escape_funcs[escoff]; test_one_vector_escape(tc, tv, ef); } } static void usage(const char *hint) { if (hint) fprintf(stderr, "Error: %s\n\n", hint); printf("PostgreSQL escape function test\n" "\n" "Usage:\n" " test_escape --conninfo=CONNINFO [OPTIONS]\n" "\n" "Options:\n" " -h, --help show this help\n" " -c, --conninfo=CONNINFO connection information to use\n" " -v, --verbose show test details even for successes\n" " -q, --quiet only show failures\n" " --force-unsupported test invalid input even if unsupported\n" ); if (hint) exit(1); } int main(int argc, char *argv[]) { pe_test_config tc = {0}; int c; int option_index; static const struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"conninfo", required_argument, NULL, 'c'}, {"verbose", no_argument, NULL, 'v'}, {"quiet", no_argument, NULL, 'q'}, {"force-unsupported", no_argument, NULL, 'f'}, {NULL, 0, NULL, 0}, }; while ((c = getopt_long(argc, argv, "vqh", long_options, &option_index)) != -1) { switch (c) { case 'h': usage(NULL); exit(0); break; case 'c': tc.conninfo = optarg; break; case 'v': tc.verbosity++; break; case 'q': tc.verbosity--; break; case 'f': tc.force_unsupported = true; break; } } if (argc - optind >= 1) usage("unused option(s) specified"); if (tc.conninfo == NULL) usage("--conninfo needs to be specified"); tc.conn = PQconnectdb(tc.conninfo); if (!tc.conn || PQstatus(tc.conn) != CONNECTION_OK) { fprintf(stderr, "could not connect: %s\n", PQerrorMessage(tc.conn)); exit(1); } for (int i = 0; i < lengthof(pe_test_vectors); i++) { test_one_vector(&tc, &pe_test_vectors[i]); } PQfinish(tc.conn); printf("# %d failures\n", tc.failure_count); printf("1..%d\n", tc.test_count); return tc.failure_count > 0; }