From aafc07c7a191bc807c77fe2a044006a5db07faba Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 19 Dec 2023 15:21:34 -0500 Subject: Move src/bin/pg_verifybackup/parse_manifest.c into src/common. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes it possible for the code to be easily reused by other client-side tools, and/or by the server. Patch by me. Review of this patch in particular by at least Peter Eisentraut; reviewers for the patch series in general include Dilip Kumar, Andres Fruend, David Steele, Álvaro Herrera, and Jakub Wartak. Discussion: http://postgr.es/m/CA+TgmoZ6UGZVnSy5iak6s6+AXu_DewXovDjhLs3-su6nmU_x_g@mail.gmail.com --- src/bin/pg_verifybackup/Makefile | 1 - src/bin/pg_verifybackup/meson.build | 1 - src/bin/pg_verifybackup/nls.mk | 4 +- src/bin/pg_verifybackup/parse_manifest.c | 762 ------------------------------ src/bin/pg_verifybackup/parse_manifest.h | 46 -- src/bin/pg_verifybackup/pg_verifybackup.c | 2 +- src/common/Makefile | 1 + src/common/meson.build | 1 + src/common/parse_manifest.c | 762 ++++++++++++++++++++++++++++++ src/include/common/parse_manifest.h | 46 ++ 10 files changed, 813 insertions(+), 813 deletions(-) delete mode 100644 src/bin/pg_verifybackup/parse_manifest.c delete mode 100644 src/bin/pg_verifybackup/parse_manifest.h create mode 100644 src/common/parse_manifest.c create mode 100644 src/include/common/parse_manifest.h (limited to 'src') diff --git a/src/bin/pg_verifybackup/Makefile b/src/bin/pg_verifybackup/Makefile index c96323faa9c..7c045f142e8 100644 --- a/src/bin/pg_verifybackup/Makefile +++ b/src/bin/pg_verifybackup/Makefile @@ -21,7 +21,6 @@ LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) OBJS = \ $(WIN32RES) \ - parse_manifest.o \ pg_verifybackup.o all: pg_verifybackup diff --git a/src/bin/pg_verifybackup/meson.build b/src/bin/pg_verifybackup/meson.build index 9369da1bc65..58f780d1a63 100644 --- a/src/bin/pg_verifybackup/meson.build +++ b/src/bin/pg_verifybackup/meson.build @@ -1,7 +1,6 @@ # Copyright (c) 2022-2023, PostgreSQL Global Development Group pg_verifybackup_sources = files( - 'parse_manifest.c', 'pg_verifybackup.c' ) diff --git a/src/bin/pg_verifybackup/nls.mk b/src/bin/pg_verifybackup/nls.mk index eba73a2c058..9e6a6049ba7 100644 --- a/src/bin/pg_verifybackup/nls.mk +++ b/src/bin/pg_verifybackup/nls.mk @@ -1,10 +1,10 @@ # src/bin/pg_verifybackup/nls.mk CATALOG_NAME = pg_verifybackup GETTEXT_FILES = $(FRONTEND_COMMON_GETTEXT_FILES) \ - parse_manifest.c \ pg_verifybackup.c \ ../../common/fe_memutils.c \ - ../../common/jsonapi.c + ../../common/jsonapi.c \ + ../../common/parse_manifest.c GETTEXT_TRIGGERS = $(FRONTEND_COMMON_GETTEXT_TRIGGERS) \ json_manifest_parse_failure:2 \ error_cb:2 \ diff --git a/src/bin/pg_verifybackup/parse_manifest.c b/src/bin/pg_verifybackup/parse_manifest.c deleted file mode 100644 index 850adf90a8d..00000000000 --- a/src/bin/pg_verifybackup/parse_manifest.c +++ /dev/null @@ -1,762 +0,0 @@ -/*------------------------------------------------------------------------- - * - * parse_manifest.c - * Parse a backup manifest in JSON format. - * - * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * src/bin/pg_verifybackup/parse_manifest.c - * - *------------------------------------------------------------------------- - */ - -#include "postgres_fe.h" - -#include "parse_manifest.h" -#include "common/jsonapi.h" - -/* - * Semantic states for JSON manifest parsing. - */ -typedef enum -{ - JM_EXPECT_TOPLEVEL_START, - JM_EXPECT_TOPLEVEL_END, - JM_EXPECT_TOPLEVEL_FIELD, - JM_EXPECT_VERSION_VALUE, - JM_EXPECT_FILES_START, - JM_EXPECT_FILES_NEXT, - JM_EXPECT_THIS_FILE_FIELD, - JM_EXPECT_THIS_FILE_VALUE, - JM_EXPECT_WAL_RANGES_START, - JM_EXPECT_WAL_RANGES_NEXT, - JM_EXPECT_THIS_WAL_RANGE_FIELD, - JM_EXPECT_THIS_WAL_RANGE_VALUE, - JM_EXPECT_MANIFEST_CHECKSUM_VALUE, - JM_EXPECT_EOF, -} JsonManifestSemanticState; - -/* - * Possible fields for one file as described by the manifest. - */ -typedef enum -{ - JMFF_PATH, - JMFF_ENCODED_PATH, - JMFF_SIZE, - JMFF_LAST_MODIFIED, - JMFF_CHECKSUM_ALGORITHM, - JMFF_CHECKSUM, -} JsonManifestFileField; - -/* - * Possible fields for one file as described by the manifest. - */ -typedef enum -{ - JMWRF_TIMELINE, - JMWRF_START_LSN, - JMWRF_END_LSN, -} JsonManifestWALRangeField; - -/* - * Internal state used while decoding the JSON-format backup manifest. - */ -typedef struct -{ - JsonManifestParseContext *context; - JsonManifestSemanticState state; - - /* These fields are used for parsing objects in the list of files. */ - JsonManifestFileField file_field; - char *pathname; - char *encoded_pathname; - char *size; - char *algorithm; - pg_checksum_type checksum_algorithm; - char *checksum; - - /* These fields are used for parsing objects in the list of WAL ranges. */ - JsonManifestWALRangeField wal_range_field; - char *timeline; - char *start_lsn; - char *end_lsn; - - /* Miscellaneous other stuff. */ - bool saw_version_field; - char *manifest_checksum; -} JsonManifestParseState; - -static JsonParseErrorType json_manifest_object_start(void *state); -static JsonParseErrorType json_manifest_object_end(void *state); -static JsonParseErrorType json_manifest_array_start(void *state); -static JsonParseErrorType json_manifest_array_end(void *state); -static JsonParseErrorType json_manifest_object_field_start(void *state, char *fname, - bool isnull); -static JsonParseErrorType json_manifest_scalar(void *state, char *token, - JsonTokenType tokentype); -static void json_manifest_finalize_file(JsonManifestParseState *parse); -static void json_manifest_finalize_wal_range(JsonManifestParseState *parse); -static void verify_manifest_checksum(JsonManifestParseState *parse, - char *buffer, size_t size); -static void json_manifest_parse_failure(JsonManifestParseContext *context, - char *msg); - -static int hexdecode_char(char c); -static bool hexdecode_string(uint8 *result, char *input, int nbytes); -static bool parse_xlogrecptr(XLogRecPtr *result, char *input); - -/* - * Main entrypoint to parse a JSON-format backup manifest. - * - * Caller should set up the parsing context and then invoke this function. - * For each file whose information is extracted from the manifest, - * context->per_file_cb is invoked. In case of trouble, context->error_cb is - * invoked and is expected not to return. - */ -void -json_parse_manifest(JsonManifestParseContext *context, char *buffer, - size_t size) -{ - JsonLexContext *lex; - JsonParseErrorType json_error; - JsonSemAction sem; - JsonManifestParseState parse; - - /* Set up our private parsing context. */ - parse.context = context; - parse.state = JM_EXPECT_TOPLEVEL_START; - parse.saw_version_field = false; - - /* Create a JSON lexing context. */ - lex = makeJsonLexContextCstringLen(NULL, buffer, size, PG_UTF8, true); - - /* Set up semantic actions. */ - sem.semstate = &parse; - sem.object_start = json_manifest_object_start; - sem.object_end = json_manifest_object_end; - sem.array_start = json_manifest_array_start; - sem.array_end = json_manifest_array_end; - sem.object_field_start = json_manifest_object_field_start; - sem.object_field_end = NULL; - sem.array_element_start = NULL; - sem.array_element_end = NULL; - sem.scalar = json_manifest_scalar; - - /* Run the actual JSON parser. */ - json_error = pg_parse_json(lex, &sem); - if (json_error != JSON_SUCCESS) - json_manifest_parse_failure(context, "parsing failed"); - if (parse.state != JM_EXPECT_EOF) - json_manifest_parse_failure(context, "manifest ended unexpectedly"); - - /* Verify the manifest checksum. */ - verify_manifest_checksum(&parse, buffer, size); - - freeJsonLexContext(lex); -} - -/* - * Invoked at the start of each object in the JSON document. - * - * The document as a whole is expected to be an object; each file and each - * WAL range is also expected to be an object. If we're anywhere else in the - * document, it's an error. - */ -static JsonParseErrorType -json_manifest_object_start(void *state) -{ - JsonManifestParseState *parse = state; - - switch (parse->state) - { - case JM_EXPECT_TOPLEVEL_START: - parse->state = JM_EXPECT_TOPLEVEL_FIELD; - break; - case JM_EXPECT_FILES_NEXT: - parse->state = JM_EXPECT_THIS_FILE_FIELD; - parse->pathname = NULL; - parse->encoded_pathname = NULL; - parse->size = NULL; - parse->algorithm = NULL; - parse->checksum = NULL; - break; - case JM_EXPECT_WAL_RANGES_NEXT: - parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD; - parse->timeline = NULL; - parse->start_lsn = NULL; - parse->end_lsn = NULL; - break; - default: - json_manifest_parse_failure(parse->context, - "unexpected object start"); - break; - } - - return JSON_SUCCESS; -} - -/* - * Invoked at the end of each object in the JSON document. - * - * The possible cases here are the same as for json_manifest_object_start. - * There's nothing special to do at the end of the document, but when we - * reach the end of an object representing a particular file or WAL range, - * we must call json_manifest_finalize_file() to save the associated details. - */ -static JsonParseErrorType -json_manifest_object_end(void *state) -{ - JsonManifestParseState *parse = state; - - switch (parse->state) - { - case JM_EXPECT_TOPLEVEL_END: - parse->state = JM_EXPECT_EOF; - break; - case JM_EXPECT_THIS_FILE_FIELD: - json_manifest_finalize_file(parse); - parse->state = JM_EXPECT_FILES_NEXT; - break; - case JM_EXPECT_THIS_WAL_RANGE_FIELD: - json_manifest_finalize_wal_range(parse); - parse->state = JM_EXPECT_WAL_RANGES_NEXT; - break; - default: - json_manifest_parse_failure(parse->context, - "unexpected object end"); - break; - } - - return JSON_SUCCESS; -} - -/* - * Invoked at the start of each array in the JSON document. - * - * Within the toplevel object, the value associated with the "Files" key - * should be an array. Similarly for the "WAL-Ranges" key. No other arrays - * are expected. - */ -static JsonParseErrorType -json_manifest_array_start(void *state) -{ - JsonManifestParseState *parse = state; - - switch (parse->state) - { - case JM_EXPECT_FILES_START: - parse->state = JM_EXPECT_FILES_NEXT; - break; - case JM_EXPECT_WAL_RANGES_START: - parse->state = JM_EXPECT_WAL_RANGES_NEXT; - break; - default: - json_manifest_parse_failure(parse->context, - "unexpected array start"); - break; - } - - return JSON_SUCCESS; -} - -/* - * Invoked at the end of each array in the JSON document. - * - * The cases here are analogous to those in json_manifest_array_start. - */ -static JsonParseErrorType -json_manifest_array_end(void *state) -{ - JsonManifestParseState *parse = state; - - switch (parse->state) - { - case JM_EXPECT_FILES_NEXT: - case JM_EXPECT_WAL_RANGES_NEXT: - parse->state = JM_EXPECT_TOPLEVEL_FIELD; - break; - default: - json_manifest_parse_failure(parse->context, - "unexpected array end"); - break; - } - - return JSON_SUCCESS; -} - -/* - * Invoked at the start of each object field in the JSON document. - */ -static JsonParseErrorType -json_manifest_object_field_start(void *state, char *fname, bool isnull) -{ - JsonManifestParseState *parse = state; - - switch (parse->state) - { - case JM_EXPECT_TOPLEVEL_FIELD: - - /* - * Inside toplevel object. The version indicator should always be - * the first field. - */ - if (!parse->saw_version_field) - { - if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0) - json_manifest_parse_failure(parse->context, - "expected version indicator"); - parse->state = JM_EXPECT_VERSION_VALUE; - parse->saw_version_field = true; - break; - } - - /* Is this the list of files? */ - if (strcmp(fname, "Files") == 0) - { - parse->state = JM_EXPECT_FILES_START; - break; - } - - /* Is this the list of WAL ranges? */ - if (strcmp(fname, "WAL-Ranges") == 0) - { - parse->state = JM_EXPECT_WAL_RANGES_START; - break; - } - - /* Is this the manifest checksum? */ - if (strcmp(fname, "Manifest-Checksum") == 0) - { - parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE; - break; - } - - /* It's not a field we recognize. */ - json_manifest_parse_failure(parse->context, - "unrecognized top-level field"); - break; - - case JM_EXPECT_THIS_FILE_FIELD: - /* Inside object for one file; which key have we got? */ - if (strcmp(fname, "Path") == 0) - parse->file_field = JMFF_PATH; - else if (strcmp(fname, "Encoded-Path") == 0) - parse->file_field = JMFF_ENCODED_PATH; - else if (strcmp(fname, "Size") == 0) - parse->file_field = JMFF_SIZE; - else if (strcmp(fname, "Last-Modified") == 0) - parse->file_field = JMFF_LAST_MODIFIED; - else if (strcmp(fname, "Checksum-Algorithm") == 0) - parse->file_field = JMFF_CHECKSUM_ALGORITHM; - else if (strcmp(fname, "Checksum") == 0) - parse->file_field = JMFF_CHECKSUM; - else - json_manifest_parse_failure(parse->context, - "unexpected file field"); - parse->state = JM_EXPECT_THIS_FILE_VALUE; - break; - - case JM_EXPECT_THIS_WAL_RANGE_FIELD: - /* Inside object for one file; which key have we got? */ - if (strcmp(fname, "Timeline") == 0) - parse->wal_range_field = JMWRF_TIMELINE; - else if (strcmp(fname, "Start-LSN") == 0) - parse->wal_range_field = JMWRF_START_LSN; - else if (strcmp(fname, "End-LSN") == 0) - parse->wal_range_field = JMWRF_END_LSN; - else - json_manifest_parse_failure(parse->context, - "unexpected WAL range field"); - parse->state = JM_EXPECT_THIS_WAL_RANGE_VALUE; - break; - - default: - json_manifest_parse_failure(parse->context, - "unexpected object field"); - break; - } - - return JSON_SUCCESS; -} - -/* - * Invoked at the start of each scalar in the JSON document. - * - * Object field names don't reach this code; those are handled by - * json_manifest_object_field_start. When we're inside of the object for - * a particular file or WAL range, that function will have noticed the name - * of the field, and we'll get the corresponding value here. When we're in - * the toplevel object, the parse state itself tells us which field this is. - * - * In all cases except for PostgreSQL-Backup-Manifest-Version, which we - * can just check on the spot, the goal here is just to save the value in - * the parse state for later use. We don't actually do anything until we - * reach either the end of the object representing this file, or the end - * of the manifest, as the case may be. - */ -static JsonParseErrorType -json_manifest_scalar(void *state, char *token, JsonTokenType tokentype) -{ - JsonManifestParseState *parse = state; - - switch (parse->state) - { - case JM_EXPECT_VERSION_VALUE: - if (strcmp(token, "1") != 0) - json_manifest_parse_failure(parse->context, - "unexpected manifest version"); - parse->state = JM_EXPECT_TOPLEVEL_FIELD; - break; - - case JM_EXPECT_THIS_FILE_VALUE: - switch (parse->file_field) - { - case JMFF_PATH: - parse->pathname = token; - break; - case JMFF_ENCODED_PATH: - parse->encoded_pathname = token; - break; - case JMFF_SIZE: - parse->size = token; - break; - case JMFF_LAST_MODIFIED: - pfree(token); /* unused */ - break; - case JMFF_CHECKSUM_ALGORITHM: - parse->algorithm = token; - break; - case JMFF_CHECKSUM: - parse->checksum = token; - break; - } - parse->state = JM_EXPECT_THIS_FILE_FIELD; - break; - - case JM_EXPECT_THIS_WAL_RANGE_VALUE: - switch (parse->wal_range_field) - { - case JMWRF_TIMELINE: - parse->timeline = token; - break; - case JMWRF_START_LSN: - parse->start_lsn = token; - break; - case JMWRF_END_LSN: - parse->end_lsn = token; - break; - } - parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD; - break; - - case JM_EXPECT_MANIFEST_CHECKSUM_VALUE: - parse->state = JM_EXPECT_TOPLEVEL_END; - parse->manifest_checksum = token; - break; - - default: - json_manifest_parse_failure(parse->context, "unexpected scalar"); - break; - } - - return JSON_SUCCESS; -} - -/* - * Do additional parsing and sanity-checking of the details gathered for one - * file, and invoke the per-file callback so that the caller gets those - * details. This happens for each file when the corresponding JSON object is - * completely parsed. - */ -static void -json_manifest_finalize_file(JsonManifestParseState *parse) -{ - JsonManifestParseContext *context = parse->context; - size_t size; - char *ep; - int checksum_string_length; - pg_checksum_type checksum_type; - int checksum_length; - uint8 *checksum_payload; - - /* Pathname and size are required. */ - if (parse->pathname == NULL && parse->encoded_pathname == NULL) - json_manifest_parse_failure(parse->context, "missing path name"); - if (parse->pathname != NULL && parse->encoded_pathname != NULL) - json_manifest_parse_failure(parse->context, - "both path name and encoded path name"); - if (parse->size == NULL) - json_manifest_parse_failure(parse->context, "missing size"); - if (parse->algorithm == NULL && parse->checksum != NULL) - json_manifest_parse_failure(parse->context, - "checksum without algorithm"); - - /* Decode encoded pathname, if that's what we have. */ - if (parse->encoded_pathname != NULL) - { - int encoded_length = strlen(parse->encoded_pathname); - int raw_length = encoded_length / 2; - - parse->pathname = palloc(raw_length + 1); - if (encoded_length % 2 != 0 || - !hexdecode_string((uint8 *) parse->pathname, - parse->encoded_pathname, - raw_length)) - json_manifest_parse_failure(parse->context, - "could not decode file name"); - parse->pathname[raw_length] = '\0'; - pfree(parse->encoded_pathname); - parse->encoded_pathname = NULL; - } - - /* Parse size. */ - size = strtoul(parse->size, &ep, 10); - if (*ep) - json_manifest_parse_failure(parse->context, - "file size is not an integer"); - - /* Parse the checksum algorithm, if it's present. */ - if (parse->algorithm == NULL) - checksum_type = CHECKSUM_TYPE_NONE; - else if (!pg_checksum_parse_type(parse->algorithm, &checksum_type)) - context->error_cb(context, "unrecognized checksum algorithm: \"%s\"", - parse->algorithm); - - /* Parse the checksum payload, if it's present. */ - checksum_string_length = parse->checksum == NULL ? 0 - : strlen(parse->checksum); - if (checksum_string_length == 0) - { - checksum_length = 0; - checksum_payload = NULL; - } - else - { - checksum_length = checksum_string_length / 2; - checksum_payload = palloc(checksum_length); - if (checksum_string_length % 2 != 0 || - !hexdecode_string(checksum_payload, parse->checksum, - checksum_length)) - context->error_cb(context, - "invalid checksum for file \"%s\": \"%s\"", - parse->pathname, parse->checksum); - } - - /* Invoke the callback with the details we've gathered. */ - context->per_file_cb(context, parse->pathname, size, - checksum_type, checksum_length, checksum_payload); - - /* Free memory we no longer need. */ - if (parse->size != NULL) - { - pfree(parse->size); - parse->size = NULL; - } - if (parse->algorithm != NULL) - { - pfree(parse->algorithm); - parse->algorithm = NULL; - } - if (parse->checksum != NULL) - { - pfree(parse->checksum); - parse->checksum = NULL; - } -} - -/* - * Do additional parsing and sanity-checking of the details gathered for one - * WAL range, and invoke the per-WAL-range callback so that the caller gets - * those details. This happens for each WAL range when the corresponding JSON - * object is completely parsed. - */ -static void -json_manifest_finalize_wal_range(JsonManifestParseState *parse) -{ - JsonManifestParseContext *context = parse->context; - TimeLineID tli; - XLogRecPtr start_lsn, - end_lsn; - char *ep; - - /* Make sure all fields are present. */ - if (parse->timeline == NULL) - json_manifest_parse_failure(parse->context, "missing timeline"); - if (parse->start_lsn == NULL) - json_manifest_parse_failure(parse->context, "missing start LSN"); - if (parse->end_lsn == NULL) - json_manifest_parse_failure(parse->context, "missing end LSN"); - - /* Parse timeline. */ - tli = strtoul(parse->timeline, &ep, 10); - if (*ep) - json_manifest_parse_failure(parse->context, - "timeline is not an integer"); - if (!parse_xlogrecptr(&start_lsn, parse->start_lsn)) - json_manifest_parse_failure(parse->context, - "could not parse start LSN"); - if (!parse_xlogrecptr(&end_lsn, parse->end_lsn)) - json_manifest_parse_failure(parse->context, - "could not parse end LSN"); - - /* Invoke the callback with the details we've gathered. */ - context->per_wal_range_cb(context, tli, start_lsn, end_lsn); - - /* Free memory we no longer need. */ - if (parse->timeline != NULL) - { - pfree(parse->timeline); - parse->timeline = NULL; - } - if (parse->start_lsn != NULL) - { - pfree(parse->start_lsn); - parse->start_lsn = NULL; - } - if (parse->end_lsn != NULL) - { - pfree(parse->end_lsn); - parse->end_lsn = NULL; - } -} - -/* - * Verify that the manifest checksum is correct. - * - * The last line of the manifest file is excluded from the manifest checksum, - * because the last line is expected to contain the checksum that covers - * the rest of the file. - */ -static void -verify_manifest_checksum(JsonManifestParseState *parse, char *buffer, - size_t size) -{ - JsonManifestParseContext *context = parse->context; - size_t i; - size_t number_of_newlines = 0; - size_t ultimate_newline = 0; - size_t penultimate_newline = 0; - pg_cryptohash_ctx *manifest_ctx; - uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH]; - uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH]; - - /* Find the last two newlines in the file. */ - for (i = 0; i < size; ++i) - { - if (buffer[i] == '\n') - { - ++number_of_newlines; - penultimate_newline = ultimate_newline; - ultimate_newline = i; - } - } - - /* - * Make sure that the last newline is right at the end, and that there are - * at least two lines total. We need this to be true in order for the - * following code, which computes the manifest checksum, to work properly. - */ - if (number_of_newlines < 2) - json_manifest_parse_failure(parse->context, - "expected at least 2 lines"); - if (ultimate_newline != size - 1) - json_manifest_parse_failure(parse->context, - "last line not newline-terminated"); - - /* Checksum the rest. */ - manifest_ctx = pg_cryptohash_create(PG_SHA256); - if (manifest_ctx == NULL) - context->error_cb(context, "out of memory"); - if (pg_cryptohash_init(manifest_ctx) < 0) - context->error_cb(context, "could not initialize checksum of manifest"); - if (pg_cryptohash_update(manifest_ctx, (uint8 *) buffer, penultimate_newline + 1) < 0) - context->error_cb(context, "could not update checksum of manifest"); - if (pg_cryptohash_final(manifest_ctx, manifest_checksum_actual, - sizeof(manifest_checksum_actual)) < 0) - context->error_cb(context, "could not finalize checksum of manifest"); - - /* Now verify it. */ - if (parse->manifest_checksum == NULL) - context->error_cb(parse->context, "manifest has no checksum"); - if (strlen(parse->manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 || - !hexdecode_string(manifest_checksum_expected, parse->manifest_checksum, - PG_SHA256_DIGEST_LENGTH)) - context->error_cb(context, "invalid manifest checksum: \"%s\"", - parse->manifest_checksum); - if (memcmp(manifest_checksum_actual, manifest_checksum_expected, - PG_SHA256_DIGEST_LENGTH) != 0) - context->error_cb(context, "manifest checksum mismatch"); - pg_cryptohash_free(manifest_ctx); -} - -/* - * Report a parse error. - * - * This is intended to be used for fairly low-level failures that probably - * shouldn't occur unless somebody has deliberately constructed a bad manifest, - * or unless the server is generating bad manifests due to some bug. msg should - * be a short string giving some hint as to what the problem is. - */ -static void -json_manifest_parse_failure(JsonManifestParseContext *context, char *msg) -{ - context->error_cb(context, "could not parse backup manifest: %s", msg); -} - -/* - * Convert a character which represents a hexadecimal digit to an integer. - * - * Returns -1 if the character is not a hexadecimal digit. - */ -static int -hexdecode_char(char c) -{ - if (c >= '0' && c <= '9') - return c - '0'; - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - if (c >= 'A' && c <= 'F') - return c - 'A' + 10; - - return -1; -} - -/* - * Decode a hex string into a byte string, 2 hex chars per byte. - * - * Returns false if invalid characters are encountered; otherwise true. - */ -static bool -hexdecode_string(uint8 *result, char *input, int nbytes) -{ - int i; - - for (i = 0; i < nbytes; ++i) - { - int n1 = hexdecode_char(input[i * 2]); - int n2 = hexdecode_char(input[i * 2 + 1]); - - if (n1 < 0 || n2 < 0) - return false; - result[i] = n1 * 16 + n2; - } - - return true; -} - -/* - * Parse an XLogRecPtr expressed using the usual string format. - */ -static bool -parse_xlogrecptr(XLogRecPtr *result, char *input) -{ - uint32 hi; - uint32 lo; - - if (sscanf(input, "%X/%X", &hi, &lo) != 2) - return false; - *result = ((uint64) hi) << 32 | lo; - return true; -} diff --git a/src/bin/pg_verifybackup/parse_manifest.h b/src/bin/pg_verifybackup/parse_manifest.h deleted file mode 100644 index 001b9a6a110..00000000000 --- a/src/bin/pg_verifybackup/parse_manifest.h +++ /dev/null @@ -1,46 +0,0 @@ -/*------------------------------------------------------------------------- - * - * parse_manifest.h - * Parse a backup manifest in JSON format. - * - * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * src/bin/pg_verifybackup/parse_manifest.h - * - *------------------------------------------------------------------------- - */ - -#ifndef PARSE_MANIFEST_H -#define PARSE_MANIFEST_H - -#include "access/xlogdefs.h" -#include "common/checksum_helper.h" -#include "mb/pg_wchar.h" - -struct JsonManifestParseContext; -typedef struct JsonManifestParseContext JsonManifestParseContext; - -typedef void (*json_manifest_per_file_callback) (JsonManifestParseContext *, - char *pathname, - size_t size, pg_checksum_type checksum_type, - int checksum_length, uint8 *checksum_payload); -typedef void (*json_manifest_per_wal_range_callback) (JsonManifestParseContext *, - TimeLineID tli, - XLogRecPtr start_lsn, XLogRecPtr end_lsn); -typedef void (*json_manifest_error_callback) (JsonManifestParseContext *, - const char *fmt,...) pg_attribute_printf(2, 3) - pg_attribute_noreturn(); - -struct JsonManifestParseContext -{ - void *private_data; - json_manifest_per_file_callback per_file_cb; - json_manifest_per_wal_range_callback per_wal_range_cb; - json_manifest_error_callback error_cb; -}; - -extern void json_parse_manifest(JsonManifestParseContext *context, - char *buffer, size_t size); - -#endif diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index d921d0f003a..88081f66f76 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -20,9 +20,9 @@ #include "common/hashfn.h" #include "common/logging.h" +#include "common/parse_manifest.h" #include "fe_utils/simple_list.h" #include "getopt_long.h" -#include "parse_manifest.h" #include "pgtime.h" /* diff --git a/src/common/Makefile b/src/common/Makefile index ce4535d7fec..1092dc63df3 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -66,6 +66,7 @@ OBJS_COMMON = \ kwlookup.o \ link-canary.o \ md5_common.o \ + parse_manifest.o \ percentrepl.o \ pg_get_line.o \ pg_lzcompress.o \ diff --git a/src/common/meson.build b/src/common/meson.build index 8be145c0fb1..d52dd12bc94 100644 --- a/src/common/meson.build +++ b/src/common/meson.build @@ -18,6 +18,7 @@ common_sources = files( 'kwlookup.c', 'link-canary.c', 'md5_common.c', + 'parse_manifest.c', 'percentrepl.c', 'pg_get_line.c', 'pg_lzcompress.c', diff --git a/src/common/parse_manifest.c b/src/common/parse_manifest.c new file mode 100644 index 00000000000..9f52bfa83b3 --- /dev/null +++ b/src/common/parse_manifest.c @@ -0,0 +1,762 @@ +/*------------------------------------------------------------------------- + * + * parse_manifest.c + * Parse a backup manifest in JSON format. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/common/parse_manifest.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include "common/jsonapi.h" +#include "common/parse_manifest.h" + +/* + * Semantic states for JSON manifest parsing. + */ +typedef enum +{ + JM_EXPECT_TOPLEVEL_START, + JM_EXPECT_TOPLEVEL_END, + JM_EXPECT_TOPLEVEL_FIELD, + JM_EXPECT_VERSION_VALUE, + JM_EXPECT_FILES_START, + JM_EXPECT_FILES_NEXT, + JM_EXPECT_THIS_FILE_FIELD, + JM_EXPECT_THIS_FILE_VALUE, + JM_EXPECT_WAL_RANGES_START, + JM_EXPECT_WAL_RANGES_NEXT, + JM_EXPECT_THIS_WAL_RANGE_FIELD, + JM_EXPECT_THIS_WAL_RANGE_VALUE, + JM_EXPECT_MANIFEST_CHECKSUM_VALUE, + JM_EXPECT_EOF, +} JsonManifestSemanticState; + +/* + * Possible fields for one file as described by the manifest. + */ +typedef enum +{ + JMFF_PATH, + JMFF_ENCODED_PATH, + JMFF_SIZE, + JMFF_LAST_MODIFIED, + JMFF_CHECKSUM_ALGORITHM, + JMFF_CHECKSUM, +} JsonManifestFileField; + +/* + * Possible fields for one file as described by the manifest. + */ +typedef enum +{ + JMWRF_TIMELINE, + JMWRF_START_LSN, + JMWRF_END_LSN, +} JsonManifestWALRangeField; + +/* + * Internal state used while decoding the JSON-format backup manifest. + */ +typedef struct +{ + JsonManifestParseContext *context; + JsonManifestSemanticState state; + + /* These fields are used for parsing objects in the list of files. */ + JsonManifestFileField file_field; + char *pathname; + char *encoded_pathname; + char *size; + char *algorithm; + pg_checksum_type checksum_algorithm; + char *checksum; + + /* These fields are used for parsing objects in the list of WAL ranges. */ + JsonManifestWALRangeField wal_range_field; + char *timeline; + char *start_lsn; + char *end_lsn; + + /* Miscellaneous other stuff. */ + bool saw_version_field; + char *manifest_checksum; +} JsonManifestParseState; + +static JsonParseErrorType json_manifest_object_start(void *state); +static JsonParseErrorType json_manifest_object_end(void *state); +static JsonParseErrorType json_manifest_array_start(void *state); +static JsonParseErrorType json_manifest_array_end(void *state); +static JsonParseErrorType json_manifest_object_field_start(void *state, char *fname, + bool isnull); +static JsonParseErrorType json_manifest_scalar(void *state, char *token, + JsonTokenType tokentype); +static void json_manifest_finalize_file(JsonManifestParseState *parse); +static void json_manifest_finalize_wal_range(JsonManifestParseState *parse); +static void verify_manifest_checksum(JsonManifestParseState *parse, + char *buffer, size_t size); +static void json_manifest_parse_failure(JsonManifestParseContext *context, + char *msg); + +static int hexdecode_char(char c); +static bool hexdecode_string(uint8 *result, char *input, int nbytes); +static bool parse_xlogrecptr(XLogRecPtr *result, char *input); + +/* + * Main entrypoint to parse a JSON-format backup manifest. + * + * Caller should set up the parsing context and then invoke this function. + * For each file whose information is extracted from the manifest, + * context->per_file_cb is invoked. In case of trouble, context->error_cb is + * invoked and is expected not to return. + */ +void +json_parse_manifest(JsonManifestParseContext *context, char *buffer, + size_t size) +{ + JsonLexContext *lex; + JsonParseErrorType json_error; + JsonSemAction sem; + JsonManifestParseState parse; + + /* Set up our private parsing context. */ + parse.context = context; + parse.state = JM_EXPECT_TOPLEVEL_START; + parse.saw_version_field = false; + + /* Create a JSON lexing context. */ + lex = makeJsonLexContextCstringLen(NULL, buffer, size, PG_UTF8, true); + + /* Set up semantic actions. */ + sem.semstate = &parse; + sem.object_start = json_manifest_object_start; + sem.object_end = json_manifest_object_end; + sem.array_start = json_manifest_array_start; + sem.array_end = json_manifest_array_end; + sem.object_field_start = json_manifest_object_field_start; + sem.object_field_end = NULL; + sem.array_element_start = NULL; + sem.array_element_end = NULL; + sem.scalar = json_manifest_scalar; + + /* Run the actual JSON parser. */ + json_error = pg_parse_json(lex, &sem); + if (json_error != JSON_SUCCESS) + json_manifest_parse_failure(context, "parsing failed"); + if (parse.state != JM_EXPECT_EOF) + json_manifest_parse_failure(context, "manifest ended unexpectedly"); + + /* Verify the manifest checksum. */ + verify_manifest_checksum(&parse, buffer, size); + + freeJsonLexContext(lex); +} + +/* + * Invoked at the start of each object in the JSON document. + * + * The document as a whole is expected to be an object; each file and each + * WAL range is also expected to be an object. If we're anywhere else in the + * document, it's an error. + */ +static JsonParseErrorType +json_manifest_object_start(void *state) +{ + JsonManifestParseState *parse = state; + + switch (parse->state) + { + case JM_EXPECT_TOPLEVEL_START: + parse->state = JM_EXPECT_TOPLEVEL_FIELD; + break; + case JM_EXPECT_FILES_NEXT: + parse->state = JM_EXPECT_THIS_FILE_FIELD; + parse->pathname = NULL; + parse->encoded_pathname = NULL; + parse->size = NULL; + parse->algorithm = NULL; + parse->checksum = NULL; + break; + case JM_EXPECT_WAL_RANGES_NEXT: + parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD; + parse->timeline = NULL; + parse->start_lsn = NULL; + parse->end_lsn = NULL; + break; + default: + json_manifest_parse_failure(parse->context, + "unexpected object start"); + break; + } + + return JSON_SUCCESS; +} + +/* + * Invoked at the end of each object in the JSON document. + * + * The possible cases here are the same as for json_manifest_object_start. + * There's nothing special to do at the end of the document, but when we + * reach the end of an object representing a particular file or WAL range, + * we must call json_manifest_finalize_file() to save the associated details. + */ +static JsonParseErrorType +json_manifest_object_end(void *state) +{ + JsonManifestParseState *parse = state; + + switch (parse->state) + { + case JM_EXPECT_TOPLEVEL_END: + parse->state = JM_EXPECT_EOF; + break; + case JM_EXPECT_THIS_FILE_FIELD: + json_manifest_finalize_file(parse); + parse->state = JM_EXPECT_FILES_NEXT; + break; + case JM_EXPECT_THIS_WAL_RANGE_FIELD: + json_manifest_finalize_wal_range(parse); + parse->state = JM_EXPECT_WAL_RANGES_NEXT; + break; + default: + json_manifest_parse_failure(parse->context, + "unexpected object end"); + break; + } + + return JSON_SUCCESS; +} + +/* + * Invoked at the start of each array in the JSON document. + * + * Within the toplevel object, the value associated with the "Files" key + * should be an array. Similarly for the "WAL-Ranges" key. No other arrays + * are expected. + */ +static JsonParseErrorType +json_manifest_array_start(void *state) +{ + JsonManifestParseState *parse = state; + + switch (parse->state) + { + case JM_EXPECT_FILES_START: + parse->state = JM_EXPECT_FILES_NEXT; + break; + case JM_EXPECT_WAL_RANGES_START: + parse->state = JM_EXPECT_WAL_RANGES_NEXT; + break; + default: + json_manifest_parse_failure(parse->context, + "unexpected array start"); + break; + } + + return JSON_SUCCESS; +} + +/* + * Invoked at the end of each array in the JSON document. + * + * The cases here are analogous to those in json_manifest_array_start. + */ +static JsonParseErrorType +json_manifest_array_end(void *state) +{ + JsonManifestParseState *parse = state; + + switch (parse->state) + { + case JM_EXPECT_FILES_NEXT: + case JM_EXPECT_WAL_RANGES_NEXT: + parse->state = JM_EXPECT_TOPLEVEL_FIELD; + break; + default: + json_manifest_parse_failure(parse->context, + "unexpected array end"); + break; + } + + return JSON_SUCCESS; +} + +/* + * Invoked at the start of each object field in the JSON document. + */ +static JsonParseErrorType +json_manifest_object_field_start(void *state, char *fname, bool isnull) +{ + JsonManifestParseState *parse = state; + + switch (parse->state) + { + case JM_EXPECT_TOPLEVEL_FIELD: + + /* + * Inside toplevel object. The version indicator should always be + * the first field. + */ + if (!parse->saw_version_field) + { + if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0) + json_manifest_parse_failure(parse->context, + "expected version indicator"); + parse->state = JM_EXPECT_VERSION_VALUE; + parse->saw_version_field = true; + break; + } + + /* Is this the list of files? */ + if (strcmp(fname, "Files") == 0) + { + parse->state = JM_EXPECT_FILES_START; + break; + } + + /* Is this the list of WAL ranges? */ + if (strcmp(fname, "WAL-Ranges") == 0) + { + parse->state = JM_EXPECT_WAL_RANGES_START; + break; + } + + /* Is this the manifest checksum? */ + if (strcmp(fname, "Manifest-Checksum") == 0) + { + parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE; + break; + } + + /* It's not a field we recognize. */ + json_manifest_parse_failure(parse->context, + "unrecognized top-level field"); + break; + + case JM_EXPECT_THIS_FILE_FIELD: + /* Inside object for one file; which key have we got? */ + if (strcmp(fname, "Path") == 0) + parse->file_field = JMFF_PATH; + else if (strcmp(fname, "Encoded-Path") == 0) + parse->file_field = JMFF_ENCODED_PATH; + else if (strcmp(fname, "Size") == 0) + parse->file_field = JMFF_SIZE; + else if (strcmp(fname, "Last-Modified") == 0) + parse->file_field = JMFF_LAST_MODIFIED; + else if (strcmp(fname, "Checksum-Algorithm") == 0) + parse->file_field = JMFF_CHECKSUM_ALGORITHM; + else if (strcmp(fname, "Checksum") == 0) + parse->file_field = JMFF_CHECKSUM; + else + json_manifest_parse_failure(parse->context, + "unexpected file field"); + parse->state = JM_EXPECT_THIS_FILE_VALUE; + break; + + case JM_EXPECT_THIS_WAL_RANGE_FIELD: + /* Inside object for one file; which key have we got? */ + if (strcmp(fname, "Timeline") == 0) + parse->wal_range_field = JMWRF_TIMELINE; + else if (strcmp(fname, "Start-LSN") == 0) + parse->wal_range_field = JMWRF_START_LSN; + else if (strcmp(fname, "End-LSN") == 0) + parse->wal_range_field = JMWRF_END_LSN; + else + json_manifest_parse_failure(parse->context, + "unexpected WAL range field"); + parse->state = JM_EXPECT_THIS_WAL_RANGE_VALUE; + break; + + default: + json_manifest_parse_failure(parse->context, + "unexpected object field"); + break; + } + + return JSON_SUCCESS; +} + +/* + * Invoked at the start of each scalar in the JSON document. + * + * Object field names don't reach this code; those are handled by + * json_manifest_object_field_start. When we're inside of the object for + * a particular file or WAL range, that function will have noticed the name + * of the field, and we'll get the corresponding value here. When we're in + * the toplevel object, the parse state itself tells us which field this is. + * + * In all cases except for PostgreSQL-Backup-Manifest-Version, which we + * can just check on the spot, the goal here is just to save the value in + * the parse state for later use. We don't actually do anything until we + * reach either the end of the object representing this file, or the end + * of the manifest, as the case may be. + */ +static JsonParseErrorType +json_manifest_scalar(void *state, char *token, JsonTokenType tokentype) +{ + JsonManifestParseState *parse = state; + + switch (parse->state) + { + case JM_EXPECT_VERSION_VALUE: + if (strcmp(token, "1") != 0) + json_manifest_parse_failure(parse->context, + "unexpected manifest version"); + parse->state = JM_EXPECT_TOPLEVEL_FIELD; + break; + + case JM_EXPECT_THIS_FILE_VALUE: + switch (parse->file_field) + { + case JMFF_PATH: + parse->pathname = token; + break; + case JMFF_ENCODED_PATH: + parse->encoded_pathname = token; + break; + case JMFF_SIZE: + parse->size = token; + break; + case JMFF_LAST_MODIFIED: + pfree(token); /* unused */ + break; + case JMFF_CHECKSUM_ALGORITHM: + parse->algorithm = token; + break; + case JMFF_CHECKSUM: + parse->checksum = token; + break; + } + parse->state = JM_EXPECT_THIS_FILE_FIELD; + break; + + case JM_EXPECT_THIS_WAL_RANGE_VALUE: + switch (parse->wal_range_field) + { + case JMWRF_TIMELINE: + parse->timeline = token; + break; + case JMWRF_START_LSN: + parse->start_lsn = token; + break; + case JMWRF_END_LSN: + parse->end_lsn = token; + break; + } + parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD; + break; + + case JM_EXPECT_MANIFEST_CHECKSUM_VALUE: + parse->state = JM_EXPECT_TOPLEVEL_END; + parse->manifest_checksum = token; + break; + + default: + json_manifest_parse_failure(parse->context, "unexpected scalar"); + break; + } + + return JSON_SUCCESS; +} + +/* + * Do additional parsing and sanity-checking of the details gathered for one + * file, and invoke the per-file callback so that the caller gets those + * details. This happens for each file when the corresponding JSON object is + * completely parsed. + */ +static void +json_manifest_finalize_file(JsonManifestParseState *parse) +{ + JsonManifestParseContext *context = parse->context; + size_t size; + char *ep; + int checksum_string_length; + pg_checksum_type checksum_type; + int checksum_length; + uint8 *checksum_payload; + + /* Pathname and size are required. */ + if (parse->pathname == NULL && parse->encoded_pathname == NULL) + json_manifest_parse_failure(parse->context, "missing path name"); + if (parse->pathname != NULL && parse->encoded_pathname != NULL) + json_manifest_parse_failure(parse->context, + "both path name and encoded path name"); + if (parse->size == NULL) + json_manifest_parse_failure(parse->context, "missing size"); + if (parse->algorithm == NULL && parse->checksum != NULL) + json_manifest_parse_failure(parse->context, + "checksum without algorithm"); + + /* Decode encoded pathname, if that's what we have. */ + if (parse->encoded_pathname != NULL) + { + int encoded_length = strlen(parse->encoded_pathname); + int raw_length = encoded_length / 2; + + parse->pathname = palloc(raw_length + 1); + if (encoded_length % 2 != 0 || + !hexdecode_string((uint8 *) parse->pathname, + parse->encoded_pathname, + raw_length)) + json_manifest_parse_failure(parse->context, + "could not decode file name"); + parse->pathname[raw_length] = '\0'; + pfree(parse->encoded_pathname); + parse->encoded_pathname = NULL; + } + + /* Parse size. */ + size = strtoul(parse->size, &ep, 10); + if (*ep) + json_manifest_parse_failure(parse->context, + "file size is not an integer"); + + /* Parse the checksum algorithm, if it's present. */ + if (parse->algorithm == NULL) + checksum_type = CHECKSUM_TYPE_NONE; + else if (!pg_checksum_parse_type(parse->algorithm, &checksum_type)) + context->error_cb(context, "unrecognized checksum algorithm: \"%s\"", + parse->algorithm); + + /* Parse the checksum payload, if it's present. */ + checksum_string_length = parse->checksum == NULL ? 0 + : strlen(parse->checksum); + if (checksum_string_length == 0) + { + checksum_length = 0; + checksum_payload = NULL; + } + else + { + checksum_length = checksum_string_length / 2; + checksum_payload = palloc(checksum_length); + if (checksum_string_length % 2 != 0 || + !hexdecode_string(checksum_payload, parse->checksum, + checksum_length)) + context->error_cb(context, + "invalid checksum for file \"%s\": \"%s\"", + parse->pathname, parse->checksum); + } + + /* Invoke the callback with the details we've gathered. */ + context->per_file_cb(context, parse->pathname, size, + checksum_type, checksum_length, checksum_payload); + + /* Free memory we no longer need. */ + if (parse->size != NULL) + { + pfree(parse->size); + parse->size = NULL; + } + if (parse->algorithm != NULL) + { + pfree(parse->algorithm); + parse->algorithm = NULL; + } + if (parse->checksum != NULL) + { + pfree(parse->checksum); + parse->checksum = NULL; + } +} + +/* + * Do additional parsing and sanity-checking of the details gathered for one + * WAL range, and invoke the per-WAL-range callback so that the caller gets + * those details. This happens for each WAL range when the corresponding JSON + * object is completely parsed. + */ +static void +json_manifest_finalize_wal_range(JsonManifestParseState *parse) +{ + JsonManifestParseContext *context = parse->context; + TimeLineID tli; + XLogRecPtr start_lsn, + end_lsn; + char *ep; + + /* Make sure all fields are present. */ + if (parse->timeline == NULL) + json_manifest_parse_failure(parse->context, "missing timeline"); + if (parse->start_lsn == NULL) + json_manifest_parse_failure(parse->context, "missing start LSN"); + if (parse->end_lsn == NULL) + json_manifest_parse_failure(parse->context, "missing end LSN"); + + /* Parse timeline. */ + tli = strtoul(parse->timeline, &ep, 10); + if (*ep) + json_manifest_parse_failure(parse->context, + "timeline is not an integer"); + if (!parse_xlogrecptr(&start_lsn, parse->start_lsn)) + json_manifest_parse_failure(parse->context, + "could not parse start LSN"); + if (!parse_xlogrecptr(&end_lsn, parse->end_lsn)) + json_manifest_parse_failure(parse->context, + "could not parse end LSN"); + + /* Invoke the callback with the details we've gathered. */ + context->per_wal_range_cb(context, tli, start_lsn, end_lsn); + + /* Free memory we no longer need. */ + if (parse->timeline != NULL) + { + pfree(parse->timeline); + parse->timeline = NULL; + } + if (parse->start_lsn != NULL) + { + pfree(parse->start_lsn); + parse->start_lsn = NULL; + } + if (parse->end_lsn != NULL) + { + pfree(parse->end_lsn); + parse->end_lsn = NULL; + } +} + +/* + * Verify that the manifest checksum is correct. + * + * The last line of the manifest file is excluded from the manifest checksum, + * because the last line is expected to contain the checksum that covers + * the rest of the file. + */ +static void +verify_manifest_checksum(JsonManifestParseState *parse, char *buffer, + size_t size) +{ + JsonManifestParseContext *context = parse->context; + size_t i; + size_t number_of_newlines = 0; + size_t ultimate_newline = 0; + size_t penultimate_newline = 0; + pg_cryptohash_ctx *manifest_ctx; + uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH]; + uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH]; + + /* Find the last two newlines in the file. */ + for (i = 0; i < size; ++i) + { + if (buffer[i] == '\n') + { + ++number_of_newlines; + penultimate_newline = ultimate_newline; + ultimate_newline = i; + } + } + + /* + * Make sure that the last newline is right at the end, and that there are + * at least two lines total. We need this to be true in order for the + * following code, which computes the manifest checksum, to work properly. + */ + if (number_of_newlines < 2) + json_manifest_parse_failure(parse->context, + "expected at least 2 lines"); + if (ultimate_newline != size - 1) + json_manifest_parse_failure(parse->context, + "last line not newline-terminated"); + + /* Checksum the rest. */ + manifest_ctx = pg_cryptohash_create(PG_SHA256); + if (manifest_ctx == NULL) + context->error_cb(context, "out of memory"); + if (pg_cryptohash_init(manifest_ctx) < 0) + context->error_cb(context, "could not initialize checksum of manifest"); + if (pg_cryptohash_update(manifest_ctx, (uint8 *) buffer, penultimate_newline + 1) < 0) + context->error_cb(context, "could not update checksum of manifest"); + if (pg_cryptohash_final(manifest_ctx, manifest_checksum_actual, + sizeof(manifest_checksum_actual)) < 0) + context->error_cb(context, "could not finalize checksum of manifest"); + + /* Now verify it. */ + if (parse->manifest_checksum == NULL) + context->error_cb(parse->context, "manifest has no checksum"); + if (strlen(parse->manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 || + !hexdecode_string(manifest_checksum_expected, parse->manifest_checksum, + PG_SHA256_DIGEST_LENGTH)) + context->error_cb(context, "invalid manifest checksum: \"%s\"", + parse->manifest_checksum); + if (memcmp(manifest_checksum_actual, manifest_checksum_expected, + PG_SHA256_DIGEST_LENGTH) != 0) + context->error_cb(context, "manifest checksum mismatch"); + pg_cryptohash_free(manifest_ctx); +} + +/* + * Report a parse error. + * + * This is intended to be used for fairly low-level failures that probably + * shouldn't occur unless somebody has deliberately constructed a bad manifest, + * or unless the server is generating bad manifests due to some bug. msg should + * be a short string giving some hint as to what the problem is. + */ +static void +json_manifest_parse_failure(JsonManifestParseContext *context, char *msg) +{ + context->error_cb(context, "could not parse backup manifest: %s", msg); +} + +/* + * Convert a character which represents a hexadecimal digit to an integer. + * + * Returns -1 if the character is not a hexadecimal digit. + */ +static int +hexdecode_char(char c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + + return -1; +} + +/* + * Decode a hex string into a byte string, 2 hex chars per byte. + * + * Returns false if invalid characters are encountered; otherwise true. + */ +static bool +hexdecode_string(uint8 *result, char *input, int nbytes) +{ + int i; + + for (i = 0; i < nbytes; ++i) + { + int n1 = hexdecode_char(input[i * 2]); + int n2 = hexdecode_char(input[i * 2 + 1]); + + if (n1 < 0 || n2 < 0) + return false; + result[i] = n1 * 16 + n2; + } + + return true; +} + +/* + * Parse an XLogRecPtr expressed using the usual string format. + */ +static bool +parse_xlogrecptr(XLogRecPtr *result, char *input) +{ + uint32 hi; + uint32 lo; + + if (sscanf(input, "%X/%X", &hi, &lo) != 2) + return false; + *result = ((uint64) hi) << 32 | lo; + return true; +} diff --git a/src/include/common/parse_manifest.h b/src/include/common/parse_manifest.h new file mode 100644 index 00000000000..811c9149f43 --- /dev/null +++ b/src/include/common/parse_manifest.h @@ -0,0 +1,46 @@ +/*------------------------------------------------------------------------- + * + * parse_manifest.h + * Parse a backup manifest in JSON format. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/common/parse_manifest.h + * + *------------------------------------------------------------------------- + */ + +#ifndef PARSE_MANIFEST_H +#define PARSE_MANIFEST_H + +#include "access/xlogdefs.h" +#include "common/checksum_helper.h" +#include "mb/pg_wchar.h" + +struct JsonManifestParseContext; +typedef struct JsonManifestParseContext JsonManifestParseContext; + +typedef void (*json_manifest_per_file_callback) (JsonManifestParseContext *, + char *pathname, + size_t size, pg_checksum_type checksum_type, + int checksum_length, uint8 *checksum_payload); +typedef void (*json_manifest_per_wal_range_callback) (JsonManifestParseContext *, + TimeLineID tli, + XLogRecPtr start_lsn, XLogRecPtr end_lsn); +typedef void (*json_manifest_error_callback) (JsonManifestParseContext *, + const char *fmt,...) pg_attribute_printf(2, 3) + pg_attribute_noreturn(); + +struct JsonManifestParseContext +{ + void *private_data; + json_manifest_per_file_callback per_file_cb; + json_manifest_per_wal_range_callback per_wal_range_cb; + json_manifest_error_callback error_cb; +}; + +extern void json_parse_manifest(JsonManifestParseContext *context, + char *buffer, size_t size); + +#endif -- cgit v1.2.3