aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils')
-rw-r--r--src/backend/utils/adt/varlena.c82
1 files changed, 82 insertions, 0 deletions
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 816c66b7e77..1f6e0908216 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -6198,6 +6198,88 @@ rest_of_char_same(const char *s1, const char *s2, int len)
/*
+ * The following *ClosestMatch() functions can be used to determine whether a
+ * user-provided string resembles any known valid values, which is useful for
+ * providing hints in log messages, among other things. Use these functions
+ * like so:
+ *
+ * initClosestMatch(&state, source_string, max_distance);
+ *
+ * for (int i = 0; i < num_valid_strings; i++)
+ * updateClosestMatch(&state, valid_strings[i]);
+ *
+ * closestMatch = getClosestMatch(&state);
+ */
+
+/*
+ * Initialize the given state with the source string and maximum Levenshtein
+ * distance to consider.
+ */
+void
+initClosestMatch(ClosestMatchState *state, const char *source, int max_d)
+{
+ Assert(state);
+ Assert(max_d >= 0);
+
+ state->source = source;
+ state->min_d = -1;
+ state->max_d = max_d;
+ state->match = NULL;
+}
+
+/*
+ * If the candidate string is a closer match than the current one saved (or
+ * there is no match saved), save it as the closest match.
+ *
+ * If the source or candidate string is NULL, empty, or too long, this function
+ * takes no action. Likewise, if the Levenshtein distance exceeds the maximum
+ * allowed or more than half the characters are different, no action is taken.
+ */
+void
+updateClosestMatch(ClosestMatchState *state, const char *candidate)
+{
+ int dist;
+
+ Assert(state);
+
+ if (state->source == NULL || state->source[0] == '\0' ||
+ candidate == NULL || candidate[0] == '\0')
+ return;
+
+ /*
+ * To avoid ERROR-ing, we check the lengths here instead of setting
+ * 'trusted' to false in the call to varstr_levenshtein_less_equal().
+ */
+ if (strlen(state->source) > MAX_LEVENSHTEIN_STRLEN ||
+ strlen(candidate) > MAX_LEVENSHTEIN_STRLEN)
+ return;
+
+ dist = varstr_levenshtein_less_equal(state->source, strlen(state->source),
+ candidate, strlen(candidate), 1, 1, 1,
+ state->max_d, true);
+ if (dist <= state->max_d &&
+ dist <= strlen(state->source) / 2 &&
+ (state->min_d == -1 || dist < state->min_d))
+ {
+ state->min_d = dist;
+ state->match = candidate;
+ }
+}
+
+/*
+ * Return the closest match. If no suitable candidates were provided via
+ * updateClosestMatch(), return NULL.
+ */
+const char *
+getClosestMatch(ClosestMatchState *state)
+{
+ Assert(state);
+
+ return state->match;
+}
+
+
+/*
* Unicode support
*/