Add support for regexps on database and user entries in pg_hba.conf

As of this commit, any database or user entry beginning with a slash (/) is considered as a regular expression. This is particularly useful for users, as now there is no clean way to match pattern on multiple HBA lines. For example, a user name mapping with a regular expression needs first to match with a HBA line, and we would skip the follow-up HBA entries if the ident regexp does *not* match with what has matched in the HBA line. pg_hba.conf is able to handle multiple databases and roles with a comma-separated list of these, hence individual regular expressions that include commas need to be double-quoted. At authentication time, user and database names are now checked in the following order: - Arbitrary keywords (like "all", the ones beginning by '+' for membership check), that we know will never have a regexp. A fancy case is for physical WAL senders, we *have* to only match "replication" for the database. - Regular expression matching. - Exact match. The previous logic did the same, but without the regexp step. We have discussed as well the possibility to support regexp pattern matching for host names, but these happen to lead to tricky issues based on what I understand, particularly with host entries that have CIDRs. This commit relies heavily on the refactoring done in a903971 and fc579e1, so as the amount of code required to compile and execute regular expressions is now minimal. When parsing pg_hba.conf, all the computed regexps needs to explicitely free()'d, same as pg_ident.conf. Documentation and TAP tests are added to cover this feature, including cases where the regexps use commas (for clarity in the docs, coverage for the parsing logic in the tests). Note that this introduces a breakage with older versions, where a database or user name beginning with a slash are treated as something to check for an equal match. Per discussion, we have discarded this as being much of an issue in practice as it would require a cluster to have database and/or role names that begin with a slash, as well as HBA entries using these. Hence, the consistency gained with regexps in pg_ident.conf is more appealing in the long term. **This compatibility change should be mentioned in the release notes.** Author: Bertrand Drouvot Reviewed-by: Jacob Champion, Tom Lane, Michael Paquier Discussion: https://postgr.es/m/fff0d7c1-8ad4-76a1-9db3-0ab6ec338bf7@amazon.com
author: Michael Paquier <michael@paquier.xyz> 2022-10-24 11:45:31 +0900
committer: Michael Paquier <michael@paquier.xyz> 2022-10-24 11:45:31 +0900
commit: 8fea86830e1d40961fd3cba59a73fca178417c78 (patch)
tree: 3a1477fc122a008b412935c22cdc8e5ff7998f16 /src
parent: 5035c93c8a5ac6804da79c67403460348b381924 (diff)
download: postgresql-8fea86830e1d40961fd3cba59a73fca178417c78.tar.gz
postgresql-8fea86830e1d40961fd3cba59a73fca178417c78.zip
2 files changed, 121 insertions, 7 deletions
diff --git a/src/backend/libpq/hba.c b/src/backend/libpq/hba.c
index f3539a79299..ea92f02a479 100644
--- a/src/backend/libpq/hba.c
+++ b/src/backend/libpq/hba.c
@@ -294,6 +294,30 @@ free_auth_token(AuthToken *token)
 }
 
 /*
+ * Free a HbaLine.  Its list of AuthTokens for databases and roles may include
+ * regular expressions that need to be cleaned up explicitly.
+ */
+static void
+free_hba_line(HbaLine *line)
+{
+	ListCell   *cell;
+
+	foreach(cell, line->roles)
+	{
+		AuthToken  *tok = lfirst(cell);
+
+		free_auth_token(tok);
+	}
+
+	foreach(cell, line->databases)
+	{
+		AuthToken  *tok = lfirst(cell);
+
+		free_auth_token(tok);
+	}
+}
+
+/*
  * Copy a AuthToken struct into freshly palloc'd memory.
  */
 static AuthToken *
@@ -661,6 +685,10 @@ is_member(Oid userid, const char *role)
 
 /*
  * Check AuthToken list for a match to role, allowing group names.
+ *
+ * Each AuthToken listed is checked one-by-one.  Keywords are processed
+ * first (these cannot have regular expressions), followed by regular
+ * expressions (if any) and the exact match.
  */
 static bool
 check_role(const char *role, Oid roleid, List *tokens)
@@ -676,8 +704,14 @@ check_role(const char *role, Oid roleid, List *tokens)
 			if (is_member(roleid, tok->string + 1))
 				return true;
 		}
-		else if (token_matches(tok, role) ||
-				 token_is_keyword(tok, "all"))
+		else if (token_is_keyword(tok, "all"))
+			return true;
+		else if (token_has_regexp(tok))
+		{
+			if (regexec_auth_token(role, tok, 0, NULL) == REG_OKAY)
+				return true;
+		}
+		else if (token_matches(tok, role))
 			return true;
 	}
 	return false;
@@ -685,6 +719,10 @@ check_role(const char *role, Oid roleid, List *tokens)
 
 /*
  * Check to see if db/role combination matches AuthToken list.
+ *
+ * Each AuthToken listed is checked one-by-one.  Keywords are checked
+ * first (these cannot have regular expressions), followed by regular
+ * expressions (if any) and the exact match.
  */
 static bool
 check_db(const char *dbname, const char *role, Oid roleid, List *tokens)
@@ -719,6 +757,11 @@ check_db(const char *dbname, const char *role, Oid roleid, List *tokens)
 		}
 		else if (token_is_keyword(tok, "replication"))
 			continue;			/* never match this if not walsender */
+		else if (token_has_regexp(tok))
+		{
+			if (regexec_auth_token(dbname, tok, 0, NULL) == REG_OKAY)
+				return true;
+		}
 		else if (token_matches(tok, dbname))
 			return true;
 	}
@@ -1138,8 +1181,13 @@ parse_hba_line(TokenizedAuthLine *tok_line, int elevel)
 	tokens = lfirst(field);
 	foreach(tokencell, tokens)
 	{
-		parsedline->databases = lappend(parsedline->databases,
-										copy_auth_token(lfirst(tokencell)));
+		AuthToken  *tok = copy_auth_token(lfirst(tokencell));
+
+		/* Compile a regexp for the database token, if necessary */
+		if (regcomp_auth_token(tok, HbaFileName, line_num, err_msg, elevel))
+			return NULL;
+
+		parsedline->databases = lappend(parsedline->databases, tok);
 	}
 
 	/* Get the roles. */
@@ -1158,8 +1206,13 @@ parse_hba_line(TokenizedAuthLine *tok_line, int elevel)
 	tokens = lfirst(field);
 	foreach(tokencell, tokens)
 	{
-		parsedline->roles = lappend(parsedline->roles,
-									copy_auth_token(lfirst(tokencell)));
+		AuthToken  *tok = copy_auth_token(lfirst(tokencell));
+
+		/* Compile a regexp from the role token, if necessary */
+		if (regcomp_auth_token(tok, HbaFileName, line_num, err_msg, elevel))
+			return NULL;
+
+		parsedline->roles = lappend(parsedline->roles, tok);
 	}
 
 	if (parsedline->conntype != ctLocal)
@@ -2355,12 +2408,31 @@ load_hba(void)
 
 	if (!ok)
 	{
-		/* File contained one or more errors, so bail out */
+		/*
+		 * File contained one or more errors, so bail out, first being careful
+		 * to clean up whatever we allocated.  Most stuff will go away via
+		 * MemoryContextDelete, but we have to clean up regexes explicitly.
+		 */
+		foreach(line, new_parsed_lines)
+		{
+			HbaLine    *newline = (HbaLine *) lfirst(line);
+
+			free_hba_line(newline);
+		}
 		MemoryContextDelete(hbacxt);
 		return false;
 	}
 
 	/* Loaded new file successfully, replace the one we use */
+	if (parsed_hba_lines != NIL)
+	{
+		foreach(line, parsed_hba_lines)
+		{
+			HbaLine    *newline = (HbaLine *) lfirst(line);
+
+			free_hba_line(newline);
+		}
+	}
 	if (parsed_hba_context != NULL)
 		MemoryContextDelete(parsed_hba_context);
 	parsed_hba_context = hbacxt;
diff --git a/src/test/authentication/t/001_password.pl b/src/test/authentication/t/001_password.pl
index ea664d18f5b..6c0c753b56c 100644
--- a/src/test/authentication/t/001_password.pl
+++ b/src/test/authentication/t/001_password.pl
@@ -81,6 +81,14 @@ $node->safe_psql(
 	 GRANT ALL ON sysuser_data TO md5_role;");
 $ENV{"PGPASSWORD"} = 'pass';
 
+# Create a role that contains a comma to stress the parsing.
+$node->safe_psql('postgres',
+	q{SET password_encryption='md5'; CREATE ROLE "md5,role" LOGIN PASSWORD 'pass';}
+);
+
+# Create a database to test regular expression.
+$node->safe_psql('postgres', "CREATE database regex_testdb;");
+
 # For "trust" method, all users should be able to connect. These users are not
 # considered to be authenticated.
 reset_pg_hba($node, 'all', 'all', 'trust');
@@ -200,6 +208,40 @@ append_to_file(
 
 test_conn($node, 'user=md5_role', 'password from pgpass', 0);
 
+# Testing with regular expression for username.  The third regexp matches.
+reset_pg_hba($node, 'all', '/^.*nomatch.*$, baduser, /^md.*$', 'password');
+test_conn($node, 'user=md5_role', 'password, matching regexp for username',
+	0);
+
+# The third regex does not match anymore.
+reset_pg_hba($node, 'all', '/^.*nomatch.*$, baduser, /^m_d.*$', 'password');
+test_conn($node, 'user=md5_role',
+	'password, non matching regexp for username',
+	2, log_unlike => [qr/connection authenticated:/]);
+
+# Test with a comma in the regular expression.  In this case, the use of
+# double quotes is mandatory so as this is not considered as two elements
+# of the user name list when parsing pg_hba.conf.
+reset_pg_hba($node, 'all', '"/^.*5,.*e$"', 'password');
+test_conn($node, 'user=md5,role', 'password', 'matching regexp for username',
+	0);
+
+# Testing with regular expression for dbname. The third regex matches.
+reset_pg_hba($node, '/^.*nomatch.*$, baddb, /^regex_t.*b$', 'all',
+	'password');
+test_conn(
+	$node, 'user=md5_role dbname=regex_testdb', 'password,
+   matching regexp for dbname', 0);
+
+# The third regexp does not match anymore.
+reset_pg_hba($node, '/^.*nomatch.*$, baddb, /^regex_t.*ba$',
+	'all', 'password');
+test_conn(
+	$node,
+	'user=md5_role dbname=regex_testdb',
+	'password, non matching regexp for dbname',
+	2, log_unlike => [qr/connection authenticated:/]);
+
 unlink($pgpassfile);
 delete $ENV{"PGPASSFILE"};
author	Michael Paquier <michael@paquier.xyz>	2022-10-24 11:45:31 +0900
committer	Michael Paquier <michael@paquier.xyz>	2022-10-24 11:45:31 +0900
commit	8fea86830e1d40961fd3cba59a73fca178417c78 (patch)
tree	3a1477fc122a008b412935c22cdc8e5ff7998f16 /src
parent	5035c93c8a5ac6804da79c67403460348b381924 (diff)
download	postgresql-8fea86830e1d40961fd3cba59a73fca178417c78.tar.gz postgresql-8fea86830e1d40961fd3cba59a73fca178417c78.zip