Skip to content

Commit 711cd29

Browse files
jchampioCommitfest Bot
authored andcommitted
oauth: Track total call count during a client flow
Tracking down the bugs that led to the addition of drain_socket_events() and drain_timer_events() was difficult, because an inefficient flow is not visibly different from one that is working properly. To help maintainers notice when something has gone wrong, track the number of calls into the flow as part of debug mode, and print the total when the flow finishes. A new test makes sure the total count is less than 100. (We expect something on the order of 10.) This isn't foolproof, but it is able to catch several regressions in the logic of the prior two commits, and future work to add TLS support to the oauth_validator test server should strengthen it as well.
1 parent ad61c53 commit 711cd29

File tree

2 files changed

+52
-1
lines changed

2 files changed

+52
-1
lines changed

src/interfaces/libpq-oauth/oauth-curl.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ struct async_ctx
278278
bool user_prompted; /* have we already sent the authz prompt? */
279279
bool used_basic_auth; /* did we send a client secret? */
280280
bool debugging; /* can we give unsafe developer assistance? */
281+
int dbg_num_calls; /* (debug mode) how many times were we called? */
281282

282283
#if defined(HAVE_SYS_EVENT_H)
283284
int nevents; /* how many events are we waiting on? */
@@ -3074,6 +3075,8 @@ PostgresPollingStatusType
30743075
pg_fe_run_oauth_flow(PGconn *conn)
30753076
{
30763077
PostgresPollingStatusType result;
3078+
fe_oauth_state *state = conn_sasl_state(conn);
3079+
struct async_ctx *actx;
30773080
#ifndef WIN32
30783081
sigset_t osigset;
30793082
bool sigpipe_pending;
@@ -3102,6 +3105,25 @@ pg_fe_run_oauth_flow(PGconn *conn)
31023105

31033106
result = pg_fe_run_oauth_flow_impl(conn);
31043107

3108+
/*
3109+
* To assist with finding bugs in drain_socket_events() and
3110+
* drain_timer_events(), when we're in debug mode, track the total number
3111+
* of calls to this function and print that at the end of the flow.
3112+
*
3113+
* Be careful that state->async_ctx could be NULL if early initialization
3114+
* fails during the first call.
3115+
*/
3116+
actx = state->async_ctx;
3117+
Assert(actx || result == PGRES_POLLING_FAILED);
3118+
3119+
if (actx && actx->debugging)
3120+
{
3121+
actx->dbg_num_calls++;
3122+
if (result == PGRES_POLLING_OK || result == PGRES_POLLING_FAILED)
3123+
fprintf(stderr, "[libpq] total number of polls: %d\n",
3124+
actx->dbg_num_calls);
3125+
}
3126+
31053127
#ifndef WIN32
31063128
if (masked)
31073129
{

src/test/modules/oauth_validator/t/001_server.pl

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,35 @@ sub connstr
418418
qr/failed to obtain access token: mutual TLS required for client \(invalid_client\)/
419419
);
420420

421+
# Count the number of calls to the internal flow when multiple retries are
422+
# triggered. The exact number depends on many things -- the TCP stack, the
423+
# version of Curl in use, random chance -- but a ridiculously high number
424+
# suggests something is wrong with our ability to clear multiplexer events after
425+
# they're no longer applicable.
426+
my ($ret, $stdout, $stderr) = $node->psql(
427+
'postgres',
428+
"SELECT 'connected for call count'",
429+
extra_params => ['-w'],
430+
connstr => connstr(stage => 'token', retries => 2),
431+
on_error_stop => 0);
432+
433+
is($ret, 0, "call count connection succeeds");
434+
like(
435+
$stderr,
436+
qr@Visit https://example\.com/ and enter the code: postgresuser@,
437+
"call count: stderr matches");
438+
439+
my $count_pattern = qr/\[libpq\] total number of polls: (\d+)/;
440+
if (like($stderr, $count_pattern, "call count: count is printed"))
441+
{
442+
# For reference, a typical flow with two retries might take between 5-15
443+
# calls to the client implementation. And while this will probably continue
444+
# to change across OSes and Curl updates, we're likely in trouble if we see
445+
# hundreds or thousands of calls.
446+
$stderr =~ $count_pattern;
447+
cmp_ok($1, '<', 100, "call count is reasonably small");
448+
}
449+
421450
# Stress test: make sure our builtin flow operates correctly even if the client
422451
# application isn't respecting PGRES_POLLING_READING/WRITING signals returned
423452
# from PQconnectPoll().
@@ -428,7 +457,7 @@ sub connstr
428457
connstr(stage => 'all', retries => 1, interval => 1));
429458

430459
note "running '" . join("' '", @cmd) . "'";
431-
my ($stdout, $stderr) = run_command(\@cmd);
460+
($stdout, $stderr) = run_command(\@cmd);
432461

433462
like($stdout, qr/connection succeeded/, "stress-async: stdout matches");
434463
unlike(

0 commit comments

Comments
 (0)