From 4f1e6f590933d490a9c0c13fded923305b9f9616 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Fri, 18 Jul 2025 10:06:13 -0700 Subject: [PATCH 1/2] libpq: Extend "read pending" check from SSL to GSS An extra check for pending bytes in the SSL layer has been part of pqReadReady() for a very long time (79ff2e96d). But when GSS transport encryption was added, it didn't receive the same treatment. (As 79ff2e96d notes, "The bug that I fixed in this patch is exceptionally hard to reproduce reliably.") Without that check, it's possible to hit a hang in gssencmode, if the server splits a large libpq message such that the final message in a streamed response is part of the same wrapped token as the split message: DataRowDataRowDataRowDataRowDataRowData -- token boundary -- RowDataRowCommandCompleteReadyForQuery If the split message takes up enough memory to nearly fill libpq's receive buffer, libpq may return from pqReadData() before the later messages are pulled out of the PqGSSRecvBuffer. Without additional socket activity from the server, pqReadReady() (via pqSocketCheck()) will never again return true, hanging the connection. Pull the pending-bytes check into the pqsecure API layer, where both SSL and GSS now implement it. Note that this does not fix the root problem! Third party clients of libpq have no way to call pqsecure_read_is_pending() in their own polling. This just brings the GSS implementation up to par with the existing SSL workaround; a broader fix is left to a subsequent commit. (However, pgtls_read_pending() is renamed to pgtls_read_is_pending(), to avoid conflation with the forthcoming pgtls_drain_pending().) Discussion: https://postgr.es/m/CAOYmi%2BmpymrgZ76Jre2dx_PwRniS9YZojwH0rZnTuiGHCsj0rA%40mail.gmail.com --- src/interfaces/libpq/fe-misc.c | 6 ++---- src/interfaces/libpq/fe-secure-gssapi.c | 6 ++++++ src/interfaces/libpq/fe-secure-openssl.c | 2 +- src/interfaces/libpq/fe-secure.c | 19 +++++++++++++++++++ src/interfaces/libpq/libpq-int.h | 4 +++- 5 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c index dca44fdc5d2f..434216ff89f0 100644 --- a/src/interfaces/libpq/fe-misc.c +++ b/src/interfaces/libpq/fe-misc.c @@ -1099,14 +1099,12 @@ pqSocketCheck(PGconn *conn, int forRead, int forWrite, pg_usec_time_t end_time) return -1; } -#ifdef USE_SSL - /* Check for SSL library buffering read bytes */ - if (forRead && conn->ssl_in_use && pgtls_read_pending(conn)) + /* Check for SSL/GSS library buffering read bytes */ + if (forRead && pqsecure_read_is_pending(conn)) { /* short-circuit the select */ return 1; } -#endif } /* We will retry as long as we get EINTR */ diff --git a/src/interfaces/libpq/fe-secure-gssapi.c b/src/interfaces/libpq/fe-secure-gssapi.c index 843b31e175fe..5067b3de9f41 100644 --- a/src/interfaces/libpq/fe-secure-gssapi.c +++ b/src/interfaces/libpq/fe-secure-gssapi.c @@ -471,6 +471,12 @@ gss_read(PGconn *conn, void *recv_buffer, size_t length, ssize_t *ret) return PGRES_POLLING_OK; } +bool +pg_GSS_read_is_pending(PGconn *conn) +{ + return PqGSSResultLength > PqGSSResultNext; +} + /* * Negotiate GSSAPI transport for a connection. When complete, returns * PGRES_POLLING_OK. Will return PGRES_POLLING_READING or diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c index 51dd7b9fec0a..8f975561e518 100644 --- a/src/interfaces/libpq/fe-secure-openssl.c +++ b/src/interfaces/libpq/fe-secure-openssl.c @@ -231,7 +231,7 @@ pgtls_read(PGconn *conn, void *ptr, size_t len) } bool -pgtls_read_pending(PGconn *conn) +pgtls_read_is_pending(PGconn *conn) { return SSL_pending(conn->ssl) > 0; } diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c index e686681ba155..94c97ec26fb3 100644 --- a/src/interfaces/libpq/fe-secure.c +++ b/src/interfaces/libpq/fe-secure.c @@ -243,6 +243,25 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len) return n; } +/* + * Returns true if there are any bytes available in the transport buffer. + */ +bool +pqsecure_read_is_pending(PGconn *conn) +{ +#ifdef USE_SSL + if (conn->ssl_in_use) + return pgtls_read_is_pending(conn); +#endif +#ifdef ENABLE_GSS + if (conn->gssenc) + return pg_GSS_read_is_pending(conn); +#endif + + /* Plaintext connections have no transport buffer. */ + return 0; +} + /* * Write data to a secure connection. * diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index 02c114f14052..902f6b92e005 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -810,6 +810,7 @@ extern int pqWriteReady(PGconn *conn); extern PostgresPollingStatusType pqsecure_open_client(PGconn *); extern void pqsecure_close(PGconn *); extern ssize_t pqsecure_read(PGconn *, void *ptr, size_t len); +extern bool pqsecure_read_is_pending(PGconn *); extern ssize_t pqsecure_write(PGconn *, const void *ptr, size_t len); extern ssize_t pqsecure_raw_read(PGconn *, void *ptr, size_t len); extern ssize_t pqsecure_raw_write(PGconn *, const void *ptr, size_t len); @@ -848,7 +849,7 @@ extern ssize_t pgtls_read(PGconn *conn, void *ptr, size_t len); /* * Is there unread data waiting in the SSL read buffer? */ -extern bool pgtls_read_pending(PGconn *conn); +extern bool pgtls_read_is_pending(PGconn *conn); /* * Write data to a secure connection. @@ -896,6 +897,7 @@ extern PostgresPollingStatusType pqsecure_open_gss(PGconn *conn); */ extern ssize_t pg_GSS_write(PGconn *conn, const void *ptr, size_t len); extern ssize_t pg_GSS_read(PGconn *conn, void *ptr, size_t len); +extern bool pg_GSS_read_is_pending(PGconn *conn); #endif /* === in fe-trace.c === */ From 0bb1ed16ce76d16ec7d2dceb4b77ae3043ae1656 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Thu, 17 Jul 2025 08:45:45 -0700 Subject: [PATCH 2/2] libpq: Drain all pending bytes from SSL/GSS during pqReadData() The previous commit strengthened a workaround for a hang when large messages are split across TLS records/GSS tokens. Because that workaround is implemented in libpq internals, it can only help us when libpq itself is polling on the socket. In nonblocking situations, where the client above libpq is expected to poll, the same bugs can show up. As a contrived example, consider a large protocol-2.0 error coming back from a server during PQconnectPoll(), split in an odd way across two records: -- TLS record (8192-byte payload) -- EEEE[...repeated a total of 8192 times] -- TLS record (8193-byte payload) -- EEEE[...repeated a total of 8192 times]\0 The first record will fill the first half of the libpq receive buffer, which is 16k long by default. The second record completely fills the last half with its first 8192 bytes, leaving the terminating NULL in the OpenSSL buffer. Since we still haven't seen the terminator at our level, PQconnectPoll() will return PGRES_POLLING_READING, expecting to come back when the server has sent "the rest" of the data. But there is nothing left to read from the socket; OpenSSL had to pull all of the data in the 8193-byte record off of the wire to decrypt it. (A real server would probably not split up the records this way, nor keep the connection open after sending a fatal connection error. But servers that regularly use larger TLS records can get the libpq receive buffer into the same state if DataRows are big enough, as reported on the list.) This is a layering violation. libpq makes decisions based on data in the application buffer, above the transport buffer (whether SSL or GSS), but clients are polling the socket, below the transport buffer. One way to fix this in a backportable way, without changing APIs too much, is to ensure data never stays in the transport buffer. Then pqReadData's postconditions will look similar for both raw sockets and SSL/GSS: any available data is either in the application buffer, or still on the socket. Building on the prior commit, add a function to the pqsecure API layer which drains all pending data from the transport layer into conn->inBuffer, expanding the buffer as necessary. pqReadData() calls this function before returning when pending data exists. This is not particularly efficient from an architectural perspective (the pqsecure_read() implementations take care to fit their packets into the current buffer, and that effort is now completely discarded), but it's hopefully easier to reason about than a full rewrite would be for the back branches. Reported-by: Lars Kanis Discussion: https://postgr.es/m/2039ac58-d3e0-434b-ac1a-2a987f3b4cb1%40greiz-reinsdorf.de Backpatch-through: 13 --- src/interfaces/libpq/fe-misc.c | 51 ++++++++++- src/interfaces/libpq/fe-secure-gssapi.c | 27 ++++++ src/interfaces/libpq/fe-secure-openssl.c | 109 +++++++++++++++++++++++ src/interfaces/libpq/fe-secure.c | 97 +++++++++++++++++++- src/interfaces/libpq/libpq-int.h | 7 ++ 5 files changed, 288 insertions(+), 3 deletions(-) diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c index 434216ff89f0..49cf32e1bb7e 100644 --- a/src/interfaces/libpq/fe-misc.c +++ b/src/interfaces/libpq/fe-misc.c @@ -55,6 +55,7 @@ static int pqPutMsgBytes(const void *buf, size_t len, PGconn *conn); static int pqSendSome(PGconn *conn, int len); static int pqSocketCheck(PGconn *conn, int forRead, int forWrite, pg_usec_time_t end_time); +static int pqReadData_internal(PGconn *conn); /* * PQlibVersion: return the libpq version number @@ -593,6 +594,13 @@ pqPutMsgEnd(PGconn *conn) /* ---------- * pqReadData: read more data, if any is available + * + * Upon a successful return, callers may assume that either 1) all available + * bytes have been consumed from the socket, or 2) the socket is still marked + * readable by the OS. (In other words: after a successful pqReadData, it's safe + * to tell a client to poll for readable bytes on the socket without any further + * draining of the SSL/GSS transport buffers.) + * * Possible return values: * 1: successfully loaded at least one more byte * 0: no data is presently available, but no error detected @@ -605,8 +613,8 @@ pqPutMsgEnd(PGconn *conn) int pqReadData(PGconn *conn) { - int someread = 0; - int nread; + int available; + bool pending; if (conn->sock == PGINVALID_SOCKET) { @@ -614,6 +622,45 @@ pqReadData(PGconn *conn) return -1; } + available = pqReadData_internal(conn); + if (available < 0) + return -1; + + /* + * Make sure there are no bytes stuck in layers between conn->inBuffer and + * the socket, to make it safe for clients to poll on PQsocket(). See + * pqsecure_drain_pending's documentation for details. + */ + pending = pqsecure_read_is_pending(conn); + + if (available && pending) + { + if (pqsecure_drain_pending(conn)) + return -1; + } + else if (!available) + { + /* + * If we're not returning any bytes from the underlying transport, + * that must imply there aren't any in the transport buffer... + */ + Assert(!pending); + } + + return available; +} + +/* + * Workhorse for pqReadData(). It's kept separate from the + * pqsecure_drain_pending() logic to avoid adding to this function's goto + * complexity. + */ +static int +pqReadData_internal(PGconn *conn) +{ + int someread = 0; + int nread; + /* Left-justify any data in the buffer to make room */ if (conn->inStart < conn->inEnd) { diff --git a/src/interfaces/libpq/fe-secure-gssapi.c b/src/interfaces/libpq/fe-secure-gssapi.c index 5067b3de9f41..6720c6fc1519 100644 --- a/src/interfaces/libpq/fe-secure-gssapi.c +++ b/src/interfaces/libpq/fe-secure-gssapi.c @@ -477,6 +477,33 @@ pg_GSS_read_is_pending(PGconn *conn) return PqGSSResultLength > PqGSSResultNext; } +int +pg_GSS_drain_pending(PGconn *conn) +{ + int pending; + + /* Figure out how many bytes to take off the connection. */ + Assert(PqGSSResultLength >= PqGSSResultNext); + pending = PqGSSResultLength - PqGSSResultNext; + + if (!pending) + { + /* Nothing to do. */ + return 0; + } + + /* Expand the input buffer if necessary. */ + if (pqCheckInBufferSpace(conn->inEnd + (size_t) pending, conn)) + return -1; /* errorMessage already set */ + + /* Now read the buffered data. */ + memcpy(conn->inBuffer + conn->inEnd, PqGSSResultBuffer + PqGSSResultNext, pending); + conn->inEnd += pending; + PqGSSResultNext += pending; + + return 0; +} + /* * Negotiate GSSAPI transport for a connection. When complete, returns * PGRES_POLLING_OK. Will return PGRES_POLLING_READING or diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c index 8f975561e518..b95e229bd08d 100644 --- a/src/interfaces/libpq/fe-secure-openssl.c +++ b/src/interfaces/libpq/fe-secure-openssl.c @@ -236,6 +236,115 @@ pgtls_read_is_pending(PGconn *conn) return SSL_pending(conn->ssl) > 0; } +/* + * Helper callback for use with ERR_print_errors_cb(). This appends the raw + * error queue to the provided PQExpBuffer, one entry per line. + * + * Note that this is not pretty output; it's meant for debugging. + */ +static int +append_error_queue(const char *str, size_t len, void *u) +{ + PQExpBuffer buf = u; + + appendBinaryPQExpBuffer(buf, str, len); + appendPQExpBufferChar(buf, '\n'); + + return 0; +} + +int +pgtls_drain_pending(PGconn *conn) +{ + int pending; + size_t drained; + + /* + * OpenSSL readahead is documented to break SSL_pending(). Plus, we can't + * afford to have OpenSSL take bytes off the socket without processing + * them; that breaks the postconditions for pqsecure_drain_pending(). + */ + Assert(!SSL_get_read_ahead(conn->ssl)); + + /* Figure out how many bytes to take off the connection. */ + pending = SSL_pending(conn->ssl); + + if (!pending) + { + /* Nothing to do. */ + return 0; + } + else if (pending < 0) + { + /* Shouldn't be possible, but don't let it mess up the math below. */ + Assert(false); + libpq_append_conn_error(conn, "OpenSSL reports negative bytes pending"); + return -1; + } + else if (pending == INT_MAX) + { + /* + * If we ever found a legitimate way to hit this, we'd need to loop + * around to call SSL_pending() again. Throw an error rather than + * complicate the code in that way, because SSL_read() should be + * bounded to the size of a single TLS record, and conn->inBuffer + * can't currently go past INT_MAX in size anyway. + */ + libpq_append_conn_error(conn, "OpenSSL reports INT_MAX bytes pending"); + return -1; + } + + /* Expand the input buffer if necessary. */ + if (pqCheckInBufferSpace(conn->inEnd + (size_t) pending, conn)) + return -1; /* errorMessage already set */ + + /* + * Now read the buffered data. + * + * Don't defer to pgtls_read(); OpenSSL should guarantee that pending data + * comes off in a single call, and we don't want to use the more + * complicated read-loop behavior. We still have to manage the error + * queue. + */ + ERR_clear_error(); + if (!SSL_read_ex(conn->ssl, conn->inBuffer + conn->inEnd, pending, &drained)) + { + int err = SSL_get_error(conn->ssl, 0); + + /* + * Something is very wrong. Report the error code and the entirety of + * the error queue without any attempt at interpretation. Probably not + * worth complicating things for the sake of translation, either. + */ + appendPQExpBuffer(&conn->errorMessage, + "unexpected error code %d while draining SSL buffer; ", + err); + + if (ERR_peek_error()) + { + appendPQExpBufferStr(&conn->errorMessage, "error queue follows:\n"); + ERR_print_errors_cb(append_error_queue, &conn->errorMessage); + } + else + appendPQExpBufferStr(&conn->errorMessage, + "no error queue provided\n"); + + return -1; + } + + /* Final consistency check. */ + if (drained != pending) + { + libpq_append_conn_error(conn, + "drained only %zu of %d pending bytes in SSL buffer", + drained, pending); + return -1; + } + + conn->inEnd += pending; + return 0; +} + ssize_t pgtls_write(PGconn *conn, const void *ptr, size_t len) { diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c index 94c97ec26fb3..82ae07be6744 100644 --- a/src/interfaces/libpq/fe-secure.c +++ b/src/interfaces/libpq/fe-secure.c @@ -244,7 +244,9 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len) } /* - * Returns true if there are any bytes available in the transport buffer. + * Returns true if there are any bytes available in the transport buffer. See + * pqsecure_drain_pending() for a more complete discussion of the concepts + * involved. */ bool pqsecure_read_is_pending(PGconn *conn) @@ -262,6 +264,99 @@ pqsecure_read_is_pending(PGconn *conn) return 0; } +/*--- + * Drains any transport data that is already buffered in userspace and adds it + * to conn->inBuffer, enlarging inBuffer if necessary. The drain fails if + * inBuffer cannot be made to hold all available transport data. + * + * Implementations should not attempt to read any more data from the socket + * while draining the transport buffer. After a successful return, + * pqsecure_bytes_pending() must be zero. + * + * This operation is necessary to prevent deadlock, due to a layering violation + * designed into our asynchronous client API: pqReadData() and all the parsing + * routines above it receive data from the SSL/GSS transport buffer, but clients + * poll on the raw PQsocket() handle. So data can be "lost" in the intermediate + * layer if we don't take it out here. + * + * To illustrate what we're trying to prevent, say that the server is sending + * two messages at once in response to a query (Aaaa and Bb), the libpq buffer + * is five characters in size, and TLS records max out at three-character + * payloads. + * + * Client libpq SSL Socket + * | | | | + * | [ ] [ ] [ ] [1] Buffers are empty, client is + * x --------------------------> | polling on socket + * | | | | + * | [ ] [ ] [xxx] [2] First record is received; poll + * | <-------------------------- | signals read-ready + * | | | | + * x ---> [ ] [ ] [xxx] [3] Client calls PQconsumeInput() + * | | | | + * | [ ] -> [ ] [xxx] [4] libpq calls pqReadData() to fill + * | | | | the receive buffer + * | [ ] [Aaa] <-- [ ] [5] SSL pulls payload off the wire + * | | | | and decrypts it + * | [Aaa ] <- [ ] [ ] [6] pqsecure_read() takes all data + * | | | | + * | <--- [Aaa ] [ ] [ ] [7] PQconsumeInput() returns with a + * x --------------------------> | partial message, PQisBusy() is + * | | | | still true, client polls again + * | [Aaa ] [ ] [xxx] [8] Second record is received; poll + * | <-------------------------- | signals read-ready + * | | | | + * x ---> [Aaa ] [ ] [xxx] [9] Client calls PQconsumeInput() + * | | | | + * | [Aaa ] -> [ ] [xxx] [10] libpq calls pqReadData() to fill + * | | | | the receive buffer + * | [Aaa ] [aBb] <-- [ ] [11] SSL decrypts + * | | | | + * | [AaaaB] <- [b ] [ ] [12] pqsecure_read() fills its + * | | | | buffer, taking only two bytes + * | <--- [AaaaB] [b ] [ ] [13] PQconsumeInput() returns with a + * | | | | complete message buffered; + * | | | | PQisBusy() is false + * x ---> [AaaaB] [b ] [ ] [14] Client calls PQgetResult() + * | | | | + * | <--- [B ] [b ] [ ] [15] Aaaa is returned; PQisBusy() is + * x --------------------------> | true and client polls again + * . | | . + * . [B ] [b ] . [16] No packets, and client hangs. + * . | | . + * + */ +int +pqsecure_drain_pending(PGconn *conn) +{ + int ret; + +#ifdef USE_SSL + if (conn->ssl_in_use) + { + ret = pgtls_drain_pending(conn); + } + else +#endif +#ifdef ENABLE_GSS + if (conn->gssenc) + { + ret = pg_GSS_drain_pending(conn); + } + else +#endif + { + /* Plaintext connections have no transport buffer. */ + ret = 0; + } + + /* Keep the implementation honest. */ + if (ret == 0) + Assert(!pqsecure_read_is_pending(conn)); + + return ret; +} + /* * Write data to a secure connection. * diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index 902f6b92e005..e7488f122d78 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -811,6 +811,7 @@ extern PostgresPollingStatusType pqsecure_open_client(PGconn *); extern void pqsecure_close(PGconn *); extern ssize_t pqsecure_read(PGconn *, void *ptr, size_t len); extern bool pqsecure_read_is_pending(PGconn *); +extern int pqsecure_drain_pending(PGconn *); extern ssize_t pqsecure_write(PGconn *, const void *ptr, size_t len); extern ssize_t pqsecure_raw_read(PGconn *, void *ptr, size_t len); extern ssize_t pqsecure_raw_write(PGconn *, const void *ptr, size_t len); @@ -851,6 +852,11 @@ extern ssize_t pgtls_read(PGconn *conn, void *ptr, size_t len); */ extern bool pgtls_read_is_pending(PGconn *conn); +/* + * Reads any data waiting in the SSL read buffer into the connection buffer. + */ +extern int pgtls_drain_pending(PGconn *conn); + /* * Write data to a secure connection. * @@ -898,6 +904,7 @@ extern PostgresPollingStatusType pqsecure_open_gss(PGconn *conn); extern ssize_t pg_GSS_write(PGconn *conn, const void *ptr, size_t len); extern ssize_t pg_GSS_read(PGconn *conn, void *ptr, size_t len); extern bool pg_GSS_read_is_pending(PGconn *conn); +extern int pg_GSS_drain_pending(PGconn *conn); #endif /* === in fe-trace.c === */