From 849a6119b8ceb0dc12771f8071621573d285dccf Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 14 Aug 2025 13:45:44 +0300 Subject: [PATCH 1/3] libpq: Don't hang on out-of-memory If an allocation failed while handling an async notification, we returned EOF, which stopped processing any further data until more data was received from the server. If more data never arrives, e.g. because the connection was used just to wait for the notification, or because the next ReadyForQuery was already received and buffered, it would get stuck forever. Instead, silently ignore the notification. Silently ignoring the notification is not a great way to handle the situation, but at least the connection doesn't get stuck, and it's consistent with how the malloc() later in the function is handled, and with e.g. how pqSaveParameterStatus() handles allocation failures. Fix the same issue with OOM on receiving BackendKeyData message. That one is new in v18. Discussion: https://www.postgresql.org/message-id/df892f9f-5923-4046-9d6f-8c48d8980b50@iki.fi --- src/interfaces/libpq/fe-protocol3.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 1599de757d13..5683229e32e8 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -1550,9 +1550,8 @@ getBackendKeyData(PGconn *conn, int msgLength) conn->be_cancel_key = malloc(cancel_key_len); if (conn->be_cancel_key == NULL) { - libpq_append_conn_error(conn, "out of memory"); - /* discard the message */ - return EOF; + /* continue without cancel key */ + return 0; } if (pqGetnchar(conn->be_cancel_key, cancel_key_len, conn)) { @@ -1588,14 +1587,19 @@ getNotify(PGconn *conn) return EOF; /* must save name while getting extra string */ svname = strdup(conn->workBuffer.data); - if (!svname) - return EOF; if (pqGets(&conn->workBuffer, conn)) { - free(svname); + if (svname) + free(svname); return EOF; } + if (!svname) + { + /* out of memory; silently ignore the notification */ + return 0; + } + /* * Store the strings right after the PGnotify structure so it can all be * freed at once. We don't use NAMEDATALEN because we don't want to tie From 281757db1a94a0d18d8c3ad086ad0b6e911b74d2 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 14 Aug 2025 13:45:44 +0300 Subject: [PATCH 2/3] libpq: Handle OOMs by disconnecting instead of silently skipping messages As noted in the previous commit, silently ignoring async notifications is not great. Our options for reporting errors are limited, but it seems better to terminate the connection than try to soldier on. Applications should handle connection loss gracefully, whereas silently missing a notification could cause much weirder problems. Similarly, if we run out of memory while saving a received ParameterStatus or cancellation key, disconnect. This also changes the error message on OOM while expanding the input buffer. It used to report "cannot allocate memory for input buffer", followed by "lost synchronization with server: got message type ...". The "lost synchronization" message seems unnecessary, so remove that and report only "cannot allocate memory for input buffer". (The comment speculated that the out of memory could indeed be caused by loss of sync, but that seems highly unlikely.) Discussion: https://www.postgresql.org/message-id/df892f9f-5923-4046-9d6f-8c48d8980b50@iki.fi --- src/interfaces/libpq/fe-exec.c | 13 +++- src/interfaces/libpq/fe-protocol3.c | 116 ++++++++++++++++++---------- src/interfaces/libpq/libpq-int.h | 2 +- 3 files changed, 88 insertions(+), 43 deletions(-) diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index 4256ae5c0cc5..0b1e37ec30bb 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -1076,8 +1076,12 @@ pqSaveMessageField(PGresult *res, char code, const char *value) /* * pqSaveParameterStatus - remember parameter status sent by backend + * + * Returns 1 on success, 0 on out-of-memory. (Note that on out-of-memory, we + * have already released the old value of the parameter, if any. The only + * really safe way to recover is to terminate the connection.) */ -void +int pqSaveParameterStatus(PGconn *conn, const char *name, const char *value) { pgParameterStatus *pstatus; @@ -1119,6 +1123,11 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value) pstatus->next = conn->pstatus; conn->pstatus = pstatus; } + else + { + /* out of memory */ + return 0; + } /* * Save values of settings that are of interest to libpq in fields of the @@ -1190,6 +1199,8 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value) { conn->scram_sha_256_iterations = atoi(value); } + + return 1; } diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 5683229e32e8..0cca832c06ac 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -43,6 +43,7 @@ (id) == PqMsg_RowDescription) +static void handleFatalError(PGconn *conn); static void handleSyncLoss(PGconn *conn, char id, int msgLength); static int getRowDescriptions(PGconn *conn, int msgLength); static int getParamDescriptions(PGconn *conn, int msgLength); @@ -120,12 +121,12 @@ pqParseInput3(PGconn *conn) conn)) { /* - * XXX add some better recovery code... plan is to skip over - * the message using its length, then report an error. For the - * moment, just treat this like loss of sync (which indeed it - * might be!) + * Abandon the connection. There's not much else we can + * safely do; we can't just ignore the message or we could + * miss important changes to the connection state. + * pqCheckInBufferSpace() already reported the error. */ - handleSyncLoss(conn, id, msgLength); + handleFatalError(conn); } return; } @@ -456,6 +457,11 @@ pqParseInput3(PGconn *conn) /* Normal case: parsing agrees with specified length */ pqParseDone(conn, conn->inCursor); } + else if (conn->error_result && conn->status == CONNECTION_BAD) + { + /* The connection was abandoned and we already reported it */ + return; + } else { /* Trouble --- report it */ @@ -470,15 +476,14 @@ pqParseInput3(PGconn *conn) } /* - * handleSyncLoss: clean up after loss of message-boundary sync + * handleFatalError: clean up after a nonrecoverable error * - * There isn't really a lot we can do here except abandon the connection. + * This is for errors where we need to abandon the connection. The caller has + * already saved the error message in conn->errorMessage. */ static void -handleSyncLoss(PGconn *conn, char id, int msgLength) +handleFatalError(PGconn *conn) { - libpq_append_conn_error(conn, "lost synchronization with server: got message type \"%c\", length %d", - id, msgLength); /* build an error result holding the error message */ pqSaveErrorResult(conn); conn->asyncStatus = PGASYNC_READY; /* drop out of PQgetResult wait loop */ @@ -487,6 +492,19 @@ handleSyncLoss(PGconn *conn, char id, int msgLength) conn->status = CONNECTION_BAD; /* No more connection to backend */ } +/* + * handleSyncLoss: clean up after loss of message-boundary sync + * + * There isn't really a lot we can do here except abandon the connection. + */ +static void +handleSyncLoss(PGconn *conn, char id, int msgLength) +{ + libpq_append_conn_error(conn, "lost synchronization with server: got message type \"%c\", length %d", + id, msgLength); + handleFatalError(conn); +} + /* * parseInput subroutine to read a 'T' (row descriptions) message. * We'll build a new PGresult structure (unless called for a Describe @@ -1519,7 +1537,11 @@ getParameterStatus(PGconn *conn) return EOF; } /* And save it */ - pqSaveParameterStatus(conn, conn->workBuffer.data, valueBuf.data); + if (!pqSaveParameterStatus(conn, conn->workBuffer.data, valueBuf.data)) + { + libpq_append_conn_error(conn, "out of memory"); + handleFatalError(conn); + } termPQExpBuffer(&valueBuf); return 0; } @@ -1550,7 +1572,8 @@ getBackendKeyData(PGconn *conn, int msgLength) conn->be_cancel_key = malloc(cancel_key_len); if (conn->be_cancel_key == NULL) { - /* continue without cancel key */ + libpq_append_conn_error(conn, "out of memory"); + handleFatalError(conn); return 0; } if (pqGetnchar(conn->be_cancel_key, cancel_key_len, conn)) @@ -1587,6 +1610,18 @@ getNotify(PGconn *conn) return EOF; /* must save name while getting extra string */ svname = strdup(conn->workBuffer.data); + if (!svname) + { + /* + * Notify messages can arrive at any state, so we cannot associate the + * error with any particular query. There's no way to return back an + * "async error", so the best we can do is drop the connection. That + * seems better than silently ignoring the notification. + */ + libpq_append_conn_error(conn, "out of memory"); + handleFatalError(conn); + return 0; + } if (pqGets(&conn->workBuffer, conn)) { if (svname) @@ -1594,12 +1629,6 @@ getNotify(PGconn *conn) return EOF; } - if (!svname) - { - /* out of memory; silently ignore the notification */ - return 0; - } - /* * Store the strings right after the PGnotify structure so it can all be * freed at once. We don't use NAMEDATALEN because we don't want to tie @@ -1608,21 +1637,26 @@ getNotify(PGconn *conn) nmlen = strlen(svname); extralen = strlen(conn->workBuffer.data); newNotify = (PGnotify *) malloc(sizeof(PGnotify) + nmlen + extralen + 2); - if (newNotify) - { - newNotify->relname = (char *) newNotify + sizeof(PGnotify); - strcpy(newNotify->relname, svname); - newNotify->extra = newNotify->relname + nmlen + 1; - strcpy(newNotify->extra, conn->workBuffer.data); - newNotify->be_pid = be_pid; - newNotify->next = NULL; - if (conn->notifyTail) - conn->notifyTail->next = newNotify; - else - conn->notifyHead = newNotify; - conn->notifyTail = newNotify; + if (!newNotify) + { + free(svname); + libpq_append_conn_error(conn, "out of memory"); + handleFatalError(conn); + return 0; } + newNotify->relname = (char *) newNotify + sizeof(PGnotify); + strcpy(newNotify->relname, svname); + newNotify->extra = newNotify->relname + nmlen + 1; + strcpy(newNotify->extra, conn->workBuffer.data); + newNotify->be_pid = be_pid; + newNotify->next = NULL; + if (conn->notifyTail) + conn->notifyTail->next = newNotify; + else + conn->notifyHead = newNotify; + conn->notifyTail = newNotify; + free(svname); return 0; } @@ -1756,12 +1790,12 @@ getCopyDataMessage(PGconn *conn) conn)) { /* - * XXX add some better recovery code... plan is to skip over - * the message using its length, then report an error. For the - * moment, just treat this like loss of sync (which indeed it - * might be!) + * Abandon the connection. There's not much else we can + * safely do; we can't just ignore the message or we could + * miss important changes to the connection state. + * pqCheckInBufferSpace() already reported the error. */ - handleSyncLoss(conn, id, msgLength); + handleFatalError(conn); return -2; } return 0; @@ -2190,12 +2224,12 @@ pqFunctionCall3(PGconn *conn, Oid fnid, conn)) { /* - * XXX add some better recovery code... plan is to skip over - * the message using its length, then report an error. For the - * moment, just treat this like loss of sync (which indeed it - * might be!) + * Abandon the connection. There's not much else we can + * safely do; we can't just ignore the message or we could + * miss important changes to the connection state. + * pqCheckInBufferSpace() already reported the error. */ - handleSyncLoss(conn, id, msgLength); + handleFatalError(conn); break; } continue; diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index a701c25038a7..02c114f14052 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -746,7 +746,7 @@ extern PGresult *pqPrepareAsyncResult(PGconn *conn); extern void pqInternalNotice(const PGNoticeHooks *hooks, const char *fmt,...) pg_attribute_printf(2, 3); extern void pqSaveMessageField(PGresult *res, char code, const char *value); -extern void pqSaveParameterStatus(PGconn *conn, const char *name, +extern int pqSaveParameterStatus(PGconn *conn, const char *name, const char *value); extern int pqRowProcessor(PGconn *conn, const char **errmsgp); extern void pqCommandQueueAdvance(PGconn *conn, bool isReadyForQuery, From 00e2825097eb910d54776be5b2ca1b4a36a626a3 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 14 Aug 2025 14:28:49 +0300 Subject: [PATCH 3/3] libpq: Be strict about cancel key lengths The protocol documentation states that the maximum length of a cancel key is 256 bytes. This starts checking for that limit in libpq. Otherwise third party backend implementations will probably start using more bytes anyway. We also start requiring that a protocol 3.0 connection does not send a longer cancel key, to make sure that servers don't start breaking old 3.0-only clients by accident. Finally this also restricts the minimum key length to 4 bytes (both in the protocol spec and in the libpq implementation). Author: Jelte Fennema-Nio Discussion: https://www.postgresql.org/message-id/DAVFE8ECY631.1KKWX7L8S4DAQ@jeltef.nl --- doc/src/sgml/protocol.sgml | 2 +- src/interfaces/libpq/fe-protocol3.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index e63647c093ba..b5395604fb8b 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -4136,7 +4136,7 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" message, indicated by the length field. - The maximum key length is 256 bytes. The + The minimum and maximum key length are 4 and 256 bytes, respectively. The PostgreSQL server only sends keys up to 32 bytes, but the larger maximum size allows for future server versions, as well as connection poolers and other middleware, to use diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 0cca832c06ac..223cd8fb51c6 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -1569,6 +1569,27 @@ getBackendKeyData(PGconn *conn, int msgLength) cancel_key_len = 5 + msgLength - (conn->inCursor - conn->inStart); + if (cancel_key_len != 4 && conn->pversion == PG_PROTOCOL(3, 0)) + { + libpq_append_conn_error(conn, "received invalid BackendKeyData message: cancel key length %d is different from 4, which is not supported in version 3.0 of the protocol", cancel_key_len); + handleFatalError(conn); + return 0; + } + + if (cancel_key_len < 4) + { + libpq_append_conn_error(conn, "received invalid BackendKeyData message: cancel key length %d is below minimum of 4 bytes", cancel_key_len); + handleFatalError(conn); + return 0; + } + + if (cancel_key_len > 256) + { + libpq_append_conn_error(conn, "received invalid BackendKeyData message: cancel key length %d exceeds maximum of 256 bytes", cancel_key_len); + handleFatalError(conn); + return 0; + } + conn->be_cancel_key = malloc(cancel_key_len); if (conn->be_cancel_key == NULL) {