From f1f76ea487717efa83317af97cbba3b9e9b11848 Mon Sep 17 00:00:00 2001 From: dmitrivasilyev Date: Fri, 15 May 2026 20:50:41 +0300 Subject: [PATCH 1/8] Cache pooler_check_query response per pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before, only ";" worked as pooler_check_query: pg_doorman always replied with a hardcoded EmptyQueryResponse + ReadyForQuery, regardless of the configured value. Setting "select 1" — the keep-alive query that Wildfly, HikariCP and many JDBC pools default to — produced the same empty response, which is semantically wrong for a non-empty query. Now pg_doorman forwards the first matching SimpleQuery to PostgreSQL, caches the response per pool, and serves subsequent matching probes from cache without touching the backend. The cache validates against the current general.pooler_check_query value on every lookup, so a RELOAD that changes the value triggers a fresh backend probe on the next ping without an explicit invalidation hook. ErrorResponse from the backend is never cached: the next probe re-fetches. The operator's responsibility: pooler_check_query must be a stable, side-effect-free SimpleQuery. Same input must produce the same output every time. Safe choices include ";" (default), "select 1", "select 'pg_doorman'", "select version()". Unsafe choices include "select now()", "select pg_is_in_recovery()" (changes on failover), or any UPDATE/DELETE/CALL — their first-call result would be cached and replayed indefinitely. Default ";" is no longer special-cased: it goes through the same cache flow. On a cold pool the first ";" now performs one backend round-trip and caches PostgreSQL's EmptyQueryResponse. Load balancers pinging through pg_doorman will surface a backend-unreachable condition as a probe failure on the very first probe of a fresh pool, instead of an unconditional OK from a hardcoded reply. Two new counter metrics expose the cache hit rate: - pg_doorman_pooler_check_query_backend_total (cache miss → backend probe) - pg_doorman_pooler_check_query_cache_total (cache hit → served from memory) --- src/client/transaction.rs | 66 +++- src/messages/mod.rs | 4 +- src/messages/protocol.rs | 43 ++- src/messages/protocol_tests.rs | 84 ++++++ src/pool/check_query_cache.rs | 94 ++++++ src/pool/dynamic.rs | 5 +- src/pool/mod.rs | 10 + src/pool/retain.rs | 1 + src/utils/buffering_writer.rs | 90 ++++++ src/utils/mod.rs | 1 + src/web/metrics/mod.rs | 31 ++ .../features/pooler-check-query-cache.feature | 283 ++++++++++++++++++ 12 files changed, 694 insertions(+), 18 deletions(-) create mode 100644 src/pool/check_query_cache.rs create mode 100644 src/utils/buffering_writer.rs create mode 100644 tests/bdd/features/pooler-check-query-cache.feature diff --git a/src/client/transaction.rs b/src/client/transaction.rs index 4c0aa5487..2dd12fb9c 100644 --- a/src/client/transaction.rs +++ b/src/client/transaction.rs @@ -17,12 +17,14 @@ use crate::client::core::{BatchOperation, Client, PreparedStatementKey}; use crate::client::util::{is_standalone_begin, QUERY_DEALLOCATE}; use crate::errors::Error; use crate::messages::{ - check_query_response, deallocate_response, error_response, error_response_terminal, + deallocate_response, error_response, error_response_terminal, has_error_response, insert_close_complete_after_last_close_complete, read_message_reuse, write_all_flush, }; use crate::pool::CANCELED_PIDS; use crate::server::Server; +use crate::utils::buffering_writer::BufferingWriter; use crate::utils::debug_messages::{log_client_to_server, log_server_to_client}; +use crate::web::metrics::{POOLER_CHECK_QUERY_BACKEND_TOTAL, POOLER_CHECK_QUERY_CACHE_TOTAL}; // ============================================================================= // PostgreSQL Extended Query Protocol - Documentation @@ -317,11 +319,14 @@ where return Ok(false); } - // Check for pooler health check query + // Pooler health-check query — byte-for-byte match against the + // pre-encoded `general.pooler_check_query`. First probe goes to + // PostgreSQL and the response is cached per pool; subsequent probes + // are served from cache. ErrorResponse is never cached. if message.len() == self.pooler_check_query_request_vec.len() && self.pooler_check_query_request_vec.as_slice() == &message[..] { - write_all_flush(&mut self.write, &check_query_response()).await?; + self.handle_pooler_check_query(message).await?; return Ok(true); } @@ -370,6 +375,61 @@ where Ok(false) } + /// Serve a `general.pooler_check_query` SimpleQuery. The first probe in + /// the pool's lifetime (and the first after a RELOAD that changes the + /// value) forwards the query to PostgreSQL; subsequent probes answer + /// from the per-pool response cache without touching the backend. + /// `ErrorResponse` from the backend is forwarded to the client as-is + /// and never cached. + async fn handle_pooler_check_query(&mut self, message: &BytesMut) -> Result<(), Error> { + let pool = self.get_pool().await?; + let current_query = crate::config::get_config() + .general + .pooler_check_query + .clone(); + + if let Some(cached) = pool.check_query_cache.get(¤t_query) { + POOLER_CHECK_QUERY_CACHE_TOTAL.inc(); + write_all_flush(&mut self.write, cached.as_ref()).await?; + return Ok(()); + } + + let mut conn = pool.database.get().await.map_err(|e| { + Error::ClientError(format!( + "pooler_check_query: failed to acquire backend: {e}" + )) + })?; + + conn.send_and_flush(message).await?; + POOLER_CHECK_QUERY_BACKEND_TOTAL.inc(); + + // Server::recv must be drained in a loop until is_data_available() + // is false; otherwise responses larger than BUFFER_FLUSH_THRESHOLD + // leave bytes in the backend socket and the next checked-out client + // reads a desynced stream. + let mut response = BytesMut::new(); + loop { + let mut overflow_buf = BytesMut::new(); + let writer = BufferingWriter::new(&mut overflow_buf); + let chunk = conn.recv(writer, None).await?; + response.extend_from_slice(&chunk); + if !overflow_buf.is_empty() { + response.extend_from_slice(&overflow_buf); + } + if !conn.is_data_available() { + break; + } + } + + write_all_flush(&mut self.write, &response).await?; + + if !has_error_response(&response) { + pool.check_query_cache.set(current_query, response.freeze()); + } + + Ok(()) + } + /// Handle simple query (Q message). /// Returns the action to take after processing. #[inline] diff --git a/src/messages/mod.rs b/src/messages/mod.rs index 1afd69a29..36000deb0 100644 --- a/src/messages/mod.rs +++ b/src/messages/mod.rs @@ -21,8 +21,8 @@ pub use config_socket::{configure_tcp_socket, configure_unix_socket}; pub use error::PgErrorMsg; pub use extended::{close_complete, Bind, Close, Describe, ExtendedProtocolData, Parse}; pub use protocol::{ - check_query_response, command_complete, data_row, data_row_nullable, deallocate_response, - error_message, error_response, error_response_terminal, flush, + command_complete, data_row, data_row_nullable, deallocate_response, error_message, + error_response, error_response_terminal, flush, has_error_response, insert_close_complete_after_last_close_complete, insert_close_complete_before_ready_for_query, insert_parse_complete_before_bind_complete, insert_parse_complete_before_parameter_description, md5_challenge, md5_hash_password, md5_hash_second_pass, md5_password, md5_password_with_hash, diff --git a/src/messages/protocol.rs b/src/messages/protocol.rs index b8b53a7ba..40304ed41 100644 --- a/src/messages/protocol.rs +++ b/src/messages/protocol.rs @@ -1,6 +1,5 @@ // Standard library imports use std::collections::HashMap; -use std::mem; // External crate imports use crate::messages::constants::SCRAM_SHA_256; use bytes::{Buf, BufMut, BytesMut}; @@ -616,16 +615,38 @@ pub fn parse_complete() -> BytesMut { bytes } -/// Create a check query response message. -pub fn check_query_response() -> BytesMut { - let mut bytes = BytesMut::with_capacity(11); - - bytes.put_u8(b'I'); - bytes.put_i32(mem::size_of::() as i32); - bytes.put_u8(b'Z'); - bytes.put_i32(mem::size_of::() as i32 + 1); - bytes.put_u8(b'I'); - bytes +/// Scan a buffered response stream for an `ErrorResponse` (tag `b'E'`) frame. +/// Returns `true` if any frame in `bytes` has the error tag. +/// +/// The buffer is expected to contain zero or more PostgreSQL backend messages +/// in wire format (`tag:1 + len:4 + body`). A truncated trailing frame is +/// treated as "no error here" and stops the scan without panicking. +pub fn has_error_response(bytes: &[u8]) -> bool { + let mut offset = 0; + while offset + 5 <= bytes.len() { + let tag = bytes[offset]; + if tag == b'E' { + return true; + } + let len = i32::from_be_bytes([ + bytes[offset + 1], + bytes[offset + 2], + bytes[offset + 3], + bytes[offset + 4], + ]); + if len < 4 { + return false; + } + let frame_end = match offset.checked_add(1 + len as usize) { + Some(end) => end, + None => return false, + }; + if frame_end > bytes.len() { + return false; + } + offset = frame_end; + } + false } /// Create a deallocate response message. diff --git a/src/messages/protocol_tests.rs b/src/messages/protocol_tests.rs index 3c0eb73b8..58fde3a1d 100644 --- a/src/messages/protocol_tests.rs +++ b/src/messages/protocol_tests.rs @@ -685,3 +685,87 @@ fn test_insert_parse_complete_before_param_desc_complex_scenario() { .concat(); assert_eq!(result.as_ref(), &expected[..]); } + +fn empty_query_response_msg() -> Vec { + vec![b'I', 0, 0, 0, 4] +} + +fn data_row_single_text(value: &[u8]) -> Vec { + let mut msg = Vec::new(); + msg.push(b'D'); + let body_len = 4 + 2 + 4 + value.len(); + msg.extend_from_slice(&(body_len as i32).to_be_bytes()); + msg.extend_from_slice(&1i16.to_be_bytes()); + msg.extend_from_slice(&(value.len() as i32).to_be_bytes()); + msg.extend_from_slice(value); + msg +} + +fn error_response_msg(sqlstate: &str) -> Vec { + let message = b"backend error"; + let mut msg = Vec::new(); + msg.push(b'E'); + let body_len = 4 + 1 + sqlstate.len() + 1 + 1 + message.len() + 1 + 1; + msg.extend_from_slice(&(body_len as i32).to_be_bytes()); + msg.push(b'C'); + msg.extend_from_slice(sqlstate.as_bytes()); + msg.push(0); + msg.push(b'M'); + msg.extend_from_slice(message); + msg.push(0); + msg.push(0); + msg +} + +#[test] +fn has_error_response_empty_buffer_returns_false() { + assert!(!super::protocol::has_error_response(&[])); +} + +#[test] +fn has_error_response_only_ready_for_query_returns_false() { + let buf = ready_for_query_msg(b'I'); + assert!(!super::protocol::has_error_response(&buf)); +} + +#[test] +fn has_error_response_empty_query_plus_rfq_returns_false() { + let mut buf = empty_query_response_msg(); + buf.extend_from_slice(&ready_for_query_msg(b'I')); + assert!(!super::protocol::has_error_response(&buf)); +} + +#[test] +fn has_error_response_data_row_plus_command_complete_plus_rfq_returns_false() { + let mut buf = data_row_single_text(b"1"); + buf.extend_from_slice(&command_complete_msg("SELECT 1")); + buf.extend_from_slice(&ready_for_query_msg(b'I')); + assert!(!super::protocol::has_error_response(&buf)); +} + +#[test] +fn has_error_response_error_response_plus_rfq_returns_true() { + let mut buf = error_response_msg("57P01"); + buf.extend_from_slice(&ready_for_query_msg(b'E')); + assert!(super::protocol::has_error_response(&buf)); +} + +#[test] +fn has_error_response_error_response_in_middle_returns_true() { + let mut buf = data_row_single_text(b"x"); + buf.extend_from_slice(&error_response_msg("42P01")); + buf.extend_from_slice(&ready_for_query_msg(b'E')); + assert!(super::protocol::has_error_response(&buf)); +} + +#[test] +fn has_error_response_truncated_frame_returns_false_without_panic() { + let buf = vec![b'D', 0, 0, 0, 100, 0xAA, 0xBB]; + assert!(!super::protocol::has_error_response(&buf)); +} + +#[test] +fn has_error_response_truncated_length_field_returns_false() { + let buf = vec![b'D', 0, 0]; + assert!(!super::protocol::has_error_response(&buf)); +} diff --git a/src/pool/check_query_cache.rs b/src/pool/check_query_cache.rs new file mode 100644 index 000000000..384be4552 --- /dev/null +++ b/src/pool/check_query_cache.rs @@ -0,0 +1,94 @@ +//! Per-`ConnectionPool` cache for the response to `general.pooler_check_query`. +//! +//! Holds the last observed `(query, response_bytes)` pair. `get(current)` +//! returns the cached response only when the stored query still matches the +//! caller's current value — a RELOAD that changes `pooler_check_query` +//! self-invalidates the cache on the next probe without any explicit hook +//! into the reload code. + +use std::sync::Arc; + +use arc_swap::ArcSwap; +use bytes::Bytes; + +#[derive(Debug)] +pub struct CheckQueryCache { + inner: ArcSwap>, +} + +impl CheckQueryCache { + pub fn new() -> Self { + Self { + inner: ArcSwap::from_pointee(None), + } + } + + /// Returns `Some(bytes)` when the cache holds a response for `current_query`. + /// Returns `None` when the cache is empty or the stored query no longer matches. + pub fn get(&self, current_query: &str) -> Option { + let snapshot = self.inner.load_full(); + match snapshot.as_ref() { + Some((q, bytes)) if q == current_query => Some(bytes.clone()), + _ => None, + } + } + + /// Stores a response for `query`. Subsequent `get(current_query)` calls + /// with `current_query == query` will return `Some(bytes)`. + pub fn set(&self, query: String, bytes: Bytes) { + self.inner.store(Arc::new(Some((query, bytes)))); + } +} + +impl Default for CheckQueryCache { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_cache_returns_none() { + let cache = CheckQueryCache::new(); + assert!(cache.get(";").is_none()); + assert!(cache.get("select 1").is_none()); + } + + #[test] + fn get_after_set_matching_query_returns_bytes() { + let cache = CheckQueryCache::new(); + cache.set("select 1".to_string(), Bytes::from_static(b"response1")); + assert_eq!( + cache.get("select 1"), + Some(Bytes::from_static(b"response1")) + ); + } + + #[test] + fn get_with_different_query_returns_none() { + let cache = CheckQueryCache::new(); + cache.set("select 1".to_string(), Bytes::from_static(b"response1")); + assert!(cache.get("select 2").is_none()); + assert!(cache.get(";").is_none()); + } + + #[test] + fn set_overwrites_previous_value() { + let cache = CheckQueryCache::new(); + cache.set("select 1".to_string(), Bytes::from_static(b"v1")); + cache.set("select 2".to_string(), Bytes::from_static(b"v2")); + assert!(cache.get("select 1").is_none()); + assert_eq!(cache.get("select 2"), Some(Bytes::from_static(b"v2"))); + } + + #[test] + fn empty_string_query_is_treated_like_any_other() { + let cache = CheckQueryCache::new(); + cache.set("".to_string(), Bytes::from_static(b"empty")); + assert_eq!(cache.get(""), Some(Bytes::from_static(b"empty"))); + assert!(cache.get("select 1").is_none()); + } +} diff --git a/src/pool/dynamic.rs b/src/pool/dynamic.rs index 2131ae0da..0a3bf01e9 100644 --- a/src/pool/dynamic.rs +++ b/src/pool/dynamic.rs @@ -18,8 +18,8 @@ use crate::stats::AddressStats; use super::types::{PoolConfig, QueueMode, Timeouts}; use super::{ build_server_tls_for_pool, get_auth_query_state, get_coordinator, get_pool, - register_dynamic_pool, resolve_server_cache_size, Address, ConnectionPool, Pool, - PoolIdentifier, PoolSettings, PreparedStatementCache, ServerPool, POOLS, + register_dynamic_pool, resolve_server_cache_size, Address, CheckQueryCache, ConnectionPool, + Pool, PoolIdentifier, PoolSettings, PreparedStatementCache, ServerPool, POOLS, }; /// Create a dynamic data pool for auth_query passthrough mode. @@ -265,6 +265,7 @@ pub fn create_dynamic_pool( config.general.worker_threads, ))), }, + check_query_cache: Arc::new(CheckQueryCache::new()), coordinator: get_coordinator(pool_name), replenish_failures: Arc::new(AtomicU32::new(0)), created_at: std::time::Instant::now(), diff --git a/src/pool/mod.rs b/src/pool/mod.rs index eff81d95c..3ed13537c 100644 --- a/src/pool/mod.rs +++ b/src/pool/mod.rs @@ -29,6 +29,7 @@ pub use types::{Metrics, PoolConfig, QueueMode, ScalingConfig, Status, Timeouts} pub use crate::server::PreparedStatementCache; mod auth_query_state; +mod check_query_cache; mod dynamic; mod eviction; pub mod gc; @@ -40,6 +41,7 @@ pub mod startup_resolver; pub mod fallback; pub use auth_query_state::AuthQueryState; +pub use check_query_cache::CheckQueryCache; pub use dynamic::create_dynamic_pool; pub use eviction::PoolEvictionSource; pub use server_pool::ServerPool; @@ -325,6 +327,12 @@ pub struct ConnectionPool { /// Cache pub prepared_statement_cache: Option, + /// Per-pool cache for the response to `general.pooler_check_query`. + /// Populated on the first matching SimpleQuery from any client; subsequent + /// matches answer from this cache without touching the backend. The cache + /// self-invalidates when `general.pooler_check_query` changes via RELOAD. + pub check_query_cache: Arc, + /// Database-level connection coordinator. `Some` when `max_db_connections > 0` /// in the pool config, `None` otherwise (disabled, zero overhead). /// Shared across all user pools for the same database. @@ -622,6 +630,7 @@ impl ConnectionPool { config.general.worker_threads, ))), }, + check_query_cache: Arc::new(CheckQueryCache::new()), coordinator: coordinators.get(pool_name).cloned(), replenish_failures: Arc::new(AtomicU32::new(0)), created_at: std::time::Instant::now(), @@ -846,6 +855,7 @@ impl ConnectionPool { config.general.worker_threads, ))), }, + check_query_cache: Arc::new(CheckQueryCache::new()), coordinator: coordinators.get(pool_name).cloned(), replenish_failures: Arc::new(AtomicU32::new(0)), created_at: std::time::Instant::now(), diff --git a/src/pool/retain.rs b/src/pool/retain.rs index 995681f1a..30541b55f 100644 --- a/src/pool/retain.rs +++ b/src/pool/retain.rs @@ -346,6 +346,7 @@ mod tests { config_hash: 0, per_user_startup_overlay_hash: crate::pool::empty_overlay_hash(), prepared_statement_cache: None, + check_query_cache: Arc::new(crate::pool::CheckQueryCache::new()), coordinator: None, replenish_failures: Arc::new(AtomicU32::new(0)), created_at: std::time::Instant::now(), diff --git a/src/utils/buffering_writer.rs b/src/utils/buffering_writer.rs new file mode 100644 index 000000000..2c6dc8bdd --- /dev/null +++ b/src/utils/buffering_writer.rs @@ -0,0 +1,90 @@ +//! `AsyncWrite` over `&mut BytesMut`. Used to capture bytes that would otherwise +//! be proxied to a client socket — feeds `Server::recv` and lets the caller +//! inspect or cache the full response without touching the client connection. + +use std::io; +use std::pin::Pin; +use std::task::{Context, Poll}; + +use bytes::BytesMut; +use tokio::io::AsyncWrite; + +pub struct BufferingWriter<'a> { + buf: &'a mut BytesMut, +} + +impl<'a> BufferingWriter<'a> { + pub fn new(buf: &'a mut BytesMut) -> Self { + Self { buf } + } +} + +impl<'a> AsyncWrite for BufferingWriter<'a> { + fn poll_write( + self: Pin<&mut Self>, + _cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + self.get_mut().buf.extend_from_slice(buf); + Poll::Ready(Ok(buf.len())) + } + + fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tokio::io::AsyncWriteExt; + + #[tokio::test] + async fn write_all_then_flush_appends_to_buffer() { + let mut buf = BytesMut::new(); + { + let mut writer = BufferingWriter::new(&mut buf); + writer.write_all(b"hello").await.unwrap(); + writer.flush().await.unwrap(); + } + assert_eq!(&buf[..], b"hello"); + } + + #[tokio::test] + async fn multi_chunk_writes_concatenate_in_order() { + let mut buf = BytesMut::new(); + { + let mut writer = BufferingWriter::new(&mut buf); + writer.write_all(b"foo").await.unwrap(); + writer.write_all(b"bar").await.unwrap(); + writer.write_all(b"baz").await.unwrap(); + } + assert_eq!(&buf[..], b"foobarbaz"); + } + + #[tokio::test] + async fn shutdown_is_idempotent_and_preserves_buffer() { + let mut buf = BytesMut::new(); + { + let mut writer = BufferingWriter::new(&mut buf); + writer.write_all(b"payload").await.unwrap(); + writer.shutdown().await.unwrap(); + writer.shutdown().await.unwrap(); + } + assert_eq!(&buf[..], b"payload"); + } + + #[tokio::test] + async fn empty_write_keeps_buffer_empty() { + let mut buf = BytesMut::new(); + { + let mut writer = BufferingWriter::new(&mut buf); + writer.write_all(b"").await.unwrap(); + } + assert!(buf.is_empty()); + } +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 53f21603d..e5c5f072c 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,3 +1,4 @@ +pub mod buffering_writer; pub mod clock; pub mod core_affinity; pub mod dashmap; diff --git a/src/web/metrics/mod.rs b/src/web/metrics/mod.rs index 83136c4bb..7fbc022b7 100644 --- a/src/web/metrics/mod.rs +++ b/src/web/metrics/mod.rs @@ -696,6 +696,37 @@ pub(crate) static QUERY_INTERNER_SYNTHETIC_MISSES_TOTAL: Lazy = Lazy counter }); +/// Times pg_doorman forwarded a `pooler_check_query` SimpleQuery to the +/// backend because the per-pool cache was empty or the cached query no +/// longer matches `general.pooler_check_query`. The first probe after +/// process start and the first probe after a RELOAD that changes the +/// value both add to this counter. +pub(crate) static POOLER_CHECK_QUERY_BACKEND_TOTAL: Lazy = Lazy::new(|| { + let counter = IntCounter::new( + "pg_doorman_pooler_check_query_backend_total", + "Pooler check queries forwarded to PostgreSQL because the per-pool \ + response cache was empty or the cached query no longer matches the \ + current general.pooler_check_query value.", + ) + .unwrap(); + REGISTRY.register(Box::new(counter.clone())).unwrap(); + counter +}); + +/// Times pg_doorman answered a `pooler_check_query` SimpleQuery from the +/// per-pool response cache without touching the backend. The ratio +/// `cache_total / (cache_total + backend_total)` is the cache hit rate. +pub(crate) static POOLER_CHECK_QUERY_CACHE_TOTAL: Lazy = Lazy::new(|| { + let counter = IntCounter::new( + "pg_doorman_pooler_check_query_cache_total", + "Pooler check queries answered from the per-pool response cache \ + without forwarding to PostgreSQL.", + ) + .unwrap(); + REGISTRY.register(Box::new(counter.clone())).unwrap(); + counter +}); + /// Wall-clock time spent in a single GC sweep cycle (named + anonymous /// combined). Custom buckets target sweep durations from 100 µs to 1 s /// because shard-scan time scales with interner size. diff --git a/tests/bdd/features/pooler-check-query-cache.feature b/tests/bdd/features/pooler-check-query-cache.feature new file mode 100644 index 000000000..e9e956b0c --- /dev/null +++ b/tests/bdd/features/pooler-check-query-cache.feature @@ -0,0 +1,283 @@ +@pooler-check-query-cache +Feature: pooler_check_query response cache + The first SimpleQuery matching general.pooler_check_query in a pool's + lifetime is forwarded to PostgreSQL. The response is cached per pool + and subsequent matching queries are served from cache without touching + the backend. Cache invalidates when general.pooler_check_query changes + via RELOAD. ErrorResponse from backend is never cached. + + Background: + Given PostgreSQL started with pg_hba.conf: + """ + local all all trust + host all all 127.0.0.1/32 trust + host all all ::1/128 trust + """ + And fixtures from "tests/fixture.sql" applied + And pg_doorman hba file contains: + """ + host all admin 127.0.0.1/32 trust + host all example_user_1 127.0.0.1/32 md5 + """ + And self-signed SSL certificates are generated + + Scenario: default ";" — first ping hits backend, second hit served from cache + Given pg_doorman started with config: + """ + [prometheus] + enabled = true + host = "0.0.0.0" + port = 9127 + + [general] + host = "127.0.0.1" + port = ${DOORMAN_PORT} + pg_hba = {path = "${DOORMAN_HBA_FILE}"} + admin_username = "admin" + admin_password = "admin" + tls_private_key = "${DOORMAN_SSL_KEY}" + tls_certificate = "${DOORMAN_SSL_CERT}" + + [pools.example_db] + server_host = "127.0.0.1" + server_port = ${PG_PORT} + pool_mode = "transaction" + + [[pools.example_db.users]] + username = "example_user_1" + password = "md58a67a0c805a5ee0384ea28e0dea557b6" + pool_size = 4 + """ + When I run shell command: + """ + export PGPASSWORD=test + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c ";" >/dev/null + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c ";" >/dev/null + + BACKEND=$(curl -s http://127.0.0.1:9127/metrics | awk '/^pg_doorman_pooler_check_query_backend_total / {print $2; f=1} END {if (!f) print 0}') + CACHE=$(curl -s http://127.0.0.1:9127/metrics | awk '/^pg_doorman_pooler_check_query_cache_total / {print $2; f=1} END {if (!f) print 0}') + + echo "backend=$BACKEND cache=$CACHE" + test "$BACKEND" = "1" || { echo "expected backend_total=1, got $BACKEND"; exit 1; } + test "$CACHE" = "1" || { echo "expected cache_total=1, got $CACHE"; exit 1; } + """ + Then the command should succeed + + Scenario: custom "select 1" — first ping hits backend, second hit served from cache + Given pg_doorman started with config: + """ + [prometheus] + enabled = true + host = "0.0.0.0" + port = 9127 + + [general] + host = "127.0.0.1" + port = ${DOORMAN_PORT} + pg_hba = {path = "${DOORMAN_HBA_FILE}"} + admin_username = "admin" + admin_password = "admin" + pooler_check_query = "select 1" + tls_private_key = "${DOORMAN_SSL_KEY}" + tls_certificate = "${DOORMAN_SSL_CERT}" + + [pools.example_db] + server_host = "127.0.0.1" + server_port = ${PG_PORT} + pool_mode = "transaction" + + [[pools.example_db.users]] + username = "example_user_1" + password = "md58a67a0c805a5ee0384ea28e0dea557b6" + pool_size = 4 + """ + When I run shell command: + """ + export PGPASSWORD=test + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c "select 1" >/dev/null + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c "select 1" >/dev/null + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c "select 1" >/dev/null + + BACKEND=$(curl -s http://127.0.0.1:9127/metrics | awk '/^pg_doorman_pooler_check_query_backend_total / {print $2; f=1} END {if (!f) print 0}') + CACHE=$(curl -s http://127.0.0.1:9127/metrics | awk '/^pg_doorman_pooler_check_query_cache_total / {print $2; f=1} END {if (!f) print 0}') + + echo "backend=$BACKEND cache=$CACHE" + test "$BACKEND" = "1" || { echo "expected backend_total=1, got $BACKEND"; exit 1; } + test "$CACHE" = "2" || { echo "expected cache_total=2, got $CACHE"; exit 1; } + """ + Then the command should succeed + + Scenario: RELOAD with a different pooler_check_query value invalidates the cache + Given pg_doorman started with config: + """ + [prometheus] + enabled = true + host = "0.0.0.0" + port = 9127 + + [general] + host = "127.0.0.1" + port = ${DOORMAN_PORT} + pg_hba = {path = "${DOORMAN_HBA_FILE}"} + admin_username = "admin" + admin_password = "admin" + pooler_check_query = "select 1" + tls_private_key = "${DOORMAN_SSL_KEY}" + tls_certificate = "${DOORMAN_SSL_CERT}" + + [pools.example_db] + server_host = "127.0.0.1" + server_port = ${PG_PORT} + pool_mode = "transaction" + + [[pools.example_db.users]] + username = "example_user_1" + password = "md58a67a0c805a5ee0384ea28e0dea557b6" + pool_size = 4 + """ + When we create admin session "adm" to pg_doorman as "admin" with password "admin" + And I run shell command: + """ + export PGPASSWORD=test + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c "select 1" >/dev/null + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c "select 1" >/dev/null + """ + Then the command should succeed + When we overwrite pg_doorman config file with: + """ + [prometheus] + enabled = true + host = "0.0.0.0" + port = 9127 + + [general] + host = "127.0.0.1" + port = ${DOORMAN_PORT} + pg_hba = {path = "${DOORMAN_HBA_FILE}"} + admin_username = "admin" + admin_password = "admin" + pooler_check_query = "select 2" + tls_private_key = "${DOORMAN_SSL_KEY}" + tls_certificate = "${DOORMAN_SSL_CERT}" + + [pools.example_db] + server_host = "127.0.0.1" + server_port = ${PG_PORT} + pool_mode = "transaction" + + [[pools.example_db.users]] + username = "example_user_1" + password = "md58a67a0c805a5ee0384ea28e0dea557b6" + pool_size = 4 + """ + And we execute "RELOAD" on admin session "adm" and store response + And we sleep for 500 milliseconds + And I run shell command: + """ + export PGPASSWORD=test + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c "select 2" >/dev/null + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c "select 2" >/dev/null + + BACKEND=$(curl -s http://127.0.0.1:9127/metrics | awk '/^pg_doorman_pooler_check_query_backend_total / {print $2; f=1} END {if (!f) print 0}') + CACHE=$(curl -s http://127.0.0.1:9127/metrics | awk '/^pg_doorman_pooler_check_query_cache_total / {print $2; f=1} END {if (!f) print 0}') + + echo "backend=$BACKEND cache=$CACHE" + test "$BACKEND" = "2" || { echo "expected backend_total=2 (one before RELOAD, one after), got $BACKEND"; exit 1; } + test "$CACHE" = "2" || { echo "expected cache_total=2 (one before RELOAD, one after), got $CACHE"; exit 1; } + """ + Then the command should succeed + + Scenario: backend ErrorResponse is forwarded but never cached + Given pg_doorman started with config: + """ + [prometheus] + enabled = true + host = "0.0.0.0" + port = 9127 + + [general] + host = "127.0.0.1" + port = ${DOORMAN_PORT} + pg_hba = {path = "${DOORMAN_HBA_FILE}"} + admin_username = "admin" + admin_password = "admin" + pooler_check_query = "select * from no_such_table_aaa" + tls_private_key = "${DOORMAN_SSL_KEY}" + tls_certificate = "${DOORMAN_SSL_CERT}" + + [pools.example_db] + server_host = "127.0.0.1" + server_port = ${PG_PORT} + pool_mode = "transaction" + + [[pools.example_db.users]] + username = "example_user_1" + password = "md58a67a0c805a5ee0384ea28e0dea557b6" + pool_size = 4 + """ + When I run shell command: + """ + export PGPASSWORD=test + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c "select * from no_such_table_aaa" 2>&1 | grep -q "no_such_table_aaa" || { echo "first probe must surface the backend error"; exit 1; } + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c "select * from no_such_table_aaa" 2>&1 | grep -q "no_such_table_aaa" || { echo "second probe must also reach backend, not cache"; exit 1; } + + BACKEND=$(curl -s http://127.0.0.1:9127/metrics | awk '/^pg_doorman_pooler_check_query_backend_total / {print $2; f=1} END {if (!f) print 0}') + CACHE=$(curl -s http://127.0.0.1:9127/metrics | awk '/^pg_doorman_pooler_check_query_cache_total / {print $2; f=1} END {if (!f) print 0}') + + echo "backend=$BACKEND cache=$CACHE" + test "$BACKEND" = "2" || { echo "expected backend_total=2 (errors are never cached), got $BACKEND"; exit 1; } + test "$CACHE" = "0" || { echo "expected cache_total=0, got $CACHE"; exit 1; } + """ + Then the command should succeed + + Scenario: response larger than the per-message buffer is drained, cached, and the pool stays in sync + Given pg_doorman started with config: + """ + [prometheus] + enabled = true + host = "0.0.0.0" + port = 9127 + + [general] + host = "127.0.0.1" + port = ${DOORMAN_PORT} + pg_hba = {path = "${DOORMAN_HBA_FILE}"} + admin_username = "admin" + admin_password = "admin" + pooler_check_query = "select repeat('x', 10000)" + tls_private_key = "${DOORMAN_SSL_KEY}" + tls_certificate = "${DOORMAN_SSL_CERT}" + + [pools.example_db] + server_host = "127.0.0.1" + server_port = ${PG_PORT} + pool_mode = "transaction" + + [[pools.example_db.users]] + username = "example_user_1" + password = "md58a67a0c805a5ee0384ea28e0dea557b6" + pool_size = 4 + """ + When I run shell command: + """ + export PGPASSWORD=test + + LEN1=$(psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -t -A -c "select repeat('x', 10000)" | tr -d '\n' | wc -c) + LEN2=$(psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -t -A -c "select repeat('x', 10000)" | tr -d '\n' | wc -c) + LEN3=$(psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -t -A -c "select repeat('x', 10000)" | tr -d '\n' | wc -c) + + test "$LEN1" = "10000" || { echo "first probe truncated: $LEN1 (expected 10000)"; exit 1; } + test "$LEN2" = "10000" || { echo "second probe truncated: $LEN2 (expected 10000)"; exit 1; } + test "$LEN3" = "10000" || { echo "third probe truncated: $LEN3 (expected 10000)"; exit 1; } + + OTHER=$(psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -t -A -c "select 42" | head -1) + test "$OTHER" = "42" || { echo "regular query after large check_query desynced: got '$OTHER'"; exit 1; } + + BACKEND=$(curl -s http://127.0.0.1:9127/metrics | awk '/^pg_doorman_pooler_check_query_backend_total / {print $2; f=1} END {if (!f) print 0}') + CACHE=$(curl -s http://127.0.0.1:9127/metrics | awk '/^pg_doorman_pooler_check_query_cache_total / {print $2; f=1} END {if (!f) print 0}') + + echo "backend=$BACKEND cache=$CACHE" + test "$BACKEND" = "1" || { echo "expected backend_total=1 (large response cached after drain), got $BACKEND"; exit 1; } + test "$CACHE" = "2" || { echo "expected cache_total=2, got $CACHE"; exit 1; } + """ + Then the command should succeed From 59732fad28e87e6fe2553f4429a71c51a9ad970d Mon Sep 17 00:00:00 2001 From: dmitrivasilyev Date: Fri, 15 May 2026 21:01:14 +0300 Subject: [PATCH 2/8] Document pooler_check_query response cache feature Operator-facing documentation for the cache shipped in the previous commit: - Stability contract: the query must be deterministic and side-effect free. - Safe values: ";", "select 1", "select 'pg_doorman'", "select version()". - Unsafe values with explicit cache-freeze failure modes: select now() / clock_timestamp() (frozen timestamp), pg_is_in_recovery() (failover blindness), counting queries (frozen count), UPDATE/INSERT/DELETE/CALL/DO (one-shot side effect cached as success forever). - Cold-pool behavior change called out: the first probe per pool now performs one PostgreSQL round-trip, even for the default ";". - Two new counter metrics with the hit-rate formula. - 3.10.0 changelog entry. pg_doorman.toml and pg_doorman.yaml regenerated from the updated fields.yaml so the library reference tests stay in sync. --- documentation/en/src/changelog.md | 60 +++++++++++++++++++++++ documentation/ru/src/reference/general.md | 37 ++++++++++++-- pg_doorman.toml | 6 ++- pg_doorman.yaml | 6 ++- src/app/generate/fields.yaml | 50 ++++++++++++++++--- 5 files changed, 144 insertions(+), 15 deletions(-) diff --git a/documentation/en/src/changelog.md b/documentation/en/src/changelog.md index fc0711631..3e38c5eb1 100644 --- a/documentation/en/src/changelog.md +++ b/documentation/en/src/changelog.md @@ -1,5 +1,65 @@ # Changelog +### 3.10.0 + +Per-pool response cache for `general.pooler_check_query`. The first +matching SimpleQuery in each pool's lifetime is forwarded to PostgreSQL; +every subsequent matching probe is answered from the cache without +touching the backend. + +#### Behavior change for cold pools + +Before this release pg_doorman answered any `pooler_check_query` match +locally with a hardcoded empty result. The default `;` came back instantly +without ever talking to PostgreSQL, and a non-empty value such as `select 1` +returned an empty response that did not match what a real PostgreSQL would +have produced. + +The first probe per pool now does one PostgreSQL round-trip and captures +the real response. If PostgreSQL is unreachable at that moment, the +probing client sees a probe failure instead of an unconditional OK; the +earlier hardcode reported the pooler as healthy even when PostgreSQL was +down. Typical JDBC keepalive queries such as `select 1` (WildFly, HikariCP) +and `select 'pg_doorman'` now return the expected row. + +#### Cache lifecycle + +The cache is per pool and keyed by the query string. A `RELOAD` that +changes `pooler_check_query` invalidates the cache on the next ping; the +new value triggers one fresh backend probe and is then served from cache +until the value changes again. A reload that keeps the same value keeps +the cached response. `ErrorResponse` from the backend is forwarded to +the client unchanged and is never cached, so the next probe retries +against PostgreSQL. + +#### Operator contract + +`pooler_check_query` must be stable: the same input must produce the +same bytes, with no side effects. Safe values: `;`, `select 1`, +`select 'pg_doorman'`, `select version()`. + +Unsafe values that the cache will silently freeze: + +- `select now()`, `select clock_timestamp()` — the cached timestamp + never advances. +- `select pg_is_in_recovery()` — a failover flips the role on + PostgreSQL but the cached response still reports the old role. +- `select count(*) from ` — the cached count is whatever the + first probe observed. +- `UPDATE`, `INSERT`, `DELETE`, `CALL`, `DO` — the side effect runs + once and the success response is cached forever. + +#### New metrics + +- `pg_doorman_pooler_check_query_backend_total` — counter, increments + on each probe forwarded to PostgreSQL (cache miss or + RELOAD-induced re-probe). +- `pg_doorman_pooler_check_query_cache_total` — counter, increments + on each probe served from the cache. + +The ratio `cache_total / (cache_total + backend_total)` is the cache +hit rate. + ### 3.9.1 Web admin console refresh and a follow-up pass on `startup_parameters`. diff --git a/documentation/ru/src/reference/general.md b/documentation/ru/src/reference/general.md index 8ee0f645f..5161bbe2d 100644 --- a/documentation/ru/src/reference/general.md +++ b/documentation/ru/src/reference/general.md @@ -712,8 +712,39 @@ hostnossl all all 192.168.1.0/24 trust ### pooler_check_query -Когда клиент отправляет ровно этот запрос как SimpleQuery, pg_doorman отвечает немедленно -без перенаправления его в PostgreSQL. Полезно для health checks от балансировщиков нагрузки (HAProxy, в стиле pgbouncer `SELECT 1`) -или keepalive-проб уровня приложения. Значение по умолчанию `;` (пустой statement) — самая лёгкая возможная проверка. +Когда клиент отправляет ровно этот запрос как SimpleQuery, pg_doorman обслуживает его через +кеш ответа на уровне пула. Первая совпадающая проба за время жизни каждого пула отправляется +в PostgreSQL, и полный ответ сохраняется. Последующие совпадающие пробы отвечаются из кеша +без обращения к бэкенду. + +Кеш индексируется по строке запроса. `RELOAD` с другим значением `pooler_check_query` +инвалидирует кеш на следующей пробе; новое значение вызывает одну свежую пробу к бэкенду +и затем обслуживается из кеша, пока значение снова не изменится. `RELOAD` с тем же значением +не сбрасывает кеш. `ErrorResponse` от бэкенда передаётся клиенту без изменений и не кешируется, +поэтому следующая проба снова идёт в PostgreSQL. + +Поведение на холодном пуле изменилось: первая проба в каждом пуле теперь делает один +round-trip к PostgreSQL даже для значения по умолчанию `;`. Если PostgreSQL в этот момент +недоступен, клиент-пробер увидит ошибку пробы вместо безусловного OK. Прежний хардкод +локального ответа сообщал, что пулер здоров, даже когда PostgreSQL лежал, и для непустых +значений вроде `select 1` возвращал пустой ответ. + +**Контракт для оператора.** Запрос должен быть детерминированным: один и тот же ввод +обязан давать один и тот же набор байт, без побочных эффектов. Безопасные значения: +`;`, `select 1`, `select 'pg_doorman'`, `select version()`. + +Небезопасные значения, которые кеш молча заморозит: + +- `select now()`, `select clock_timestamp()` — закешированный timestamp перестанет идти вперёд. +- `select pg_is_in_recovery()` — failover поменяет роль на PostgreSQL, но закешированный ответ + всё ещё будет показывать прежнюю роль. +- `select count(*) from
` — закешированное число останется тем, что увидела первая проба. +- `UPDATE`, `INSERT`, `DELETE`, `CALL`, `DO` — побочный эффект выполнится один раз, а ответ + об успехе закешируется навсегда. + +Доля попаданий в кеш экспортируется двумя счётчиками без меток: +`pg_doorman_pooler_check_query_backend_total` (пробы, отправленные в PostgreSQL) и +`pg_doorman_pooler_check_query_cache_total` (пробы, обслуженные из кеша). Отношение +`cache_total / (cache_total + backend_total)` — это hit rate. По умолчанию: `";"`. diff --git a/pg_doorman.toml b/pg_doorman.toml index 3661ece21..dfe889ae8 100644 --- a/pg_doorman.toml +++ b/pg_doorman.toml @@ -235,8 +235,10 @@ sync_server_parameters = false # Default: 1048576 (1048576 bytes) message_size_to_be_stream = 1048576 -# Query intercepted by pg_doorman and answered locally (never reaches PostgreSQL). -# Used by load balancers and monitoring to check if the pooler is alive. +# SimpleQuery used by load balancers and monitoring as a liveness probe. +# The first match per pool is forwarded to PostgreSQL; the response is cached +# and reused for every subsequent match without touching the backend. +# The query must be stable (same input → same output) and side-effect free. # Default: ";" pooler_check_query = ";" diff --git a/pg_doorman.yaml b/pg_doorman.yaml index d5d7ce3ff..043276291 100644 --- a/pg_doorman.yaml +++ b/pg_doorman.yaml @@ -275,8 +275,10 @@ general: # Default: "1MB" (1048576 bytes) message_size_to_be_stream: "1MB" - # Query intercepted by pg_doorman and answered locally (never reaches PostgreSQL). - # Used by load balancers and monitoring to check if the pooler is alive. + # SimpleQuery used by load balancers and monitoring as a liveness probe. + # The first match per pool is forwarded to PostgreSQL; the response is cached + # and reused for every subsequent match without touching the backend. + # The query must be stable (same input → same output) and side-effect free. # Default: ";" pooler_check_query: ";" diff --git a/src/app/generate/fields.yaml b/src/app/generate/fields.yaml index 18adb0e44..831405d26 100644 --- a/src/app/generate/fields.yaml +++ b/src/app/generate/fields.yaml @@ -758,15 +758,49 @@ fields: pooler_check_query: config: en: | - Query intercepted by pg_doorman and answered locally (never reaches PostgreSQL). - Used by load balancers and monitoring to check if the pooler is alive. - ru: | - Запрос, перехватываемый pg_doorman и отвечаемый локально (не доходит до PostgreSQL). - Используется балансировщиками и мониторингом для проверки живости пулера. + SimpleQuery used by load balancers and monitoring as a liveness probe. + The first match per pool is forwarded to PostgreSQL; the response is cached + and reused for every subsequent match without touching the backend. + The query must be stable (same input → same output) and side-effect free. + ru: | + SimpleQuery, который балансировщики и мониторинг используют как проверку живости. + Первое совпадение в пуле отправляется в PostgreSQL; ответ кешируется и переиспользуется + для всех последующих совпадений без обращения к бэкенду. + Запрос должен быть детерминированным и без побочных эффектов. doc: | - When a client sends this exact query as a SimpleQuery, pg_doorman responds immediately - without forwarding it to PostgreSQL. Useful for health checks from load balancers (HAProxy, pgbouncer-style `SELECT 1`) - or application-level keepalive probes. The default `;` (empty statement) is the lightest possible check. + When a client sends this exact query as a SimpleQuery, pg_doorman serves it through a per-pool + response cache. The first matching probe in each pool's lifetime is forwarded to PostgreSQL and + the full response is captured. Subsequent matching probes are answered from the cache without + touching the backend. + + The cache is keyed by the query string. A `RELOAD` that changes `pooler_check_query` invalidates + the cache on the next ping; the new value triggers one fresh backend probe and is then served + from cache until the value changes again. A reload that keeps the same value keeps the cached + response. `ErrorResponse` from the backend is forwarded to the client unchanged and is never + cached, so the next probe retries against PostgreSQL. + + Cold-pool behavior changed: the first probe per pool now does one PostgreSQL round-trip even + for the default `;`. If PostgreSQL is unreachable at that moment, the probing client sees a + probe failure instead of an unconditional OK. The earlier hardcoded local answer reported the + pooler as healthy even when PostgreSQL was down, and made non-empty values such as `select 1` + return an empty response. + + **Operator contract.** The query must be stable: the same input must always produce the same + bytes, with no side effects. Safe values: `;`, `select 1`, `select 'pg_doorman'`, `select version()`. + + Unsafe values that the cache will silently freeze: + + - `select now()`, `select clock_timestamp()` — the cached timestamp never advances. + - `select pg_is_in_recovery()` — a failover flips the role on PostgreSQL but the cached response + still reports the old role. + - `select count(*) from
` — the cached count is whatever the first probe observed. + - `UPDATE`, `INSERT`, `DELETE`, `CALL`, `DO` — the side effect runs once and the success + response is cached forever. + + Cache hit rate is exported as two counters without labels: + `pg_doorman_pooler_check_query_backend_total` (probes forwarded to PostgreSQL) and + `pg_doorman_pooler_check_query_cache_total` (probes served from cache). The ratio + `cache_total / (cache_total + backend_total)` is the hit rate. default: '";"' prepared_statements: From ad48e66d377347adf74d5a0243fd9453fcdbaf54 Mon Sep 17 00:00:00 2001 From: dmitrivasilyev Date: Fri, 15 May 2026 21:11:01 +0300 Subject: [PATCH 3/8] Drop dead pooler_check_query_request_bytes config field The field was declared as Option> on the config struct, defaulted to None, and read by poller_check_query_request_bytes_vec via an if-let-Some shortcut. Nothing ever wrote a Some value into it, so the shortcut was unreachable and the field was permanently dead. Removed the field, the unreachable branch, and its entry in the fields.yaml structural-allowlist for the config validator. --- src/app/generate/annotated.rs | 9 ++++----- src/config/general.rs | 6 ------ src/config/tests.rs | 2 ++ 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/app/generate/annotated.rs b/src/app/generate/annotated.rs index 04a3447a1..36e2c3669 100644 --- a/src/app/generate/annotated.rs +++ b/src/app/generate/annotated.rs @@ -2252,11 +2252,10 @@ mod tests { // Structural/internal fields that don't have their own fields.yaml entry let structural_fields: &[&str] = &[ - "users", // nested sub-section - "pools", // top-level section - "path", // internal runtime field - "pooler_check_query_request_bytes", // derived from pooler_check_query - "auth_query", // nested struct, checked via "auth_query" section + "users", // nested sub-section + "pools", // top-level section + "path", // internal runtime field + "auth_query", // nested struct, checked via "auth_query" section ]; // AuthQueryConfig pub fields live in pool.rs alongside Pool pub fields. diff --git a/src/config/general.rs b/src/config/general.rs index d5406b1a6..b9cbe3cdc 100644 --- a/src/config/general.rs +++ b/src/config/general.rs @@ -152,8 +152,6 @@ pub struct General { // pooler_check_query: ping pooler with simple query like '/* ping pooler */;'. #[serde(default = "General::default_pooler_check_query")] pub pooler_check_query: String, - #[serde(skip_serializing_if = "Option::is_none")] - pooler_check_query_request_bytes: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub tls_certificate: Option, @@ -492,9 +490,6 @@ impl General { } pub fn poller_check_query_request_bytes_vec(&self) -> Vec { - if let Some(ref bytes) = self.pooler_check_query_request_bytes { - return bytes.clone(); - } let mut buf = BytesMut::from(&b"Q"[..]); buf.put_i32(self.pooler_check_query.len() as i32 + mem::size_of::() as i32 + 1); buf.put_slice(self.pooler_check_query.as_bytes()); @@ -601,7 +596,6 @@ impl Default for General { daemon_pid_file: Self::default_daemon_pid_file(), syslog_prog_name: None, pooler_check_query: Self::default_pooler_check_query(), - pooler_check_query_request_bytes: None, backlog: Self::default_backlog(), } } diff --git a/src/config/tests.rs b/src/config/tests.rs index 331f6217a..991ae73ec 100644 --- a/src/config/tests.rs +++ b/src/config/tests.rs @@ -1,5 +1,7 @@ //! Tests for configuration module. +#![allow(clippy::field_reassign_with_default)] + use super::*; use serial_test::serial; use std::io::Write; From 5ec5881eba560ff8a4d90a1665dbbedf5c21cd3b Mon Sep 17 00:00:00 2001 From: dmitrivasilyev Date: Fri, 15 May 2026 21:39:28 +0300 Subject: [PATCH 4/8] Strengthen pooler_check_query path: consistency, lifecycle, observability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multi-discipline review flagged three correctness issues and a hot-path regression: - Stale-client cache poisoning across RELOAD. Before, the byte-match used a snapshot captured at client connect, while the cache key came from the live config in the handler. A RELOAD that races with an in-flight probe could write a cache entry for one query under the key of another. Now the byte-match and the cache key read the same process-wide snapshot, swapped atomically when the config reloads. Bytes from a stale config no longer match the active probe; they fall through to the normal SQL path instead. - The handler re-resolved the pool through the global pool map, which could pick a different generation than the client's authenticated pool after a RELOAD or dynamic-pool swap. The authenticated pool is now threaded through. - Backend lifecycle was incomplete. recv/send/cleanup errors did not mark the backend bad before returning it to the pool, and the cache accepted any response without an ErrorResponse, including non-idle states like an open transaction or a COPY in progress. Errors now mark the backend bad before propagating, and the cache only accepts responses that end in ReadyForQuery('I')dle. Hot path: the cache reader switched from load_full to a guarded read, dropping an atomic refcount bump per warm hit. The full Config clone per probe is gone — the new snapshot already carries the query string. Cleanup: removed the per-client request_bytes vec and its initialisers, the unused poller_check_query_request_bytes_vec helper, and a config field that was declared but never written. Docs: the two probe counters are now in the canonical prometheus.md reference (EN regenerated, RU hand-edited). Tests: - 9 new unit tests (snapshot encoding, idle-RFQ detector). - 1 new BDD scenario: stale probe bytes after RELOAD bypass the cache path and reach the backend as regular SQL; the new probe value drives a fresh backend hit and populates a fresh cache entry. --- documentation/ru/src/reference/prometheus.md | 2 + src/app/generate/annotated.rs | 3 +- src/app/generate/docs.rs | 4 +- src/client/core.rs | 2 - src/client/migration.rs | 2 - src/client/startup.rs | 2 - src/client/transaction.rs | 77 +++++++++++----- src/config/general.rs | 10 -- src/config/mod.rs | 5 + src/config/pooler_check_query.rs | 79 ++++++++++++++++ src/messages/mod.rs | 17 ++-- src/messages/protocol.rs | 9 ++ src/messages/protocol_tests.rs | 38 ++++++++ src/pool/check_query_cache.rs | 4 +- .../features/pooler-check-query-cache.feature | 91 +++++++++++++++++++ 15 files changed, 293 insertions(+), 52 deletions(-) create mode 100644 src/config/pooler_check_query.rs diff --git a/documentation/ru/src/reference/prometheus.md b/documentation/ru/src/reference/prometheus.md index bfae15cab..ecd6698c8 100644 --- a/documentation/ru/src/reference/prometheus.md +++ b/documentation/ru/src/reference/prometheus.md @@ -136,6 +136,8 @@ Query interner общий для процесса. У этих метрик не | `pg_doorman_query_interner_evictions_total` | Counter по `kind` и `reason` (`gc_passive` или `ttl_expired`). Named-записи удаляются, когда их больше не держит ни один кеш вне interner; anonymous-записи удаляются после idle TTL. | | `pg_doorman_query_interner_synthetic_misses_total` | Counter синтетических ответов SQLSTATE `26000` для anonymous prepared statements, состояние которых уже недоступно при последующем `Bind` или `Describe`. Перед увеличением `query_interner_anon_idle_ttl_seconds` проверьте вытеснения из клиентского Anonymous LRU, WARN-логи, `RESET INTERNER` и TTL-вытеснения. | | `pg_doorman_query_interner_gc_duration_seconds` | Гистограмма времени одного прохода GC interner (named и anonymous вместе), в секундах. Помогает увидеть, когда большой interner делает обход заметным. | +| `pg_doorman_pooler_check_query_backend_total` | Counter пробов `pooler_check_query`, отправленных в PostgreSQL (промах кеша или повторная проба после RELOAD). После прогрева значение должно быть стабильным; постоянно растущий rate означает, что популовый кеш не удерживает запись. | +| `pg_doorman_pooler_check_query_cache_total` | Counter пробов `pooler_check_query`, обслуженных из популового кеша ответа без обращения к бэкенду. Hit rate = `cache_total / (cache_total + backend_total)`. | ### Метрики серверного TLS diff --git a/src/app/generate/annotated.rs b/src/app/generate/annotated.rs index 36e2c3669..748b64de4 100644 --- a/src/app/generate/annotated.rs +++ b/src/app/generate/annotated.rs @@ -2149,7 +2149,7 @@ mod tests { if !trimmed.starts_with("pub ") || !trimmed.contains(':') { return None; } - // Skip pub fn, pub mod, pub struct, pub enum, pub use, pub type, pub const + // Skip pub fn, pub mod, pub struct, pub enum, pub use, pub type, pub const, pub static if trimmed.starts_with("pub fn ") || trimmed.starts_with("pub mod ") || trimmed.starts_with("pub struct ") @@ -2157,6 +2157,7 @@ mod tests { || trimmed.starts_with("pub use ") || trimmed.starts_with("pub type ") || trimmed.starts_with("pub const ") + || trimmed.starts_with("pub static ") { return None; } diff --git a/src/app/generate/docs.rs b/src/app/generate/docs.rs index cf051aee8..760671070 100644 --- a/src/app/generate/docs.rs +++ b/src/app/generate/docs.rs @@ -508,7 +508,9 @@ fn write_prometheus_metrics_section(out: &mut String) { let _ = writeln!(out, "| `pg_doorman_query_interner_bytes` | Gauge by `kind` (`named` or `anonymous`). Total bytes of interned query text. Refreshed once per GC sweep. |"); let _ = writeln!(out, "| `pg_doorman_query_interner_evictions_total` | Counter by `kind` and `reason` (`gc_passive` or `ttl_expired`). Named entries are removed when no cache outside the interner still holds them; anonymous entries are removed after the idle TTL. |"); let _ = writeln!(out, "| `pg_doorman_query_interner_synthetic_misses_total` | Counter of synthetic SQLSTATE `26000` responses for anonymous prepared statements whose state was no longer available when a later `Bind` or `Describe` referenced it. Check client Anonymous LRU evictions, WARN logs, `RESET INTERNER`, and TTL evictions before increasing `query_interner_anon_idle_ttl_seconds`. |"); - let _ = writeln!(out, "| `pg_doorman_query_interner_gc_duration_seconds` | Histogram of one interner GC sweep (named and anonymous combined), in seconds. Use this to detect large interners that make sweep time visible. |\n"); + let _ = writeln!(out, "| `pg_doorman_query_interner_gc_duration_seconds` | Histogram of one interner GC sweep (named and anonymous combined), in seconds. Use this to detect large interners that make sweep time visible. |"); + let _ = writeln!(out, "| `pg_doorman_pooler_check_query_backend_total` | Counter of `pooler_check_query` probes forwarded to PostgreSQL (cache miss or RELOAD-induced re-probe). Steady-state value should be flat after warmup; a continuously rising rate means the per-pool cache is not retaining its entry. |"); + let _ = writeln!(out, "| `pg_doorman_pooler_check_query_cache_total` | Counter of `pooler_check_query` probes answered from the per-pool response cache without touching the backend. Hit rate = `cache_total / (cache_total + backend_total)`. |\n"); // Grafana Dashboard let _ = writeln!(out, "## Grafana Dashboard\n"); diff --git a/src/client/core.rs b/src/client/core.rs index d7daa3f11..141db1759 100644 --- a/src/client/core.rs +++ b/src/client/core.rs @@ -585,8 +585,6 @@ pub struct Client { pub(crate) client_last_messages_in_tx: PooledBuffer, - pub(crate) pooler_check_query_request_vec: Vec, - /// Pending BEGIN message for deferred connection optimization. /// When client sends standalone "begin;", we synthesize response /// and defer actual BEGIN until next query arrives. diff --git a/src/client/migration.rs b/src/client/migration.rs index ce631c150..01ab32a2e 100644 --- a/src/client/migration.rs +++ b/src/client/migration.rs @@ -526,7 +526,6 @@ pub async fn reconstruct_client( prepared, client_last_messages_in_tx: PooledBuffer::new(), max_memory_usage: config.general.max_memory_usage.as_bytes(), - pooler_check_query_request_vec: config.general.poller_check_query_request_bytes_vec(), client_pending_begin: None, #[cfg(unix)] raw_fd, @@ -633,7 +632,6 @@ pub async fn reconstruct_tls_client( prepared, client_last_messages_in_tx: PooledBuffer::new(), max_memory_usage: config.general.max_memory_usage.as_bytes(), - pooler_check_query_request_vec: config.general.poller_check_query_request_bytes_vec(), client_pending_begin: None, #[cfg(unix)] raw_fd, diff --git a/src/client/startup.rs b/src/client/startup.rs index 562963ffc..9f661c270 100644 --- a/src/client/startup.rs +++ b/src/client/startup.rs @@ -458,7 +458,6 @@ where prepared: PreparedStatementState::new(prepared_statements_enabled, anon_cache_size), client_last_messages_in_tx: PooledBuffer::new(), max_memory_usage: config.general.max_memory_usage.as_bytes(), - pooler_check_query_request_vec: config.general.poller_check_query_request_bytes_vec(), client_pending_begin: None, #[cfg(unix)] raw_fd, @@ -501,7 +500,6 @@ where session_xact_start: None, client_last_messages_in_tx: PooledBuffer::new(), max_memory_usage: 128 * 1024 * 1024, - pooler_check_query_request_vec: Vec::new(), client_pending_begin: None, #[cfg(unix)] raw_fd: None, diff --git a/src/client/transaction.rs b/src/client/transaction.rs index 2dd12fb9c..bb989a865 100644 --- a/src/client/transaction.rs +++ b/src/client/transaction.rs @@ -17,8 +17,9 @@ use crate::client::core::{BatchOperation, Client, PreparedStatementKey}; use crate::client::util::{is_standalone_begin, QUERY_DEALLOCATE}; use crate::errors::Error; use crate::messages::{ - deallocate_response, error_response, error_response_terminal, has_error_response, - insert_close_complete_after_last_close_complete, read_message_reuse, write_all_flush, + deallocate_response, ends_with_idle_ready_for_query, error_response, error_response_terminal, + has_error_response, insert_close_complete_after_last_close_complete, read_message_reuse, + write_all_flush, }; use crate::pool::CANCELED_PIDS; use crate::server::Server; @@ -314,19 +315,26 @@ where /// Returns `Ok(true)` if query was handled (caller should continue to next iteration), /// `Ok(false)` if query needs normal processing. #[inline] - async fn try_handle_without_server(&mut self, message: &BytesMut) -> Result { + async fn try_handle_without_server( + &mut self, + message: &BytesMut, + pool: &crate::pool::ConnectionPool, + ) -> Result { if message[0] != b'Q' { return Ok(false); } // Pooler health-check query — byte-for-byte match against the - // pre-encoded `general.pooler_check_query`. First probe goes to - // PostgreSQL and the response is cached per pool; subsequent probes - // are served from cache. ErrorResponse is never cached. - if message.len() == self.pooler_check_query_request_vec.len() - && self.pooler_check_query_request_vec.as_slice() == &message[..] + // pre-encoded `general.pooler_check_query`. The same snapshot is + // used as the cache key in `handle_pooler_check_query`, so a + // RELOAD that races with an in-flight probe can never mix + // request bytes from one config with a cache key from another. + let snapshot = crate::config::POOLER_CHECK_QUERY_SNAPSHOT.load_full(); + if message.len() == snapshot.request_bytes.len() + && snapshot.request_bytes.as_ref() == &message[..] { - self.handle_pooler_check_query(message).await?; + self.handle_pooler_check_query(message, pool, &snapshot) + .await?; return Ok(true); } @@ -379,16 +387,17 @@ where /// the pool's lifetime (and the first after a RELOAD that changes the /// value) forwards the query to PostgreSQL; subsequent probes answer /// from the per-pool response cache without touching the backend. - /// `ErrorResponse` from the backend is forwarded to the client as-is - /// and never cached. - async fn handle_pooler_check_query(&mut self, message: &BytesMut) -> Result<(), Error> { - let pool = self.get_pool().await?; - let current_query = crate::config::get_config() - .general - .pooler_check_query - .clone(); - - if let Some(cached) = pool.check_query_cache.get(¤t_query) { + /// `ErrorResponse` and any response that does not end in + /// `ReadyForQuery('I')dle` are forwarded to the client as-is and + /// never cached — caching them would freeze a non-idle backend state + /// and replay it to later probes. + async fn handle_pooler_check_query( + &mut self, + message: &BytesMut, + pool: &crate::pool::ConnectionPool, + snapshot: &crate::config::PoolerCheckQuerySnapshot, + ) -> Result<(), Error> { + if let Some(cached) = pool.check_query_cache.get(&snapshot.query) { POOLER_CHECK_QUERY_CACHE_TOTAL.inc(); write_all_flush(&mut self.write, cached.as_ref()).await?; return Ok(()); @@ -400,7 +409,17 @@ where )) })?; - conn.send_and_flush(message).await?; + if let Err(err) = conn.checkin_cleanup().await { + conn.mark_bad(&format!( + "pooler_check_query: checkin_cleanup failed: {err}" + )); + return Err(err); + } + + if let Err(err) = conn.send_and_flush(message).await { + conn.mark_bad(&format!("pooler_check_query: send failed: {err}")); + return Err(err); + } POOLER_CHECK_QUERY_BACKEND_TOTAL.inc(); // Server::recv must be drained in a loop until is_data_available() @@ -411,7 +430,13 @@ where loop { let mut overflow_buf = BytesMut::new(); let writer = BufferingWriter::new(&mut overflow_buf); - let chunk = conn.recv(writer, None).await?; + let chunk = match conn.recv(writer, None).await { + Ok(chunk) => chunk, + Err(err) => { + conn.mark_bad(&format!("pooler_check_query: recv failed: {err}")); + return Err(err); + } + }; response.extend_from_slice(&chunk); if !overflow_buf.is_empty() { response.extend_from_slice(&overflow_buf); @@ -423,8 +448,9 @@ where write_all_flush(&mut self.write, &response).await?; - if !has_error_response(&response) { - pool.check_query_cache.set(current_query, response.freeze()); + if !has_error_response(&response) && ends_with_idle_ready_for_query(&response) { + pool.check_query_cache + .set(snapshot.query.clone(), response.freeze()); } Ok(()) @@ -750,7 +776,10 @@ where let current_pool = pool.as_ref().unwrap(); // Handle fast queries (pooler check, DEALLOCATE) without server - if self.try_handle_without_server(&message).await? { + if self + .try_handle_without_server(&message, current_pool) + .await? + { continue; } diff --git a/src/config/general.rs b/src/config/general.rs index b9cbe3cdc..09861f94f 100644 --- a/src/config/general.rs +++ b/src/config/general.rs @@ -1,9 +1,7 @@ //! General configuration settings for the connection pooler. -use bytes::{BufMut, BytesMut}; use ipnet::IpNet; use serde_derive::{Deserialize, Serialize}; -use std::mem; use super::tls; use super::{ByteSize, Duration, Include}; @@ -489,14 +487,6 @@ impl General { ";".to_string() } - pub fn poller_check_query_request_bytes_vec(&self) -> Vec { - let mut buf = BytesMut::from(&b"Q"[..]); - buf.put_i32(self.pooler_check_query.len() as i32 + mem::size_of::() as i32 + 1); - buf.put_slice(self.pooler_check_query.as_bytes()); - buf.put_u8(b'\0'); - buf.to_vec() - } - pub fn default_hba() -> Vec { vec![] } diff --git a/src/config/mod.rs b/src/config/mod.rs index d3f3e0911..01562975c 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -27,6 +27,7 @@ mod duration; mod general; mod include; mod pool; +mod pooler_check_query; pub mod startup_parameters; mod talos; pub mod tls; @@ -43,6 +44,9 @@ pub use duration::Duration; pub use general::General; pub use include::{GeneralWithInclude, Include, ServerConfig}; pub use pool::{AuthQueryConfig, Pool}; +pub use pooler_check_query::{ + update_pooler_check_query_snapshot, PoolerCheckQuerySnapshot, POOLER_CHECK_QUERY_SNAPSHOT, +}; pub use talos::Talos; pub use tls::{ServerTlsConfig, ServerTlsMode}; pub use user::User; @@ -862,6 +866,7 @@ pub async fn parse(path: &str) -> Result<(), Error> { // Update the configuration globally. CONFIG.store(Arc::new(config.clone())); + update_pooler_check_query_snapshot(&config.general.pooler_check_query); Ok(()) } diff --git a/src/config/pooler_check_query.rs b/src/config/pooler_check_query.rs new file mode 100644 index 000000000..b33e52ad9 --- /dev/null +++ b/src/config/pooler_check_query.rs @@ -0,0 +1,79 @@ +//! Process-wide snapshot of `general.pooler_check_query` and its pre-encoded +//! SimpleQuery wire bytes. +//! +//! Lives outside `general.rs` because the snapshot is runtime cache state +//! rather than a serializable config field. Updated on every config +//! `parse()` so that the byte-match path and the per-pool response cache +//! key always read the same pair, even across a `RELOAD` that races with +//! an in-flight probe. + +use arc_swap::ArcSwap; +use bytes::{BufMut, Bytes, BytesMut}; +use once_cell::sync::Lazy; +use std::mem; +use std::sync::Arc; + +/// Atomic snapshot of `general.pooler_check_query` and its pre-encoded +/// SimpleQuery wire bytes. Initialized with the default `;` value. +pub static POOLER_CHECK_QUERY_SNAPSHOT: Lazy> = + Lazy::new(|| ArcSwap::from_pointee(PoolerCheckQuerySnapshot::new(";"))); + +#[derive(Debug)] +pub struct PoolerCheckQuerySnapshot { + pub query: String, + pub request_bytes: Bytes, +} + +impl PoolerCheckQuerySnapshot { + pub fn new(query: &str) -> Self { + let mut buf = BytesMut::with_capacity(query.len() + 6); + buf.put_u8(b'Q'); + buf.put_i32(query.len() as i32 + mem::size_of::() as i32 + 1); + buf.put_slice(query.as_bytes()); + buf.put_u8(b'\0'); + Self { + query: query.to_string(), + request_bytes: buf.freeze(), + } + } +} + +/// Atomically replace the global snapshot. Called from config `parse()` +/// after the new `Config` has been swapped into `CONFIG`. +pub fn update_pooler_check_query_snapshot(query: &str) { + POOLER_CHECK_QUERY_SNAPSHOT.store(Arc::new(PoolerCheckQuerySnapshot::new(query))); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn snapshot_encodes_simple_query_wire_format() { + let snap = PoolerCheckQuerySnapshot::new(";"); + assert_eq!(snap.query, ";"); + // 'Q' + i32(6) + ';' + '\0' = 7 bytes total; length field counts itself + body + null + assert_eq!(snap.request_bytes.as_ref(), &[b'Q', 0, 0, 0, 6, b';', 0]); + } + + #[test] + fn snapshot_encodes_select_one() { + let snap = PoolerCheckQuerySnapshot::new("select 1"); + // 'Q' + i32(13) + "select 1" + '\0' + let expected = { + let mut v = vec![b'Q', 0, 0, 0, 13]; + v.extend_from_slice(b"select 1"); + v.push(0); + v + }; + assert_eq!(snap.request_bytes.as_ref(), &expected[..]); + } + + #[test] + fn update_swaps_global_snapshot() { + update_pooler_check_query_snapshot("select 42"); + let live = POOLER_CHECK_QUERY_SNAPSHOT.load(); + assert_eq!(live.query, "select 42"); + update_pooler_check_query_snapshot(";"); + } +} diff --git a/src/messages/mod.rs b/src/messages/mod.rs index 36000deb0..598c4af94 100644 --- a/src/messages/mod.rs +++ b/src/messages/mod.rs @@ -21,14 +21,15 @@ pub use config_socket::{configure_tcp_socket, configure_unix_socket}; pub use error::PgErrorMsg; pub use extended::{close_complete, Bind, Close, Describe, ExtendedProtocolData, Parse}; pub use protocol::{ - command_complete, data_row, data_row_nullable, deallocate_response, error_message, - error_response, error_response_terminal, flush, has_error_response, - insert_close_complete_after_last_close_complete, insert_close_complete_before_ready_for_query, - insert_parse_complete_before_bind_complete, insert_parse_complete_before_parameter_description, - md5_challenge, md5_hash_password, md5_hash_second_pass, md5_password, md5_password_with_hash, - notify, parse_complete, parse_params, parse_startup, plain_password_challenge, read_password, - ready_for_query, scram_server_response, scram_start_challenge, server_parameter_message, - simple_query, ssl_request, startup, sync, wrong_password, + command_complete, data_row, data_row_nullable, deallocate_response, + ends_with_idle_ready_for_query, error_message, error_response, error_response_terminal, flush, + has_error_response, insert_close_complete_after_last_close_complete, + insert_close_complete_before_ready_for_query, insert_parse_complete_before_bind_complete, + insert_parse_complete_before_parameter_description, md5_challenge, md5_hash_password, + md5_hash_second_pass, md5_password, md5_password_with_hash, notify, parse_complete, + parse_params, parse_startup, plain_password_challenge, read_password, ready_for_query, + scram_server_response, scram_start_challenge, server_parameter_message, simple_query, + ssl_request, startup, sync, wrong_password, }; pub use socket::{ proxy_copy_data, proxy_copy_data_with_timeout, read_message, read_message_body_reuse, diff --git a/src/messages/protocol.rs b/src/messages/protocol.rs index 40304ed41..23e71521b 100644 --- a/src/messages/protocol.rs +++ b/src/messages/protocol.rs @@ -615,6 +615,15 @@ pub fn parse_complete() -> BytesMut { bytes } +/// Returns `true` iff `bytes` ends with a complete `ReadyForQuery('I')dle` +/// frame: `Z 00 00 00 05 I`. The check confirms that the backend left the +/// SimpleQuery cycle cleanly and is not in a transaction or error block — +/// the only state in which the response is safe to cache and replay. +pub fn ends_with_idle_ready_for_query(bytes: &[u8]) -> bool { + const RFQ_IDLE: &[u8] = &[b'Z', 0, 0, 0, 5, b'I']; + bytes.ends_with(RFQ_IDLE) +} + /// Scan a buffered response stream for an `ErrorResponse` (tag `b'E'`) frame. /// Returns `true` if any frame in `bytes` has the error tag. /// diff --git a/src/messages/protocol_tests.rs b/src/messages/protocol_tests.rs index 58fde3a1d..3fd37fb16 100644 --- a/src/messages/protocol_tests.rs +++ b/src/messages/protocol_tests.rs @@ -769,3 +769,41 @@ fn has_error_response_truncated_length_field_returns_false() { let buf = vec![b'D', 0, 0]; assert!(!super::protocol::has_error_response(&buf)); } + +#[test] +fn ends_with_idle_rfq_empty_buffer_returns_false() { + assert!(!super::protocol::ends_with_idle_ready_for_query(&[])); +} + +#[test] +fn ends_with_idle_rfq_only_idle_rfq_returns_true() { + let buf = ready_for_query_msg(b'I'); + assert!(super::protocol::ends_with_idle_ready_for_query(&buf)); +} + +#[test] +fn ends_with_idle_rfq_transaction_rfq_returns_false() { + let buf = ready_for_query_msg(b'T'); + assert!(!super::protocol::ends_with_idle_ready_for_query(&buf)); +} + +#[test] +fn ends_with_idle_rfq_error_state_rfq_returns_false() { + let buf = ready_for_query_msg(b'E'); + assert!(!super::protocol::ends_with_idle_ready_for_query(&buf)); +} + +#[test] +fn ends_with_idle_rfq_select_then_idle_returns_true() { + let mut buf = data_row_single_text(b"1"); + buf.extend_from_slice(&command_complete_msg("SELECT 1")); + buf.extend_from_slice(&ready_for_query_msg(b'I')); + assert!(super::protocol::ends_with_idle_ready_for_query(&buf)); +} + +#[test] +fn ends_with_idle_rfq_truncated_trailing_byte_returns_false() { + // Tail is one byte short of a full RFQ frame. + let buf = vec![b'Z', 0, 0, 0, 5]; + assert!(!super::protocol::ends_with_idle_ready_for_query(&buf)); +} diff --git a/src/pool/check_query_cache.rs b/src/pool/check_query_cache.rs index 384be4552..e9658dcd6 100644 --- a/src/pool/check_query_cache.rs +++ b/src/pool/check_query_cache.rs @@ -26,8 +26,8 @@ impl CheckQueryCache { /// Returns `Some(bytes)` when the cache holds a response for `current_query`. /// Returns `None` when the cache is empty or the stored query no longer matches. pub fn get(&self, current_query: &str) -> Option { - let snapshot = self.inner.load_full(); - match snapshot.as_ref() { + let guard = self.inner.load(); + match guard.as_ref() { Some((q, bytes)) if q == current_query => Some(bytes.clone()), _ => None, } diff --git a/tests/bdd/features/pooler-check-query-cache.feature b/tests/bdd/features/pooler-check-query-cache.feature index e9e956b0c..fa1a07336 100644 --- a/tests/bdd/features/pooler-check-query-cache.feature +++ b/tests/bdd/features/pooler-check-query-cache.feature @@ -281,3 +281,94 @@ Feature: pooler_check_query response cache test "$CACHE" = "2" || { echo "expected cache_total=2, got $CACHE"; exit 1; } """ Then the command should succeed + + Scenario: bytes from the previous pooler_check_query stop matching after RELOAD + Given pg_doorman started with config: + """ + [prometheus] + enabled = true + host = "0.0.0.0" + port = 9127 + + [general] + host = "127.0.0.1" + port = ${DOORMAN_PORT} + pg_hba = {path = "${DOORMAN_HBA_FILE}"} + admin_username = "admin" + admin_password = "admin" + pooler_check_query = "select 1" + tls_private_key = "${DOORMAN_SSL_KEY}" + tls_certificate = "${DOORMAN_SSL_CERT}" + + [pools.example_db] + server_host = "127.0.0.1" + server_port = ${PG_PORT} + pool_mode = "transaction" + + [[pools.example_db.users]] + username = "example_user_1" + password = "md58a67a0c805a5ee0384ea28e0dea557b6" + pool_size = 4 + """ + When we create admin session "adm" to pg_doorman as "admin" with password "admin" + And I run shell command: + """ + export PGPASSWORD=test + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c "select 1" >/dev/null + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c "select 1" >/dev/null + """ + Then the command should succeed + When we overwrite pg_doorman config file with: + """ + [prometheus] + enabled = true + host = "0.0.0.0" + port = 9127 + + [general] + host = "127.0.0.1" + port = ${DOORMAN_PORT} + pg_hba = {path = "${DOORMAN_HBA_FILE}"} + admin_username = "admin" + admin_password = "admin" + pooler_check_query = "select 2" + tls_private_key = "${DOORMAN_SSL_KEY}" + tls_certificate = "${DOORMAN_SSL_CERT}" + + [pools.example_db] + server_host = "127.0.0.1" + server_port = ${PG_PORT} + pool_mode = "transaction" + + [[pools.example_db.users]] + username = "example_user_1" + password = "md58a67a0c805a5ee0384ea28e0dea557b6" + pool_size = 4 + """ + And we execute "RELOAD" on admin session "adm" and store response + And we sleep for 500 milliseconds + And I run shell command: + """ + export PGPASSWORD=test + + # "select 1" no longer matches the active probe — the snapshot now holds + # "select 2". The bytes go through the normal SQL path and reach the + # backend as a regular query; check_query counters must not move. + OUT=$(psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -t -A -c "select 1" | tr -d '\n') + test "$OUT" = "1" || { echo "regular SQL path for stale probe returned '$OUT', expected '1'"; exit 1; } + + # The new probe value must now run the cache flow. + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c "select 2" >/dev/null + psql -h 127.0.0.1 -p ${DOORMAN_PORT} -U example_user_1 -d example_db -c "select 2" >/dev/null + + BACKEND=$(curl -s http://127.0.0.1:9127/metrics | awk '/^pg_doorman_pooler_check_query_backend_total / {print $2; f=1} END {if (!f) print 0}') + CACHE=$(curl -s http://127.0.0.1:9127/metrics | awk '/^pg_doorman_pooler_check_query_cache_total / {print $2; f=1} END {if (!f) print 0}') + + echo "backend=$BACKEND cache=$CACHE" + # backend_total: 1 from pre-RELOAD select 1, 1 from first post-RELOAD select 2 = 2 + # cache_total: 1 pre-RELOAD select 1 hit, 1 post-RELOAD select 2 hit = 2 + # The stale "select 1" after RELOAD goes through normal SQL, not check_query. + test "$BACKEND" = "2" || { echo "expected backend_total=2, got $BACKEND"; exit 1; } + test "$CACHE" = "2" || { echo "expected cache_total=2, got $CACHE"; exit 1; } + """ + Then the command should succeed From c4d11bb5240a89d1511cc4f49e2a47700adf2ed4 Mon Sep 17 00:00:00 2001 From: dmitrivasilyev Date: Fri, 15 May 2026 22:26:02 +0300 Subject: [PATCH 5/8] Tag pooler_check_query BDD feature so CI actually runs it The BDD matrix in .github/workflows/bdd-tests.yml filters scenarios by @rust-1..4 / @go / @nodejs / etc. tags. The cache feature shipped with only @pooler-check-query-cache, so every CI job silently skipped it while reporting green. Adding the @rust @rust-2 tags routes the six scenarios into the existing Rust part 2 job, the least-loaded Rust shard in the current matrix. --- tests/bdd/features/pooler-check-query-cache.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/bdd/features/pooler-check-query-cache.feature b/tests/bdd/features/pooler-check-query-cache.feature index fa1a07336..2337e24e1 100644 --- a/tests/bdd/features/pooler-check-query-cache.feature +++ b/tests/bdd/features/pooler-check-query-cache.feature @@ -1,4 +1,4 @@ -@pooler-check-query-cache +@rust @rust-2 @pooler-check-query-cache Feature: pooler_check_query response cache The first SimpleQuery matching general.pooler_check_query in a pool's lifetime is forwarded to PostgreSQL. The response is cached per pool From 09de1d981a6ef6f43d096d96dd82511277a4967e Mon Sep 17 00:00:00 2001 From: dmitrivasilyev Date: Fri, 15 May 2026 22:36:12 +0300 Subject: [PATCH 6/8] Wire 16 orphan BDD features into the CI matrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BDD matrix in .github/workflows/bdd-tests.yml dispatches scenarios by matrix tag (@rust-1..4, @go, @python, @nodejs, etc.). Sixteen feature files carried only their own private tags (@config-test, @connection, @hba, @prometheus, ...) — they passed `make test-bdd TAGS=@foo` locally but every CI job silently skipped them while the suite reported green. The lapse was caught when feat/pooler-check-query-cache landed on the same trap. Added @rust @rust-N to each, distributing across the three less-loaded shards (rust-1 +5, rust-2 +5, rust-4 +6) and leaving rust-3 (sleep-heavy) untouched. Resulting balance: 8 / 9 / 20 / 14. `bench.feature` already runs via .github/workflows/bench.yml in release mode, so it stays on @bench alone. --- tests/bdd/features/config-test.feature | 2 +- tests/bdd/features/connection.feature | 2 +- tests/bdd/features/direct-handoff.feature | 2 +- tests/bdd/features/flush-timeout-protocol-violation.feature | 2 +- tests/bdd/features/hba.feature | 2 +- tests/bdd/features/pipeline-async-disconnect.feature | 2 +- tests/bdd/features/pool-internal-bench.feature | 2 +- tests/bdd/features/pool-pressure.feature | 2 +- tests/bdd/features/pool-size-show-pools.feature | 2 +- tests/bdd/features/prepared-statement-doesnt-exist-bug.feature | 2 +- tests/bdd/features/prometheus.feature | 2 +- tests/bdd/features/rust-deferred-begin-bug.feature | 2 +- tests/bdd/features/selectin-loading-bug.feature | 2 +- tests/bdd/features/session-mode-error-handling.feature | 2 +- tests/bdd/features/session-mode-stats.feature | 2 +- tests/bdd/features/show-pools-memory.feature | 2 +- 16 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/bdd/features/config-test.feature b/tests/bdd/features/config-test.feature index 1768f792d..23e48c6f4 100644 --- a/tests/bdd/features/config-test.feature +++ b/tests/bdd/features/config-test.feature @@ -1,4 +1,4 @@ -@config-test +@rust @rust-1 @config-test Feature: Configuration test mode (-t / --test-config) pg_doorman should support nginx-style configuration validation with -t flag. This allows validating configuration files before deployment without starting the server. diff --git a/tests/bdd/features/connection.feature b/tests/bdd/features/connection.feature index 266a8ddf2..86be9cbe5 100644 --- a/tests/bdd/features/connection.feature +++ b/tests/bdd/features/connection.feature @@ -1,4 +1,4 @@ -@connection +@rust @rust-2 @connection Feature: Connection through pg_doorman Scenario: Successful connection to PostgreSQL through pg_doorman diff --git a/tests/bdd/features/direct-handoff.feature b/tests/bdd/features/direct-handoff.feature index 4733694de..2a9f48c06 100644 --- a/tests/bdd/features/direct-handoff.feature +++ b/tests/bdd/features/direct-handoff.feature @@ -1,4 +1,4 @@ -@pool @direct-handoff +@rust @rust-4 @pool @direct-handoff Feature: Direct handoff — return_object delivers via oneshot channel Regression harness for the oneshot-channel handoff path in return_object. When a connection is returned and the waiters queue diff --git a/tests/bdd/features/flush-timeout-protocol-violation.feature b/tests/bdd/features/flush-timeout-protocol-violation.feature index 326ccb860..3b9f7685d 100644 --- a/tests/bdd/features/flush-timeout-protocol-violation.feature +++ b/tests/bdd/features/flush-timeout-protocol-violation.feature @@ -1,4 +1,4 @@ -@flush-timeout +@rust @rust-1 @flush-timeout Feature: Flush timeout should send proper ErrorResponse to client When pg_doorman's 5s flush timeout fires (server TCP write blocks), the client must receive a PostgreSQL ErrorResponse message, not a diff --git a/tests/bdd/features/hba.feature b/tests/bdd/features/hba.feature index 550f26792..4c9408efe 100644 --- a/tests/bdd/features/hba.feature +++ b/tests/bdd/features/hba.feature @@ -1,4 +1,4 @@ -@hba +@rust @rust-2 @hba Feature: HBA authentication tests Test pg_doorman HBA trust authentication and deny rules diff --git a/tests/bdd/features/pipeline-async-disconnect.feature b/tests/bdd/features/pipeline-async-disconnect.feature index 21df3a1ce..6a87191f5 100644 --- a/tests/bdd/features/pipeline-async-disconnect.feature +++ b/tests/bdd/features/pipeline-async-disconnect.feature @@ -1,4 +1,4 @@ -@rust @pipeline-async-disconnect +@rust @rust-4 @pipeline-async-disconnect Feature: Async pipeline disconnect (Flush mode) When a client uses Flush (async/pipeline mode) instead of Sync, and disconnects before reading all responses, checkin_cleanup is skipped for async connections. diff --git a/tests/bdd/features/pool-internal-bench.feature b/tests/bdd/features/pool-internal-bench.feature index c74e611fd..035124047 100644 --- a/tests/bdd/features/pool-internal-bench.feature +++ b/tests/bdd/features/pool-internal-bench.feature @@ -1,4 +1,4 @@ -@pool +@rust @rust-1 @pool Feature: Internal Pool.get benchmarks These benchmarks measure the internal Pool.get operation performance with real PostgreSQL connections. diff --git a/tests/bdd/features/pool-pressure.feature b/tests/bdd/features/pool-pressure.feature index c9c2c1c75..808933cb9 100644 --- a/tests/bdd/features/pool-pressure.feature +++ b/tests/bdd/features/pool-pressure.feature @@ -1,4 +1,4 @@ -@pool @pool-pressure +@rust @rust-2 @pool @pool-pressure Feature: Pool under sustained client pressure Regression harness for Phase 4 anticipation behavior and pool sizing decisions. Each scenario pins a specific property of `Pool::timeout_get` diff --git a/tests/bdd/features/pool-size-show-pools.feature b/tests/bdd/features/pool-size-show-pools.feature index c5bebe2bf..ae844f255 100644 --- a/tests/bdd/features/pool-size-show-pools.feature +++ b/tests/bdd/features/pool-size-show-pools.feature @@ -1,4 +1,4 @@ -@rust @pool-size-show-pools +@rust @rust-4 @pool-size-show-pools Feature: pool_size column in SHOW POOLS Verify that SHOW POOLS exposes the configured pool_size for both static (config-defined) and dynamic (auth_query passthrough) pools. diff --git a/tests/bdd/features/prepared-statement-doesnt-exist-bug.feature b/tests/bdd/features/prepared-statement-doesnt-exist-bug.feature index bfe39d7d2..855081dd1 100644 --- a/tests/bdd/features/prepared-statement-doesnt-exist-bug.feature +++ b/tests/bdd/features/prepared-statement-doesnt-exist-bug.feature @@ -1,4 +1,4 @@ -@rust @prepared-cache @bug +@rust @rust-1 @prepared-cache @bug Feature: Prepared statement cache desync on client disconnect before Sync When a client sends Parse but disconnects before Sync/Flush, diff --git a/tests/bdd/features/prometheus.feature b/tests/bdd/features/prometheus.feature index 2ea898fa1..f680547b7 100644 --- a/tests/bdd/features/prometheus.feature +++ b/tests/bdd/features/prometheus.feature @@ -1,4 +1,4 @@ -@prometheus +@rust @rust-2 @prometheus Feature: Prometheus metrics tests Test pg_doorman Prometheus metrics endpoint diff --git a/tests/bdd/features/rust-deferred-begin-bug.feature b/tests/bdd/features/rust-deferred-begin-bug.feature index 55438a848..2ea4d32f4 100644 --- a/tests/bdd/features/rust-deferred-begin-bug.feature +++ b/tests/bdd/features/rust-deferred-begin-bug.feature @@ -1,4 +1,4 @@ -@rust @rust-deferred-begin-bug +@rust @rust-4 @rust-deferred-begin-bug Feature: Deferred BEGIN optimization bug with extended protocol Test that pg_doorman correctly handles transaction state when deferred BEGIN is followed by ROLLBACK without any actual queries in extended protocol. diff --git a/tests/bdd/features/selectin-loading-bug.feature b/tests/bdd/features/selectin-loading-bug.feature index b6bf759f7..e98e6dc8b 100644 --- a/tests/bdd/features/selectin-loading-bug.feature +++ b/tests/bdd/features/selectin-loading-bug.feature @@ -1,4 +1,4 @@ -@selectin +@rust @rust-1 @selectin Feature: Selectin loading bug reproduction (asyncpg + SQLAlchemy) Reproduces protocol desync when pg_doorman's prepared statement cache eviction happens while async_mode=true (between Flush and Sync). diff --git a/tests/bdd/features/session-mode-error-handling.feature b/tests/bdd/features/session-mode-error-handling.feature index a45702488..ac6478bb6 100644 --- a/tests/bdd/features/session-mode-error-handling.feature +++ b/tests/bdd/features/session-mode-error-handling.feature @@ -1,4 +1,4 @@ -@rust @session-mode-error +@rust @rust-2 @session-mode-error Feature: Session mode does not destroy connections on SQL errors When pg_doorman runs in session mode and the server is in async protocol mode (Flush), a PostgreSQL ErrorResponse (syntax error, division by zero) should not mark the server diff --git a/tests/bdd/features/session-mode-stats.feature b/tests/bdd/features/session-mode-stats.feature index 06f6a4ff2..6ad26ce8e 100644 --- a/tests/bdd/features/session-mode-stats.feature +++ b/tests/bdd/features/session-mode-stats.feature @@ -1,4 +1,4 @@ -@pool @session-mode +@rust @rust-4 @pool @session-mode Feature: Session mode statistics accuracy Regression test for query_time and xact_time percentiles in session mode. Without the fix, both metrics accumulate the entire session duration diff --git a/tests/bdd/features/show-pools-memory.feature b/tests/bdd/features/show-pools-memory.feature index e773c7529..af8197484 100644 --- a/tests/bdd/features/show-pools-memory.feature +++ b/tests/bdd/features/show-pools-memory.feature @@ -1,4 +1,4 @@ -@rust @admin @pools-memory +@rust @rust-4 @admin @pools-memory Feature: Admin console SHOW POOLS_MEMORY and SHOW PREPARED_STATEMENTS commands Test that pg_doorman admin console correctly handles memory monitoring commands From 911647627bdfabee86165a661d8a423cfc2a90fa Mon Sep 17 00:00:00 2001 From: dmitrivasilyev Date: Fri, 15 May 2026 23:08:51 +0300 Subject: [PATCH 7/8] Fix CI regressions exposed by BDD matrix re-tagging The matrix wiring in the previous commit pulled sixteen orphan features into CI. Two of them masked existing bugs and two have pre-existing product issues outside this PR's scope. Fixed: - src/app/config.rs: when -t / --test-config exits on parse error, the message now always goes to stderr. Previously the branch for non-terminal stdin used `log::error!`, but the logger is not yet initialized at that point, so CI captured an empty stream. The scenario "Test invalid configuration file with -t flag" now sees the expected "Config parse error:" prefix. - tests/go/hba/hba_trust_test.go: the HBA reject error message no longer prefixes the peer with "IP address" and now includes the source port. The Go test for HBA deny was pinned to the older string and would never match the current output. Loosened the substring assertion to the stable portion ("to @ (TLS: false) is not permitted by HBA configuration") which still proves the path is taken. Deferred (matrix tag reverted, scenarios silently skipped again, TODO issue notes saved in MEMORY/project_orphan_bdd_features.md): - pool-size-show-pools.feature "Dynamic pool" scenario: auth_query passthrough returns pool_size=1 instead of the configured default_pool_size=3. Product bug in dynamic-pool size resolution. - rust-deferred-begin-bug.feature: all four scenarios skip because a step definition the file relies on is no longer registered in the test binary. Needs a separate investigation of the extended-session helper. --- src/app/config.rs | 14 ++++---------- tests/bdd/features/pool-size-show-pools.feature | 2 +- tests/bdd/features/rust-deferred-begin-bug.feature | 2 +- tests/go/hba/hba_trust_test.go | 2 +- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/app/config.rs b/src/app/config.rs index 0d6f9e6b6..3d89330d0 100644 --- a/src/app/config.rs +++ b/src/app/config.rs @@ -1,6 +1,3 @@ -use log::error; -use std::io::{self, IsTerminal, Write}; - use crate::config::{get_config, Config}; use tokio::runtime::Builder; @@ -15,13 +12,10 @@ pub fn init_config(args: &Args) -> Result> { match crate::config::parse(args.config_file.as_str()).await { Ok(_) => (), Err(err) => { - let stdin = io::stdin(); - if stdin.is_terminal() { - eprintln!("Config parse error: {err}"); - io::stdout().flush().unwrap(); - } else { - error!("Config parse error: {err:?}"); - } + // Always write to stderr — the logger has not been + // initialized yet, so `log::error!` is swallowed on + // non-terminal stdin (CI, supervisor). + eprintln!("Config parse error: {err}"); std::process::exit(exitcode::CONFIG); } }; diff --git a/tests/bdd/features/pool-size-show-pools.feature b/tests/bdd/features/pool-size-show-pools.feature index ae844f255..c5bebe2bf 100644 --- a/tests/bdd/features/pool-size-show-pools.feature +++ b/tests/bdd/features/pool-size-show-pools.feature @@ -1,4 +1,4 @@ -@rust @rust-4 @pool-size-show-pools +@rust @pool-size-show-pools Feature: pool_size column in SHOW POOLS Verify that SHOW POOLS exposes the configured pool_size for both static (config-defined) and dynamic (auth_query passthrough) pools. diff --git a/tests/bdd/features/rust-deferred-begin-bug.feature b/tests/bdd/features/rust-deferred-begin-bug.feature index 2ea4d32f4..55438a848 100644 --- a/tests/bdd/features/rust-deferred-begin-bug.feature +++ b/tests/bdd/features/rust-deferred-begin-bug.feature @@ -1,4 +1,4 @@ -@rust @rust-4 @rust-deferred-begin-bug +@rust @rust-deferred-begin-bug Feature: Deferred BEGIN optimization bug with extended protocol Test that pg_doorman correctly handles transaction state when deferred BEGIN is followed by ROLLBACK without any actual queries in extended protocol. diff --git a/tests/go/hba/hba_trust_test.go b/tests/go/hba/hba_trust_test.go index 4906fc669..3c85dcd07 100644 --- a/tests/go/hba/hba_trust_test.go +++ b/tests/go/hba/hba_trust_test.go @@ -23,5 +23,5 @@ func Test_HbaDeny(t *testing.T) { defer db.Close() _, err := db.Exec("select 1") assert.Error(t, err) - assert.Contains(t, err.Error(), "Connection from IP address 127.0.0.1 to example_user_nopassword@example_db (TLS: false)") + assert.Contains(t, err.Error(), "to example_user_nopassword@example_db (TLS: false) is not permitted by HBA configuration") } From bd7028aea4793c0b91984deac58e5d51b1235c5e Mon Sep 17 00:00:00 2001 From: dmitrivasilyev Date: Sat, 16 May 2026 00:13:06 +0300 Subject: [PATCH 8/8] Restore two more orphan BDD features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pool-size-show-pools.feature "Dynamic pool" scenario: the YAML used auth_query.default_pool_size, a key that was renamed to auth_query.pool_size in commit e383a7a but never propagated here. With the new value the SHOW POOLS column matches the configured 3. Matrix tag @rust-4 restored. rust-deferred-begin-bug.feature: implemented the missing "session should receive ParseComplete/BindComplete/CommandComplete 'tag'/ReadyForQuery 'status'" step bindings as a thin helper over the already-captured session message log. Also renamed "we create extended session" to "we create session" — extended-protocol sessions are the same regular sessions, the Parse/Bind/Execute/Sync steps already exist. The deferred-begin scenarios still fail one assertion: after Parse "BEGIN" + Bind + Execute + Sync the server backend is allocated (sv_active=1), and the test wants 0. That is a real product gap — pg_doorman only short-circuits standalone BEGIN over SimpleQuery (is_standalone_begin in client/transaction.rs), not over the extended protocol. Implementing extended-BEGIN deferral is its own change. The matrix tag stays off until the gap is closed; the new step bindings are kept because they are useful to whichever PR picks this up. --- tests/bdd/extended/session_management.rs | 80 +++++++++++++++++++ .../bdd/features/pool-size-show-pools.feature | 5 +- .../features/rust-deferred-begin-bug.feature | 8 +- 3 files changed, 86 insertions(+), 7 deletions(-) diff --git a/tests/bdd/extended/session_management.rs b/tests/bdd/extended/session_management.rs index 192a3f4de..227c7491a 100644 --- a/tests/bdd/extended/session_management.rs +++ b/tests/bdd/extended/session_management.rs @@ -553,6 +553,86 @@ pub async fn session_should_receive_datarow( ); } +#[then(regex = r#"^session "([^"]+)" should receive ParseComplete$"#)] +pub async fn session_should_receive_parse_complete(world: &mut DoormanWorld, session_name: String) { + expect_message_tag(world, &session_name, '1', "ParseComplete", None); +} + +#[then(regex = r#"^session "([^"]+)" should receive BindComplete$"#)] +pub async fn session_should_receive_bind_complete(world: &mut DoormanWorld, session_name: String) { + expect_message_tag(world, &session_name, '2', "BindComplete", None); +} + +#[then(regex = r#"^session "([^"]+)" should receive CommandComplete "([^"]+)"$"#)] +pub async fn session_should_receive_command_complete( + world: &mut DoormanWorld, + session_name: String, + expected_tag: String, +) { + expect_message_tag( + world, + &session_name, + 'C', + "CommandComplete", + Some(expected_tag.as_bytes()), + ); +} + +#[then(regex = r#"^session "([^"]+)" should receive ReadyForQuery "([^"]+)"$"#)] +pub async fn session_should_receive_ready_for_query( + world: &mut DoormanWorld, + session_name: String, + expected_status: String, +) { + let expected = expected_status.as_bytes(); + assert_eq!( + expected.len(), + 1, + "ReadyForQuery status must be a single byte ('I'/'T'/'E'), got {:?}", + expected_status + ); + expect_message_tag(world, &session_name, 'Z', "ReadyForQuery", Some(expected)); +} + +/// Scan stored session messages for a frame with the given backend tag. +/// When `expected_body_prefix` is `Some`, the matching frame's body must +/// also start with those bytes — the caller uses this for tag-bearing +/// messages such as CommandComplete ("BEGIN\0") and ReadyForQuery ("I"). +fn expect_message_tag( + world: &DoormanWorld, + session_name: &str, + tag: char, + label: &str, + expected_body_prefix: Option<&[u8]>, +) { + let messages = world + .session_messages + .get(session_name) + .unwrap_or_else(|| panic!("No messages stored for session '{}'", session_name)); + + for (msg_type, data) in messages { + if *msg_type != tag { + continue; + } + let Some(expected) = expected_body_prefix else { + return; + }; + if data.starts_with(expected) { + return; + } + } + + panic!( + "No {} received from session '{}' (looking for tag '{}'{})", + label, + session_name, + tag, + expected_body_prefix + .map(|p| format!(" with body starting {:?}", String::from_utf8_lossy(p))) + .unwrap_or_default(), + ); +} + #[then(regex = r#"^session "([^"]+)" should receive error containing "([^"]+)"$"#)] pub async fn session_should_receive_error_containing( world: &mut DoormanWorld, diff --git a/tests/bdd/features/pool-size-show-pools.feature b/tests/bdd/features/pool-size-show-pools.feature index c5bebe2bf..6be227521 100644 --- a/tests/bdd/features/pool-size-show-pools.feature +++ b/tests/bdd/features/pool-size-show-pools.feature @@ -1,4 +1,4 @@ -@rust @pool-size-show-pools +@rust @rust-4 @pool-size-show-pools Feature: pool_size column in SHOW POOLS Verify that SHOW POOLS exposes the configured pool_size for both static (config-defined) and dynamic (auth_query passthrough) pools. @@ -71,8 +71,7 @@ Feature: pool_size column in SHOW POOLS query: "SELECT username, password FROM auth_users WHERE username = $1" user: "postgres" password: "" - pool_size: 1 - default_pool_size: 3 + pool_size: 3 cache_ttl: "1h" cache_failure_ttl: "30s" min_interval: "0s" diff --git a/tests/bdd/features/rust-deferred-begin-bug.feature b/tests/bdd/features/rust-deferred-begin-bug.feature index 55438a848..f7c1fb6f0 100644 --- a/tests/bdd/features/rust-deferred-begin-bug.feature +++ b/tests/bdd/features/rust-deferred-begin-bug.feature @@ -42,7 +42,7 @@ Feature: Deferred BEGIN optimization bug with extended protocol # This test reproduces the bug using pure extended protocol # Session 1: Create connection, send BEGIN via extended protocol - When we create extended session "session1" to pg_doorman as "example_user_1" with password "" and database "example_db" + When we create session "session1" to pg_doorman as "example_user_1" with password "" and database "example_db" # Send Parse for BEGIN And we send Parse "" with query "BEGIN" to session "session1" @@ -79,7 +79,7 @@ Feature: Deferred BEGIN optimization bug with extended protocol # Session 2: Reuse the connection from pool # BUG: If pg_doorman doesn't properly reset transaction state, # the next client might see stale state - When we create extended session "session2" to pg_doorman as "example_user_1" with password "" and database "example_db" + When we create session "session2" to pg_doorman as "example_user_1" with password "" and database "example_db" # Try to start a new transaction And we send Parse "" with query "BEGIN" to session "session2" @@ -144,7 +144,7 @@ Feature: Deferred BEGIN optimization bug with extended protocol Scenario: Multiple empty transactions in sequence # Stress test: multiple BEGIN/ROLLBACK cycles without queries - When we create extended session "client" to pg_doorman as "example_user_1" with password "" and database "example_db" + When we create session "client" to pg_doorman as "example_user_1" with password "" and database "example_db" # Iteration 1: BEGIN + ROLLBACK And we send Parse "" with query "BEGIN" to session "client" @@ -215,7 +215,7 @@ Feature: Deferred BEGIN optimization bug with extended protocol Scenario: Deferred BEGIN followed by ROLLBACK in pipeline mode # Test async pipeline mode with deferred BEGIN - When we create extended session "client" to pg_doorman as "example_user_1" with password "" and database "example_db" + When we create session "client" to pg_doorman as "example_user_1" with password "" and database "example_db" # Send BEGIN + ROLLBACK in pipeline (multiple messages before Sync) And we send Parse "" with query "BEGIN" to session "client"