From 1806eb3965ad89c09dac66754f02b47284ef36f1 Mon Sep 17 00:00:00 2001 From: Anthonin Bonnefoy Date: Wed, 16 Jul 2025 11:17:30 +0200 Subject: [PATCH 1/2] PG: Add per index blocks hit and blocks read pg_statio_user_indexes provides per index block usage. It is using pg_stat_get_blocks_fetched and pg_stat_get_blocks_hit behind the hood. We can emit those metrics directly from our IDX_METRICS query. The metric name index.blocks_read was used as index_blocks_read already exists for the per table block usage. --- postgres/changelog.d/20767.added | 1 + postgres/datadog_checks/postgres/relationsmanager.py | 8 +++++++- postgres/metadata.csv | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 postgres/changelog.d/20767.added diff --git a/postgres/changelog.d/20767.added b/postgres/changelog.d/20767.added new file mode 100644 index 0000000000000..9c6d8b24c2a5f --- /dev/null +++ b/postgres/changelog.d/20767.added @@ -0,0 +1 @@ +PG: Add per index blocks hit and blocks read diff --git a/postgres/datadog_checks/postgres/relationsmanager.py b/postgres/datadog_checks/postgres/relationsmanager.py index 7a8bcc2f30344..f53286f3877a0 100644 --- a/postgres/datadog_checks/postgres/relationsmanager.py +++ b/postgres/datadog_checks/postgres/relationsmanager.py @@ -61,7 +61,7 @@ } -# This is similaar to pg_stat_user_indexes view +# This is similar to pg_stat_user_indexes view IDX_METRICS = { 'name': 'pg_index', 'query': """ @@ -74,6 +74,8 @@ idx_scan, idx_tup_read, idx_tup_fetch, + idx_blks_read, + idx_blks_hit, index_size FROM (SELECT N.nspname AS schemaname, @@ -83,6 +85,8 @@ pg_stat_get_numscans(I.oid) AS idx_scan, pg_stat_get_tuples_returned(I.oid) AS idx_tup_read, pg_stat_get_tuples_fetched(I.oid) AS idx_tup_fetch, + pg_stat_get_blocks_fetched(I.oid) - pg_stat_get_blocks_hit(I.oid) AS idx_blks_read, + pg_stat_get_blocks_hit(I.oid) AS idx_blks_hit, pg_relation_size(indexrelid) as index_size FROM pg_class C JOIN pg_index X ON C.oid = X.indrelid JOIN @@ -101,6 +105,8 @@ {'name': 'index_scans', 'type': 'rate'}, {'name': 'index_rows_read', 'type': 'rate'}, {'name': 'index_rows_fetched', 'type': 'rate'}, + {'name': 'index.index_blocks_read', 'type': 'rate'}, + {'name': 'index.index_blocks_hit', 'type': 'rate'}, {'name': 'individual_index_size', 'type': 'gauge'}, ], } diff --git a/postgres/metadata.csv b/postgres/metadata.csv index 9692e07e7262b..21a05201ff2e1 100644 --- a/postgres/metadata.csv +++ b/postgres/metadata.csv @@ -73,6 +73,8 @@ postgresql.function.self_time,rate,,,,"Enabled with `collect_function_metrics`. postgresql.function.total_time,rate,,,,"Enabled with `collect_function_metrics`. Total time spent in this function and all other functions called by it. This metric is tagged with db, schema, function.",0,postgres,postgres_function_total_time,, postgresql.heap_blocks_hit,gauge,,hit,second,"Enabled with `relations`. The number of buffer hits in this table. This metric is tagged with db, schema, table.",0,postgres,heap blks hit,, postgresql.heap_blocks_read,gauge,,block,second,"Enabled with `relations`. The number of disk blocks read from this table. This metric is tagged with db, schema, table.",0,postgres,heap blks read,, +postgresql.index.index_blocks_hit,gauge,,hit,second,"Enabled with `relations`. The number of buffer hits for a specific index. This metric is tagged with db, schema, table, index.",0,postgres,index blks hit,, +postgresql.index.index_blocks_read,gauge,,block,second,"Enabled with `relations`. The number of disk blocks for a specific index. This metric is tagged with db, schema, table, index.",0,postgres,index blks read,, postgresql.index_bloat,gauge,,percent,,"Enabled with `collect_bloat_metrics`. The estimated percentage of index bloat. This metric is tagged with db, schema, table, index.",0,postgres,ibloat,, postgresql.index_blocks_hit,gauge,,hit,second,"Enabled with `relations`. The number of buffer hits in all indexes on this table. This metric is tagged with db, schema, table.",0,postgres,idx blks hit,, postgresql.index_blocks_read,gauge,,block,second,"Enabled with `relations`. The number of disk blocks read from all indexes on this table. This metric is tagged with db, schema, table.",0,postgres,idx blks read,, From 8720d7b986c87628331197f4504cd914dd88188e Mon Sep 17 00:00:00 2001 From: Anthonin Bonnefoy Date: Mon, 21 Jul 2025 08:21:21 +0200 Subject: [PATCH 2/2] PG: Remove wal flaky tests WAL metrics presence are already covered in common pg integration tests --- postgres/tests/test_pg_integration.py | 70 --------------------------- 1 file changed, 70 deletions(-) diff --git a/postgres/tests/test_pg_integration.py b/postgres/tests/test_pg_integration.py index 2ad073222f56a..fbf4ee821b8cb 100644 --- a/postgres/tests/test_pg_integration.py +++ b/postgres/tests/test_pg_integration.py @@ -605,49 +605,6 @@ def throw_exception_first_time(*args, **kwargs): assert_state_clean(check) -@requires_over_14 -@pytest.mark.parametrize( - 'is_aurora', - [True, False], -) -@pytest.mark.flaky(max_runs=5) -def test_wal_stats(aggregator, integration_check, pg_instance, is_aurora): - conn = _get_superconn(pg_instance) - with conn.cursor() as cur: - cur.execute("select wal_records, wal_fpi, wal_bytes from pg_stat_wal;") - (wal_records, wal_fpi, wal_bytes) = cur.fetchall()[0] - cur.execute("insert into persons (lastname) values ('test');") - - # Wait for pg_stat_wal to be updated - for _ in range(50): - with conn.cursor() as cur: - cur.execute("select wal_records, wal_bytes from pg_stat_wal;") - new_wal_records = cur.fetchall()[0][0] - if new_wal_records > wal_records: - break - time.sleep(0.1) - - check = integration_check(pg_instance) - check.is_aurora = is_aurora - if is_aurora is True: - return - check.run() - - expected_tags = _get_expected_tags(check, pg_instance) - aggregator.assert_metric('postgresql.wal.records', count=1, tags=expected_tags) - aggregator.assert_metric('postgresql.wal.bytes', count=1, tags=expected_tags) - - # Expect at least one Heap + one Transaction additional records in the WAL - assert_metric_at_least( - aggregator, 'postgresql.wal.records', tags=expected_tags, count=1, lower_bound=wal_records + 2 - ) - # We should have at least one full page write - assert_metric_at_least(aggregator, 'postgresql.wal.bytes', tags=expected_tags, count=1, lower_bound=wal_bytes + 100) - assert_metric_at_least( - aggregator, 'postgresql.wal.full_page_images', tags=expected_tags, count=1, lower_bound=wal_fpi - ) - - def test_query_timeout(integration_check, pg_instance): pg_instance['query_timeout'] = 1000 check = integration_check(pg_instance) @@ -658,33 +615,6 @@ def test_query_timeout(integration_check, pg_instance): cursor.execute("select pg_sleep(2000)") -@requires_over_10 -@pytest.mark.parametrize( - 'is_aurora', - [True, False], -) -def test_wal_metrics(aggregator, integration_check, pg_instance, is_aurora): - check = integration_check(pg_instance) - check.is_aurora = is_aurora - - if is_aurora is True: - return - # Default PG's wal size is 16MB - wal_size = 16777216 - - postgres_conn = _get_superconn(pg_instance) - with postgres_conn.cursor() as cur: - cur.execute("select count(*) from pg_ls_waldir();") - expected_num_wals = cur.fetchall()[0][0] - - check.run() - - expected_wal_size = expected_num_wals * wal_size - dd_agent_tags = _get_expected_tags(check, pg_instance) - aggregator.assert_metric('postgresql.wal_count', count=1, value=expected_num_wals, tags=dd_agent_tags) - aggregator.assert_metric('postgresql.wal_size', count=1, value=expected_wal_size, tags=dd_agent_tags) - - def test_pg_control(aggregator, integration_check, pg_instance): check = integration_check(pg_instance) check.run()