From 1806eb3965ad89c09dac66754f02b47284ef36f1 Mon Sep 17 00:00:00 2001
From: Anthonin Bonnefoy <anthonin.bonnefoy@datadoghq.com>
Date: Wed, 16 Jul 2025 11:17:30 +0200
Subject: [PATCH 1/2] PG: Add per index blocks hit and blocks read

pg_statio_user_indexes provides per index block usage. It is using
pg_stat_get_blocks_fetched and pg_stat_get_blocks_hit behind the hood.
We can emit those metrics directly from our IDX_METRICS query.

The metric name index.blocks_read was used as index_blocks_read already
exists for the per table block usage.
---
 postgres/changelog.d/20767.added                     | 1 +
 postgres/datadog_checks/postgres/relationsmanager.py | 8 +++++++-
 postgres/metadata.csv                                | 2 ++
 3 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 postgres/changelog.d/20767.added

diff --git a/postgres/changelog.d/20767.added b/postgres/changelog.d/20767.added
new file mode 100644
index 0000000000000..9c6d8b24c2a5f
--- /dev/null
+++ b/postgres/changelog.d/20767.added
@@ -0,0 +1 @@
+PG: Add per index blocks hit and blocks read
diff --git a/postgres/datadog_checks/postgres/relationsmanager.py b/postgres/datadog_checks/postgres/relationsmanager.py
index 7a8bcc2f30344..f53286f3877a0 100644
--- a/postgres/datadog_checks/postgres/relationsmanager.py
+++ b/postgres/datadog_checks/postgres/relationsmanager.py
@@ -61,7 +61,7 @@
 }
 
 
-# This is similaar to pg_stat_user_indexes view
+# This is similar to pg_stat_user_indexes view
 IDX_METRICS = {
     'name': 'pg_index',
     'query': """
@@ -74,6 +74,8 @@
   idx_scan,
   idx_tup_read,
   idx_tup_fetch,
+  idx_blks_read,
+  idx_blks_hit,
   index_size
 FROM (SELECT
       N.nspname AS schemaname,
@@ -83,6 +85,8 @@
       pg_stat_get_numscans(I.oid) AS idx_scan,
       pg_stat_get_tuples_returned(I.oid) AS idx_tup_read,
       pg_stat_get_tuples_fetched(I.oid) AS idx_tup_fetch,
+      pg_stat_get_blocks_fetched(I.oid) - pg_stat_get_blocks_hit(I.oid) AS idx_blks_read,
+      pg_stat_get_blocks_hit(I.oid) AS idx_blks_hit,
       pg_relation_size(indexrelid) as index_size
     FROM pg_class C JOIN
       pg_index X ON C.oid = X.indrelid JOIN
@@ -101,6 +105,8 @@
         {'name': 'index_scans', 'type': 'rate'},
         {'name': 'index_rows_read', 'type': 'rate'},
         {'name': 'index_rows_fetched', 'type': 'rate'},
+        {'name': 'index.index_blocks_read', 'type': 'rate'},
+        {'name': 'index.index_blocks_hit', 'type': 'rate'},
         {'name': 'individual_index_size', 'type': 'gauge'},
     ],
 }
diff --git a/postgres/metadata.csv b/postgres/metadata.csv
index 9692e07e7262b..21a05201ff2e1 100644
--- a/postgres/metadata.csv
+++ b/postgres/metadata.csv
@@ -73,6 +73,8 @@ postgresql.function.self_time,rate,,,,"Enabled with `collect_function_metrics`.
 postgresql.function.total_time,rate,,,,"Enabled with `collect_function_metrics`. Total time spent in this function and all other functions called by it. This metric is tagged with db, schema, function.",0,postgres,postgres_function_total_time,,
 postgresql.heap_blocks_hit,gauge,,hit,second,"Enabled with `relations`. The number of buffer hits in this table. This metric is tagged with db, schema, table.",0,postgres,heap blks hit,,
 postgresql.heap_blocks_read,gauge,,block,second,"Enabled with `relations`. The number of disk blocks read from this table. This metric is tagged with db, schema, table.",0,postgres,heap blks read,,
+postgresql.index.index_blocks_hit,gauge,,hit,second,"Enabled with `relations`. The number of buffer hits for a specific index. This metric is tagged with db, schema, table, index.",0,postgres,index blks hit,,
+postgresql.index.index_blocks_read,gauge,,block,second,"Enabled with `relations`. The number of disk blocks for a specific index. This metric is tagged with db, schema, table, index.",0,postgres,index blks read,,
 postgresql.index_bloat,gauge,,percent,,"Enabled with `collect_bloat_metrics`. The estimated percentage of index bloat. This metric is tagged with db, schema, table, index.",0,postgres,ibloat,,
 postgresql.index_blocks_hit,gauge,,hit,second,"Enabled with `relations`. The number of buffer hits in all indexes on this table. This metric is tagged with db, schema, table.",0,postgres,idx blks hit,,
 postgresql.index_blocks_read,gauge,,block,second,"Enabled with `relations`. The number of disk blocks read from all indexes on this table. This metric is tagged with db, schema, table.",0,postgres,idx blks read,,

From 8720d7b986c87628331197f4504cd914dd88188e Mon Sep 17 00:00:00 2001
From: Anthonin Bonnefoy <anthonin.bonnefoy@datadoghq.com>
Date: Mon, 21 Jul 2025 08:21:21 +0200
Subject: [PATCH 2/2] PG: Remove wal flaky tests

WAL metrics presence are already covered in common pg integration tests
---
 postgres/tests/test_pg_integration.py | 70 ---------------------------
 1 file changed, 70 deletions(-)

diff --git a/postgres/tests/test_pg_integration.py b/postgres/tests/test_pg_integration.py
index 2ad073222f56a..fbf4ee821b8cb 100644
--- a/postgres/tests/test_pg_integration.py
+++ b/postgres/tests/test_pg_integration.py
@@ -605,49 +605,6 @@ def throw_exception_first_time(*args, **kwargs):
     assert_state_clean(check)
 
 
-@requires_over_14
-@pytest.mark.parametrize(
-    'is_aurora',
-    [True, False],
-)
-@pytest.mark.flaky(max_runs=5)
-def test_wal_stats(aggregator, integration_check, pg_instance, is_aurora):
-    conn = _get_superconn(pg_instance)
-    with conn.cursor() as cur:
-        cur.execute("select wal_records, wal_fpi, wal_bytes from pg_stat_wal;")
-        (wal_records, wal_fpi, wal_bytes) = cur.fetchall()[0]
-        cur.execute("insert into persons (lastname) values ('test');")
-
-    # Wait for pg_stat_wal to be updated
-    for _ in range(50):
-        with conn.cursor() as cur:
-            cur.execute("select wal_records, wal_bytes from pg_stat_wal;")
-            new_wal_records = cur.fetchall()[0][0]
-            if new_wal_records > wal_records:
-                break
-        time.sleep(0.1)
-
-    check = integration_check(pg_instance)
-    check.is_aurora = is_aurora
-    if is_aurora is True:
-        return
-    check.run()
-
-    expected_tags = _get_expected_tags(check, pg_instance)
-    aggregator.assert_metric('postgresql.wal.records', count=1, tags=expected_tags)
-    aggregator.assert_metric('postgresql.wal.bytes', count=1, tags=expected_tags)
-
-    # Expect at least one Heap + one Transaction additional records in the WAL
-    assert_metric_at_least(
-        aggregator, 'postgresql.wal.records', tags=expected_tags, count=1, lower_bound=wal_records + 2
-    )
-    # We should have at least one full page write
-    assert_metric_at_least(aggregator, 'postgresql.wal.bytes', tags=expected_tags, count=1, lower_bound=wal_bytes + 100)
-    assert_metric_at_least(
-        aggregator, 'postgresql.wal.full_page_images', tags=expected_tags, count=1, lower_bound=wal_fpi
-    )
-
-
 def test_query_timeout(integration_check, pg_instance):
     pg_instance['query_timeout'] = 1000
     check = integration_check(pg_instance)
@@ -658,33 +615,6 @@ def test_query_timeout(integration_check, pg_instance):
                 cursor.execute("select pg_sleep(2000)")
 
 
-@requires_over_10
-@pytest.mark.parametrize(
-    'is_aurora',
-    [True, False],
-)
-def test_wal_metrics(aggregator, integration_check, pg_instance, is_aurora):
-    check = integration_check(pg_instance)
-    check.is_aurora = is_aurora
-
-    if is_aurora is True:
-        return
-    # Default PG's wal size is 16MB
-    wal_size = 16777216
-
-    postgres_conn = _get_superconn(pg_instance)
-    with postgres_conn.cursor() as cur:
-        cur.execute("select count(*) from pg_ls_waldir();")
-        expected_num_wals = cur.fetchall()[0][0]
-
-    check.run()
-
-    expected_wal_size = expected_num_wals * wal_size
-    dd_agent_tags = _get_expected_tags(check, pg_instance)
-    aggregator.assert_metric('postgresql.wal_count', count=1, value=expected_num_wals, tags=dd_agent_tags)
-    aggregator.assert_metric('postgresql.wal_size', count=1, value=expected_wal_size, tags=dd_agent_tags)
-
-
 def test_pg_control(aggregator, integration_check, pg_instance):
     check = integration_check(pg_instance)
     check.run()