Skip to content

PG: Add per index blocks hit and blocks read #20767

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions postgres/changelog.d/20767.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
PG: Add per index blocks hit and blocks read
8 changes: 7 additions & 1 deletion postgres/datadog_checks/postgres/relationsmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
}


# This is similaar to pg_stat_user_indexes view
# This is similar to pg_stat_user_indexes view
IDX_METRICS = {
'name': 'pg_index',
'query': """
Expand All @@ -74,6 +74,8 @@
idx_scan,
idx_tup_read,
idx_tup_fetch,
idx_blks_read,
idx_blks_hit,
index_size
FROM (SELECT
N.nspname AS schemaname,
Expand All @@ -83,6 +85,8 @@
pg_stat_get_numscans(I.oid) AS idx_scan,
pg_stat_get_tuples_returned(I.oid) AS idx_tup_read,
pg_stat_get_tuples_fetched(I.oid) AS idx_tup_fetch,
pg_stat_get_blocks_fetched(I.oid) - pg_stat_get_blocks_hit(I.oid) AS idx_blks_read,
pg_stat_get_blocks_hit(I.oid) AS idx_blks_hit,
pg_relation_size(indexrelid) as index_size
FROM pg_class C JOIN
pg_index X ON C.oid = X.indrelid JOIN
Expand All @@ -101,6 +105,8 @@
{'name': 'index_scans', 'type': 'rate'},
{'name': 'index_rows_read', 'type': 'rate'},
{'name': 'index_rows_fetched', 'type': 'rate'},
{'name': 'index.blocks_read', 'type': 'rate'},
{'name': 'index.blocks_hit', 'type': 'rate'},
{'name': 'individual_index_size', 'type': 'gauge'},
],
}
Expand Down
2 changes: 2 additions & 0 deletions postgres/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ postgresql.function.self_time,rate,,,,"Enabled with `collect_function_metrics`.
postgresql.function.total_time,rate,,,,"Enabled with `collect_function_metrics`. Total time spent in this function and all other functions called by it. This metric is tagged with db, schema, function.",0,postgres,postgres_function_total_time,,
postgresql.heap_blocks_hit,gauge,,hit,second,"Enabled with `relations`. The number of buffer hits in this table. This metric is tagged with db, schema, table.",0,postgres,heap blks hit,,
postgresql.heap_blocks_read,gauge,,block,second,"Enabled with `relations`. The number of disk blocks read from this table. This metric is tagged with db, schema, table.",0,postgres,heap blks read,,
postgresql.index.blocks_hit,gauge,,hit,second,"Enabled with `relations`. The number of buffer hits for a specific index. This metric is tagged with db, schema, table, index.",0,postgres,index blks hit,,
postgresql.index.blocks_read,gauge,,block,second,"Enabled with `relations`. The number of disk blocks for a specific index. This metric is tagged with db, schema, table, index.",0,postgres,index blks read,,
postgresql.index_bloat,gauge,,percent,,"Enabled with `collect_bloat_metrics`. The estimated percentage of index bloat. This metric is tagged with db, schema, table, index.",0,postgres,ibloat,,
postgresql.index_blocks_hit,gauge,,hit,second,"Enabled with `relations`. The number of buffer hits in all indexes on this table. This metric is tagged with db, schema, table.",0,postgres,idx blks hit,,
postgresql.index_blocks_read,gauge,,block,second,"Enabled with `relations`. The number of disk blocks read from all indexes on this table. This metric is tagged with db, schema, table.",0,postgres,idx blks read,,
Expand Down
98 changes: 0 additions & 98 deletions postgres/tests/test_pg_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,49 +605,6 @@ def throw_exception_first_time(*args, **kwargs):
assert_state_clean(check)


@requires_over_14
@pytest.mark.parametrize(
'is_aurora',
[True, False],
)
@pytest.mark.flaky(max_runs=5)
def test_wal_stats(aggregator, integration_check, pg_instance, is_aurora):
conn = _get_superconn(pg_instance)
with conn.cursor() as cur:
cur.execute("select wal_records, wal_fpi, wal_bytes from pg_stat_wal;")
(wal_records, wal_fpi, wal_bytes) = cur.fetchall()[0]
cur.execute("insert into persons (lastname) values ('test');")

# Wait for pg_stat_wal to be updated
for _ in range(10):
with conn.cursor() as cur:
cur.execute("select wal_records, wal_bytes from pg_stat_wal;")
new_wal_records = cur.fetchall()[0][0]
if new_wal_records > wal_records:
break
time.sleep(0.1)

check = integration_check(pg_instance)
check.is_aurora = is_aurora
if is_aurora is True:
return
check.run()

expected_tags = _get_expected_tags(check, pg_instance)
aggregator.assert_metric('postgresql.wal.records', count=1, tags=expected_tags)
aggregator.assert_metric('postgresql.wal.bytes', count=1, tags=expected_tags)

# Expect at least one Heap + one Transaction additional records in the WAL
assert_metric_at_least(
aggregator, 'postgresql.wal.records', tags=expected_tags, count=1, lower_bound=wal_records + 2
)
# We should have at least one full page write
assert_metric_at_least(aggregator, 'postgresql.wal.bytes', tags=expected_tags, count=1, lower_bound=wal_bytes + 100)
assert_metric_at_least(
aggregator, 'postgresql.wal.full_page_images', tags=expected_tags, count=1, lower_bound=wal_fpi
)


def test_query_timeout(integration_check, pg_instance):
pg_instance['query_timeout'] = 1000
check = integration_check(pg_instance)
Expand All @@ -658,61 +615,6 @@ def test_query_timeout(integration_check, pg_instance):
cursor.execute("select pg_sleep(2000)")


@requires_over_10
@pytest.mark.parametrize(
'is_aurora',
[True, False],
)
def test_wal_metrics(aggregator, integration_check, pg_instance, is_aurora):
check = integration_check(pg_instance)
check.is_aurora = is_aurora

if is_aurora is True:
return
# Default PG's wal size is 16MB
wal_size = 16777216

postgres_conn = _get_superconn(pg_instance)
with postgres_conn.cursor() as cur:
cur.execute("select count(*) from pg_ls_waldir();")
expected_num_wals = cur.fetchall()[0][0]

check.run()

expected_wal_size = expected_num_wals * wal_size
dd_agent_tags = _get_expected_tags(check, pg_instance)
aggregator.assert_metric('postgresql.wal_count', count=1, value=expected_num_wals, tags=dd_agent_tags)
aggregator.assert_metric('postgresql.wal_size', count=1, value=expected_wal_size, tags=dd_agent_tags)


def test_pg_control(aggregator, integration_check, pg_instance):
check = integration_check(pg_instance)
check.run()

dd_agent_tags = _get_expected_tags(check, pg_instance)
aggregator.assert_metric('postgresql.control.timeline_id', count=1, value=1, tags=dd_agent_tags)

postgres_conn = _get_superconn(pg_instance)
with postgres_conn.cursor() as cur:
cur.execute("CHECKPOINT;")

aggregator.reset()
check.run()
# checkpoint should be less than 2s old
assert_metric_at_least(
aggregator, 'postgresql.control.checkpoint_delay', count=1, higher_bound=2.0, tags=dd_agent_tags
)
# After a checkpoint, we have the CHECKPOINT_ONLINE record (114 bytes) and also
# likely receive RUNNING_XACTS (50 bytes) record
assert_metric_at_least(
aggregator, 'postgresql.control.checkpoint_delay_bytes', count=1, higher_bound=250, tags=dd_agent_tags
)
# And restart should be slightly more than checkpoint delay
assert_metric_at_least(
aggregator, 'postgresql.control.redo_delay_bytes', count=1, higher_bound=300, tags=dd_agent_tags
)


def test_pg_control_wal_level(aggregator, integration_check, pg_instance):
"""
Makes sure that we only get the control checkpoint metrics in the correct environment
Expand Down
Loading