From 0ed7da6b348fa230edf721726ce87069c704265d Mon Sep 17 00:00:00 2001 From: Seth Samuel Date: Thu, 12 Jun 2025 09:12:27 -0400 Subject: [PATCH] Cache obfuscated SQL on Python side --- .../datadog_checks/base/utils/db/utils.py | 12 ++++++++++++ postgres/tests/test_statements.py | 7 ++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/datadog_checks_base/datadog_checks/base/utils/db/utils.py b/datadog_checks_base/datadog_checks/base/utils/db/utils.py index f94ca3853a5af..99751a738d6f6 100644 --- a/datadog_checks_base/datadog_checks/base/utils/db/utils.py +++ b/datadog_checks_base/datadog_checks/base/utils/db/utils.py @@ -10,6 +10,7 @@ import socket import threading import time +from hashlib import md5 from concurrent.futures.thread import ThreadPoolExecutor from enum import Enum, auto from ipaddress import IPv4Address @@ -218,6 +219,7 @@ def default_json_event_encoding(o): return o.decode('utf-8') raise TypeError +obfuscate_cache = {} def obfuscate_sql_with_metadata(query, options=None, replace_null_character=False): """ @@ -238,6 +240,14 @@ def obfuscate_sql_with_metadata(query, options=None, replace_null_character=Fals # replace embedded null characters \x00 before obfuscating query = query.replace('\x00', '') + cache_key = md5((query + options).encode('utf-8')).hexdigest() + # print(query) + if cache_key in obfuscate_cache: + # print("Cache hit") + # Return cached result if available + return obfuscate_cache[cache_key] + + # print("Cache miss") statement = datadog_agent.obfuscate_sql(query, options) # The `obfuscate_sql` testing stub returns bytes, so we have to handle that here. # The actual `obfuscate_sql` method in the agent's Go code returns a JSON string. @@ -256,6 +266,8 @@ def obfuscate_sql_with_metadata(query, options=None, replace_null_character=Fals tables = metadata.pop('tables_csv', None) tables = [table.strip() for table in tables.split(',') if table != ''] if tables else None statement_with_metadata['metadata']['tables'] = tables + + obfuscate_cache[cache_key] = statement_with_metadata return statement_with_metadata diff --git a/postgres/tests/test_statements.py b/postgres/tests/test_statements.py index 2c07c5e639943..8944862736ce1 100644 --- a/postgres/tests/test_statements.py +++ b/postgres/tests/test_statements.py @@ -968,10 +968,11 @@ def test_statement_metadata( # Metrics will match to the normalized query signature normalized_query_signature = 'ca85e8d659051b3a' - def obfuscate_sql(query, options=None): - if query.startswith('SELECT city FROM persons WHERE city'): + def obfuscate_sql(query_in, options=None): + if query == query_in: + print("test query found") return json.dumps({'query': normalized_query, 'metadata': metadata}) - return json.dumps({'query': query, 'metadata': metadata}) + return json.dumps({'query': query_in, 'metadata': metadata}) check = integration_check(dbm_instance) check._connect()