blockscout · akolotov · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025
diff --git a/.env.example b/.env.example
@@ -36,3 +36,8 @@ BLOCKSCOUT_RPC_POOL_PER_HOST=50
 # The server version is appended automatically.
 BLOCKSCOUT_MCP_USER_AGENT="Blockscout MCP"
 
+# Optional Mixpanel analytics (HTTP mode only). Set token to enable; leave empty to disable.
+# Use API host for regional endpoints (e.g., EU). No tracking occurs in stdio mode.
+BLOCKSCOUT_MIXPANEL_TOKEN=""
+BLOCKSCOUT_MIXPANEL_API_HOST=""
+
diff --git a/AGENTS.md b/AGENTS.md
@@ -19,6 +19,8 @@ mcp-server/
 │   ├── config.py               # Configuration management (e.g., API keys, timeouts, cache settings)
 │   ├── constants.py            # Centralized constants used throughout the application, including data truncation limits
 │   ├── logging_utils.py        # Logging utilities for production-ready log formatting
+│   ├── analytics.py            # Centralized Mixpanel analytics for tool invocations (HTTP mode only)
+│   ├── client_meta.py          # Shared client metadata extraction helpers and defaults
 │   ├── cache.py                # Simple in-memory cache for chain data
 │   ├── web3_pool.py            # Async Web3 connection pool manager
 │   ├── models.py               # Defines standardized Pydantic models for all tool responses
@@ -206,6 +208,17 @@ mcp-server/
     * **`logging_utils.py`**:
         * Provides utilities for configuring production-ready logging.
         * Contains the `replace_rich_handlers_with_standard()` function that eliminates multi-line Rich formatting from MCP SDK logs.
+    * **`analytics.py`**:
+        * Centralized Mixpanel analytics for MCP tool invocations.
+        * Enabled only in HTTP mode when `BLOCKSCOUT_MIXPANEL_TOKEN` is set.
+        * Generates deterministic `distinct_id` based on client IP, name, and version fingerprint.
+        * Tracks tool invocations with client metadata, protocol version, and call source (MCP vs REST).
+        * Includes IP geolocation metadata for Mixpanel and graceful error handling to avoid breaking tool execution.
+    * **`client_meta.py`**:
+        * Shared utilities for extracting client metadata (name, version, protocol, user_agent) from MCP Context.
+        * Provides `ClientMeta` dataclass and `extract_client_meta_from_ctx()` function.
+        * Falls back to User-Agent header when MCP client name is unavailable.
+        * Ensures consistent sentinel defaults ("N/A", "Unknown") across logging and analytics modules.
     * **`cache.py`**:
         * Encapsulates in-memory caching of chain data with TTL management.
     * **`web3_pool.py`**:

diff --git a/Dockerfile b/Dockerfile
@@ -32,5 +32,7 @@ ENV BLOCKSCOUT_ADVANCED_FILTERS_PAGE_SIZE="10"
 ENV BLOCKSCOUT_RPC_REQUEST_TIMEOUT="60.0"
 ENV BLOCKSCOUT_RPC_POOL_PER_HOST="50"
 ENV BLOCKSCOUT_MCP_USER_AGENT="Blockscout MCP"
+# ENV BLOCKSCOUT_MIXPANEL_TOKEN="" # Intentionally commented out: pass at runtime to avoid embedding secrets in image
+ENV BLOCKSCOUT_MIXPANEL_API_HOST=""
 
 CMD ["python", "-m", "blockscout_mcp_server"]
diff --git a/README.md b/README.md
@@ -144,13 +144,7 @@ Refer to [TESTING.md](TESTING.md) for comprehensive instructions on running both
 ## Example Prompts for AI Agents
 
 ```plaintext
-On which popular networks is `ens.eth` deployed as a contract?
-```
-
-```plaintext
-What are the usual activities performed by `ens.eth` on the Ethereum Mainnet?
-Since it is a contract, what is the most used functionality of this contract?
-Which address interacts with the contract the most?
+Is any approval set for OP token on Optimism chain by `zeaver.eth`?
 ```
 
 ```plaintext
@@ -163,9 +157,22 @@ before `Nov 08 2024 04:21:35 AM (-06:00 UTC)`?
 ```
 
 ```plaintext
-What is the most recent transaction made to queue a proposal on `0x323A76393544d5ecca80cd6ef2A560C6a395b7E3`
-in the Ethereum mainnet? What is the proposal ID? What are the current vote
-statistics for this proposal?
+Tell me more about the transaction `0xf8a55721f7e2dcf85690aaf81519f7bc820bc58a878fa5f81b12aef5ccda0efb`
+on Redstone rollup.
+```
+
+```plaintext
+Is there any blacklisting functionality of USDT token on Arbitrum One?
+```
+
+```plaintext
+What is the latest block on Gnosis Chain and who is the block minter?
+Were any funds moved from this minter recently?
+```
+
+```plaintext
+When the most recent reward distribution of Kinto token was made to the wallet
+`0x7D467D99028199D99B1c91850C4dea0c82aDDF52` in Kinto chain?
 ```
 
 ## Development & Deployment

diff --git a/SPEC.md b/SPEC.md
@@ -493,8 +493,38 @@ Implemented via the `@log_tool_invocation` decorator, these logs capture:
 - The arguments provided to the tool.
 - The identity of the MCP client that initiated the call, including its **name**, **version**, and the **MCP protocol version** it is using.
 
+If the client name cannot be determined from the MCP session parameters, the server falls back to the HTTP `User-Agent` header as the client identifier.
+
 This provides a clear audit trail, helping to diagnose issues that may be specific to certain client versions or protocol implementations. For stateless calls, such as those from the REST API where no client is present, this information is gracefully omitted.
 
+#### 3. Mixpanel Analytics for Tool Invocation
+
+To gain insight into tool usage patterns, the server can optionally report tool invocations to Mixpanel.
+
+- Activation (opt-in only):
+  - Enabled exclusively in HTTP modes (MCP-over-HTTP and REST).
+  - Requires `BLOCKSCOUT_MIXPANEL_TOKEN` to be set; otherwise analytics are disabled.
+
+- Integration point:
+  - Tracking is centralized in `blockscout_mcp_server/analytics.py` and invoked from the shared `@log_tool_invocation` decorator so every tool is tracked consistently without altering tool implementations.
+
+- Tracked properties (per event):
+  - Client IP address derived from the HTTP request, preferring proxy headers when present: `X-Forwarded-For` (first value), then `X-Real-IP`, otherwise connection `client.host`.
+  - MCP client name (or the HTTP `User-Agent` when the client name is unavailable).
+  - MCP client version.
+  - MCP protocol version.
+  - Tool arguments (currently sent as-is, without truncation).
+  - Call source: whether the tool was invoked by MCP or via the REST API.
+
+- Anonymous identity (distinct_id) (as per Mixpanel's [documentation](https://docs.mixpanel.com/docs/tracking-methods/id-management/identifying-users-simplified#server-side-identity-management)):
+  - A stable `distinct_id` is generated to anonymously identify unique users.
+  - The fingerprint is the concatenation of: namespace URL (`"https://blockscout.com/mcp/"`), client IP, client name, and client version.
+  - This provides stable identification even when multiple clients share the same name/version (e.g., Claude Desktop), because their IPs differ.
+
+- REST API support and source attribution:
+  - The REST context mock is extended with a request context wrapper so analytics can extract IP and headers consistently (see `blockscout_mcp_server/api/dependencies.py`).
+  - A `call_source` field is introduced on the REST mock context and set to `"rest"`, allowing analytics to reliably distinguish REST API calls from MCP tool calls without coupling to specific URL paths.
+
 ### Smart Contract Interaction Tools
 
 This server exposes a tool for on-chain smart contract read-only state access. It uses the JSON-RPC `eth_call` semantics under the hood and aligns with the standardized `ToolResponse` model.

diff --git a/blockscout_mcp_server/__init__.py b/blockscout_mcp_server/__init__.py
@@ -1,3 +1,3 @@
 """Blockscout MCP Server package."""
 
-__version__ = "0.7.0"
+__version__ = "0.8.0-dev"
diff --git a/blockscout_mcp_server/analytics.py b/blockscout_mcp_server/analytics.py
@@ -0,0 +1,188 @@
+"""Centralized Mixpanel analytics for MCP tool invocations.
+
+Tracking is enabled only when:
+- BLOCKSCOUT_MIXPANEL_TOKEN is set, and
+- server runs in HTTP mode (set via set_http_mode(True)).
+
+Events are emitted via Mixpanel with a deterministic distinct_id based on a
+connection fingerprint composed of client IP, client name, and client version.
+"""
+
+from __future__ import annotations
+
+import logging
+import uuid
+from typing import Any
+
+try:
+    # Import lazily; tests will mock this
+    from mixpanel import Consumer, Mixpanel
+except ImportError:  # pragma: no cover
+
+    class _MissingMixpanel:  # noqa: D401 - simple placeholder
+        """Placeholder that raises if Mixpanel is actually used."""
+
+        def __init__(self, *args: Any, **kwargs: Any) -> None:  # noqa: D401 - simple placeholder
+            raise ImportError("Mixpanel library is not installed. Please install 'mixpanel' to use analytics features.")
+
+    Consumer = _MissingMixpanel  # type: ignore[assignment]
+    Mixpanel = _MissingMixpanel  # type: ignore[assignment]
+
+from blockscout_mcp_server.client_meta import (
+    ClientMeta,
+    extract_client_meta_from_ctx,
+    get_header_case_insensitive,
+)
+from blockscout_mcp_server.config import config
+
+logger = logging.getLogger(__name__)
+
+
+_is_http_mode_enabled: bool = False
+_mp_client: Any | None = None
+
+
+def set_http_mode(is_http: bool) -> None:
+    """Enable or disable HTTP mode for analytics gating."""
+    global _is_http_mode_enabled
+    _is_http_mode_enabled = bool(is_http)
+    # Log enablement status once at startup (HTTP path only)
+    if _is_http_mode_enabled:
+        token = getattr(config, "mixpanel_token", "")
+        if token:
+            # Best-effort initialize client to validate configuration
+            _ = _get_mixpanel_client()
+            api_host = getattr(config, "mixpanel_api_host", "") or "default"
+            logger.info("Mixpanel analytics enabled (api_host=%s)", api_host)
+        else:
+            logger.debug("Mixpanel analytics not enabled: BLOCKSCOUT_MIXPANEL_TOKEN is not set")
+
+
+def _get_mixpanel_client() -> Any | None:
+    """Return a singleton Mixpanel client if token is configured."""
+    global _mp_client
+    if _mp_client is not None:
+        return _mp_client
+    token = getattr(config, "mixpanel_token", "")
+    if not token:
+        return None
+    try:
+        api_host = getattr(config, "mixpanel_api_host", "")
+        if api_host:
+            consumer = Consumer(api_host=api_host)
+            _mp_client = Mixpanel(token, consumer=consumer)
+        else:
+            _mp_client = Mixpanel(token)
+        return _mp_client
+    except Exception as exc:  # pragma: no cover - defensive
+        logger.debug("Failed to initialize Mixpanel client: %s", exc)
+        return None
+
+
+def _extract_request_ip(ctx: Any) -> str:
+    """Extract client IP address from context if possible."""
+    ip = ""
+    try:
+        request = getattr(getattr(ctx, "request_context", None), "request", None)
+        if request is not None:
+            headers = request.headers or {}
+            # Prefer proxy-forwarded headers
+            xff = get_header_case_insensitive(headers, "x-forwarded-for", "") or ""
+            if xff:
+                # left-most IP per standard
+                ip = xff.split(",")[0].strip()
+            else:
+                x_real_ip = get_header_case_insensitive(headers, "x-real-ip", "") or ""
+                if x_real_ip:
+                    ip = x_real_ip
+                else:
+                    client = getattr(request, "client", None)
+                    if client and getattr(client, "host", None):
+                        ip = client.host
+    except Exception:  # pragma: no cover - tolerate all shapes
+        pass
+    return ip
+
+
+def _build_distinct_id(ip: str, client_name: str, client_version: str) -> str:
+    # User-Agent is merged into client_name in extract_client_meta_from_ctx when name is unavailable.
+    # Therefore composite requires only ip, client_name and client_version for a stable fingerprint.
+    composite = "|".join([ip or "", client_name or "", client_version or ""])
+    return str(uuid.uuid5(uuid.NAMESPACE_URL, "https://blockscout.com/mcp/" + composite))
+
+
+def _determine_call_source(ctx: Any) -> str:
+    """Return 'mcp' for MCP calls, 'rest' for REST API, else 'unknown'.
+
+    Priority:
+    1) Explicit marker set by caller (e.g., REST mock context) via `call_source`.
+    2) Default to 'mcp' when no explicit marker is present (applies to MCP-over-HTTP).
+    """
+    try:
+        explicit = getattr(ctx, "call_source", None)
+        if isinstance(explicit, str) and explicit:
+            return explicit
+        # No explicit marker: treat as MCP (covers MCP-over-HTTP)
+        return "mcp"
+    except Exception:  # pragma: no cover
+        pass
+    return "unknown"
+
+
+def track_tool_invocation(
+    ctx: Any,
+    tool_name: str,
+    tool_args: dict[str, Any],
+    client_meta: ClientMeta | None = None,
+) -> None:
+    """Track a tool invocation in Mixpanel, if enabled and in HTTP mode."""
+    if not _is_http_mode_enabled:
+        return
+    mp = _get_mixpanel_client()
+    if mp is None:
+        return
+
+    try:
+        ip = _extract_request_ip(ctx)
+
+        # Prefer provided client metadata from the decorator; otherwise, fall back to context
+        if client_meta is not None:
+            client_name = client_meta.name
+            client_version = client_meta.version
+            protocol_version = client_meta.protocol
+            user_agent = client_meta.user_agent
+        else:
+            meta = extract_client_meta_from_ctx(ctx)
+            client_name = meta.name
+            client_version = meta.version
+            protocol_version = meta.protocol
+            user_agent = meta.user_agent
+
+        distinct_id = _build_distinct_id(ip, client_name, client_version)
+
+        properties: dict[str, Any] = {
+            "ip": ip,
+            "client_name": client_name,
+            "client_version": client_version,
+            "user_agent": user_agent,
+            "tool_args": tool_args,
+            "protocol_version": protocol_version,
+            "source": _determine_call_source(ctx),
+        }
+
+        # TODO: Remove this log after validating Mixpanel analytics end-to-end
+        logger.info(
+            "Mixpanel event prepared: distinct_id=%s tool=%s properties=%s",
+            distinct_id,
+            tool_name,
+            properties,
+        )
+
+        meta = {"ip": ip} if ip else None
+        # Mixpanel Python SDK allows meta for IP geolocation mapping
+        if meta is not None:
+            mp.track(distinct_id, tool_name, properties, meta=meta)  # type: ignore[call-arg]
+        else:
+            mp.track(distinct_id, tool_name, properties)
+    except Exception as exc:  # pragma: no cover - do not break tool flow
+        logger.debug("Mixpanel tracking failed for %s: %s", tool_name, exc)
diff --git a/blockscout_mcp_server/api/dependencies.py b/blockscout_mcp_server/api/dependencies.py
@@ -1,14 +1,35 @@
 """Dependencies for the REST API, such as mock context providers."""
 
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:  # pragma: no cover - typing-only import
+    from starlette.requests import Request
+
+
+class _RequestContextWrapper:
+    """Lightweight wrapper to mimic MCP's request_context shape for analytics."""
+
+    def __init__(self, request: Request) -> None:
+        self.request: Request = request
+
 
 class MockCtx:
     """A mock context for stateless REST calls.
 
     Tool functions require a ``ctx`` object to report progress. Since REST
     endpoints are stateless and have no MCP session, this mock provides the
     required ``info`` and ``report_progress`` methods as no-op async functions.
+    It also exposes a ``request_context`` with the current Starlette request so
+    analytics can extract connection fingerprint data.
     """
 
+    def __init__(self, request: Request | None = None) -> None:
+        self.request_context = _RequestContextWrapper(request) if request is not None else None
+        # Mark source explicitly so analytics can distinguish REST from MCP without path coupling
+        self.call_source = "rest"
+
     async def info(self, message: str) -> None:
         """Simulate the ``info`` method of an MCP ``Context``."""
         pass
@@ -18,6 +39,6 @@ async def report_progress(self, *args, **kwargs) -> None:
         pass
 
 
-def get_mock_context() -> MockCtx:
+def get_mock_context(request: Request | None = None) -> MockCtx:
     """Dependency provider to get a mock context for stateless REST calls."""
-    return MockCtx()
+    return MockCtx(request=request)