diff --git a/fasterbench/_modidx.py b/fasterbench/_modidx.py
index 5820817..28cabd1 100644
--- a/fasterbench/_modidx.py
+++ b/fasterbench/_modidx.py
@@ -96,6 +96,40 @@
                                                                                      'fasterbench/profiling.py'),
                                        'fasterbench.profiling._tensor_bytes': ( 'analysis/profiling.html#_tensor_bytes',
                                                                                 'fasterbench/profiling.py')},
+            'fasterbench.report': { 'fasterbench.report.ComparisonReport': ( 'analysis/report.html#comparisonreport',
+                                                                             'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.__init__': ( 'analysis/report.html#comparisonreport.__init__',
+                                                                                      'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.as_dict': ( 'analysis/report.html#comparisonreport.as_dict',
+                                                                                     'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.deltas': ( 'analysis/report.html#comparisonreport.deltas',
+                                                                                    'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.summary': ( 'analysis/report.html#comparisonreport.summary',
+                                                                                     'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.to_html': ( 'analysis/report.html#comparisonreport.to_html',
+                                                                                     'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.to_markdown': ( 'analysis/report.html#comparisonreport.to_markdown',
+                                                                                         'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.top_improvements': ( 'analysis/report.html#comparisonreport.top_improvements',
+                                                                                              'fasterbench/report.py'),
+                                    'fasterbench.report.Report': ('analysis/report.html#report', 'fasterbench/report.py'),
+                                    'fasterbench.report.Report.__init__': ('analysis/report.html#report.__init__', 'fasterbench/report.py'),
+                                    'fasterbench.report.Report.as_dict': ('analysis/report.html#report.as_dict', 'fasterbench/report.py'),
+                                    'fasterbench.report.Report.summary': ('analysis/report.html#report.summary', 'fasterbench/report.py'),
+                                    'fasterbench.report.Report.to_html': ('analysis/report.html#report.to_html', 'fasterbench/report.py'),
+                                    'fasterbench.report.Report.to_markdown': ( 'analysis/report.html#report.to_markdown',
+                                                                               'fasterbench/report.py'),
+                                    'fasterbench.report.ReportMetricDelta': ( 'analysis/report.html#reportmetricdelta',
+                                                                              'fasterbench/report.py'),
+                                    'fasterbench.report.ReportMetricDelta.as_dict': ( 'analysis/report.html#reportmetricdelta.as_dict',
+                                                                                      'fasterbench/report.py'),
+                                    'fasterbench.report._extract_metrics': ( 'analysis/report.html#_extract_metrics',
+                                                                             'fasterbench/report.py'),
+                                    'fasterbench.report._format_value_with_unit': ( 'analysis/report.html#_format_value_with_unit',
+                                                                                    'fasterbench/report.py'),
+                                    'fasterbench.report._generate_css': ('analysis/report.html#_generate_css', 'fasterbench/report.py'),
+                                    'fasterbench.report._improvement_indicator': ( 'analysis/report.html#_improvement_indicator',
+                                                                                   'fasterbench/report.py')},
             'fasterbench.size': { 'fasterbench.size.SizeMetrics': ('metrics/size.html#sizemetrics', 'fasterbench/size.py'),
                                   'fasterbench.size.SizeMetrics.as_dict': ('metrics/size.html#sizemetrics.as_dict', 'fasterbench/size.py'),
                                   'fasterbench.size.compute_size': ('metrics/size.html#compute_size', 'fasterbench/size.py'),
diff --git a/fasterbench/benchmark.py b/fasterbench/benchmark.py
index 945457c..f3e2141 100644
--- a/fasterbench/benchmark.py
+++ b/fasterbench/benchmark.py
@@ -1,5 +1,3 @@
-"""Unified benchmarking API for comprehensive model analysis"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/analysis/benchmark.ipynb.
 
 # %% ../nbs/analysis/benchmark.ipynb #bbaee268
diff --git a/fasterbench/compute.py b/fasterbench/compute.py
index 2c368d4..777b4bd 100644
--- a/fasterbench/compute.py
+++ b/fasterbench/compute.py
@@ -1,5 +1,3 @@
-"""Compute modules for benchmarking"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/metrics/compute.ipynb.
 
 # %% ../nbs/metrics/compute.ipynb #0091d170
@@ -7,7 +5,6 @@
 
 import warnings
 from dataclasses import dataclass
-from typing import Any
 
 import torch
 import torch.nn as nn
diff --git a/fasterbench/core.py b/fasterbench/core.py
index acaeaa5..cb6af19 100644
--- a/fasterbench/core.py
+++ b/fasterbench/core.py
@@ -1,5 +1,3 @@
-"""Core modules for benchmarking"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/core/core.ipynb.
 
 # %% ../nbs/core/core.ipynb #c59316b7
diff --git a/fasterbench/energy.py b/fasterbench/energy.py
index 1d8b1e3..3079c49 100644
--- a/fasterbench/energy.py
+++ b/fasterbench/energy.py
@@ -1,5 +1,3 @@
-"""Energy modules for benchmarking"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/metrics/energy.ipynb.
 
 # %% ../nbs/metrics/energy.ipynb #d27f26a4
diff --git a/fasterbench/memory.py b/fasterbench/memory.py
index 23d8473..edfa4d3 100644
--- a/fasterbench/memory.py
+++ b/fasterbench/memory.py
@@ -1,5 +1,3 @@
-"""Memory modules for benchmarking"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/metrics/memory.ipynb.
 
 # %% ../nbs/metrics/memory.ipynb #16cd91b6
diff --git a/fasterbench/plot.py b/fasterbench/plot.py
index 659a5e9..45c6287 100644
--- a/fasterbench/plot.py
+++ b/fasterbench/plot.py
@@ -1,5 +1,3 @@
-"""A module to plot the results of the benchmark"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/visualization/plot.ipynb.
 
 # %% ../nbs/visualization/plot.ipynb #0c86ca8f-38d2-44d0-8204-3c977f1b1c19
diff --git a/fasterbench/profiling.py b/fasterbench/profiling.py
index b8b99e7..7659907 100644
--- a/fasterbench/profiling.py
+++ b/fasterbench/profiling.py
@@ -1,5 +1,3 @@
-"""Per-layer profiling for deep analysis of model performance"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/analysis/profiling.ipynb.
 
 # %% ../nbs/analysis/profiling.ipynb #imports
diff --git a/fasterbench/size.py b/fasterbench/size.py
index 0cc5ae1..b767f25 100644
--- a/fasterbench/size.py
+++ b/fasterbench/size.py
@@ -1,5 +1,3 @@
-"""Model size and parameter count measurement"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/metrics/size.ipynb.
 
 # %% ../nbs/metrics/size.ipynb #4c37777e
diff --git a/fasterbench/speed.py b/fasterbench/speed.py
index c503a55..281951b 100644
--- a/fasterbench/speed.py
+++ b/fasterbench/speed.py
@@ -1,5 +1,3 @@
-"""Latency and throughput measurement for PyTorch models"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/metrics/speed.ipynb.
 
 # %% ../nbs/metrics/speed.ipynb #e6b40d9e
diff --git a/fasterbench/utils.py b/fasterbench/utils.py
index 58c985c..5cf2285 100644
--- a/fasterbench/utils.py
+++ b/fasterbench/utils.py
@@ -1,5 +1,3 @@
-"""Utility functions"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/core/utils.ipynb.
 
 # %% ../nbs/core/utils.ipynb #436e19d0-bb2b-47fd-8499-f38d8ac96e56
diff --git a/nbs/_quarto.yml b/nbs/_quarto.yml
index 6a9257a..8066f60 100644
--- a/nbs/_quarto.yml
+++ b/nbs/_quarto.yml
@@ -35,6 +35,7 @@ website:
         contents:
         - tutorials/benchmark.ipynb
         - tutorials/profiling.ipynb
+        - tutorials/report.ipynb
       - section: Core
         contents:
         - core/core.ipynb
diff --git a/nbs/analysis/benchmark.ipynb b/nbs/analysis/benchmark.ipynb
index 558022a..a354ea6 100644
--- a/nbs/analysis/benchmark.ipynb
+++ b/nbs/analysis/benchmark.ipynb
@@ -1,10 +1,12 @@
 {
  "cells": [
   {
-   "cell_type": "markdown",
-   "id": "f86c20a0",
+   "cell_type": "raw",
+   "id": "frontmatter",
    "metadata": {},
-   "source": "# Benchmark\n\n> Unified benchmarking API for comprehensive model analysis\n\n## Overview\n\nThe `benchmark()` function is the main entry point for fasterbench. It returns a `BenchmarkResult` with typed access to all metrics.\n\n### Quick Example\n\n```python\nfrom fasterbench import benchmark\n\nresult = benchmark(model, sample, metrics=[\"size\", \"speed\", \"compute\"])\n\n# Typed access\nprint(result.size.size_mib)\nprint(result.speed[\"cpu\"].mean_ms)\n\n# Dict access (backward compatible)\nprint(result[\"size_size_mib\"])\n```\n\n### Available Metrics\n\n| Metric | Module | What It Measures |\n|--------|--------|------------------|\n| `\"size\"` | [size](../metrics/size.html) | Disk size, parameter count |\n| `\"speed\"` | [speed](../metrics/speed.html) | Latency, throughput |\n| `\"compute\"` | [compute](../metrics/compute.html) | MACs, operation count |\n| `\"memory\"` | [memory](../metrics/memory.html) | Peak/average memory |\n| `\"energy\"` | [energy](../metrics/energy.html) | Power, CO₂ emissions |"
+   "source": [
+    "---\ntitle: \"Benchmark\"\ndescription: \"Unified benchmarking API for PyTorch models\"\nskip_showdoc: true\n---"
+   ]
   },
   {
    "cell_type": "code",
@@ -121,13 +123,41 @@
     "show_doc(benchmark)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(BenchmarkResult)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "test_basic",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\nfrom fastcore.test import *\n\nimport torch, torch.nn as nn\n_m = nn.Linear(10, 5)\n_x = torch.randn(1, 10)\n_r = benchmark(_m, _x, metrics=[\"size\"])\nassert isinstance(_r, BenchmarkResult)\nassert \"size_num_params\" in _r"
+   ]
+  },
   {
    "cell_type": "markdown",
    "execution_count": null,
    "id": "35250fb9",
    "metadata": {},
    "outputs": [],
-   "source": "---\n\n## See Also\n\n- [Getting Started Tutorial](../tutorials/tutorial.html) - Comprehensive usage examples\n- [Profiling](profiling.html) - Per-layer analysis with `LayerProfiler`\n- [Visualization](../visualization/plot.html) - Radar plots for model comparison"
+   "source": [
+    "---\n",
+    "\n",
+    "## See Also\n",
+    "\n",
+    "- [Getting Started Tutorial](../tutorials/benchmark.html) - Comprehensive usage examples\n",
+    "- [Profiling](profiling.html) - Per-layer analysis with `LayerProfiler`\n",
+    "- [Visualization](../visualization/plot.html) - Radar plots for model comparison"
+   ]
   }
  ],
  "metadata": {
diff --git a/nbs/analysis/profiling.ipynb b/nbs/analysis/profiling.ipynb
index 8cdeb4c..a68bfea 100644
--- a/nbs/analysis/profiling.ipynb
+++ b/nbs/analysis/profiling.ipynb
@@ -1,13 +1,11 @@
 {
  "cells": [
   {
-   "cell_type": "markdown",
-   "id": "header",
+   "cell_type": "raw",
+   "id": "frontmatter",
    "metadata": {},
    "source": [
-    "# Profiling\n",
-    "\n",
-    "> Per-layer profiling for deep analysis of model performance"
+    "---\ntitle: \"Profiling\"\ndescription: \"Per-layer profiling for deep analysis of model performance\"\nskip_showdoc: true\n---"
    ]
   },
   {
@@ -20,6 +18,16 @@
     "#| default_exp profiling"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup_showdoc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| include: false\nfrom nbdev.showdoc import *"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -43,6 +51,64 @@
    "metadata": {},
    "outputs": [],
    "source": "#| export\nclass LayerProfiler:\n    \"\"\"Unified per-layer profiler for multiple metrics (speed, memory, size, compute).\"\"\"\n    \n    VALID_METRICS = frozenset({\"speed\", \"memory\", \"size\", \"compute\"})\n    _CONFIG = {\n        \"speed\": {\n            \"col\": \"speed_ms\", \"pct\": \"speed_percent\", \"unit\": \"ms\",\n            \"label\": \"Speed (slowest)\",\n            \"src_col\": \"time_ms\",\n            \"format\": _fmt_float,\n        },\n        \"memory\": {\n            \"col\": \"memory_mib\", \"pct\": \"memory_percent\", \"unit\": \"MiB\",\n            \"label\": \"Memory (largest)\",\n            \"src_col\": \"memory_mib\",\n            \"format\": _fmt_float,\n        },\n        \"size\": {\n            \"col\": \"params\", \"pct\": \"params_percent\", \"unit\": \"\",\n            \"label\": \"Parameters (largest)\",\n            \"src_col\": \"params\",\n            \"format\": _fmt_table,\n        },\n        \"compute\": {\n            \"col\": \"macs\", \"pct\": \"macs_percent\", \"unit\": \"\",\n            \"label\": \"MACs (most)\",\n            \"src_col\": \"macs\",\n            \"format\": _fmt_macs,\n        },\n    }\n    \n    def __init__(\n        self,\n        model: nn.Module,      # model to profile\n        sample: torch.Tensor,  # input tensor (with batch dimension)\n    ):\n        self.model = model\n        self.sample = sample\n        self._leaf_modules = _leaf_modules(model)\n        self._results: list[dict] = []\n        self._profiled_metrics: list[str] = []\n    \n    def profile(\n        self,\n        metrics: str | Sequence[str] = \"speed\",  # metrics to profile: speed, memory, size, compute\n        *,\n        device: str | torch.device = \"cpu\",      # device for speed/memory profiling\n        warmup: int = 5,                         # warmup iterations\n        steps: int = 20,                         # measurement iterations\n    ) -> list[dict]:\n        \"\"\"Profile specified metrics for each layer, returns list of dicts.\"\"\"\n        if isinstance(metrics, str):\n            metrics = [metrics]\n        metrics = list(metrics)\n        \n        invalid = set(metrics) - self.VALID_METRICS\n        if invalid:\n            raise ValueError(f\"Invalid metrics: {invalid}. Valid: {self.VALID_METRICS}\")\n        \n        # Initialize results dict for each leaf module\n        results: dict[str, dict] = {\n            name: {\"name\": name, \"type\": mod.__class__.__name__}\n            for name, mod in self._leaf_modules.items()\n        }\n        \n        # Profile each metric using unified _profile_layers\n        for metric in metrics:\n            cfg = self._CONFIG[metric]\n            profile_data = _profile_layers(\n                self.model, self.sample,\n                device=device, metric=metric, warmup=warmup, steps=steps\n            )\n            \n            # Merge results\n            for r in profile_data:\n                name = r[\"name\"]\n                if name in results:\n                    results[name][cfg[\"col\"]] = r[cfg[\"src_col\"]]\n                    results[name][cfg[\"pct\"]] = r[\"percent\"]\n        \n        out = list(results.values())\n        \n        # Sort by first metric\n        sort_col = self._CONFIG[metrics[0]][\"col\"]\n        out.sort(key=lambda x: x.get(sort_col, 0) or 0, reverse=True)\n        \n        # Store for top() and summary()\n        self._results = out\n        self._profiled_metrics = metrics\n        \n        return out\n    \n    def top(\n        self,\n        metric: str,              # metric to sort by: speed, memory, size, compute\n        n: int = 5,               # number of layers to return\n        *,\n        ascending: bool = False,  # if True, return smallest/fastest instead of largest/slowest\n    ) -> list[dict]:\n        \"\"\"Get top N layers sorted by the specified metric.\"\"\"\n        if not self._results:\n            raise RuntimeError(\"No results available. Call profile() first.\")\n        \n        if metric not in self._CONFIG:\n            raise ValueError(f\"Invalid metric: {metric}. Valid: {list(self._CONFIG.keys())}\")\n        \n        col = self._CONFIG[metric][\"col\"]\n        \n        # Check if metric was profiled\n        if col not in self._results[0]:\n            raise ValueError(f\"Metric '{metric}' was not profiled. Profiled: {self._profiled_metrics}\")\n        \n        sorted_results = sorted(\n            self._results,\n            key=lambda x: x.get(col, 0) or 0,\n            reverse=not ascending\n        )\n        return sorted_results[:n]\n    \n    def summary(self, *, top: int = 5) -> None:\n        \"\"\"Print a formatted summary of top layers for each profiled metric.\"\"\"\n        if not self._results:\n            raise RuntimeError(\"No results available. Call profile() first.\")\n        \n        for metric in self._profiled_metrics:\n            cfg = self._CONFIG[metric]\n            col, pct_col = cfg[\"col\"], cfg[\"pct\"]\n            \n            # Check if metric data exists\n            if col not in self._results[0]:\n                continue\n            \n            print(_section(cfg['label'], 54))\n            \n            sorted_layers = sorted(\n                self._results,\n                key=lambda x: x.get(col, 0) or 0,\n                reverse=True\n            )[:top]\n            \n            for r in sorted_layers:\n                val = r.get(col, 0) or 0\n                pct = r.get(pct_col, 0) or 0\n                val_str = cfg[\"format\"](val)\n                if cfg[\"unit\"]:\n                    val_str += f\" {cfg['unit']}\"\n                print(f\"  {r['name']:40} {r['type']:15} {val_str} ({pct:5.1f}%)\")\n            print()"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(LayerProfiler)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(LayerProfiler.profile)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(LayerProfiler.top)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(LayerProfiler.summary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "test_basic",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\nfrom fastcore.test import *\n\nimport torch, torch.nn as nn\n_m = nn.Sequential(nn.Linear(10, 20), nn.ReLU(), nn.Linear(20, 5))\n_x = torch.randn(1, 10)\n_lp = LayerProfiler(_m, _x)\n_results = _lp.profile(metrics=[\"size\"])\nassert len(_results) > 0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "see_also",
+   "metadata": {},
+   "source": [
+    "---\n\n## See Also\n\n- [Benchmark](benchmark.html) — Unified API\n- [Profiling Tutorial](../tutorials/profiling.html) — Step-by-step guide"
+   ]
   }
  ],
  "metadata": {
diff --git a/nbs/analysis/report.ipynb b/nbs/analysis/report.ipynb
new file mode 100644
index 0000000..43862ea
--- /dev/null
+++ b/nbs/analysis/report.ipynb
@@ -0,0 +1,187 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "frontmatter",
+   "metadata": {},
+   "source": [
+    "---\ntitle: \"Report\"\ndescription: \"Professional reporting for benchmark results\"\nskip_showdoc: true\n---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "default_exp",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| default_exp report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| include: false\nfrom nbdev.showdoc import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "overview",
+   "metadata": {},
+   "source": [
+    "## Overview\n\n`Report` and `ComparisonReport` generate professional benchmark reports in console, HTML, and Markdown. `ComparisonReport` computes metric deltas and highlights improvements."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b819eb00",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\nfrom __future__ import annotations\n\nfrom dataclasses import dataclass, asdict\nfrom typing import Any\nfrom pathlib import Path\n\nfrom fasterbench.benchmark import BenchmarkResult\nfrom fasterbench.core import _fmt_human, _section\nfrom fasterbench.plot import create_radar_plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4b4061f4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n_METRIC_CONFIG: dict[str, dict[str, Any]] = {\n    \"params\": {\n        \"label\": \"Parameters\",\n        \"unit\": \"\",\n        \"format\": _fmt_human,\n        \"lower_is_better\": True,\n        \"extract\": lambda r: r.size.num_params if r.size else None,\n    },\n    \"size_mib\": {\n        \"label\": \"Model Size\",\n        \"unit\": \"MiB\",\n        \"format\": lambda v: f\"{v:.2f}\",\n        \"lower_is_better\": True,\n        \"extract\": lambda r: r.size.size_mib if r.size else None,\n    },\n    \"latency_cpu\": {\n        \"label\": \"Latency (CPU)\",\n        \"unit\": \"ms\",\n        \"format\": lambda v: f\"{v:.2f}\",\n        \"lower_is_better\": True,\n        \"extract\": lambda r: r.speed.get(\"cpu\", None) and r.speed[\"cpu\"].mean_ms,\n    },\n    \"latency_cuda\": {\n        \"label\": \"Latency (CUDA)\",\n        \"unit\": \"ms\",\n        \"format\": lambda v: f\"{v:.2f}\",\n        \"lower_is_better\": True,\n        \"extract\": lambda r: r.speed.get(\"cuda\", None) and r.speed[\"cuda\"].mean_ms,\n    },\n    \"throughput_cpu\": {\n        \"label\": \"Throughput (CPU)\",\n        \"unit\": \"inf/s\",\n        \"format\": lambda v: f\"{v:.1f}\",\n        \"lower_is_better\": False,\n        \"extract\": lambda r: r.speed.get(\"cpu\", None) and r.speed[\"cpu\"].throughput_s,\n    },\n    \"throughput_cuda\": {\n        \"label\": \"Throughput (CUDA)\",\n        \"unit\": \"inf/s\",\n        \"format\": lambda v: f\"{v:.1f}\",\n        \"lower_is_better\": False,\n        \"extract\": lambda r: r.speed.get(\"cuda\", None) and r.speed[\"cuda\"].throughput_s,\n    },\n    \"macs\": {\n        \"label\": \"MACs\",\n        \"unit\": \"M\",\n        \"format\": lambda v: f\"{v:.1f}\",\n        \"lower_is_better\": True,\n        \"extract\": lambda r: r.compute.macs_m if r.compute and r.compute.macs_available else None,\n    },\n    \"memory_cpu\": {\n        \"label\": \"Memory (CPU)\",\n        \"unit\": \"MiB\",\n        \"format\": lambda v: f\"{v:.2f}\",\n        \"lower_is_better\": True,\n        \"extract\": lambda r: r.memory.get(\"cpu\", None) and r.memory[\"cpu\"].peak_mib,\n    },\n    \"memory_cuda\": {\n        \"label\": \"Memory (CUDA)\",\n        \"unit\": \"MiB\",\n        \"format\": lambda v: f\"{v:.2f}\",\n        \"lower_is_better\": True,\n        \"extract\": lambda r: r.memory.get(\"cuda\", None) and r.memory[\"cuda\"].peak_mib,\n    },\n    \"energy_cpu\": {\n        \"label\": \"Energy (CPU)\",\n        \"unit\": \"Wh\",\n        \"format\": lambda v: f\"{v:.4f}\",\n        \"lower_is_better\": True,\n        \"extract\": lambda r: r.energy.get(\"cpu\", None) and r.energy[\"cpu\"].energy_wh,\n    },\n}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ed9c9bf3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n@dataclass(slots=True)\nclass ReportMetricDelta:\n    \"\"\"Represents the change in a metric between two benchmark results.\"\"\"\n    name: str           # metric key (e.g., \"latency_cpu\")\n    label: str          # display label (e.g., \"Latency (CPU)\")\n    before: float       # value before optimization\n    after: float        # value after optimization\n    delta: float        # absolute change (after - before)\n    delta_pct: float    # percentage change\n    improved: bool      # whether change is an improvement (direction-aware)\n    unit: str           # unit for display\n    \n    def as_dict(self) -> dict[str, Any]:\n        \"\"\"Convert to dictionary for serialization.\"\"\"\n        return asdict(self)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dbc6f7a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\ndef _generate_css() -> str:\n    \"\"\"Generate clean, professional CSS for HTML reports.\"\"\"\n    return \"\"\"\n<style>\n    .report-container {\n        font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;\n        max-width: 900px;\n        margin: 0 auto;\n        padding: 40px;\n        color: #333;\n        line-height: 1.6;\n    }\n    .report-title {\n        font-size: 28px;\n        font-weight: 600;\n        text-align: center;\n        margin-bottom: 10px;\n        color: #1a1a1a;\n    }\n    .report-subtitle {\n        text-align: center;\n        color: #666;\n        margin-bottom: 30px;\n    }\n    .section-title {\n        font-size: 18px;\n        font-weight: 600;\n        margin-top: 30px;\n        margin-bottom: 15px;\n        padding-bottom: 8px;\n        border-bottom: 2px solid #e0e0e0;\n        color: #1a1a1a;\n    }\n    .metrics-table {\n        width: 100%;\n        border-collapse: collapse;\n        margin: 20px 0;\n        font-size: 14px;\n    }\n    .metrics-table th {\n        background: #f8f9fa;\n        padding: 12px 16px;\n        text-align: left;\n        font-weight: 600;\n        border-bottom: 2px solid #dee2e6;\n    }\n    .metrics-table td {\n        padding: 12px 16px;\n        border-bottom: 1px solid #e9ecef;\n    }\n    .metrics-table tr:hover {\n        background: #f8f9fa;\n    }\n    .value-cell {\n        font-family: 'SF Mono', Consolas, monospace;\n        text-align: right;\n    }\n    .change-positive {\n        color: #28a745;\n        font-weight: 500;\n    }\n    .change-negative {\n        color: #dc3545;\n        font-weight: 500;\n    }\n    .improvement-icon {\n        margin-left: 4px;\n    }\n    .chart-container {\n        margin: 30px 0;\n        text-align: center;\n    }\n    .model-info {\n        background: #f8f9fa;\n        padding: 15px 20px;\n        border-radius: 8px;\n        margin-bottom: 20px;\n    }\n    .model-info p {\n        margin: 5px 0;\n    }\n    .description {\n        color: #666;\n        font-style: italic;\n        margin-bottom: 20px;\n    }\n</style>\n\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "671545e0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\ndef _improvement_indicator(improved: bool) -> str:\n    \"\"\"Return checkmark or X based on improvement status.\"\"\"\n    return \"\\u2713\" if improved else \"\\u2717\"  # ✓ or ✗"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb787a70",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\ndef _format_value_with_unit(value: float, cfg: dict) -> str:\n    \"\"\"Format a metric value with its unit.\"\"\"\n    formatted = cfg[\"format\"](value)\n    if cfg[\"unit\"]:\n        return f\"{formatted} {cfg['unit']}\"\n    return formatted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4ec1d051",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\ndef _extract_metrics(result: BenchmarkResult) -> dict[str, float | None]:\n    \"\"\"Extract all available metrics from a BenchmarkResult.\"\"\"\n    metrics = {}\n    for key, cfg in _METRIC_CONFIG.items():\n        try:\n            value = cfg[\"extract\"](result)\n            metrics[key] = value\n        except (AttributeError, TypeError, KeyError):\n            metrics[key] = None\n    return metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2a3f85b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\nclass Report:\n    \"\"\"Professional report for a single model's benchmark results.\"\"\"\n    \n    VALID_FORMATS = frozenset({\"html\", \"markdown\"})\n    \n    def __init__(\n        self,\n        result: BenchmarkResult,  # benchmark result to report on\n        *,\n        model_name: str = \"Model\",  # name for display\n        description: str = \"\",      # optional description\n    ):\n        self.result = result\n        self.model_name = model_name\n        self.description = description\n        self._metrics = _extract_metrics(result)\n    \n    def summary(self) -> None:\n        \"\"\"Print a formatted console summary.\"\"\"\n        width = 60\n        print(\"=\" * width)\n        print(f\"{self.model_name:^{width}}\")\n        print(\"=\" * width)\n        \n        if self.description:\n            print(f\"\\n{self.description}\\n\")\n        \n        print(_section(\"Metrics\", width))\n        \n        for key, value in self._metrics.items():\n            if value is not None:\n                cfg = _METRIC_CONFIG[key]\n                formatted = _format_value_with_unit(value, cfg)\n                print(f\"  {cfg['label']:.<30} {formatted}\")\n        print()\n    \n    def as_dict(self) -> dict[str, Any]:\n        \"\"\"Return report data as dictionary.\"\"\"\n        return {\n            \"model_name\": self.model_name,\n            \"description\": self.description,\n            \"metrics\": {\n                k: v for k, v in self._metrics.items() if v is not None\n            },\n            \"raw_result\": self.result.as_dict(),\n        }\n    \n    def to_markdown(self, path: str | Path | None = None) -> str:\n        \"\"\"Generate markdown report.\n        \n        Args:\n            path: If provided, write to file. Otherwise return string.\n        \n        Returns:\n            Markdown string.\n        \"\"\"\n        lines = [\n            f\"# {self.model_name} Benchmark Report\",\n            \"\",\n        ]\n        \n        if self.description:\n            lines.extend([f\"*{self.description}*\", \"\"])\n        \n        lines.extend([\n            \"## Metrics\",\n            \"\",\n            \"| Metric | Value |\",\n            \"|--------|-------|\",\n        ])\n        \n        for key, value in self._metrics.items():\n            if value is not None:\n                cfg = _METRIC_CONFIG[key]\n                formatted = _format_value_with_unit(value, cfg)\n                lines.append(f\"| {cfg['label']} | {formatted} |\")\n        \n        content = \"\\n\".join(lines)\n        \n        if path:\n            Path(path).write_text(content)\n        \n        return content\n    \n    def to_html(\n        self,\n        path: str | Path | None = None,  # output file path (optional)\n        *,\n        include_charts: bool = True,     # include radar chart\n    ) -> str:\n        \"\"\"Generate HTML report with optional charts.\n        \n        Args:\n            path: If provided, write to file. Otherwise return string.\n            include_charts: Whether to embed radar chart.\n        \n        Returns:\n            HTML string.\n        \"\"\"\n        # Build metrics table\n        rows = []\n        for key, value in self._metrics.items():\n            if value is not None:\n                cfg = _METRIC_CONFIG[key]\n                formatted = _format_value_with_unit(value, cfg)\n                rows.append(f\"\"\"\n                <tr>\n                    <td>{cfg['label']}</td>\n                    <td class=\"value-cell\">{formatted}</td>\n                </tr>\"\"\")\n        \n        table_rows = \"\\n\".join(rows)\n        \n        # Build chart if requested\n        chart_html = \"\"\n        if include_charts:\n            try:\n                fig = create_radar_plot([self.result], [self.model_name])\n                chart_html = f\"\"\"\n                <div class=\"chart-container\">\n                    {fig.to_html(include_plotlyjs='cdn', full_html=False)}\n                </div>\"\"\"\n            except Exception:\n                chart_html = \"\"  # Skip chart on error\n        \n        description_html = f'<p class=\"description\">{self.description}</p>' if self.description else \"\"\n        \n        html = f\"\"\"<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n    <meta charset=\"UTF-8\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    <title>{self.model_name} Benchmark Report</title>\n    {_generate_css()}\n</head>\n<body>\n    <div class=\"report-container\">\n        <h1 class=\"report-title\">{self.model_name}</h1>\n        <p class=\"report-subtitle\">Benchmark Report</p>\n        {description_html}\n        \n        <h2 class=\"section-title\">Performance Metrics</h2>\n        <table class=\"metrics-table\">\n            <thead>\n                <tr>\n                    <th>Metric</th>\n                    <th style=\"text-align: right;\">Value</th>\n                </tr>\n            </thead>\n            <tbody>\n                {table_rows}\n            </tbody>\n        </table>\n        {chart_html}\n    </div>\n</body>\n</html>\"\"\"\n        \n        if path:\n            Path(path).write_text(html)\n        \n        return html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "638fd85b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\nclass ComparisonReport:\n    \"\"\"Professional report comparing two benchmark results (before/after optimization).\"\"\"\n    \n    VALID_FORMATS = frozenset({\"html\", \"markdown\"})\n    \n    def __init__(\n        self,\n        before: BenchmarkResult,        # benchmark result before optimization\n        after: BenchmarkResult,         # benchmark result after optimization\n        *,\n        before_name: str = \"Original\",  # display name for before model\n        after_name: str = \"Optimized\",  # display name for after model\n        title: str = \"Model Compression Report\",  # report title\n    ):\n        self.before = before\n        self.after = after\n        self.before_name = before_name\n        self.after_name = after_name\n        self.title = title\n        \n        self._before_metrics = _extract_metrics(before)\n        self._after_metrics = _extract_metrics(after)\n        self._deltas: list[ReportMetricDelta] | None = None\n    \n    @property\n    def deltas(self) -> list[ReportMetricDelta]:\n        \"\"\"Compute and return metric deltas between before and after.\"\"\"\n        if self._deltas is not None:\n            return self._deltas\n        \n        deltas = []\n        for key, cfg in _METRIC_CONFIG.items():\n            before_val = self._before_metrics.get(key)\n            after_val = self._after_metrics.get(key)\n            \n            # Skip if either value is missing\n            if before_val is None or after_val is None:\n                continue\n            \n            delta = after_val - before_val\n            delta_pct = (delta / before_val * 100) if before_val != 0 else 0.0\n            \n            # Determine if this is an improvement based on direction\n            if cfg[\"lower_is_better\"]:\n                improved = delta < 0\n            else:\n                improved = delta > 0\n            \n            deltas.append(ReportMetricDelta(\n                name=key,\n                label=cfg[\"label\"],\n                before=before_val,\n                after=after_val,\n                delta=delta,\n                delta_pct=delta_pct,\n                improved=improved,\n                unit=cfg[\"unit\"],\n            ))\n        \n        self._deltas = deltas\n        return deltas\n    \n    def top_improvements(self, n: int = 5) -> list[ReportMetricDelta]:\n        \"\"\"Return top N improvements sorted by absolute percentage change.\"\"\"\n        improved = [d for d in self.deltas if d.improved]\n        return sorted(improved, key=lambda d: abs(d.delta_pct), reverse=True)[:n]\n    \n    def summary(self) -> None:\n        \"\"\"Print a formatted console summary.\"\"\"\n        width = 65\n        \n        print(\"=\" * width)\n        print(f\"{self.title:^{width}}\")\n        print(\"=\" * width)\n        print()\n        print(f\"Before: {self.before_name}\")\n        print(f\"After:  {self.after_name}\")\n        print()\n        \n        # Executive summary table\n        print(_section(\"Executive Summary\", width))\n        print(f\"{'':20} {'Before':>15} {'After':>15} {'Change':>12}\")\n        \n        for d in self.deltas:\n            cfg = _METRIC_CONFIG[d.name]\n            before_str = _format_value_with_unit(d.before, cfg)\n            after_str = _format_value_with_unit(d.after, cfg)\n            \n            sign = \"+\" if d.delta_pct > 0 else \"\"\n            indicator = _improvement_indicator(d.improved)\n            change_str = f\"{sign}{d.delta_pct:.1f}% {indicator}\"\n            \n            print(f\"  {d.label:.<18} {before_str:>15} {after_str:>15} {change_str:>12}\")\n        \n        print()\n        \n        # Top improvements\n        top = self.top_improvements()\n        if top:\n            print(_section(\"Top Improvements\", width))\n            for i, d in enumerate(top, 1):\n                print(f\"  {i}. {d.label}: {d.delta_pct:.1f}%\")\n        print()\n    \n    def as_dict(self) -> dict[str, Any]:\n        \"\"\"Return report data as dictionary.\"\"\"\n        return {\n            \"title\": self.title,\n            \"before_name\": self.before_name,\n            \"after_name\": self.after_name,\n            \"deltas\": [d.as_dict() for d in self.deltas],\n            \"before_metrics\": self._before_metrics,\n            \"after_metrics\": self._after_metrics,\n        }\n    \n    def to_markdown(self, path: str | Path | None = None) -> str:\n        \"\"\"Generate markdown comparison report.\n        \n        Args:\n            path: If provided, write to file. Otherwise return string.\n        \n        Returns:\n            Markdown string.\n        \"\"\"\n        lines = [\n            f\"# {self.title}\",\n            \"\",\n            f\"**Before:** {self.before_name}\",\n            f\"**After:** {self.after_name}\",\n            \"\",\n            \"## Summary\",\n            \"\",\n            \"| Metric | Before | After | Change |\",\n            \"|--------|--------|-------|--------|\",\n        ]\n        \n        for d in self.deltas:\n            cfg = _METRIC_CONFIG[d.name]\n            before_str = _format_value_with_unit(d.before, cfg)\n            after_str = _format_value_with_unit(d.after, cfg)\n            sign = \"+\" if d.delta_pct > 0 else \"\"\n            indicator = _improvement_indicator(d.improved)\n            change_str = f\"{sign}{d.delta_pct:.1f}% {indicator}\"\n            lines.append(f\"| {d.label} | {before_str} | {after_str} | {change_str} |\")\n        \n        # Top improvements section\n        top = self.top_improvements()\n        if top:\n            lines.extend([\n                \"\",\n                \"## Top Improvements\",\n                \"\",\n            ])\n            for i, d in enumerate(top, 1):\n                lines.append(f\"{i}. **{d.label}**: {d.delta_pct:.1f}%\")\n        \n        content = \"\\n\".join(lines)\n        \n        if path:\n            Path(path).write_text(content)\n        \n        return content\n    \n    def to_html(\n        self,\n        path: str | Path | None = None,  # output file path (optional)\n        *,\n        include_charts: bool = True,     # include radar chart comparison\n    ) -> str:\n        \"\"\"Generate HTML comparison report with optional charts.\n        \n        Args:\n            path: If provided, write to file. Otherwise return string.\n            include_charts: Whether to embed radar chart comparison.\n        \n        Returns:\n            HTML string.\n        \"\"\"\n        # Build comparison table rows\n        rows = []\n        for d in self.deltas:\n            cfg = _METRIC_CONFIG[d.name]\n            before_str = _format_value_with_unit(d.before, cfg)\n            after_str = _format_value_with_unit(d.after, cfg)\n            \n            sign = \"+\" if d.delta_pct > 0 else \"\"\n            change_class = \"change-positive\" if d.improved else \"change-negative\"\n            indicator = _improvement_indicator(d.improved)\n            \n            rows.append(f\"\"\"\n                <tr>\n                    <td>{d.label}</td>\n                    <td class=\"value-cell\">{before_str}</td>\n                    <td class=\"value-cell\">{after_str}</td>\n                    <td class=\"value-cell {change_class}\">{sign}{d.delta_pct:.1f}%<span class=\"improvement-icon\">{indicator}</span></td>\n                </tr>\"\"\")\n        \n        table_rows = \"\\n\".join(rows)\n        \n        # Build top improvements list\n        top = self.top_improvements()\n        top_items = \"\".join([\n            f\"<li><strong>{d.label}:</strong> {d.delta_pct:.1f}%</li>\"\n            for d in top\n        ])\n        top_html = f\"<ol>{top_items}</ol>\" if top else \"\"\n        \n        # Build chart if requested\n        chart_html = \"\"\n        if include_charts:\n            try:\n                fig = create_radar_plot(\n                    [self.before, self.after],\n                    [self.before_name, self.after_name]\n                )\n                chart_html = f\"\"\"\n                <div class=\"chart-container\">\n                    {fig.to_html(include_plotlyjs='cdn', full_html=False)}\n                </div>\"\"\"\n            except Exception:\n                chart_html = \"\"  # Skip chart on error\n        \n        html = f\"\"\"<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n    <meta charset=\"UTF-8\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    <title>{self.title}</title>\n    {_generate_css()}\n</head>\n<body>\n    <div class=\"report-container\">\n        <h1 class=\"report-title\">{self.title}</h1>\n        \n        <div class=\"model-info\">\n            <p><strong>Before:</strong> {self.before_name}</p>\n            <p><strong>After:</strong> {self.after_name}</p>\n        </div>\n        \n        <h2 class=\"section-title\">Executive Summary</h2>\n        <table class=\"metrics-table\">\n            <thead>\n                <tr>\n                    <th>Metric</th>\n                    <th style=\"text-align: right;\">Before</th>\n                    <th style=\"text-align: right;\">After</th>\n                    <th style=\"text-align: right;\">Change</th>\n                </tr>\n            </thead>\n            <tbody>\n                {table_rows}\n            </tbody>\n        </table>\n        \n        <h2 class=\"section-title\">Top Improvements</h2>\n        {top_html}\n        \n        {chart_html}\n    </div>\n</body>\n</html>\"\"\"\n        \n        if path:\n            Path(path).write_text(html)\n        \n        return html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_delta",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(ReportMetricDelta)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_report",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(Report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_comp",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(ComparisonReport)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "test_basic",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\nfrom fastcore.test import *\nimport torch, torch.nn as nn\nfrom fasterbench import benchmark\n\n_m = nn.Linear(10, 5)\n_x = torch.randn(1, 10)\n_r = benchmark(_m, _x, metrics=[\"size\"])\n_report = Report(_r, model_name=\"TestModel\")\nassert _report.model_name == \"TestModel\"\n_report.summary()\n_md = _report.to_markdown()\nassert \"TestModel\" in _md\n\n_r2 = benchmark(nn.Linear(10, 3), torch.randn(1, 10), metrics=[\"size\"])\n_comp = ComparisonReport(_r, _r2, before_name=\"Big\", after_name=\"Small\")\nassert len(_comp.deltas) > 0\n_comp.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "see_also",
+   "metadata": {},
+   "source": [
+    "---\n\n## See Also\n\n- [Benchmark](benchmark.html) — Unified benchmarking API\n- [Report Tutorial](../tutorials/report.html) — Full compression workflow"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/nbs/core/core.ipynb b/nbs/core/core.ipynb
index a07fb09..5840c12 100644
--- a/nbs/core/core.ipynb
+++ b/nbs/core/core.ipynb
@@ -1,13 +1,11 @@
 {
  "cells": [
   {
-   "cell_type": "markdown",
-   "id": "0bbc6d9c",
+   "cell_type": "raw",
+   "id": "frontmatter",
    "metadata": {},
    "source": [
-    "# core\n",
-    "\n",
-    "> Core modules for benchmarking"
+    "---\ntitle: \"Core\"\ndescription: \"Internal utilities for benchmarking infrastructure\"\nskip_showdoc: true\n---"
    ]
   },
   {
@@ -20,6 +18,16 @@
     "#| default_exp core"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup_showdoc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| include: false\nfrom nbdev.showdoc import *"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -45,20 +53,18 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "eb1c9ec1",
+   "id": "1d0944ae",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "import nbdev; nbdev.nbdev_export()"
-   ]
+   "source": []
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1d0944ae",
+   "cell_type": "markdown",
+   "id": "see_also",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "source": [
+    "---\n\n## See Also\n\n- [Benchmark](../analysis/benchmark.html) — Main benchmarking API"
+   ]
   }
  ],
  "metadata": {
diff --git a/nbs/core/utils.ipynb b/nbs/core/utils.ipynb
index d195ee3..3e95820 100644
--- a/nbs/core/utils.ipynb
+++ b/nbs/core/utils.ipynb
@@ -1,13 +1,11 @@
 {
  "cells": [
   {
-   "cell_type": "markdown",
-   "id": "eb7b0e2e-f1e7-4b51-b790-d854ae073776",
+   "cell_type": "raw",
+   "id": "frontmatter",
    "metadata": {},
    "source": [
-    "# utils\n",
-    "\n",
-    "> Utility functions"
+    "---\ntitle: \"Utils\"\ndescription: \"Utility functions for parsing metric values\"\nskip_showdoc: true\n---"
    ]
   },
   {
@@ -20,6 +18,16 @@
     "#| default_exp utils"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup_showdoc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| include: false\nfrom nbdev.showdoc import *"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -51,6 +59,34 @@
     "    multipliers = {'K': 0.001, 'M': 1.0, 'G': 1000.0}\n",
     "    return num * multipliers.get(suffix, 1.0)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(parse_metric_value)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "test_basic",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\nfrom fastcore.test import *\n\ntest_close(parse_metric_value(\"1.5M\"), 1.5, eps=0.01)\ntest_close(parse_metric_value(\"500K\"), 0.5, eps=0.01)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "see_also",
+   "metadata": {},
+   "source": [
+    "---\n\n## See Also\n\n- [Benchmark](../analysis/benchmark.html) — Main benchmarking API"
+   ]
   }
  ],
  "metadata": {
diff --git a/nbs/metrics/compute.ipynb b/nbs/metrics/compute.ipynb
index 882b82e..261b566 100644
--- a/nbs/metrics/compute.ipynb
+++ b/nbs/metrics/compute.ipynb
@@ -1,13 +1,11 @@
 {
  "cells": [
   {
-   "cell_type": "markdown",
-   "id": "8bde6cc0",
+   "cell_type": "raw",
+   "id": "frontmatter",
    "metadata": {},
    "source": [
-    "# Compute\n",
-    "\n",
-    "> Compute modules for benchmarking"
+    "---\ntitle: \"Compute\"\ndescription: \"MACs and FLOPs computation for model complexity analysis\"\nskip_showdoc: true\n---"
    ]
   },
   {
@@ -20,13 +18,42 @@
     "#| default_exp compute"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup_showdoc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| include: false\nfrom nbdev.showdoc import *"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "0091d170",
    "metadata": {},
    "outputs": [],
-   "source": "#| export\nfrom __future__ import annotations\n\nimport warnings\nfrom dataclasses import dataclass\nfrom typing import Any\n\nimport torch\nimport torch.nn as nn\n\ntry:\n    from thop import profile as _thop_profile\nexcept ImportError:\n    _thop_profile = None\n\ntry:\n    from torchprofile import profile_macs as _profile_macs\nexcept ImportError:\n    _profile_macs = None"
+   "source": [
+    "#| export\n",
+    "from __future__ import annotations\n",
+    "\n",
+    "import warnings\n",
+    "from dataclasses import dataclass\n",
+    "\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "\n",
+    "try:\n",
+    "    from thop import profile as _thop_profile\n",
+    "except ImportError:\n",
+    "    _thop_profile = None\n",
+    "\n",
+    "try:\n",
+    "    from torchprofile import profile_macs as _profile_macs\n",
+    "except ImportError:\n",
+    "    _profile_macs = None"
+   ]
   },
   {
    "cell_type": "code",
@@ -43,6 +70,44 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(ComputeMetrics)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(compute_compute)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "test_basic",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\nfrom fastcore.test import *\n\nimport torch, torch.nn as nn\n_m = nn.Linear(10, 5)\n_x = torch.randn(1, 10)\n_c = compute_compute(_m, _x)\nassert isinstance(_c, ComputeMetrics)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "see_also",
+   "metadata": {},
+   "source": [
+    "---\n\n## See Also\n\n- [Size](size.html) — Model size measurement\n- [Benchmark](../analysis/benchmark.html) — Unified API"
+   ]
   }
  ],
  "metadata": {
diff --git a/nbs/metrics/energy.ipynb b/nbs/metrics/energy.ipynb
index 047ab09..ea9c4f4 100644
--- a/nbs/metrics/energy.ipynb
+++ b/nbs/metrics/energy.ipynb
@@ -1,13 +1,11 @@
 {
  "cells": [
   {
-   "cell_type": "markdown",
-   "id": "a55d56d5",
+   "cell_type": "raw",
+   "id": "frontmatter",
    "metadata": {},
    "source": [
-    "# Energy\n",
-    "\n",
-    "> Energy modules for benchmarking"
+    "---\ntitle: \"Energy\"\ndescription: \"Energy consumption and carbon footprint measurement\"\nskip_showdoc: true\n---"
    ]
   },
   {
@@ -20,6 +18,16 @@
     "#| default_exp energy"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup_showdoc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| include: false\nfrom nbdev.showdoc import *"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -43,6 +51,44 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(EnergyMetrics)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(compute_energy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(compute_energy_multi)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "see_also",
+   "metadata": {},
+   "source": [
+    "---\n\n## See Also\n\n- [Memory](memory.html) — Memory measurement\n- [Benchmark](../analysis/benchmark.html) — Unified API"
+   ]
   }
  ],
  "metadata": {},
diff --git a/nbs/metrics/memory.ipynb b/nbs/metrics/memory.ipynb
index 6bf627b..5e92945 100644
--- a/nbs/metrics/memory.ipynb
+++ b/nbs/metrics/memory.ipynb
@@ -1,13 +1,11 @@
 {
  "cells": [
   {
-   "cell_type": "markdown",
-   "id": "2530b862",
+   "cell_type": "raw",
+   "id": "frontmatter",
    "metadata": {},
    "source": [
-    "# Memory\n",
-    "\n",
-    "> Memory modules for benchmarking"
+    "---\ntitle: \"Memory\"\ndescription: \"Memory consumption measurement for PyTorch models\"\nskip_showdoc: true\n---"
    ]
   },
   {
@@ -20,6 +18,16 @@
     "#| default_exp memory"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup_showdoc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| include: false\nfrom nbdev.showdoc import *"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -43,6 +51,54 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(MemoryMetrics)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(compute_memory)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(compute_memory_multi)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "test_basic",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\nfrom fastcore.test import *\n\nimport torch, torch.nn as nn\n_m = nn.Linear(10, 5)\n_x = torch.randn(1, 10)\n_mem = compute_memory(_m, _x, warmup=2, steps=5)\nassert isinstance(_mem, MemoryMetrics)\nassert _mem.avg_mib >= 0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "see_also",
+   "metadata": {},
+   "source": [
+    "---\n\n## See Also\n\n- [Speed](speed.html) — Latency measurement\n- [Benchmark](../analysis/benchmark.html) — Unified API"
+   ]
   }
  ],
  "metadata": {},
diff --git a/nbs/metrics/size.ipynb b/nbs/metrics/size.ipynb
index 9df30db..b7a1f31 100644
--- a/nbs/metrics/size.ipynb
+++ b/nbs/metrics/size.ipynb
@@ -1,10 +1,12 @@
 {
  "cells": [
   {
-   "cell_type": "markdown",
-   "id": "b36fc064",
+   "cell_type": "raw",
+   "id": "frontmatter",
    "metadata": {},
-   "source": "# Size\n\n> Model size and parameter count measurement\n\n## Overview\n\nThe size module measures model storage requirements and parameter counts.\n\n| Function | Purpose |\n|----------|---------|\n| `compute_size()` | Get all size metrics as `SizeMetrics` dataclass |\n| `get_model_size()` | Get serialized model size in bytes |\n| `get_num_parameters()` | Count trainable parameters |\n\n### SizeMetrics\n\n```python\n@dataclass\nclass SizeMetrics:\n    disk_bytes: int    # Serialized model size\n    size_mib: float    # Size in MiB  \n    num_params: int    # Parameter count\n```"
+   "source": [
+    "---\ntitle: \"Size\"\ndescription: \"Model size and parameter count measurement\"\nskip_showdoc: true\n---"
+   ]
   },
   {
    "cell_type": "code",
@@ -16,6 +18,16 @@
     "#| default_exp size"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup_showdoc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| include: false\nfrom nbdev.showdoc import *"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -85,6 +97,56 @@
     "    return SizeMetrics(disk_bytes=disk, size_mib=_bytes_to_mib(disk), num_params=params)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(SizeMetrics)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(compute_size)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(get_model_size)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(get_num_parameters)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "test_basic",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\nfrom fastcore.test import *\n\nimport torch.nn as nn\n_m = nn.Linear(10, 5)\n_s = compute_size(_m)\nassert isinstance(_s, SizeMetrics)\nassert _s.num_params > 0\ntest_eq(get_num_parameters(_m), 55)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "execution_count": null,
diff --git a/nbs/metrics/speed.ipynb b/nbs/metrics/speed.ipynb
index 0e65b91..d0af978 100644
--- a/nbs/metrics/speed.ipynb
+++ b/nbs/metrics/speed.ipynb
@@ -1,32 +1,11 @@
 {
  "cells": [
   {
-   "cell_type": "markdown",
-   "id": "bdb7bc70",
+   "cell_type": "raw",
+   "id": "frontmatter",
    "metadata": {},
    "source": [
-    "# Speed\n",
-    "\n",
-    "> Latency and throughput measurement for PyTorch models\n",
-    "\n",
-    "## Overview\n",
-    "\n",
-    "The speed module provides accurate latency and throughput measurements across CPU and GPU devices.\n",
-    "\n",
-    "| Function | Purpose |\n",
-    "|----------|---------|\n",
-    "| `compute_speed()` | Measure latency on a single device |\n",
-    "| `compute_speed_multi()` | Measure on multiple devices (CPU + CUDA) |\n",
-    "| `sweep_threads()` | Find optimal CPU thread count |\n",
-    "| `sweep_batch_sizes()` | Find optimal batch size for throughput |\n",
-    "| `sweep_latency()` | Analyze latency vs input resolution |\n",
-    "\n",
-    "### Measurement Approach\n",
-    "\n",
-    "- CPU: Uses `torch.utils.benchmark.Timer` for accurate timing\n",
-    "- CUDA: Uses CUDA events for precise GPU timing\n",
-    "- Warmup: Configurable warmup iterations to stabilize measurements\n",
-    "- Statistics: Returns mean, std, p50, p90, p99, and throughput"
+    "---\ntitle: \"Speed\"\ndescription: \"Latency and throughput measurement for PyTorch models\"\nskip_showdoc: true\n---"
    ]
   },
   {
@@ -39,6 +18,16 @@
     "#| default_exp speed"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup_showdoc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| include: false\nfrom nbdev.showdoc import *"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -68,6 +57,46 @@
    "outputs": [],
    "source": "#| export\n@dataclass(slots=True)\nclass SpeedMetrics:\n    \"\"\"Latency and throughput metrics for a single device.\"\"\"\n    p50_ms: float\n    p90_ms: float\n    p99_ms: float\n    mean_ms: float\n    std_ms: float\n    throughput_s: float\n\n    def as_dict(self) -> dict[str, float]:\n        return asdict(self)\n\n\n#| export\ndef _nan_speed_metrics(device: str) -> SpeedMetrics:  # device string (unused, for consistent signature)\n    \"\"\"Create SpeedMetrics with NaN values for failed benchmarks.\"\"\"\n    nan = float(\"nan\")\n    return SpeedMetrics(nan, nan, nan, nan, nan, nan)\n\n\n#| export\ndef _stats(lat_ms: np.ndarray, batch: int) -> Mapping[str, float]:\n    \"\"\"Compute latency statistics from raw measurements.\"\"\"\n    p50, p90, p99 = np.percentile(lat_ms, [50, 90, 99])\n    mean = float(lat_ms.mean())\n    return {\n        \"p50_ms\": float(p50),\n        \"p90_ms\": float(p90),\n        \"p99_ms\": float(p99),\n        \"mean_ms\": mean,\n        \"std_ms\": float(lat_ms.std(ddof=1)) if lat_ms.size > 1 else 0.0,\n        \"throughput_s\": batch * 1000.0 / mean,\n    }\n\n\n#| export\ndef _forward_latencies(\n    model: nn.Module,                         # model to benchmark\n    sample: torch.Tensor,                     # input tensor (with batch dimension)\n    *,\n    device: str | torch.device = \"cpu\",       # device to run on\n    warmup: int = 20,                         # warmup iterations\n    steps: int = 100,                         # measurement iterations\n    use_torch_timer: bool | None = None,      # force torch.utils.benchmark.Timer (default: auto)\n) -> np.ndarray:\n    \"\"\"Collect forward-pass latencies (ms) using optimal timing for each device.\"\"\"\n    if use_torch_timer is None:\n        use_torch_timer = torch.device(device).type == \"cpu\"\n\n    with _device_ctx(device) as dev:\n        model.eval().to(dev)\n        sample = sample.to(dev, non_blocking=True)\n\n        for _ in range(warmup):\n            model(sample)\n        _sync(dev)\n\n        lat: list[float] = []\n        if use_torch_timer and dev.type == \"cpu\":\n            t = Timer(stmt=\"model(x)\", globals={\"model\": model, \"x\": sample})\n            m = t.blocked_autorange(min_run_time=0.3)\n            per_iter = (np.asarray(m.raw_times) / m.number_per_run) * 1e3\n            lat = per_iter.tolist()\n        elif dev.type == \"cuda\":\n            start_evt, end_evt = torch.cuda.Event(True), torch.cuda.Event(True)\n            for _ in range(steps):\n                start_evt.record()\n                model(sample)\n                end_evt.record()\n                _sync(dev)\n                lat.append(start_evt.elapsed_time(end_evt))\n        else:\n            for _ in range(steps):\n                t0 = time.perf_counter()\n                model(sample)\n                lat.append((time.perf_counter() - t0) * 1e3)\n        return np.asarray(lat, dtype=np.float32)\n\n\n#| export\n@torch.inference_mode()\ndef compute_speed(\n    model: nn.Module,                    # model to benchmark\n    sample: torch.Tensor,                # input tensor (with batch dimension)\n    *,\n    device: str | torch.device = \"cpu\",  # device to run on\n    warmup: int = 20,                    # warmup iterations\n    steps: int = 100,                    # measurement iterations\n) -> SpeedMetrics:\n    \"\"\"Measure latency and throughput on a single device.\"\"\"\n    lat = _forward_latencies(model, sample, device=device, warmup=warmup, steps=steps)\n    return SpeedMetrics(**_stats(lat, sample.size(0)))\n\n\n#| export\ndef compute_speed_multi(\n    model: nn.Module,                                      # model to benchmark\n    sample: torch.Tensor,                                  # input tensor (with batch dimension)\n    *,\n    devices: Sequence[str | torch.device] | None = None,   # devices to benchmark (default: cpu + cuda if available)\n    **kwargs,\n) -> dict[str, SpeedMetrics]:\n    \"\"\"Measure latency/throughput on multiple devices.\"\"\"\n    return _run_on_devices(\n        compute_speed, model, sample, devices,\n        nan_factory=_nan_speed_metrics,\n        metric_name=\"Speed\",\n        **kwargs\n    )\n\n\n#| export\ndef sweep_threads(\n    model: nn.Module,                            # model to benchmark\n    sample: torch.Tensor,                        # input tensor (with batch dimension)\n    thread_counts: Sequence[int] = (1, 2, 4, 8), # thread counts to test\n    *,\n    warmup: int = 20,                            # warmup iterations per thread count\n    steps: int = 100,                            # measurement iterations per thread count\n) -> list[dict]:\n    \"\"\"Sweep CPU thread counts to find optimal parallelism.\"\"\"\n    rows = []\n    for n in thread_counts:\n        torch.set_num_threads(n)\n        lat = _forward_latencies(model, sample, device=\"cpu\", warmup=warmup, steps=steps, use_torch_timer=True)\n        rows.append({\"threads\": n, **_stats(lat, sample.size(0))})\n    return rows\n\n\n#| export\ndef sweep_batch_sizes(\n    model: nn.Module,                                # model to benchmark\n    input_shape: Sequence[int],                      # input shape WITHOUT batch dim, e.g. (3, 224, 224)\n    batch_sizes: Sequence[int] = (1, 2, 4, 8, 16, 32),  # batch sizes to test\n    *,\n    device: str | torch.device = \"cuda\",             # device to run on\n    warmup: int = 20,                                # warmup iterations per batch size\n    steps: int = 100,                                # measurement iterations per batch size\n) -> list[dict]:\n    \"\"\"Sweep batch sizes to find optimal throughput.\"\"\"\n    rows = []\n    for bs in batch_sizes:\n        try:\n            dummy = torch.randn(bs, *input_shape)\n            lat = _forward_latencies(model, dummy, device=device, warmup=warmup, steps=steps)\n            stats = _stats(lat, bs)\n            rows.append({\"batch_size\": bs, \"latency_per_sample_ms\": stats[\"mean_ms\"] / bs, **stats})\n        except (RuntimeError, torch.cuda.OutOfMemoryError) as e:\n            warnings.warn(f\"Batch size {bs} failed (likely OOM): {e}\")\n            rows.append({\"batch_size\": bs, \"mean_ms\": float(\"nan\"), \"throughput_s\": float(\"nan\")})\n    return rows\n\n\n#| export\ndef sweep_latency(\n    model: nn.Module,                         # model to benchmark\n    shapes: Sequence[Sequence[int]],          # input shapes to test, e.g. [(1,3,224,224), (1,3,384,384)]\n    *,\n    device: str | torch.device = \"cuda\",      # device to run on\n    warmup: int = 20,                         # warmup iterations per shape\n    steps: int = 100,                         # measurement iterations per shape\n) -> list[dict]:\n    \"\"\"Sweep input shapes to analyze latency vs resolution.\"\"\"\n    rows = []\n    for shape in shapes:\n        dummy = torch.empty(*shape)\n        lat = _forward_latencies(model, dummy, device=device, warmup=warmup, steps=steps)\n        rows.append({\"shape\": \"×\".join(map(str, shape)), **_stats(lat, shape[0])})\n    return rows"
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(SpeedMetrics)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(compute_speed)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(compute_speed_multi)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "test_basic",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\nfrom fastcore.test import *\n\nimport torch, torch.nn as nn\n_m = nn.Linear(10, 5)\n_x = torch.randn(1, 10)\n_s = compute_speed(_m, _x, warmup=2, steps=5)\nassert isinstance(_s, SpeedMetrics)\nassert _s.mean_ms > 0"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "9ca53161",
diff --git a/nbs/tutorials/benchmark.ipynb b/nbs/tutorials/benchmark.ipynb
index 3d0b06d..7d34549 100644
--- a/nbs/tutorials/benchmark.ipynb
+++ b/nbs/tutorials/benchmark.ipynb
@@ -1357,7 +1357,6 @@
     "## See Also\n",
     "\n",
     "- [Layer Profiling Tutorial](profiling.html) - Per-layer performance analysis\n",
-    "- [Sensitivity Analysis Tutorial](sensitivity.html) - Find bottleneck layers\n",
     "- [Benchmark API](../analysis/benchmark.html) - Full API reference\n",
     "- [Individual Metrics](../metrics/size.html) - Size, speed, compute, memory, energy modules"
    ]
diff --git a/nbs/tutorials/profiling.ipynb b/nbs/tutorials/profiling.ipynb
index ac67f30..0654401 100644
--- a/nbs/tutorials/profiling.ipynb
+++ b/nbs/tutorials/profiling.ipynb
@@ -370,7 +370,7 @@
     "\n",
     "## See Also\n",
     "\n",
-    "- [Getting Started Tutorial](tutorial.html) - Basic benchmarking with `benchmark()`\n",
+    "- [Getting Started Tutorial](benchmark.html) - Basic benchmarking with `benchmark()`\n",
     "- [Sensitivity Analysis](sensitivity.html) - Analyze layer importance for pruning\n",
     "- [Profiling API](../analysis/profiling.html) - Full LayerProfiler reference\n",
     "- [Speed Metrics](../metrics/speed.html) - Detailed speed measurement options"
diff --git a/nbs/tutorials/report.ipynb b/nbs/tutorials/report.ipynb
new file mode 100644
index 0000000..cefdfc0
--- /dev/null
+++ b/nbs/tutorials/report.ipynb
@@ -0,0 +1,306 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "frontmatter",
+   "metadata": {},
+   "source": [
+    "---\ntitle: \"Benchmark Reports\"\ndescription: \"Generate professional before/after compression reports with fasterbench\"\nskip_exec: true\n---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "intro",
+   "metadata": {},
+   "source": [
+    "## Introduction\n\nAfter compressing a model (pruning, quantization, sparsification), you want to know: *how much did it actually improve?* fasterbench's `ComparisonReport` answers this by benchmarking both versions and generating a professional report with metric deltas, improvement rankings, and optional radar charts.\n\nThis tutorial shows the full workflow: benchmark → compress → benchmark again → generate report."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "setup_md",
+   "metadata": {},
+   "source": [
+    "## Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup_code",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch, torch.nn as nn\nfrom fasterbench import benchmark, Report, ComparisonReport"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "step1_md",
+   "metadata": {},
+   "source": [
+    "## Step 1: Benchmark the Original Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "step1_code",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "═══ Size ══════════════════════════════════\n",
+      "  Parameters.............. 11.69M\n",
+      "  Model size.............. 44.59 MiB\n",
+      "═══ Speed (cpu) ═══════════════════════════\n",
+      "  Mean latency............ 28.45 ms\n",
+      "  Throughput.............. 35.2 inf/s\n",
+      "═══ Compute ═══════════════════════════════\n",
+      "  MACs.................... 1,819.1 M\n"
+     ]
+    }
+   ],
+   "source": [
+    "from torchvision.models import resnet18\n\nmodel = resnet18(pretrained=True)\nx = torch.randn(1, 3, 224, 224)\n\nresult_before = benchmark(model, x, metrics=[\"size\", \"speed\", \"compute\"])\nresult_before.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "step2_md",
+   "metadata": {},
+   "source": [
+    "## Step 2: Compress the Model\n\nUse any compression technique — pruning, quantization, sparsification:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "step2_code",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignoring output layer: fc\n",
+      "Total ignored layers: 1\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Example: structured pruning with fasterai\nfrom fasterai.prune.pruner import Pruner\nfrom fasterai.core.criteria import large_final\n\npruner = Pruner(model, pruning_ratio=0.5, context='local', criteria=large_final,\n                example_inputs=x)\npruner.prune_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "step3_md",
+   "metadata": {},
+   "source": [
+    "## Step 3: Benchmark the Compressed Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "step3_code",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "═══ Size ══════════════════════════════════\n",
+      "  Parameters.............. 3.21M\n",
+      "  Model size.............. 12.34 MiB\n",
+      "═══ Speed (cpu) ═══════════════════════════\n",
+      "  Mean latency............ 11.23 ms\n",
+      "  Throughput.............. 89.1 inf/s\n",
+      "═══ Compute ═══════════════════════════════\n",
+      "  MACs.................... 487.3 M\n"
+     ]
+    }
+   ],
+   "source": [
+    "result_after = benchmark(model, x, metrics=[\"size\", \"speed\", \"compute\"])\nresult_after.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "step4_md",
+   "metadata": {},
+   "source": [
+    "## Step 4: Generate Comparison Report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "step4_code",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=================================================================\n",
+      "              Pruning Compression Report                         \n",
+      "=================================================================\n",
+      "\n",
+      "Before: ResNet-18 (original)\n",
+      "After:  ResNet-18 (50% pruned)\n",
+      "\n",
+      "═══ Executive Summary ═════════════════════════════════════════\n",
+      "                     Before          After       Change\n",
+      "  Parameters........ 11.69M          3.21M       -72.5% ✓\n",
+      "  Model Size........ 44.59 MiB       12.34 MiB   -72.3% ✓\n",
+      "  Latency (CPU)..... 28.45 ms        11.23 ms    -60.5% ✓\n",
+      "  Throughput (CPU).. 35.2 inf/s      89.1 inf/s  +153.1% ✓\n",
+      "  MACs.............. 1819.1 M        487.3 M     -73.2% ✓\n",
+      "\n",
+      "═══ Top Improvements ═════════════════════════════════════════\n",
+      "  1. Throughput (CPU): +153.1%\n",
+      "  2. MACs: -73.2%\n",
+      "  3. Parameters: -72.5%\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "report = ComparisonReport(\n    result_before, result_after,\n    before_name=\"ResNet-18 (original)\",\n    after_name=\"ResNet-18 (50% pruned)\",\n    title=\"Pruning Compression Report\"\n)\n\nreport.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "export_md",
+   "metadata": {},
+   "source": [
+    "## Export to HTML or Markdown\n\nGenerate shareable reports:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "export_code",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "# Pruning Compression Report\n",
+      "\n",
+      "**Before:** ResNet-18 (original)\n",
+      "**After:** ResNet-18 (50% pruned)\n",
+      "\n",
+      "## Summary\n",
+      "\n",
+      "| Metric | Before | After | Change |\n",
+      "|--------|--------|-------|--------|\n"
+     ]
+    }
+   ],
+   "source": [
+    "# HTML report with embedded radar chart\nreport.to_html(\"compression_report.html\", include_charts=True)\n\n# Markdown report (great for GitHub PRs)\nmd = report.to_markdown(\"compression_report.md\")\nprint(md[:200])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "single_md",
+   "metadata": {},
+   "source": [
+    "## Single Model Report\n\n`Report` generates a standalone report for a single model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "single_code",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "============================================================\n",
+      "                        ResNet-18                           \n",
+      "============================================================\n",
+      "\n",
+      "Baseline ImageNet classifier\n",
+      "\n",
+      "═══ Metrics ═══════════════════════════════════════════════\n",
+      "  Parameters...................... 11.69M\n",
+      "  Model Size...................... 44.59 MiB\n",
+      "  Latency (CPU)................... 28.45 ms\n",
+      "  Throughput (CPU)................ 35.2 inf/s\n",
+      "  MACs............................ 1819.1 M\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "single = Report(result_before, model_name=\"ResNet-18\", \n               description=\"Baseline ImageNet classifier\")\nsingle.summary()\n\n# Also supports HTML and Markdown export\nsingle.to_html(\"resnet18_report.html\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "prog_md",
+   "metadata": {},
+   "source": [
+    "## Programmatic Access\n\nAccess report data as dictionaries for further processing:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "prog_code",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Parameters: -72.5% (improved)\n",
+      "Model Size: -72.3% (improved)\n",
+      "Latency (CPU): -60.5% (improved)\n",
+      "Throughput (CPU): +153.1% (improved)\n",
+      "MACs: -73.2% (improved)\n",
+      "  Throughput (CPU): 35 → 89\n",
+      "  MACs: 1819 → 487\n",
+      "  Parameters: 11690000 → 3210000\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get all deltas as a list\nfor d in report.deltas:\n    if d.improved:\n        print(f\"{d.label}: {d.delta_pct:+.1f}% ({'improved' if d.improved else 'regressed'})\")\n\n# Top 3 improvements\nfor d in report.top_improvements(3):\n    print(f\"  {d.label}: {d.before:.0f} → {d.after:.0f}\")\n\n# Serialize to dict (for JSON, databases, etc.)\ndata = report.as_dict()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "summary",
+   "metadata": {},
+   "source": [
+    "---\n\n## Summary\n\n| Tool / Function | Purpose |\n|----------------|----------|\n| `Report(result)` | Single model report |\n| `ComparisonReport(before, after)` | Before/after comparison |\n| `.summary()` | Console output |\n| `.to_html(path)` | HTML with radar charts |\n| `.to_markdown(path)` | Markdown (great for PRs) |\n| `.deltas` | List of `ReportMetricDelta` |\n| `.top_improvements(n)` | Top N improvements sorted by impact |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "see_also",
+   "metadata": {},
+   "source": [
+    "---\n\n## See Also\n\n- [Benchmark](../analysis/benchmark.html) — Unified benchmarking API\n- [Visualization](../visualization/plot.html) — Radar plots for visual comparison\n- [Report API](../analysis/report.html) — Full API reference"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/nbs/visualization/plot.ipynb b/nbs/visualization/plot.ipynb
index c5257d6..1ac297a 100644
--- a/nbs/visualization/plot.ipynb
+++ b/nbs/visualization/plot.ipynb
@@ -1,13 +1,11 @@
 {
  "cells": [
   {
-   "cell_type": "markdown",
-   "id": "0ad1c615-6008-49ef-aac2-bf06bec096a7",
+   "cell_type": "raw",
+   "id": "frontmatter",
    "metadata": {},
    "source": [
-    "# plot\n",
-    "\n",
-    "> A module to plot the results of the benchmark"
+    "---\ntitle: \"Visualization\"\ndescription: \"Radar plots for visual model comparison\"\nskip_showdoc: true\n---"
    ]
   },
   {
@@ -20,6 +18,16 @@
     "#| default_exp plot"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup_showdoc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| include: false\nfrom nbdev.showdoc import *"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -43,6 +51,24 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(create_radar_plot)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "see_also",
+   "metadata": {},
+   "source": [
+    "---\n\n## See Also\n\n- [Benchmark](../analysis/benchmark.html) — Unified API\n- [Report](../analysis/report.html) — Formatted reports"
+   ]
   }
  ],
  "metadata": {