FasterAI-Labs · nathanhubens · Apr 13, 2026 · Apr 10, 2026
diff --git a/fasterbench/_modidx.py b/fasterbench/_modidx.py
@@ -96,6 +96,40 @@
                                                                                      'fasterbench/profiling.py'),
                                        'fasterbench.profiling._tensor_bytes': ( 'analysis/profiling.html#_tensor_bytes',
                                                                                 'fasterbench/profiling.py')},
+            'fasterbench.report': { 'fasterbench.report.ComparisonReport': ( 'analysis/report.html#comparisonreport',
+                                                                             'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.__init__': ( 'analysis/report.html#comparisonreport.__init__',
+                                                                                      'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.as_dict': ( 'analysis/report.html#comparisonreport.as_dict',
+                                                                                     'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.deltas': ( 'analysis/report.html#comparisonreport.deltas',
+                                                                                    'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.summary': ( 'analysis/report.html#comparisonreport.summary',
+                                                                                     'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.to_html': ( 'analysis/report.html#comparisonreport.to_html',
+                                                                                     'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.to_markdown': ( 'analysis/report.html#comparisonreport.to_markdown',
+                                                                                         'fasterbench/report.py'),
+                                    'fasterbench.report.ComparisonReport.top_improvements': ( 'analysis/report.html#comparisonreport.top_improvements',
+                                                                                              'fasterbench/report.py'),
+                                    'fasterbench.report.Report': ('analysis/report.html#report', 'fasterbench/report.py'),
+                                    'fasterbench.report.Report.__init__': ('analysis/report.html#report.__init__', 'fasterbench/report.py'),
+                                    'fasterbench.report.Report.as_dict': ('analysis/report.html#report.as_dict', 'fasterbench/report.py'),
+                                    'fasterbench.report.Report.summary': ('analysis/report.html#report.summary', 'fasterbench/report.py'),
+                                    'fasterbench.report.Report.to_html': ('analysis/report.html#report.to_html', 'fasterbench/report.py'),
+                                    'fasterbench.report.Report.to_markdown': ( 'analysis/report.html#report.to_markdown',
+                                                                               'fasterbench/report.py'),
+                                    'fasterbench.report.ReportMetricDelta': ( 'analysis/report.html#reportmetricdelta',
+                                                                              'fasterbench/report.py'),
+                                    'fasterbench.report.ReportMetricDelta.as_dict': ( 'analysis/report.html#reportmetricdelta.as_dict',
+                                                                                      'fasterbench/report.py'),
+                                    'fasterbench.report._extract_metrics': ( 'analysis/report.html#_extract_metrics',
+                                                                             'fasterbench/report.py'),
+                                    'fasterbench.report._format_value_with_unit': ( 'analysis/report.html#_format_value_with_unit',
+                                                                                    'fasterbench/report.py'),
+                                    'fasterbench.report._generate_css': ('analysis/report.html#_generate_css', 'fasterbench/report.py'),
+                                    'fasterbench.report._improvement_indicator': ( 'analysis/report.html#_improvement_indicator',
+                                                                                   'fasterbench/report.py')},
             'fasterbench.size': { 'fasterbench.size.SizeMetrics': ('metrics/size.html#sizemetrics', 'fasterbench/size.py'),
                                   'fasterbench.size.SizeMetrics.as_dict': ('metrics/size.html#sizemetrics.as_dict', 'fasterbench/size.py'),
                                   'fasterbench.size.compute_size': ('metrics/size.html#compute_size', 'fasterbench/size.py'),

diff --git a/fasterbench/benchmark.py b/fasterbench/benchmark.py
@@ -1,5 +1,3 @@
-"""Unified benchmarking API for comprehensive model analysis"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/analysis/benchmark.ipynb.
 
 # %% ../nbs/analysis/benchmark.ipynb #bbaee268

diff --git a/fasterbench/compute.py b/fasterbench/compute.py
@@ -1,13 +1,10 @@
-"""Compute modules for benchmarking"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/metrics/compute.ipynb.
 
 # %% ../nbs/metrics/compute.ipynb #0091d170
 from __future__ import annotations
 
 import warnings
 from dataclasses import dataclass
-from typing import Any
 
 import torch
 import torch.nn as nn

diff --git a/fasterbench/core.py b/fasterbench/core.py
@@ -1,5 +1,3 @@
-"""Core modules for benchmarking"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/core/core.ipynb.
 
 # %% ../nbs/core/core.ipynb #c59316b7

diff --git a/fasterbench/energy.py b/fasterbench/energy.py
@@ -1,5 +1,3 @@
-"""Energy modules for benchmarking"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/metrics/energy.ipynb.
 
 # %% ../nbs/metrics/energy.ipynb #d27f26a4

diff --git a/fasterbench/memory.py b/fasterbench/memory.py
@@ -1,5 +1,3 @@
-"""Memory modules for benchmarking"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/metrics/memory.ipynb.
 
 # %% ../nbs/metrics/memory.ipynb #16cd91b6

diff --git a/fasterbench/plot.py b/fasterbench/plot.py
@@ -1,5 +1,3 @@
-"""A module to plot the results of the benchmark"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/visualization/plot.ipynb.
 
 # %% ../nbs/visualization/plot.ipynb #0c86ca8f-38d2-44d0-8204-3c977f1b1c19

diff --git a/fasterbench/profiling.py b/fasterbench/profiling.py
@@ -1,5 +1,3 @@
-"""Per-layer profiling for deep analysis of model performance"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/analysis/profiling.ipynb.
 
 # %% ../nbs/analysis/profiling.ipynb #imports

diff --git a/fasterbench/size.py b/fasterbench/size.py
@@ -1,5 +1,3 @@
-"""Model size and parameter count measurement"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/metrics/size.ipynb.
 
 # %% ../nbs/metrics/size.ipynb #4c37777e

diff --git a/fasterbench/speed.py b/fasterbench/speed.py
@@ -1,5 +1,3 @@
-"""Latency and throughput measurement for PyTorch models"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/metrics/speed.ipynb.
 
 # %% ../nbs/metrics/speed.ipynb #e6b40d9e

diff --git a/fasterbench/utils.py b/fasterbench/utils.py
@@ -1,5 +1,3 @@
-"""Utility functions"""
-
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/core/utils.ipynb.
 
 # %% ../nbs/core/utils.ipynb #436e19d0-bb2b-47fd-8499-f38d8ac96e56

diff --git a/nbs/_quarto.yml b/nbs/_quarto.yml
@@ -35,6 +35,7 @@ website:
         contents:
         - tutorials/benchmark.ipynb
         - tutorials/profiling.ipynb
+        - tutorials/report.ipynb
       - section: Core
         contents:
         - core/core.ipynb

diff --git a/nbs/analysis/benchmark.ipynb b/nbs/analysis/benchmark.ipynb
@@ -1,10 +1,12 @@
 {
  "cells": [
   {
-   "cell_type": "markdown",
-   "id": "f86c20a0",
+   "cell_type": "raw",
+   "id": "frontmatter",
    "metadata": {},
-   "source": "# Benchmark\n\n> Unified benchmarking API for comprehensive model analysis\n\n## Overview\n\nThe `benchmark()` function is the main entry point for fasterbench. It returns a `BenchmarkResult` with typed access to all metrics.\n\n### Quick Example\n\n```python\nfrom fasterbench import benchmark\n\nresult = benchmark(model, sample, metrics=[\"size\", \"speed\", \"compute\"])\n\n# Typed access\nprint(result.size.size_mib)\nprint(result.speed[\"cpu\"].mean_ms)\n\n# Dict access (backward compatible)\nprint(result[\"size_size_mib\"])\n```\n\n### Available Metrics\n\n| Metric | Module | What It Measures |\n|--------|--------|------------------|\n| `\"size\"` | [size](../metrics/size.html) | Disk size, parameter count |\n| `\"speed\"` | [speed](../metrics/speed.html) | Latency, throughput |\n| `\"compute\"` | [compute](../metrics/compute.html) | MACs, operation count |\n| `\"memory\"` | [memory](../metrics/memory.html) | Peak/average memory |\n| `\"energy\"` | [energy](../metrics/energy.html) | Power, CO₂ emissions |"
+   "source": [
+    "---\ntitle: \"Benchmark\"\ndescription: \"Unified benchmarking API for PyTorch models\"\nskip_showdoc: true\n---"
+   ]
   },
   {
    "cell_type": "code",
@@ -121,13 +123,41 @@
     "show_doc(benchmark)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(BenchmarkResult)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "test_basic",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\nfrom fastcore.test import *\n\nimport torch, torch.nn as nn\n_m = nn.Linear(10, 5)\n_x = torch.randn(1, 10)\n_r = benchmark(_m, _x, metrics=[\"size\"])\nassert isinstance(_r, BenchmarkResult)\nassert \"size_num_params\" in _r"
+   ]
+  },
   {
    "cell_type": "markdown",
    "execution_count": null,
    "id": "35250fb9",
    "metadata": {},
    "outputs": [],
-   "source": "---\n\n## See Also\n\n- [Getting Started Tutorial](../tutorials/tutorial.html) - Comprehensive usage examples\n- [Profiling](profiling.html) - Per-layer analysis with `LayerProfiler`\n- [Visualization](../visualization/plot.html) - Radar plots for model comparison"
+   "source": [
+    "---\n",
+    "\n",
+    "## See Also\n",
+    "\n",
+    "- [Getting Started Tutorial](../tutorials/benchmark.html) - Comprehensive usage examples\n",
+    "- [Profiling](profiling.html) - Per-layer analysis with `LayerProfiler`\n",
+    "- [Visualization](../visualization/plot.html) - Radar plots for model comparison"
+   ]
   }
  ],
  "metadata": {

diff --git a/nbs/analysis/profiling.ipynb b/nbs/analysis/profiling.ipynb
@@ -1,13 +1,11 @@
 {
  "cells": [
   {
-   "cell_type": "markdown",
-   "id": "header",
+   "cell_type": "raw",
+   "id": "frontmatter",
    "metadata": {},
    "source": [
-    "# Profiling\n",
-    "\n",
-    "> Per-layer profiling for deep analysis of model performance"
+    "---\ntitle: \"Profiling\"\ndescription: \"Per-layer profiling for deep analysis of model performance\"\nskip_showdoc: true\n---"
    ]
   },
   {
@@ -20,6 +18,16 @@
     "#| default_exp profiling"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup_showdoc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| include: false\nfrom nbdev.showdoc import *"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -43,6 +51,64 @@
    "metadata": {},
    "outputs": [],
    "source": "#| export\nclass LayerProfiler:\n    \"\"\"Unified per-layer profiler for multiple metrics (speed, memory, size, compute).\"\"\"\n    \n    VALID_METRICS = frozenset({\"speed\", \"memory\", \"size\", \"compute\"})\n    _CONFIG = {\n        \"speed\": {\n            \"col\": \"speed_ms\", \"pct\": \"speed_percent\", \"unit\": \"ms\",\n            \"label\": \"Speed (slowest)\",\n            \"src_col\": \"time_ms\",\n            \"format\": _fmt_float,\n        },\n        \"memory\": {\n            \"col\": \"memory_mib\", \"pct\": \"memory_percent\", \"unit\": \"MiB\",\n            \"label\": \"Memory (largest)\",\n            \"src_col\": \"memory_mib\",\n            \"format\": _fmt_float,\n        },\n        \"size\": {\n            \"col\": \"params\", \"pct\": \"params_percent\", \"unit\": \"\",\n            \"label\": \"Parameters (largest)\",\n            \"src_col\": \"params\",\n            \"format\": _fmt_table,\n        },\n        \"compute\": {\n            \"col\": \"macs\", \"pct\": \"macs_percent\", \"unit\": \"\",\n            \"label\": \"MACs (most)\",\n            \"src_col\": \"macs\",\n            \"format\": _fmt_macs,\n        },\n    }\n    \n    def __init__(\n        self,\n        model: nn.Module,      # model to profile\n        sample: torch.Tensor,  # input tensor (with batch dimension)\n    ):\n        self.model = model\n        self.sample = sample\n        self._leaf_modules = _leaf_modules(model)\n        self._results: list[dict] = []\n        self._profiled_metrics: list[str] = []\n    \n    def profile(\n        self,\n        metrics: str | Sequence[str] = \"speed\",  # metrics to profile: speed, memory, size, compute\n        *,\n        device: str | torch.device = \"cpu\",      # device for speed/memory profiling\n        warmup: int = 5,                         # warmup iterations\n        steps: int = 20,                         # measurement iterations\n    ) -> list[dict]:\n        \"\"\"Profile specified metrics for each layer, returns list of dicts.\"\"\"\n        if isinstance(metrics, str):\n            metrics = [metrics]\n        metrics = list(metrics)\n        \n        invalid = set(metrics) - self.VALID_METRICS\n        if invalid:\n            raise ValueError(f\"Invalid metrics: {invalid}. Valid: {self.VALID_METRICS}\")\n        \n        # Initialize results dict for each leaf module\n        results: dict[str, dict] = {\n            name: {\"name\": name, \"type\": mod.__class__.__name__}\n            for name, mod in self._leaf_modules.items()\n        }\n        \n        # Profile each metric using unified _profile_layers\n        for metric in metrics:\n            cfg = self._CONFIG[metric]\n            profile_data = _profile_layers(\n                self.model, self.sample,\n                device=device, metric=metric, warmup=warmup, steps=steps\n            )\n            \n            # Merge results\n            for r in profile_data:\n                name = r[\"name\"]\n                if name in results:\n                    results[name][cfg[\"col\"]] = r[cfg[\"src_col\"]]\n                    results[name][cfg[\"pct\"]] = r[\"percent\"]\n        \n        out = list(results.values())\n        \n        # Sort by first metric\n        sort_col = self._CONFIG[metrics[0]][\"col\"]\n        out.sort(key=lambda x: x.get(sort_col, 0) or 0, reverse=True)\n        \n        # Store for top() and summary()\n        self._results = out\n        self._profiled_metrics = metrics\n        \n        return out\n    \n    def top(\n        self,\n        metric: str,              # metric to sort by: speed, memory, size, compute\n        n: int = 5,               # number of layers to return\n        *,\n        ascending: bool = False,  # if True, return smallest/fastest instead of largest/slowest\n    ) -> list[dict]:\n        \"\"\"Get top N layers sorted by the specified metric.\"\"\"\n        if not self._results:\n            raise RuntimeError(\"No results available. Call profile() first.\")\n        \n        if metric not in self._CONFIG:\n            raise ValueError(f\"Invalid metric: {metric}. Valid: {list(self._CONFIG.keys())}\")\n        \n        col = self._CONFIG[metric][\"col\"]\n        \n        # Check if metric was profiled\n        if col not in self._results[0]:\n            raise ValueError(f\"Metric '{metric}' was not profiled. Profiled: {self._profiled_metrics}\")\n        \n        sorted_results = sorted(\n            self._results,\n            key=lambda x: x.get(col, 0) or 0,\n            reverse=not ascending\n        )\n        return sorted_results[:n]\n    \n    def summary(self, *, top: int = 5) -> None:\n        \"\"\"Print a formatted summary of top layers for each profiled metric.\"\"\"\n        if not self._results:\n            raise RuntimeError(\"No results available. Call profile() first.\")\n        \n        for metric in self._profiled_metrics:\n            cfg = self._CONFIG[metric]\n            col, pct_col = cfg[\"col\"], cfg[\"pct\"]\n            \n            # Check if metric data exists\n            if col not in self._results[0]:\n                continue\n            \n            print(_section(cfg['label'], 54))\n            \n            sorted_layers = sorted(\n                self._results,\n                key=lambda x: x.get(col, 0) or 0,\n                reverse=True\n            )[:top]\n            \n            for r in sorted_layers:\n                val = r.get(col, 0) or 0\n                pct = r.get(pct_col, 0) or 0\n                val_str = cfg[\"format\"](val)\n                if cfg[\"unit\"]:\n                    val_str += f\" {cfg['unit']}\"\n                print(f\"  {r['name']:40} {r['type']:15} {val_str} ({pct:5.1f}%)\")\n            print()"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(LayerProfiler)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(LayerProfiler.profile)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(LayerProfiler.top)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sd_3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(LayerProfiler.summary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "test_basic",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\nfrom fastcore.test import *\n\nimport torch, torch.nn as nn\n_m = nn.Sequential(nn.Linear(10, 20), nn.ReLU(), nn.Linear(20, 5))\n_x = torch.randn(1, 10)\n_lp = LayerProfiler(_m, _x)\n_results = _lp.profile(metrics=[\"size\"])\nassert len(_results) > 0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "see_also",
+   "metadata": {},
+   "source": [
+    "---\n\n## See Also\n\n- [Benchmark](benchmark.html) — Unified API\n- [Profiling Tutorial](../tutorials/profiling.html) — Step-by-step guide"
+   ]
   }
  ],
  "metadata": {