From fb4eebcaa93cf6569c594c88420a36bbb1fd41bc Mon Sep 17 00:00:00 2001 From: nathanhubens Date: Mon, 13 Apr 2026 21:03:56 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20misc=20module=20cleanup=20=E2=80=94=20de?= =?UTF-8?q?precate=20cpu=5Foptimizer,=20remove=20dead=20imports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 of misc module revamp: - Remove unused `import F` from bn_folding and fc_decomposer - Replace cpu_optimizer with optimize_for_cpu (torch.compile backend) - Old accelerate_model_for_cpu deprecated with shim - Fixed bug: torch.jit.script doesn't use example_input (was dead param) - Removed dependency on deprecated optimize_for_mobile - Added tests (was skip_exec with zero coverage) - Add conv_decomposer.ipynb and cpu_optimizer.ipynb to _quarto.yml sidebar - Add cpu_optimizer to misc/all.py exports - Fix rank_ratio → percent_removed doc bug in fc_decomposer tutorial --- fasterai/_modidx.py | 4 +- fasterai/misc/all.py | 3 +- fasterai/misc/bn_folding.py | 1 - fasterai/misc/cpu_optimizer.py | 37 +++++-- fasterai/misc/fc_decomposer.py | 1 - nbs/_quarto.yml | 2 + nbs/misc/bn_folding.ipynb | 3 +- nbs/misc/cpu_optimizer.ipynb | 143 +++++++------------------ nbs/misc/fc_decomposer.ipynb | 3 +- nbs/tutorials/misc/fc_decomposer.ipynb | 10 +- 10 files changed, 77 insertions(+), 130 deletions(-) diff --git a/fasterai/_modidx.py b/fasterai/_modidx.py index 500ab14..efe8195 100644 --- a/fasterai/_modidx.py +++ b/fasterai/_modidx.py @@ -234,7 +234,9 @@ 'fasterai.misc.conv_decomposer._unfold': ( 'misc/conv_decomposer.html#_unfold', 'fasterai/misc/conv_decomposer.py')}, 'fasterai.misc.cpu_optimizer': { 'fasterai.misc.cpu_optimizer.accelerate_model_for_cpu': ( 'misc/cpu_optimizer.html#accelerate_model_for_cpu', - 'fasterai/misc/cpu_optimizer.py')}, + 'fasterai/misc/cpu_optimizer.py'), + 'fasterai.misc.cpu_optimizer.optimize_for_cpu': ( 'misc/cpu_optimizer.html#optimize_for_cpu', + 'fasterai/misc/cpu_optimizer.py')}, 'fasterai.misc.fc_decomposer': { 'fasterai.misc.fc_decomposer.FC_Decomposer': ( 'misc/fc_decomposer.html#fc_decomposer', 'fasterai/misc/fc_decomposer.py'), 'fasterai.misc.fc_decomposer.FC_Decomposer.SVD': ( 'misc/fc_decomposer.html#fc_decomposer.svd', diff --git a/fasterai/misc/all.py b/fasterai/misc/all.py index 545f64c..f071eec 100644 --- a/fasterai/misc/all.py +++ b/fasterai/misc/all.py @@ -1,3 +1,4 @@ from .bn_folding import * from .fc_decomposer import * -from .conv_decomposer import * \ No newline at end of file +from .conv_decomposer import * +from .cpu_optimizer import * \ No newline at end of file diff --git a/fasterai/misc/bn_folding.py b/fasterai/misc/bn_folding.py index 758b722..6334d33 100644 --- a/fasterai/misc/bn_folding.py +++ b/fasterai/misc/bn_folding.py @@ -6,7 +6,6 @@ # %% ../../nbs/misc/bn_folding.ipynb #productive-preparation import torch import torch.nn as nn -import torch.nn.functional as F import copy # %% ../../nbs/misc/bn_folding.ipynb #83000749 diff --git a/fasterai/misc/cpu_optimizer.py b/fasterai/misc/cpu_optimizer.py index b5fd869..bf5e91c 100644 --- a/fasterai/misc/cpu_optimizer.py +++ b/fasterai/misc/cpu_optimizer.py @@ -1,20 +1,37 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/misc/cpu_optimizer.ipynb. # %% auto #0 -__all__ = ['accelerate_model_for_cpu'] +__all__ = ['optimize_for_cpu', 'accelerate_model_for_cpu'] # %% ../../nbs/misc/cpu_optimizer.ipynb #fbbccd4a import torch import torch.nn as nn -from torch.utils.mobile_optimizer import optimize_for_mobile +import warnings # %% ../../nbs/misc/cpu_optimizer.ipynb #6524ac31 -def accelerate_model_for_cpu(model: nn.Module, example_input: torch.Tensor): - model.eval() - example_input = example_input.to(memory_format=torch.channels_last) - - model = model.to(memory_format=torch.channels_last) - model = torch.jit.script(model) - model = optimize_for_mobile(model) +def optimize_for_cpu( + model: nn.Module, # The PyTorch model to optimize + sample: torch.Tensor, # Sample input for tracing (with batch dim) + *, + backend: str = "compile", # "compile" (torch.compile) or "trace" (torch.jit.trace) + compile_mode: str = "default", # torch.compile mode +) -> nn.Module: + "Optimize model for CPU inference via channels-last layout + compilation" + model = model.eval().to(memory_format=torch.channels_last) + sample = sample.to(memory_format=torch.channels_last) + + if backend == "compile": + return torch.compile(model, mode=compile_mode) + elif backend == "trace": + with torch.no_grad(): + return torch.jit.trace(model, sample) + else: + raise ValueError(f"Unknown backend: {backend!r}. Use 'compile' or 'trace'.") - return model +def accelerate_model_for_cpu(model: nn.Module, example_input: torch.Tensor): + "Deprecated: use optimize_for_cpu() instead" + warnings.warn( + "accelerate_model_for_cpu is deprecated, use optimize_for_cpu(model, sample) instead", + DeprecationWarning, stacklevel=2, + ) + return optimize_for_cpu(model, example_input, backend="trace") diff --git a/fasterai/misc/fc_decomposer.py b/fasterai/misc/fc_decomposer.py index c1fa113..b466716 100644 --- a/fasterai/misc/fc_decomposer.py +++ b/fasterai/misc/fc_decomposer.py @@ -6,7 +6,6 @@ # %% ../../nbs/misc/fc_decomposer.ipynb #fbbccd4a import torch import torch.nn as nn -import torch.nn.functional as F import copy # %% ../../nbs/misc/fc_decomposer.ipynb #6524ac31 diff --git a/nbs/_quarto.yml b/nbs/_quarto.yml index 3490807..972f71b 100644 --- a/nbs/_quarto.yml +++ b/nbs/_quarto.yml @@ -114,6 +114,8 @@ website: contents: - misc/bn_folding.ipynb - misc/fc_decomposer.ipynb + - misc/conv_decomposer.ipynb + - misc/cpu_optimizer.ipynb - section: Export contents: - export/onnx_exporter.ipynb diff --git a/nbs/misc/bn_folding.ipynb b/nbs/misc/bn_folding.ipynb index ea50d7c..408e303 100644 --- a/nbs/misc/bn_folding.ipynb +++ b/nbs/misc/bn_folding.ipynb @@ -45,7 +45,6 @@ "#| export\n", "import torch\n", "import torch.nn as nn\n", - "import torch.nn.functional as F\n", "import copy" ] }, @@ -388,4 +387,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/nbs/misc/cpu_optimizer.ipynb b/nbs/misc/cpu_optimizer.ipynb index b953d8d..fb41767 100644 --- a/nbs/misc/cpu_optimizer.ipynb +++ b/nbs/misc/cpu_optimizer.ipynb @@ -6,11 +6,10 @@ "metadata": {}, "source": [ "---\n", - "description: Further optimize for CPU inference\n", + "description: Optimize models for CPU inference\n", "output-file: cpu_optimizer.html\n", - "title: Further optimize for CPU inference\n", + "title: CPU Optimizer\n", "skip_showdoc: true\n", - "skip_exec: true\n", "---" ] }, @@ -41,31 +40,13 @@ "id": "fbbccd4a", "metadata": {}, "outputs": [], - "source": [ - "#| export\n", - "import torch\n", - "import torch.nn as nn\n", - "from torch.utils.mobile_optimizer import optimize_for_mobile" - ] + "source": "#| export\nimport torch\nimport torch.nn as nn\nimport warnings" }, { "cell_type": "markdown", "id": "hbzsrd6sl1h", "metadata": {}, - "source": [ - "## Overview\n", - "\n", - "The `accelerate_model_for_cpu` function applies optimizations to prepare a PyTorch model for efficient CPU inference. It combines several techniques:\n", - "\n", - "1. **Channels-last memory format**: Optimizes memory layout for CNN operations on CPU\n", - "2. **TorchScript compilation**: JIT compiles the model for faster execution\n", - "3. **Mobile optimization**: Applies `optimize_for_mobile` for operator fusion and other optimizations\n", - "\n", - "**When to use:**\n", - "- Deploying models on CPU-only servers\n", - "- Edge deployment without GPU\n", - "- After quantization for maximum CPU performance" - ] + "source": "## Overview\n\n`optimize_for_cpu` prepares a model for efficient CPU inference by combining:\n\n1. **Channels-last memory format** — optimizes layout for CNN operations on CPU\n2. **Compilation** — `torch.compile` (default) or `torch.jit.trace` for operator fusion\n\n| Backend | Speed | Compatibility | Best For |\n|---------|-------|---------------|----------|\n| `\"compile\"` | Faster | Most models | Default choice |\n| `\"trace\"` | Good | Requires static shapes | Legacy / mobile |" }, { "cell_type": "code", @@ -73,104 +54,52 @@ "id": "6524ac31", "metadata": {}, "outputs": [], - "source": [ - "#| export\n", - "def accelerate_model_for_cpu(model: nn.Module, example_input: torch.Tensor):\n", - " model.eval()\n", - " example_input = example_input.to(memory_format=torch.channels_last)\n", - " \n", - " model = model.to(memory_format=torch.channels_last)\n", - " model = torch.jit.script(model)\n", - " model = optimize_for_mobile(model)\n", - "\n", - " return model" - ] + "source": "#| export\ndef optimize_for_cpu(\n model: nn.Module, # The PyTorch model to optimize\n sample: torch.Tensor, # Sample input for tracing (with batch dim)\n *,\n backend: str = \"compile\", # \"compile\" (torch.compile) or \"trace\" (torch.jit.trace)\n compile_mode: str = \"default\", # torch.compile mode\n) -> nn.Module:\n \"Optimize model for CPU inference via channels-last layout + compilation\"\n model = model.eval().to(memory_format=torch.channels_last)\n sample = sample.to(memory_format=torch.channels_last)\n\n if backend == \"compile\":\n return torch.compile(model, mode=compile_mode)\n elif backend == \"trace\":\n with torch.no_grad():\n return torch.jit.trace(model, sample)\n else:\n raise ValueError(f\"Unknown backend: {backend!r}. Use 'compile' or 'trace'.\")\n\ndef accelerate_model_for_cpu(model: nn.Module, example_input: torch.Tensor):\n \"Deprecated: use optimize_for_cpu() instead\"\n warnings.warn(\n \"accelerate_model_for_cpu is deprecated, use optimize_for_cpu(model, sample) instead\",\n DeprecationWarning, stacklevel=2,\n )\n return optimize_for_cpu(model, example_input, backend=\"trace\")" }, { "cell_type": "code", "execution_count": null, "id": "50222d43", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found permutation search CUDA kernels\n", - "[ASP][Info] permutation_search_kernels can be imported.\n" - ] - }, - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/FasterAI-Labs/fasterai/tree/master/blob/master/fasterai/misc/cpu_optimizer.py#L12){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### accelerate_model_for_cpu\n", - "\n", - "```python\n", - "\n", - "def accelerate_model_for_cpu(\n", - " model:Module, example_input:Tensor\n", - "):\n", - "\n", - "\n", - "```" - ], - "text/plain": [ - "```python\n", - "\n", - "def accelerate_model_for_cpu(\n", - " model:Module, example_input:Tensor\n", - "):\n", - "\n", - "\n", - "```" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "show_doc(accelerate_model_for_cpu)" - ] + "outputs": [], + "source": "show_doc(optimize_for_cpu)" }, { "cell_type": "markdown", "id": "78818w1gh87", "metadata": {}, + "source": "```python\nfrom fasterai.misc.cpu_optimizer import optimize_for_cpu\n\nmodel = resnet18(pretrained=True)\nsample = torch.randn(1, 3, 224, 224)\n\n# Default: torch.compile\noptimized = optimize_for_cpu(model, sample)\n\n# Or JIT trace for mobile/static shapes\ntraced = optimize_for_cpu(model, sample, backend=\"trace\")\n```\n\n> **Note:** `accelerate_model_for_cpu` is deprecated. Use `optimize_for_cpu` instead." + }, + { + "cell_type": "code", + "metadata": {}, "source": [ - "**Parameters:**\n", - "\n", - "- `model`: The PyTorch model to optimize\n", - "- `example_input`: A sample input tensor (used for tracing)\n", - "\n", - "**Returns:** An optimized TorchScript model\n", - "\n", - "---\n", + "#| hide\n", + "from fastcore.test import *\n", + "import torch, torch.nn as nn\n", "\n", - "## Usage Example\n", + "# optimize_for_cpu with trace backend\n", + "_m = nn.Sequential(nn.Conv2d(3, 16, 3, padding=1), nn.ReLU(), nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Linear(16, 10))\n", + "_x = torch.randn(1, 3, 8, 8)\n", + "_traced = optimize_for_cpu(_m, _x, backend=\"trace\")\n", + "_out = _traced(_x.to(memory_format=torch.channels_last))\n", + "test_eq(_out.shape, (1, 10))\n", + "assert torch.isfinite(_out).all()\n", "\n", - "```python\n", - "from fasterai.misc.cpu_optimizer import accelerate_model_for_cpu\n", - "import torch\n", + "# Invalid backend raises ValueError\n", + "with ExceptionExpected(ValueError): optimize_for_cpu(_m, _x, backend=\"bad\")\n", "\n", - "# Create example input matching your model's expected shape\n", - "example_input = torch.randn(1, 3, 224, 224)\n", - "\n", - "# Optimize model for CPU inference\n", - "optimized_model = accelerate_model_for_cpu(model, example_input)\n", - "\n", - "# Use the optimized model\n", - "with torch.no_grad():\n", - " output = optimized_model(input_tensor)\n", - "```\n", - "\n", - "**Note:** The returned model is a TorchScript model. Some dynamic Python features may not be supported." - ] + "# Deprecated function emits warning\n", + "import warnings\n", + "with warnings.catch_warnings(record=True) as w:\n", + " warnings.simplefilter(\"always\")\n", + " accelerate_model_for_cpu(nn.Sequential(nn.Conv2d(3, 16, 3), nn.ReLU()), torch.randn(1, 3, 8, 8))\n", + " assert len(w) == 1\n", + " assert issubclass(w[0].category, DeprecationWarning)" + ], + "outputs": [], + "execution_count": null, + "id": "test_cpu_opt" }, { "cell_type": "markdown", @@ -190,4 +119,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/nbs/misc/fc_decomposer.ipynb b/nbs/misc/fc_decomposer.ipynb index 6a12fdb..7a3da06 100644 --- a/nbs/misc/fc_decomposer.ipynb +++ b/nbs/misc/fc_decomposer.ipynb @@ -101,7 +101,6 @@ "#| export\n", "import torch\n", "import torch.nn as nn\n", - "import torch.nn.functional as F\n", "import copy" ] }, @@ -328,4 +327,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/nbs/tutorials/misc/fc_decomposer.ipynb b/nbs/tutorials/misc/fc_decomposer.ipynb index 5a804d2..69ea24f 100644 --- a/nbs/tutorials/misc/fc_decomposer.ipynb +++ b/nbs/tutorials/misc/fc_decomposer.ipynb @@ -469,11 +469,11 @@ "\n", "| Parameter | Default | Description |\n", "|-----------|---------|-------------|\n", - "| `rank_ratio` | `0.5` | Fraction of singular values to keep (0-1). Lower = more compression, more accuracy loss |\n", + "| `percent_removed` | `0.5` | Fraction of singular values to keep (0-1). Lower = more compression, more accuracy loss |\n", "\n", - "### Choosing rank_ratio\n", + "### Choosing percent_removed\n", "\n", - "| rank_ratio | Compression | Accuracy Impact |\n", + "| percent_removed | Compression | Accuracy Impact |\n", "|------------|-------------|-----------------|\n", "| `0.8` | Low | Minimal |\n", "| `0.5` | Medium | Moderate |\n", @@ -496,7 +496,7 @@ "learn.fit_one_cycle(5)\n", "\n", "# 2. Decompose FC layers\n", - "fc = FC_Decomposer(rank_ratio=0.5)\n", + "fc = FC_Decomposer(percent_removed=0.5)\n", "new_model = fc.decompose(learn.model)\n", "\n", "# 3. Fine-tune to recover accuracy\n", @@ -527,4 +527,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file