Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion fasterai/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,9 @@
'fasterai.misc.conv_decomposer._unfold': ( 'misc/conv_decomposer.html#_unfold',
'fasterai/misc/conv_decomposer.py')},
'fasterai.misc.cpu_optimizer': { 'fasterai.misc.cpu_optimizer.accelerate_model_for_cpu': ( 'misc/cpu_optimizer.html#accelerate_model_for_cpu',
'fasterai/misc/cpu_optimizer.py')},
'fasterai/misc/cpu_optimizer.py'),
'fasterai.misc.cpu_optimizer.optimize_for_cpu': ( 'misc/cpu_optimizer.html#optimize_for_cpu',
'fasterai/misc/cpu_optimizer.py')},
'fasterai.misc.fc_decomposer': { 'fasterai.misc.fc_decomposer.FC_Decomposer': ( 'misc/fc_decomposer.html#fc_decomposer',
'fasterai/misc/fc_decomposer.py'),
'fasterai.misc.fc_decomposer.FC_Decomposer.SVD': ( 'misc/fc_decomposer.html#fc_decomposer.svd',
Expand Down
3 changes: 2 additions & 1 deletion fasterai/misc/all.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .bn_folding import *
from .fc_decomposer import *
from .conv_decomposer import *
from .conv_decomposer import *
from .cpu_optimizer import *
1 change: 0 additions & 1 deletion fasterai/misc/bn_folding.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# %% ../../nbs/misc/bn_folding.ipynb #productive-preparation
import torch
import torch.nn as nn
import torch.nn.functional as F
import copy

# %% ../../nbs/misc/bn_folding.ipynb #83000749
Expand Down
37 changes: 27 additions & 10 deletions fasterai/misc/cpu_optimizer.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,37 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/misc/cpu_optimizer.ipynb.

# %% auto #0
__all__ = ['accelerate_model_for_cpu']
__all__ = ['optimize_for_cpu', 'accelerate_model_for_cpu']

# %% ../../nbs/misc/cpu_optimizer.ipynb #fbbccd4a
import torch
import torch.nn as nn
from torch.utils.mobile_optimizer import optimize_for_mobile
import warnings

# %% ../../nbs/misc/cpu_optimizer.ipynb #6524ac31
def accelerate_model_for_cpu(model: nn.Module, example_input: torch.Tensor):
model.eval()
example_input = example_input.to(memory_format=torch.channels_last)

model = model.to(memory_format=torch.channels_last)
model = torch.jit.script(model)
model = optimize_for_mobile(model)
def optimize_for_cpu(
model: nn.Module, # The PyTorch model to optimize
sample: torch.Tensor, # Sample input for tracing (with batch dim)
*,
backend: str = "compile", # "compile" (torch.compile) or "trace" (torch.jit.trace)
compile_mode: str = "default", # torch.compile mode
) -> nn.Module:
"Optimize model for CPU inference via channels-last layout + compilation"
model = model.eval().to(memory_format=torch.channels_last)
sample = sample.to(memory_format=torch.channels_last)

if backend == "compile":
return torch.compile(model, mode=compile_mode)
elif backend == "trace":
with torch.no_grad():
return torch.jit.trace(model, sample)
else:
raise ValueError(f"Unknown backend: {backend!r}. Use 'compile' or 'trace'.")

return model
def accelerate_model_for_cpu(model: nn.Module, example_input: torch.Tensor):
"Deprecated: use optimize_for_cpu() instead"
warnings.warn(
"accelerate_model_for_cpu is deprecated, use optimize_for_cpu(model, sample) instead",
DeprecationWarning, stacklevel=2,
)
return optimize_for_cpu(model, example_input, backend="trace")
1 change: 0 additions & 1 deletion fasterai/misc/fc_decomposer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# %% ../../nbs/misc/fc_decomposer.ipynb #fbbccd4a
import torch
import torch.nn as nn
import torch.nn.functional as F
import copy

# %% ../../nbs/misc/fc_decomposer.ipynb #6524ac31
Expand Down
2 changes: 2 additions & 0 deletions nbs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ website:
contents:
- misc/bn_folding.ipynb
- misc/fc_decomposer.ipynb
- misc/conv_decomposer.ipynb
- misc/cpu_optimizer.ipynb
- section: Export
contents:
- export/onnx_exporter.ipynb
Expand Down
3 changes: 1 addition & 2 deletions nbs/misc/bn_folding.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
"#| export\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import copy"
]
},
Expand Down Expand Up @@ -388,4 +387,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
143 changes: 36 additions & 107 deletions nbs/misc/cpu_optimizer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@
"metadata": {},
"source": [
"---\n",
"description: Further optimize for CPU inference\n",
"description: Optimize models for CPU inference\n",
"output-file: cpu_optimizer.html\n",
"title: Further optimize for CPU inference\n",
"title: CPU Optimizer\n",
"skip_showdoc: true\n",
"skip_exec: true\n",
"---"
]
},
Expand Down Expand Up @@ -41,136 +40,66 @@
"id": "fbbccd4a",
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"import torch\n",
"import torch.nn as nn\n",
"from torch.utils.mobile_optimizer import optimize_for_mobile"
]
"source": "#| export\nimport torch\nimport torch.nn as nn\nimport warnings"
},
{
"cell_type": "markdown",
"id": "hbzsrd6sl1h",
"metadata": {},
"source": [
"## Overview\n",
"\n",
"The `accelerate_model_for_cpu` function applies optimizations to prepare a PyTorch model for efficient CPU inference. It combines several techniques:\n",
"\n",
"1. **Channels-last memory format**: Optimizes memory layout for CNN operations on CPU\n",
"2. **TorchScript compilation**: JIT compiles the model for faster execution\n",
"3. **Mobile optimization**: Applies `optimize_for_mobile` for operator fusion and other optimizations\n",
"\n",
"**When to use:**\n",
"- Deploying models on CPU-only servers\n",
"- Edge deployment without GPU\n",
"- After quantization for maximum CPU performance"
]
"source": "## Overview\n\n`optimize_for_cpu` prepares a model for efficient CPU inference by combining:\n\n1. **Channels-last memory format** — optimizes layout for CNN operations on CPU\n2. **Compilation** — `torch.compile` (default) or `torch.jit.trace` for operator fusion\n\n| Backend | Speed | Compatibility | Best For |\n|---------|-------|---------------|----------|\n| `\"compile\"` | Faster | Most models | Default choice |\n| `\"trace\"` | Good | Requires static shapes | Legacy / mobile |"
},
{
"cell_type": "code",
"execution_count": null,
"id": "6524ac31",
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"def accelerate_model_for_cpu(model: nn.Module, example_input: torch.Tensor):\n",
" model.eval()\n",
" example_input = example_input.to(memory_format=torch.channels_last)\n",
" \n",
" model = model.to(memory_format=torch.channels_last)\n",
" model = torch.jit.script(model)\n",
" model = optimize_for_mobile(model)\n",
"\n",
" return model"
]
"source": "#| export\ndef optimize_for_cpu(\n model: nn.Module, # The PyTorch model to optimize\n sample: torch.Tensor, # Sample input for tracing (with batch dim)\n *,\n backend: str = \"compile\", # \"compile\" (torch.compile) or \"trace\" (torch.jit.trace)\n compile_mode: str = \"default\", # torch.compile mode\n) -> nn.Module:\n \"Optimize model for CPU inference via channels-last layout + compilation\"\n model = model.eval().to(memory_format=torch.channels_last)\n sample = sample.to(memory_format=torch.channels_last)\n\n if backend == \"compile\":\n return torch.compile(model, mode=compile_mode)\n elif backend == \"trace\":\n with torch.no_grad():\n return torch.jit.trace(model, sample)\n else:\n raise ValueError(f\"Unknown backend: {backend!r}. Use 'compile' or 'trace'.\")\n\ndef accelerate_model_for_cpu(model: nn.Module, example_input: torch.Tensor):\n \"Deprecated: use optimize_for_cpu() instead\"\n warnings.warn(\n \"accelerate_model_for_cpu is deprecated, use optimize_for_cpu(model, sample) instead\",\n DeprecationWarning, stacklevel=2,\n )\n return optimize_for_cpu(model, example_input, backend=\"trace\")"
},
{
"cell_type": "code",
"execution_count": null,
"id": "50222d43",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found permutation search CUDA kernels\n",
"[ASP][Info] permutation_search_kernels can be imported.\n"
]
},
{
"data": {
"text/markdown": [
"---\n",
"\n",
"[source](https://github.com/FasterAI-Labs/fasterai/tree/master/blob/master/fasterai/misc/cpu_optimizer.py#L12){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"\n",
"### accelerate_model_for_cpu\n",
"\n",
"```python\n",
"\n",
"def accelerate_model_for_cpu(\n",
" model:Module, example_input:Tensor\n",
"):\n",
"\n",
"\n",
"```"
],
"text/plain": [
"```python\n",
"\n",
"def accelerate_model_for_cpu(\n",
" model:Module, example_input:Tensor\n",
"):\n",
"\n",
"\n",
"```"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"show_doc(accelerate_model_for_cpu)"
]
"outputs": [],
"source": "show_doc(optimize_for_cpu)"
},
{
"cell_type": "markdown",
"id": "78818w1gh87",
"metadata": {},
"source": "```python\nfrom fasterai.misc.cpu_optimizer import optimize_for_cpu\n\nmodel = resnet18(pretrained=True)\nsample = torch.randn(1, 3, 224, 224)\n\n# Default: torch.compile\noptimized = optimize_for_cpu(model, sample)\n\n# Or JIT trace for mobile/static shapes\ntraced = optimize_for_cpu(model, sample, backend=\"trace\")\n```\n\n> **Note:** `accelerate_model_for_cpu` is deprecated. Use `optimize_for_cpu` instead."
},
{
"cell_type": "code",
"metadata": {},
"source": [
"**Parameters:**\n",
"\n",
"- `model`: The PyTorch model to optimize\n",
"- `example_input`: A sample input tensor (used for tracing)\n",
"\n",
"**Returns:** An optimized TorchScript model\n",
"\n",
"---\n",
"#| hide\n",
"from fastcore.test import *\n",
"import torch, torch.nn as nn\n",
"\n",
"## Usage Example\n",
"# optimize_for_cpu with trace backend\n",
"_m = nn.Sequential(nn.Conv2d(3, 16, 3, padding=1), nn.ReLU(), nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Linear(16, 10))\n",
"_x = torch.randn(1, 3, 8, 8)\n",
"_traced = optimize_for_cpu(_m, _x, backend=\"trace\")\n",
"_out = _traced(_x.to(memory_format=torch.channels_last))\n",
"test_eq(_out.shape, (1, 10))\n",
"assert torch.isfinite(_out).all()\n",
"\n",
"```python\n",
"from fasterai.misc.cpu_optimizer import accelerate_model_for_cpu\n",
"import torch\n",
"# Invalid backend raises ValueError\n",
"with ExceptionExpected(ValueError): optimize_for_cpu(_m, _x, backend=\"bad\")\n",
"\n",
"# Create example input matching your model's expected shape\n",
"example_input = torch.randn(1, 3, 224, 224)\n",
"\n",
"# Optimize model for CPU inference\n",
"optimized_model = accelerate_model_for_cpu(model, example_input)\n",
"\n",
"# Use the optimized model\n",
"with torch.no_grad():\n",
" output = optimized_model(input_tensor)\n",
"```\n",
"\n",
"**Note:** The returned model is a TorchScript model. Some dynamic Python features may not be supported."
]
"# Deprecated function emits warning\n",
"import warnings\n",
"with warnings.catch_warnings(record=True) as w:\n",
" warnings.simplefilter(\"always\")\n",
" accelerate_model_for_cpu(nn.Sequential(nn.Conv2d(3, 16, 3), nn.ReLU()), torch.randn(1, 3, 8, 8))\n",
" assert len(w) == 1\n",
" assert issubclass(w[0].category, DeprecationWarning)"
],
"outputs": [],
"execution_count": null,
"id": "test_cpu_opt"
},
{
"cell_type": "markdown",
Expand All @@ -190,4 +119,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
3 changes: 1 addition & 2 deletions nbs/misc/fc_decomposer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@
"#| export\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import copy"
]
},
Expand Down Expand Up @@ -328,4 +327,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
10 changes: 5 additions & 5 deletions nbs/tutorials/misc/fc_decomposer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -469,11 +469,11 @@
"\n",
"| Parameter | Default | Description |\n",
"|-----------|---------|-------------|\n",
"| `rank_ratio` | `0.5` | Fraction of singular values to keep (0-1). Lower = more compression, more accuracy loss |\n",
"| `percent_removed` | `0.5` | Fraction of singular values to keep (0-1). Lower = more compression, more accuracy loss |\n",
"\n",
"### Choosing rank_ratio\n",
"### Choosing percent_removed\n",
"\n",
"| rank_ratio | Compression | Accuracy Impact |\n",
"| percent_removed | Compression | Accuracy Impact |\n",
"|------------|-------------|-----------------|\n",
"| `0.8` | Low | Minimal |\n",
"| `0.5` | Medium | Moderate |\n",
Expand All @@ -496,7 +496,7 @@
"learn.fit_one_cycle(5)\n",
"\n",
"# 2. Decompose FC layers\n",
"fc = FC_Decomposer(rank_ratio=0.5)\n",
"fc = FC_Decomposer(percent_removed=0.5)\n",
"new_model = fc.decompose(learn.model)\n",
"\n",
"# 3. Fine-tune to recover accuracy\n",
Expand Down Expand Up @@ -527,4 +527,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
Loading