diff --git a/fasterai/_modidx.py b/fasterai/_modidx.py index 500ab14..7c9cee7 100644 --- a/fasterai/_modidx.py +++ b/fasterai/_modidx.py @@ -234,7 +234,9 @@ 'fasterai.misc.conv_decomposer._unfold': ( 'misc/conv_decomposer.html#_unfold', 'fasterai/misc/conv_decomposer.py')}, 'fasterai.misc.cpu_optimizer': { 'fasterai.misc.cpu_optimizer.accelerate_model_for_cpu': ( 'misc/cpu_optimizer.html#accelerate_model_for_cpu', - 'fasterai/misc/cpu_optimizer.py')}, + 'fasterai/misc/cpu_optimizer.py'), + 'fasterai.misc.cpu_optimizer.optimize_for_cpu': ( 'misc/cpu_optimizer.html#optimize_for_cpu', + 'fasterai/misc/cpu_optimizer.py')}, 'fasterai.misc.fc_decomposer': { 'fasterai.misc.fc_decomposer.FC_Decomposer': ( 'misc/fc_decomposer.html#fc_decomposer', 'fasterai/misc/fc_decomposer.py'), 'fasterai.misc.fc_decomposer.FC_Decomposer.SVD': ( 'misc/fc_decomposer.html#fc_decomposer.svd', @@ -242,7 +244,11 @@ 'fasterai.misc.fc_decomposer.FC_Decomposer.__init__': ( 'misc/fc_decomposer.html#fc_decomposer.__init__', 'fasterai/misc/fc_decomposer.py'), 'fasterai.misc.fc_decomposer.FC_Decomposer.decompose': ( 'misc/fc_decomposer.html#fc_decomposer.decompose', - 'fasterai/misc/fc_decomposer.py')}, + 'fasterai/misc/fc_decomposer.py'), + 'fasterai.misc.fc_decomposer._rank_from_energy': ( 'misc/fc_decomposer.html#_rank_from_energy', + 'fasterai/misc/fc_decomposer.py'), + 'fasterai.misc.fc_decomposer._should_decompose': ( 'misc/fc_decomposer.html#_should_decompose', + 'fasterai/misc/fc_decomposer.py')}, 'fasterai.prune.all': {}, 'fasterai.prune.prune_callback': { 'fasterai.prune.prune_callback.PruneCallback': ( 'prune/prune_callback.html#prunecallback', 'fasterai/prune/prune_callback.py'), diff --git a/fasterai/misc/all.py b/fasterai/misc/all.py index 545f64c..f071eec 100644 --- a/fasterai/misc/all.py +++ b/fasterai/misc/all.py @@ -1,3 +1,4 @@ from .bn_folding import * from .fc_decomposer import * -from .conv_decomposer import * \ No newline at end of file +from .conv_decomposer import * +from .cpu_optimizer import * \ No newline at end of file diff --git a/fasterai/misc/bn_folding.py b/fasterai/misc/bn_folding.py index 758b722..6334d33 100644 --- a/fasterai/misc/bn_folding.py +++ b/fasterai/misc/bn_folding.py @@ -6,7 +6,6 @@ # %% ../../nbs/misc/bn_folding.ipynb #productive-preparation import torch import torch.nn as nn -import torch.nn.functional as F import copy # %% ../../nbs/misc/bn_folding.ipynb #83000749 diff --git a/fasterai/misc/conv_decomposer.py b/fasterai/misc/conv_decomposer.py index 933f8c2..d8a5629 100644 --- a/fasterai/misc/conv_decomposer.py +++ b/fasterai/misc/conv_decomposer.py @@ -9,25 +9,28 @@ import copy # %% ../../nbs/misc/conv_decomposer.ipynb #conv-decomposer +from .fc_decomposer import _rank_from_energy, _should_decompose + def _unfold(tensor, mode): "Unfold a tensor along a mode into a matrix" return tensor.moveaxis(mode, 0).flatten(1) -def _partial_tucker(weight, ranks, n_iter=5): +def _partial_tucker(weight, ranks, n_iter=10, tol=1e-4): "Partial Tucker decomposition on modes [0, 1] via alternating SVD (HOOI)" - # Initialize factors from SVD of mode unfoldings U0 = torch.linalg.svd(_unfold(weight, 0), full_matrices=False)[0][:, :ranks[0]] U1 = torch.linalg.svd(_unfold(weight, 1), full_matrices=False)[0][:, :ranks[1]] for _ in range(n_iter): - # Project out mode 0 using U0, then update U1 + U0_prev, U1_prev = U0.clone(), U1.clone() + # Project out mode 0, update U1 proj = torch.einsum('oihw, or -> rihw', weight, U0) U1 = torch.linalg.svd(_unfold(proj, 1), full_matrices=False)[0][:, :ranks[1]] - # Project out mode 1 using U1, then update U0 + # Project out mode 1, update U0 proj = torch.einsum('oihw, is -> oshw', weight, U1) U0 = torch.linalg.svd(_unfold(proj, 0), full_matrices=False)[0][:, :ranks[0]] + # Early stopping on convergence + if (U0 - U0_prev).norm() + (U1 - U1_prev).norm() < tol: break - # Core = W ×₀ U0ᵀ ×₁ U1ᵀ core = torch.einsum('oihw, or, is -> rshw', weight, U0, U1) return core, [U0, U1] @@ -38,35 +41,51 @@ class Conv_Decomposer: def __init__(self): pass def decompose(self, - model: nn.Module, # The model to decompose - percent_removed: float = 0.5, # Fraction of rank to remove per mode [0, 1) + model: nn.Module, # The model to decompose + percent_removed: float = 0.5, # Fraction of rank to remove per mode [0, 1) + energy_threshold: float | None = None, # Auto rank: keep this fraction of energy (0-1) + layers: list[str] | None = None, # Layer names to decompose (None = all eligible) + exclude: list[str] | None = None, # Layer names to skip + n_iter: int = 10, # Max HOOI iterations + tol: float = 1e-4, # HOOI convergence tolerance ) -> nn.Module: - "Recursively decompose all eligible Conv2d layers in the model" - if not (0 <= percent_removed < 1): + "Decompose eligible Conv2d layers. Use energy_threshold for automatic rank selection." + if energy_threshold is None and not (0 <= percent_removed < 1): raise ValueError(f"percent_removed must be in range [0, 1), got {percent_removed}") + if energy_threshold is not None and not (0 < energy_threshold <= 1): + raise ValueError(f"energy_threshold must be in range (0, 1], got {energy_threshold}") new_model = copy.deepcopy(model) - for name in list(new_model._modules): - module = new_model._modules[name] - if len(list(module._modules)) > 0: - new_model._modules[name] = self.decompose(module, percent_removed) - elif isinstance(module, nn.Conv2d) and module.groups == 1 and min(module.kernel_size) > 1: - new_model._modules[name] = self.Tucker(module, percent_removed) + for name, module in list(new_model.named_modules()): + if (isinstance(module, nn.Conv2d) and module.groups == 1 + and min(module.kernel_size) > 1 + and _should_decompose(name, layers, exclude)): + parent_name, _, child_name = name.rpartition('.') + parent = new_model.get_submodule(parent_name) if parent_name else new_model + setattr(parent, child_name, self.Tucker(module, percent_removed, energy_threshold, n_iter, tol)) return new_model def Tucker(self, - layer: nn.Conv2d, # The Conv2d layer to decompose - percent_removed: float, # Fraction of rank to remove per mode + layer: nn.Conv2d, # The Conv2d layer to decompose + percent_removed: float = 0.5, # Fraction of rank to remove per mode + energy_threshold: float | None = None, # Auto rank via energy retention + n_iter: int = 10, # Max HOOI iterations + tol: float = 1e-4, # HOOI convergence tolerance ) -> nn.Sequential: "Perform Tucker decomposition on a single Conv2d layer" W = layer.weight.data C_out, C_in = W.shape[:2] - R_out = max(1, int((1 - percent_removed) * C_out)) - R_in = max(1, int((1 - percent_removed) * C_in)) + if energy_threshold is not None: + S0 = torch.linalg.svd(_unfold(W, 0), full_matrices=False)[1] + S1 = torch.linalg.svd(_unfold(W, 1), full_matrices=False)[1] + R_out = _rank_from_energy(S0, energy_threshold) + R_in = _rank_from_energy(S1, energy_threshold) + else: + R_out = max(1, int((1 - percent_removed) * C_out)) + R_in = max(1, int((1 - percent_removed) * C_in)) - core, (U_out, U_in) = _partial_tucker(W, [R_out, R_in]) - # core: (R_out, R_in, H, W), U_out: (C_out, R_out), U_in: (C_in, R_in) + core, (U_out, U_in) = _partial_tucker(W, [R_out, R_in], n_iter=n_iter, tol=tol) # 1. Pointwise input compression: (C_in → R_in) first = nn.Conv2d(C_in, R_in, 1, bias=False) diff --git a/fasterai/misc/cpu_optimizer.py b/fasterai/misc/cpu_optimizer.py index b5fd869..bf5e91c 100644 --- a/fasterai/misc/cpu_optimizer.py +++ b/fasterai/misc/cpu_optimizer.py @@ -1,20 +1,37 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/misc/cpu_optimizer.ipynb. # %% auto #0 -__all__ = ['accelerate_model_for_cpu'] +__all__ = ['optimize_for_cpu', 'accelerate_model_for_cpu'] # %% ../../nbs/misc/cpu_optimizer.ipynb #fbbccd4a import torch import torch.nn as nn -from torch.utils.mobile_optimizer import optimize_for_mobile +import warnings # %% ../../nbs/misc/cpu_optimizer.ipynb #6524ac31 -def accelerate_model_for_cpu(model: nn.Module, example_input: torch.Tensor): - model.eval() - example_input = example_input.to(memory_format=torch.channels_last) - - model = model.to(memory_format=torch.channels_last) - model = torch.jit.script(model) - model = optimize_for_mobile(model) +def optimize_for_cpu( + model: nn.Module, # The PyTorch model to optimize + sample: torch.Tensor, # Sample input for tracing (with batch dim) + *, + backend: str = "compile", # "compile" (torch.compile) or "trace" (torch.jit.trace) + compile_mode: str = "default", # torch.compile mode +) -> nn.Module: + "Optimize model for CPU inference via channels-last layout + compilation" + model = model.eval().to(memory_format=torch.channels_last) + sample = sample.to(memory_format=torch.channels_last) + + if backend == "compile": + return torch.compile(model, mode=compile_mode) + elif backend == "trace": + with torch.no_grad(): + return torch.jit.trace(model, sample) + else: + raise ValueError(f"Unknown backend: {backend!r}. Use 'compile' or 'trace'.") - return model +def accelerate_model_for_cpu(model: nn.Module, example_input: torch.Tensor): + "Deprecated: use optimize_for_cpu() instead" + warnings.warn( + "accelerate_model_for_cpu is deprecated, use optimize_for_cpu(model, sample) instead", + DeprecationWarning, stacklevel=2, + ) + return optimize_for_cpu(model, example_input, backend="trace") diff --git a/fasterai/misc/fc_decomposer.py b/fasterai/misc/fc_decomposer.py index c1fa113..ec5eaa7 100644 --- a/fasterai/misc/fc_decomposer.py +++ b/fasterai/misc/fc_decomposer.py @@ -6,45 +6,61 @@ # %% ../../nbs/misc/fc_decomposer.ipynb #fbbccd4a import torch import torch.nn as nn -import torch.nn.functional as F import copy # %% ../../nbs/misc/fc_decomposer.ipynb #6524ac31 +def _rank_from_energy(S, threshold): + "Find minimum rank to retain `threshold` fraction of singular value energy" + energy = S.pow(2).cumsum(0) / S.pow(2).sum() + idx = (energy >= threshold).nonzero(as_tuple=True)[0] + return max(1, int(idx[0].item()) + 1) if len(idx) > 0 else S.shape[0] + +def _should_decompose(name, layers=None, exclude=None): + "Check if a named layer should be decomposed" + if exclude and name in exclude: return False + if layers is not None: return name in layers + return True + class FC_Decomposer: "Decompose fully-connected layers using SVD to reduce parameters" - def __init__(self): - pass + def __init__(self): pass def decompose(self, - model: nn.Module, # The model to decompose - percent_removed: float = 0.5 # Fraction of singular values to remove [0, 1) + model: nn.Module, # The model to decompose + percent_removed: float = 0.5, # Fraction of singular values to remove [0, 1) + energy_threshold: float | None = None, # Auto rank: keep this fraction of energy (0-1) + layers: list[str] | None = None, # Layer names to decompose (None = all) + exclude: list[str] | None = None, # Layer names to skip ) -> nn.Module: - "Recursively decompose all Linear layers in the model using SVD" - if not (0 <= percent_removed < 1): + "Decompose Linear layers using SVD. Use energy_threshold for automatic rank selection." + if energy_threshold is None and not (0 <= percent_removed < 1): raise ValueError(f"percent_removed must be in range [0, 1), got {percent_removed}") + if energy_threshold is not None and not (0 < energy_threshold <= 1): + raise ValueError(f"energy_threshold must be in range (0, 1], got {energy_threshold}") new_model = copy.deepcopy(model) - module_names = list(new_model._modules) - - for k, name in enumerate(module_names): - if len(list(new_model._modules[name]._modules)) > 0: - new_model._modules[name] = self.decompose(new_model._modules[name], percent_removed) - else: - if isinstance(new_model._modules[name], nn.Linear): - layer = self.SVD(new_model._modules[name], percent_removed) - new_model._modules[name] = layer + for name, module in list(new_model.named_modules()): + if isinstance(module, nn.Linear) and _should_decompose(name, layers, exclude): + parent_name, _, child_name = name.rpartition('.') + parent = new_model.get_submodule(parent_name) if parent_name else new_model + setattr(parent, child_name, self.SVD(module, percent_removed, energy_threshold)) return new_model - def SVD(self, - layer: nn.Linear, # The Linear layer to decompose - percent_removed: float # Fraction of singular values to remove + layer: nn.Linear, # The Linear layer to decompose + percent_removed: float = 0.5, # Fraction of singular values to remove + energy_threshold: float | None = None, # Auto rank via energy retention ) -> nn.Sequential: "Perform SVD decomposition on a single Linear layer" W = layer.weight.data U, S, Vh = torch.linalg.svd(W, full_matrices=False) - L = max(1, int((1.-percent_removed) * S.shape[0])) + + if energy_threshold is not None: + L = _rank_from_energy(S, energy_threshold) + else: + L = max(1, int((1.-percent_removed) * S.shape[0])) + W1 = U[:,:L] W2 = torch.diag(S[:L]) @ Vh[:L] layer_1 = nn.Linear(in_features=layer.in_features, diff --git a/nbs/_quarto.yml b/nbs/_quarto.yml index 3490807..972f71b 100644 --- a/nbs/_quarto.yml +++ b/nbs/_quarto.yml @@ -114,6 +114,8 @@ website: contents: - misc/bn_folding.ipynb - misc/fc_decomposer.ipynb + - misc/conv_decomposer.ipynb + - misc/cpu_optimizer.ipynb - section: Export contents: - export/onnx_exporter.ipynb diff --git a/nbs/misc/bn_folding.ipynb b/nbs/misc/bn_folding.ipynb index ea50d7c..408e303 100644 --- a/nbs/misc/bn_folding.ipynb +++ b/nbs/misc/bn_folding.ipynb @@ -45,7 +45,6 @@ "#| export\n", "import torch\n", "import torch.nn as nn\n", - "import torch.nn.functional as F\n", "import copy" ] }, @@ -388,4 +387,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/nbs/misc/conv_decomposer.ipynb b/nbs/misc/conv_decomposer.ipynb index 5edb688..f4761ba 100644 --- a/nbs/misc/conv_decomposer.ipynb +++ b/nbs/misc/conv_decomposer.ipynb @@ -51,7 +51,7 @@ "id": "conv-decomposer", "metadata": {}, "outputs": [], - "source": "#| export\ndef _unfold(tensor, mode):\n \"Unfold a tensor along a mode into a matrix\"\n return tensor.moveaxis(mode, 0).flatten(1)\n\ndef _partial_tucker(weight, ranks, n_iter=5):\n \"Partial Tucker decomposition on modes [0, 1] via alternating SVD (HOOI)\"\n # Initialize factors from SVD of mode unfoldings\n U0 = torch.linalg.svd(_unfold(weight, 0), full_matrices=False)[0][:, :ranks[0]]\n U1 = torch.linalg.svd(_unfold(weight, 1), full_matrices=False)[0][:, :ranks[1]]\n\n for _ in range(n_iter):\n # Project out mode 0 using U0, then update U1\n proj = torch.einsum('oihw, or -> rihw', weight, U0)\n U1 = torch.linalg.svd(_unfold(proj, 1), full_matrices=False)[0][:, :ranks[1]]\n # Project out mode 1 using U1, then update U0\n proj = torch.einsum('oihw, is -> oshw', weight, U1)\n U0 = torch.linalg.svd(_unfold(proj, 0), full_matrices=False)[0][:, :ranks[0]]\n\n # Core = W ×₀ U0ᵀ ×₁ U1ᵀ\n core = torch.einsum('oihw, or, is -> rshw', weight, U0, U1)\n return core, [U0, U1]\n\n\nclass Conv_Decomposer:\n \"Decompose Conv2d layers using Tucker decomposition to reduce parameters and FLOPs\"\n\n def __init__(self): pass\n\n def decompose(self,\n model: nn.Module, # The model to decompose\n percent_removed: float = 0.5, # Fraction of rank to remove per mode [0, 1)\n ) -> nn.Module:\n \"Recursively decompose all eligible Conv2d layers in the model\"\n if not (0 <= percent_removed < 1):\n raise ValueError(f\"percent_removed must be in range [0, 1), got {percent_removed}\")\n\n new_model = copy.deepcopy(model)\n for name in list(new_model._modules):\n module = new_model._modules[name]\n if len(list(module._modules)) > 0:\n new_model._modules[name] = self.decompose(module, percent_removed)\n elif isinstance(module, nn.Conv2d) and module.groups == 1 and min(module.kernel_size) > 1:\n new_model._modules[name] = self.Tucker(module, percent_removed)\n return new_model\n\n def Tucker(self,\n layer: nn.Conv2d, # The Conv2d layer to decompose\n percent_removed: float, # Fraction of rank to remove per mode\n ) -> nn.Sequential:\n \"Perform Tucker decomposition on a single Conv2d layer\"\n W = layer.weight.data\n C_out, C_in = W.shape[:2]\n\n R_out = max(1, int((1 - percent_removed) * C_out))\n R_in = max(1, int((1 - percent_removed) * C_in))\n\n core, (U_out, U_in) = _partial_tucker(W, [R_out, R_in])\n # core: (R_out, R_in, H, W), U_out: (C_out, R_out), U_in: (C_in, R_in)\n\n # 1. Pointwise input compression: (C_in → R_in)\n first = nn.Conv2d(C_in, R_in, 1, bias=False)\n first.weight.data = U_in.t().unsqueeze(-1).unsqueeze(-1)\n\n # 2. Spatial convolution at reduced rank: (R_in → R_out)\n middle = nn.Conv2d(R_in, R_out, layer.kernel_size, stride=layer.stride,\n padding=layer.padding, dilation=layer.dilation, bias=False)\n middle.weight.data = core\n\n # 3. Pointwise output expansion: (R_out → C_out)\n last = nn.Conv2d(R_out, C_out, 1, bias=layer.bias is not None)\n last.weight.data = U_out.unsqueeze(-1).unsqueeze(-1)\n if layer.bias is not None:\n last.bias.data = layer.bias.data\n\n return nn.Sequential(first, middle, last)" + "source": "#| export\nfrom fasterai.misc.fc_decomposer import _rank_from_energy, _should_decompose\n\ndef _unfold(tensor, mode):\n \"Unfold a tensor along a mode into a matrix\"\n return tensor.moveaxis(mode, 0).flatten(1)\n\ndef _partial_tucker(weight, ranks, n_iter=10, tol=1e-4):\n \"Partial Tucker decomposition on modes [0, 1] via alternating SVD (HOOI)\"\n U0 = torch.linalg.svd(_unfold(weight, 0), full_matrices=False)[0][:, :ranks[0]]\n U1 = torch.linalg.svd(_unfold(weight, 1), full_matrices=False)[0][:, :ranks[1]]\n\n for _ in range(n_iter):\n U0_prev, U1_prev = U0.clone(), U1.clone()\n # Project out mode 0, update U1\n proj = torch.einsum('oihw, or -> rihw', weight, U0)\n U1 = torch.linalg.svd(_unfold(proj, 1), full_matrices=False)[0][:, :ranks[1]]\n # Project out mode 1, update U0\n proj = torch.einsum('oihw, is -> oshw', weight, U1)\n U0 = torch.linalg.svd(_unfold(proj, 0), full_matrices=False)[0][:, :ranks[0]]\n # Early stopping on convergence\n if (U0 - U0_prev).norm() + (U1 - U1_prev).norm() < tol: break\n\n core = torch.einsum('oihw, or, is -> rshw', weight, U0, U1)\n return core, [U0, U1]\n\n\nclass Conv_Decomposer:\n \"Decompose Conv2d layers using Tucker decomposition to reduce parameters and FLOPs\"\n\n def __init__(self): pass\n\n def decompose(self,\n model: nn.Module, # The model to decompose\n percent_removed: float = 0.5, # Fraction of rank to remove per mode [0, 1)\n energy_threshold: float | None = None, # Auto rank: keep this fraction of energy (0-1)\n layers: list[str] | None = None, # Layer names to decompose (None = all eligible)\n exclude: list[str] | None = None, # Layer names to skip\n n_iter: int = 10, # Max HOOI iterations\n tol: float = 1e-4, # HOOI convergence tolerance\n ) -> nn.Module:\n \"Decompose eligible Conv2d layers. Use energy_threshold for automatic rank selection.\"\n if energy_threshold is None and not (0 <= percent_removed < 1):\n raise ValueError(f\"percent_removed must be in range [0, 1), got {percent_removed}\")\n if energy_threshold is not None and not (0 < energy_threshold <= 1):\n raise ValueError(f\"energy_threshold must be in range (0, 1], got {energy_threshold}\")\n\n new_model = copy.deepcopy(model)\n for name, module in list(new_model.named_modules()):\n if (isinstance(module, nn.Conv2d) and module.groups == 1 \n and min(module.kernel_size) > 1\n and _should_decompose(name, layers, exclude)):\n parent_name, _, child_name = name.rpartition('.')\n parent = new_model.get_submodule(parent_name) if parent_name else new_model\n setattr(parent, child_name, self.Tucker(module, percent_removed, energy_threshold, n_iter, tol))\n return new_model\n\n def Tucker(self,\n layer: nn.Conv2d, # The Conv2d layer to decompose\n percent_removed: float = 0.5, # Fraction of rank to remove per mode\n energy_threshold: float | None = None, # Auto rank via energy retention\n n_iter: int = 10, # Max HOOI iterations\n tol: float = 1e-4, # HOOI convergence tolerance\n ) -> nn.Sequential:\n \"Perform Tucker decomposition on a single Conv2d layer\"\n W = layer.weight.data\n C_out, C_in = W.shape[:2]\n\n if energy_threshold is not None:\n S0 = torch.linalg.svd(_unfold(W, 0), full_matrices=False)[1]\n S1 = torch.linalg.svd(_unfold(W, 1), full_matrices=False)[1]\n R_out = _rank_from_energy(S0, energy_threshold)\n R_in = _rank_from_energy(S1, energy_threshold)\n else:\n R_out = max(1, int((1 - percent_removed) * C_out))\n R_in = max(1, int((1 - percent_removed) * C_in))\n\n core, (U_out, U_in) = _partial_tucker(W, [R_out, R_in], n_iter=n_iter, tol=tol)\n\n # 1. Pointwise input compression: (C_in → R_in)\n first = nn.Conv2d(C_in, R_in, 1, bias=False)\n first.weight.data = U_in.t().unsqueeze(-1).unsqueeze(-1)\n\n # 2. Spatial convolution at reduced rank: (R_in → R_out)\n middle = nn.Conv2d(R_in, R_out, layer.kernel_size, stride=layer.stride,\n padding=layer.padding, dilation=layer.dilation, bias=False)\n middle.weight.data = core\n\n # 3. Pointwise output expansion: (R_out → C_out)\n last = nn.Conv2d(R_out, C_out, 1, bias=layer.bias is not None)\n last.weight.data = U_out.unsqueeze(-1).unsqueeze(-1)\n if layer.bias is not None:\n last.bias.data = layer.bias.data\n\n return nn.Sequential(first, middle, last)" }, { "cell_type": "code", @@ -85,7 +85,7 @@ "id": "tests", "metadata": {}, "outputs": [], - "source": "#| hide\nfrom fastcore.test import *\n\ndecomposer = Conv_Decomposer()\n\n# --- Output shape preserved ---\n_m = nn.Sequential(nn.Conv2d(3, 16, 3, padding=1), nn.ReLU(), nn.Conv2d(16, 32, 3, padding=1))\n_x = torch.randn(2, 3, 8, 8)\n_m_dec = decomposer.decompose(_m, percent_removed=0.5)\ntest_eq(_m(_x).shape, _m_dec(_x).shape)\n\n# --- percent_removed=0.0 → close reconstruction (HOOI is iterative, not exact) ---\n_m2 = nn.Sequential(nn.Conv2d(16, 32, 3, padding=1))\n_x2 = torch.randn(2, 16, 8, 8)\n_m2_dec = decomposer.decompose(_m2, percent_removed=0.0)\ntest_close(_m2(_x2), _m2_dec(_x2), eps=0.01)\n\n# --- Decomposed structure: Conv2d becomes Sequential of 3 Conv2ds ---\nassert isinstance(_m_dec[0], nn.Sequential)\ntest_eq(len(_m_dec[0]), 3)\ntest_eq(_m_dec[0][0].kernel_size, (1, 1)) # pointwise in\ntest_eq(_m_dec[0][1].kernel_size, (3, 3)) # spatial\ntest_eq(_m_dec[0][2].kernel_size, (1, 1)) # pointwise out\n\n# --- 1x1 convolutions are skipped ---\n_m_pw = nn.Sequential(nn.Conv2d(16, 32, 1))\n_m_pw_dec = decomposer.decompose(_m_pw, percent_removed=0.5)\nassert isinstance(_m_pw_dec[0], nn.Conv2d) # unchanged, not Sequential\n\n# --- Grouped convolutions are skipped ---\n_m_dw = nn.Sequential(nn.Conv2d(16, 16, 3, padding=1, groups=16))\n_m_dw_dec = decomposer.decompose(_m_dw, percent_removed=0.5)\nassert isinstance(_m_dw_dec[0], nn.Conv2d) # unchanged\n\n# --- Minimum rank >= 1 even at extreme removal ---\n_m3 = nn.Sequential(nn.Conv2d(4, 8, 3, padding=1))\n_m3_dec = decomposer.decompose(_m3, percent_removed=0.95)\ntest_eq(_m3_dec[0][0].out_features if hasattr(_m3_dec[0][0], 'out_features') else _m3_dec[0][0].out_channels, max(1, int(0.05 * 4)))\n\n# --- Bias handling: original bias → last layer gets it ---\n_conv_bias = nn.Conv2d(16, 32, 3, padding=1, bias=True)\n_dec_bias = decomposer.Tucker(_conv_bias, 0.5)\nassert _dec_bias[0].bias is None # first: no bias\nassert _dec_bias[1].bias is None # middle: no bias\nassert _dec_bias[2].bias is not None # last: has bias\n\n_conv_nobias = nn.Conv2d(16, 32, 3, padding=1, bias=False)\n_dec_nobias = decomposer.Tucker(_conv_nobias, 0.5)\nassert _dec_nobias[2].bias is None # last: no bias\n\n# --- Stride/padding transfer to middle conv only ---\n_conv_stride = nn.Conv2d(16, 32, 3, stride=2, padding=1)\n_dec_stride = decomposer.Tucker(_conv_stride, 0.5)\ntest_eq(_dec_stride[0].stride, (1, 1)) # pointwise: default\ntest_eq(_dec_stride[1].stride, (2, 2)) # middle: from original\ntest_eq(_dec_stride[2].stride, (1, 1)) # pointwise: default\n\n# --- Validation ---\nwith ExceptionExpected(ValueError): decomposer.decompose(nn.Sequential(nn.Conv2d(3, 16, 3)), percent_removed=1.0)\nwith ExceptionExpected(ValueError): decomposer.decompose(nn.Sequential(nn.Conv2d(3, 16, 3)), percent_removed=-0.1)" + "source": "#| hide\nfrom fastcore.test import *\n\ndecomposer = Conv_Decomposer()\n\n# --- Output shape preserved ---\n_m = nn.Sequential(nn.Conv2d(3, 16, 3, padding=1), nn.ReLU(), nn.Conv2d(16, 32, 3, padding=1))\n_x = torch.randn(2, 3, 8, 8)\n_m_dec = decomposer.decompose(_m, percent_removed=0.5)\ntest_eq(_m(_x).shape, _m_dec(_x).shape)\n\n# --- percent_removed=0.0 → close reconstruction ---\n_m2 = nn.Sequential(nn.Conv2d(16, 32, 3, padding=1))\n_x2 = torch.randn(2, 16, 8, 8)\n_m2_dec = decomposer.decompose(_m2, percent_removed=0.0)\ntest_close(_m2(_x2), _m2_dec(_x2), eps=0.01)\n\n# --- Decomposed structure: 3 Conv2ds ---\nassert isinstance(_m_dec[0], nn.Sequential)\ntest_eq(len(_m_dec[0]), 3)\ntest_eq(_m_dec[0][0].kernel_size, (1, 1))\ntest_eq(_m_dec[0][1].kernel_size, (3, 3))\ntest_eq(_m_dec[0][2].kernel_size, (1, 1))\n\n# --- 1x1 and grouped skipped ---\nassert isinstance(decomposer.decompose(nn.Sequential(nn.Conv2d(16, 32, 1)), 0.5)[0], nn.Conv2d)\nassert isinstance(decomposer.decompose(nn.Sequential(nn.Conv2d(16, 16, 3, groups=16, padding=1)), 0.5)[0], nn.Conv2d)\n\n# --- Bias handling ---\n_dec_bias = decomposer.Tucker(nn.Conv2d(16, 32, 3, padding=1, bias=True), 0.5)\nassert _dec_bias[0].bias is None and _dec_bias[1].bias is None and _dec_bias[2].bias is not None\n\n# --- Stride/padding transfer ---\n_dec_stride = decomposer.Tucker(nn.Conv2d(16, 32, 3, stride=2, padding=1), 0.5)\ntest_eq(_dec_stride[1].stride, (2, 2))\n\n# --- Validation ---\nwith ExceptionExpected(ValueError): decomposer.decompose(nn.Sequential(nn.Conv2d(3, 16, 3)), percent_removed=1.0)\n\n# --- energy_threshold ---\n_m3 = nn.Sequential(nn.Conv2d(16, 32, 3, padding=1))\n_m3_99 = decomposer.decompose(_m3, energy_threshold=0.99)\n_m3_50 = decomposer.decompose(_m3, percent_removed=0.5)\n# 99% energy → more channels kept than 50% removal\nassert _m3_99[0][0].out_channels >= _m3_50[0][0].out_channels\n\n# --- layers / exclude ---\n_m4 = nn.Sequential(nn.Conv2d(3, 16, 3, padding=1), nn.ReLU(), nn.Conv2d(16, 32, 3, padding=1))\n_m4_sel = decomposer.decompose(_m4, 0.5, layers=['0'])\nassert isinstance(_m4_sel[0], nn.Sequential) # decomposed\nassert isinstance(_m4_sel[2], nn.Conv2d) # untouched\n\n_m5 = nn.Sequential(nn.Conv2d(3, 16, 3, padding=1), nn.ReLU(), nn.Conv2d(16, 32, 3, padding=1))\n_m5_exc = decomposer.decompose(_m5, 0.5, exclude=['2'])\nassert isinstance(_m5_exc[0], nn.Sequential)\nassert isinstance(_m5_exc[2], nn.Conv2d)\n\n# --- HOOI convergence: tol controls early stopping ---\n_m6 = nn.Sequential(nn.Conv2d(16, 32, 3, padding=1))\n_m6_strict = decomposer.decompose(_m6, 0.5, tol=1e-8, n_iter=50) # tight tol, more iters\n_m6_loose = decomposer.decompose(_m6, 0.5, tol=1.0, n_iter=50) # loose tol, stops early\n# Both produce valid output\n_x6 = torch.randn(2, 16, 8, 8)\nassert torch.isfinite(_m6_strict(_x6)).all()\nassert torch.isfinite(_m6_loose(_x6)).all()" }, { "cell_type": "code", @@ -111,4 +111,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/nbs/misc/cpu_optimizer.ipynb b/nbs/misc/cpu_optimizer.ipynb index b953d8d..fb41767 100644 --- a/nbs/misc/cpu_optimizer.ipynb +++ b/nbs/misc/cpu_optimizer.ipynb @@ -6,11 +6,10 @@ "metadata": {}, "source": [ "---\n", - "description: Further optimize for CPU inference\n", + "description: Optimize models for CPU inference\n", "output-file: cpu_optimizer.html\n", - "title: Further optimize for CPU inference\n", + "title: CPU Optimizer\n", "skip_showdoc: true\n", - "skip_exec: true\n", "---" ] }, @@ -41,31 +40,13 @@ "id": "fbbccd4a", "metadata": {}, "outputs": [], - "source": [ - "#| export\n", - "import torch\n", - "import torch.nn as nn\n", - "from torch.utils.mobile_optimizer import optimize_for_mobile" - ] + "source": "#| export\nimport torch\nimport torch.nn as nn\nimport warnings" }, { "cell_type": "markdown", "id": "hbzsrd6sl1h", "metadata": {}, - "source": [ - "## Overview\n", - "\n", - "The `accelerate_model_for_cpu` function applies optimizations to prepare a PyTorch model for efficient CPU inference. It combines several techniques:\n", - "\n", - "1. **Channels-last memory format**: Optimizes memory layout for CNN operations on CPU\n", - "2. **TorchScript compilation**: JIT compiles the model for faster execution\n", - "3. **Mobile optimization**: Applies `optimize_for_mobile` for operator fusion and other optimizations\n", - "\n", - "**When to use:**\n", - "- Deploying models on CPU-only servers\n", - "- Edge deployment without GPU\n", - "- After quantization for maximum CPU performance" - ] + "source": "## Overview\n\n`optimize_for_cpu` prepares a model for efficient CPU inference by combining:\n\n1. **Channels-last memory format** — optimizes layout for CNN operations on CPU\n2. **Compilation** — `torch.compile` (default) or `torch.jit.trace` for operator fusion\n\n| Backend | Speed | Compatibility | Best For |\n|---------|-------|---------------|----------|\n| `\"compile\"` | Faster | Most models | Default choice |\n| `\"trace\"` | Good | Requires static shapes | Legacy / mobile |" }, { "cell_type": "code", @@ -73,104 +54,52 @@ "id": "6524ac31", "metadata": {}, "outputs": [], - "source": [ - "#| export\n", - "def accelerate_model_for_cpu(model: nn.Module, example_input: torch.Tensor):\n", - " model.eval()\n", - " example_input = example_input.to(memory_format=torch.channels_last)\n", - " \n", - " model = model.to(memory_format=torch.channels_last)\n", - " model = torch.jit.script(model)\n", - " model = optimize_for_mobile(model)\n", - "\n", - " return model" - ] + "source": "#| export\ndef optimize_for_cpu(\n model: nn.Module, # The PyTorch model to optimize\n sample: torch.Tensor, # Sample input for tracing (with batch dim)\n *,\n backend: str = \"compile\", # \"compile\" (torch.compile) or \"trace\" (torch.jit.trace)\n compile_mode: str = \"default\", # torch.compile mode\n) -> nn.Module:\n \"Optimize model for CPU inference via channels-last layout + compilation\"\n model = model.eval().to(memory_format=torch.channels_last)\n sample = sample.to(memory_format=torch.channels_last)\n\n if backend == \"compile\":\n return torch.compile(model, mode=compile_mode)\n elif backend == \"trace\":\n with torch.no_grad():\n return torch.jit.trace(model, sample)\n else:\n raise ValueError(f\"Unknown backend: {backend!r}. Use 'compile' or 'trace'.\")\n\ndef accelerate_model_for_cpu(model: nn.Module, example_input: torch.Tensor):\n \"Deprecated: use optimize_for_cpu() instead\"\n warnings.warn(\n \"accelerate_model_for_cpu is deprecated, use optimize_for_cpu(model, sample) instead\",\n DeprecationWarning, stacklevel=2,\n )\n return optimize_for_cpu(model, example_input, backend=\"trace\")" }, { "cell_type": "code", "execution_count": null, "id": "50222d43", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found permutation search CUDA kernels\n", - "[ASP][Info] permutation_search_kernels can be imported.\n" - ] - }, - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/FasterAI-Labs/fasterai/tree/master/blob/master/fasterai/misc/cpu_optimizer.py#L12){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### accelerate_model_for_cpu\n", - "\n", - "```python\n", - "\n", - "def accelerate_model_for_cpu(\n", - " model:Module, example_input:Tensor\n", - "):\n", - "\n", - "\n", - "```" - ], - "text/plain": [ - "```python\n", - "\n", - "def accelerate_model_for_cpu(\n", - " model:Module, example_input:Tensor\n", - "):\n", - "\n", - "\n", - "```" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "show_doc(accelerate_model_for_cpu)" - ] + "outputs": [], + "source": "show_doc(optimize_for_cpu)" }, { "cell_type": "markdown", "id": "78818w1gh87", "metadata": {}, + "source": "```python\nfrom fasterai.misc.cpu_optimizer import optimize_for_cpu\n\nmodel = resnet18(pretrained=True)\nsample = torch.randn(1, 3, 224, 224)\n\n# Default: torch.compile\noptimized = optimize_for_cpu(model, sample)\n\n# Or JIT trace for mobile/static shapes\ntraced = optimize_for_cpu(model, sample, backend=\"trace\")\n```\n\n> **Note:** `accelerate_model_for_cpu` is deprecated. Use `optimize_for_cpu` instead." + }, + { + "cell_type": "code", + "metadata": {}, "source": [ - "**Parameters:**\n", - "\n", - "- `model`: The PyTorch model to optimize\n", - "- `example_input`: A sample input tensor (used for tracing)\n", - "\n", - "**Returns:** An optimized TorchScript model\n", - "\n", - "---\n", + "#| hide\n", + "from fastcore.test import *\n", + "import torch, torch.nn as nn\n", "\n", - "## Usage Example\n", + "# optimize_for_cpu with trace backend\n", + "_m = nn.Sequential(nn.Conv2d(3, 16, 3, padding=1), nn.ReLU(), nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Linear(16, 10))\n", + "_x = torch.randn(1, 3, 8, 8)\n", + "_traced = optimize_for_cpu(_m, _x, backend=\"trace\")\n", + "_out = _traced(_x.to(memory_format=torch.channels_last))\n", + "test_eq(_out.shape, (1, 10))\n", + "assert torch.isfinite(_out).all()\n", "\n", - "```python\n", - "from fasterai.misc.cpu_optimizer import accelerate_model_for_cpu\n", - "import torch\n", + "# Invalid backend raises ValueError\n", + "with ExceptionExpected(ValueError): optimize_for_cpu(_m, _x, backend=\"bad\")\n", "\n", - "# Create example input matching your model's expected shape\n", - "example_input = torch.randn(1, 3, 224, 224)\n", - "\n", - "# Optimize model for CPU inference\n", - "optimized_model = accelerate_model_for_cpu(model, example_input)\n", - "\n", - "# Use the optimized model\n", - "with torch.no_grad():\n", - " output = optimized_model(input_tensor)\n", - "```\n", - "\n", - "**Note:** The returned model is a TorchScript model. Some dynamic Python features may not be supported." - ] + "# Deprecated function emits warning\n", + "import warnings\n", + "with warnings.catch_warnings(record=True) as w:\n", + " warnings.simplefilter(\"always\")\n", + " accelerate_model_for_cpu(nn.Sequential(nn.Conv2d(3, 16, 3), nn.ReLU()), torch.randn(1, 3, 8, 8))\n", + " assert len(w) == 1\n", + " assert issubclass(w[0].category, DeprecationWarning)" + ], + "outputs": [], + "execution_count": null, + "id": "test_cpu_opt" }, { "cell_type": "markdown", @@ -190,4 +119,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/nbs/misc/fc_decomposer.ipynb b/nbs/misc/fc_decomposer.ipynb index 6a12fdb..f983736 100644 --- a/nbs/misc/fc_decomposer.ipynb +++ b/nbs/misc/fc_decomposer.ipynb @@ -101,7 +101,6 @@ "#| export\n", "import torch\n", "import torch.nn as nn\n", - "import torch.nn.functional as F\n", "import copy" ] }, @@ -111,60 +110,7 @@ "id": "6524ac31", "metadata": {}, "outputs": [], - "source": [ - "#| export\n", - "class FC_Decomposer:\n", - " \"Decompose fully-connected layers using SVD to reduce parameters\"\n", - "\n", - " def __init__(self):\n", - " pass\n", - " \n", - " def decompose(self, \n", - " model: nn.Module, # The model to decompose\n", - " percent_removed: float = 0.5 # Fraction of singular values to remove [0, 1)\n", - " ) -> nn.Module:\n", - " \"Recursively decompose all Linear layers in the model using SVD\"\n", - " if not (0 <= percent_removed < 1):\n", - " raise ValueError(f\"percent_removed must be in range [0, 1), got {percent_removed}\")\n", - "\n", - " new_model = copy.deepcopy(model)\n", - " module_names = list(new_model._modules)\n", - "\n", - " for k, name in enumerate(module_names):\n", - " if len(list(new_model._modules[name]._modules)) > 0:\n", - " new_model._modules[name] = self.decompose(new_model._modules[name], percent_removed)\n", - " else:\n", - " if isinstance(new_model._modules[name], nn.Linear):\n", - " layer = self.SVD(new_model._modules[name], percent_removed)\n", - " new_model._modules[name] = layer\n", - " return new_model\n", - "\n", - "\n", - " def SVD(self, \n", - " layer: nn.Linear, # The Linear layer to decompose\n", - " percent_removed: float # Fraction of singular values to remove\n", - " ) -> nn.Sequential:\n", - " \"Perform SVD decomposition on a single Linear layer\"\n", - " W = layer.weight.data\n", - " U, S, Vh = torch.linalg.svd(W, full_matrices=False)\n", - " L = max(1, int((1.-percent_removed) * S.shape[0]))\n", - " W1 = U[:,:L]\n", - " W2 = torch.diag(S[:L]) @ Vh[:L]\n", - " layer_1 = nn.Linear(in_features=layer.in_features, \n", - " out_features=L, bias=False)\n", - " layer_1.weight.data = W2\n", - "\n", - " layer_2 = nn.Linear(in_features=L, \n", - " out_features=layer.out_features, bias=True)\n", - " layer_2.weight.data = W1\n", - "\n", - " if layer.bias is None: \n", - " layer_2.bias.data = torch.zeros(layer.out_features)\n", - " else:\n", - " layer_2.bias.data = layer.bias.data\n", - "\n", - " return nn.Sequential(layer_1, layer_2)" - ] + "source": "#| export\ndef _rank_from_energy(S, threshold):\n \"Find minimum rank to retain `threshold` fraction of singular value energy\"\n energy = S.pow(2).cumsum(0) / S.pow(2).sum()\n idx = (energy >= threshold).nonzero(as_tuple=True)[0]\n return max(1, int(idx[0].item()) + 1) if len(idx) > 0 else S.shape[0]\n\ndef _should_decompose(name, layers=None, exclude=None):\n \"Check if a named layer should be decomposed\"\n if exclude and name in exclude: return False\n if layers is not None: return name in layers\n return True\n\nclass FC_Decomposer:\n \"Decompose fully-connected layers using SVD to reduce parameters\"\n\n def __init__(self): pass\n \n def decompose(self, \n model: nn.Module, # The model to decompose\n percent_removed: float = 0.5, # Fraction of singular values to remove [0, 1)\n energy_threshold: float | None = None, # Auto rank: keep this fraction of energy (0-1)\n layers: list[str] | None = None, # Layer names to decompose (None = all)\n exclude: list[str] | None = None, # Layer names to skip\n ) -> nn.Module:\n \"Decompose Linear layers using SVD. Use energy_threshold for automatic rank selection.\"\n if energy_threshold is None and not (0 <= percent_removed < 1):\n raise ValueError(f\"percent_removed must be in range [0, 1), got {percent_removed}\")\n if energy_threshold is not None and not (0 < energy_threshold <= 1):\n raise ValueError(f\"energy_threshold must be in range (0, 1], got {energy_threshold}\")\n\n new_model = copy.deepcopy(model)\n for name, module in list(new_model.named_modules()):\n if isinstance(module, nn.Linear) and _should_decompose(name, layers, exclude):\n parent_name, _, child_name = name.rpartition('.')\n parent = new_model.get_submodule(parent_name) if parent_name else new_model\n setattr(parent, child_name, self.SVD(module, percent_removed, energy_threshold))\n return new_model\n\n def SVD(self, \n layer: nn.Linear, # The Linear layer to decompose\n percent_removed: float = 0.5, # Fraction of singular values to remove\n energy_threshold: float | None = None, # Auto rank via energy retention\n ) -> nn.Sequential:\n \"Perform SVD decomposition on a single Linear layer\"\n W = layer.weight.data\n U, S, Vh = torch.linalg.svd(W, full_matrices=False)\n\n if energy_threshold is not None:\n L = _rank_from_energy(S, energy_threshold)\n else:\n L = max(1, int((1.-percent_removed) * S.shape[0]))\n\n W1 = U[:,:L]\n W2 = torch.diag(S[:L]) @ Vh[:L]\n layer_1 = nn.Linear(in_features=layer.in_features, \n out_features=L, bias=False)\n layer_1.weight.data = W2\n\n layer_2 = nn.Linear(in_features=L, \n out_features=layer.out_features, bias=True)\n layer_2.weight.data = W1\n\n if layer.bias is None: \n layer_2.bias.data = torch.zeros(layer.out_features)\n else:\n layer_2.bias.data = layer.bias.data\n\n return nn.Sequential(layer_1, layer_2)" }, { "cell_type": "code", @@ -267,42 +213,7 @@ "id": "xwk977e4ia", "metadata": {}, "outputs": [], - "source": [ - "#| hide\n", - "from fastcore.test import *\n", - "\n", - "# SVD decomposition preserves output approximately\n", - "model = nn.Sequential(nn.Linear(32, 64), nn.ReLU(), nn.Linear(64, 10))\n", - "x = torch.randn(4, 32)\n", - "out_orig = model(x)\n", - "\n", - "decomposer = FC_Decomposer()\n", - "model_dec = decomposer.decompose(model, percent_removed=0.5)\n", - "out_dec = model_dec(x)\n", - "test_close(out_orig, out_dec, eps=1.0) # 50% SVD removal has significant reconstruction error\n", - "\n", - "# Decomposed structure: Linear → Sequential(Linear, Linear)\n", - "assert isinstance(model_dec[0], nn.Sequential)\n", - "assert len(model_dec[0]) == 2\n", - "\n", - "# percent_removed=0 → very close output\n", - "m2 = nn.Sequential(nn.Linear(32, 64))\n", - "x2 = torch.randn(4, 32)\n", - "out2 = m2(x2)\n", - "m2_dec = decomposer.decompose(m2, percent_removed=0.0)\n", - "test_close(out2, m2_dec(x2), eps=1e-4)\n", - "\n", - "# L >= 1 always (even at extreme removal)\n", - "m3 = nn.Sequential(nn.Linear(10, 20))\n", - "m3_dec = decomposer.decompose(m3, percent_removed=0.95)\n", - "assert m3_dec[0][0].out_features >= 1\n", - "\n", - "# Invalid percent_removed raises ValueError\n", - "with ExceptionExpected(ValueError):\n", - " decomposer.decompose(nn.Sequential(nn.Linear(10, 10)), percent_removed=1.0)\n", - "with ExceptionExpected(ValueError):\n", - " decomposer.decompose(nn.Sequential(nn.Linear(10, 10)), percent_removed=-0.1)" - ] + "source": "#| hide\nfrom fastcore.test import *\n\n# SVD decomposition preserves output approximately\nmodel = nn.Sequential(nn.Linear(32, 64), nn.ReLU(), nn.Linear(64, 10))\nx = torch.randn(4, 32)\nout_orig = model(x)\n\ndecomposer = FC_Decomposer()\nmodel_dec = decomposer.decompose(model, percent_removed=0.5)\nout_dec = model_dec(x)\ntest_close(out_orig, out_dec, eps=1.0)\n\n# Decomposed structure: Linear → Sequential(Linear, Linear)\nassert isinstance(model_dec[0], nn.Sequential)\nassert len(model_dec[0]) == 2\n\n# percent_removed=0 → very close output\nm2 = nn.Sequential(nn.Linear(32, 64))\nx2 = torch.randn(4, 32)\nout2 = m2(x2)\nm2_dec = decomposer.decompose(m2, percent_removed=0.0)\ntest_close(out2, m2_dec(x2), eps=1e-4)\n\n# L >= 1 always (even at extreme removal)\nm3 = nn.Sequential(nn.Linear(10, 20))\nm3_dec = decomposer.decompose(m3, percent_removed=0.95)\nassert m3_dec[0][0].out_features >= 1\n\n# Invalid percent_removed raises ValueError\nwith ExceptionExpected(ValueError):\n decomposer.decompose(nn.Sequential(nn.Linear(10, 10)), percent_removed=1.0)\n\n# --- energy_threshold ---\nm4 = nn.Sequential(nn.Linear(32, 64), nn.ReLU(), nn.Linear(64, 10))\nm4_99 = decomposer.decompose(m4, energy_threshold=0.99)\nm4_50 = decomposer.decompose(m4, percent_removed=0.5)\n# energy_threshold=0.99 should keep more singular values than 50% removal\nassert m4_99[0][0].out_features >= m4_50[0][0].out_features\n\n# energy_threshold=1.0 keeps all singular values\nm5 = nn.Sequential(nn.Linear(10, 20))\nm5_full = decomposer.decompose(m5, energy_threshold=1.0)\ntest_eq(m5_full[0][0].out_features, 10) # min(10, 20)\n\n# --- layers / exclude ---\nm6 = nn.Sequential(nn.Linear(32, 64), nn.ReLU(), nn.Linear(64, 10))\n# Only decompose first layer\nm6_sel = decomposer.decompose(m6, 0.5, layers=['0'])\nassert isinstance(m6_sel[0], nn.Sequential) # decomposed\nassert isinstance(m6_sel[2], nn.Linear) # untouched\n\n# Exclude last layer\nm7 = nn.Sequential(nn.Linear(32, 64), nn.ReLU(), nn.Linear(64, 10))\nm7_exc = decomposer.decompose(m7, 0.5, exclude=['2'])\nassert isinstance(m7_exc[0], nn.Sequential) # decomposed\nassert isinstance(m7_exc[2], nn.Linear) # excluded" }, { "cell_type": "markdown", @@ -328,4 +239,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/nbs/tutorials/misc/fc_decomposer.ipynb b/nbs/tutorials/misc/fc_decomposer.ipynb index 5a804d2..69ea24f 100644 --- a/nbs/tutorials/misc/fc_decomposer.ipynb +++ b/nbs/tutorials/misc/fc_decomposer.ipynb @@ -469,11 +469,11 @@ "\n", "| Parameter | Default | Description |\n", "|-----------|---------|-------------|\n", - "| `rank_ratio` | `0.5` | Fraction of singular values to keep (0-1). Lower = more compression, more accuracy loss |\n", + "| `percent_removed` | `0.5` | Fraction of singular values to keep (0-1). Lower = more compression, more accuracy loss |\n", "\n", - "### Choosing rank_ratio\n", + "### Choosing percent_removed\n", "\n", - "| rank_ratio | Compression | Accuracy Impact |\n", + "| percent_removed | Compression | Accuracy Impact |\n", "|------------|-------------|-----------------|\n", "| `0.8` | Low | Minimal |\n", "| `0.5` | Medium | Moderate |\n", @@ -496,7 +496,7 @@ "learn.fit_one_cycle(5)\n", "\n", "# 2. Decompose FC layers\n", - "fc = FC_Decomposer(rank_ratio=0.5)\n", + "fc = FC_Decomposer(percent_removed=0.5)\n", "new_model = fc.decompose(learn.model)\n", "\n", "# 3. Fine-tune to recover accuracy\n", @@ -527,4 +527,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file