Fix Gemma3n inference without cache (#323)

brchristian · web-flow · commit e9b1649662d2 · 2025-07-30T14:23:31.000-07:00
Closes #322
diff --git a/mlx_lm/_version.py b/mlx_lm/_version.py
@@ -1,3 +1,3 @@
 # Copyright © 2023-2025 Apple Inc.
 
-__version__ = "0.26.1"
+__version__ = "0.26.2"
diff --git a/mlx_lm/models/gemma3n.py b/mlx_lm/models/gemma3n.py
@@ -476,7 +476,7 @@ def __call__(
         per_layer_inputs = self.project_per_layer_inputs(h, per_layer_inputs)
 
         if cache is None:
-            cache = [None] * len(self.layers)
+            cache = self.make_cache()
 
         if mask is None:
             full_mask = create_attention_mask(