diff --git a/.github/workflows/tag-dispatch-cloud.yml b/.github/workflows/tag-dispatch-cloud.yml
new file mode 100644
index 000000000000..53a0e91d6946
--- /dev/null
+++ b/.github/workflows/tag-dispatch-cloud.yml
@@ -0,0 +1,45 @@
+name: Tag Dispatch to Cloud
+
+on:
+  push:
+    tags:
+      - 'v*'
+
+jobs:
+  dispatch-cloud:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Send repository dispatch to cloud
+        env:
+          DISPATCH_TOKEN: ${{ secrets.CLOUD_REPO_DISPATCH_TOKEN }}
+          RELEASE_TAG: ${{ github.ref_name }}
+        run: |
+          set -euo pipefail
+
+          if [ -z "${DISPATCH_TOKEN:-}" ]; then
+            echo "::error::CLOUD_REPO_DISPATCH_TOKEN is required but not set."
+            exit 1
+          fi
+
+          RELEASE_URL="https://github.com/${{ github.repository }}/releases/tag/${RELEASE_TAG}"
+
+          PAYLOAD="$(jq -n \
+            --arg release_tag "$RELEASE_TAG" \
+            --arg release_url "$RELEASE_URL" \
+            '{
+              event_type: "comfyui_tag_pushed",
+              client_payload: {
+                release_tag: $release_tag,
+                release_url: $release_url
+              }
+            }')"
+
+          curl -fsSL \
+            -X POST \
+            -H "Accept: application/vnd.github+json" \
+            -H "Content-Type: application/json" \
+            -H "Authorization: Bearer ${DISPATCH_TOKEN}" \
+            https://api.github.com/repos/Comfy-Org/cloud/dispatches \
+            -d "$PAYLOAD"
+
+          echo "✅ Dispatched ComfyUI tag ${RELEASE_TAG} to Comfy-Org/cloud"
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index ee56f8523cdd..e259aed6316e 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -31,6 +31,7 @@
 import comfy.hooks
 import comfy.lora
 import comfy.model_management
+import comfy.ops
 import comfy.patcher_extension
 import comfy.utils
 from comfy.comfy_types import UnetWrapperFunction
@@ -856,7 +857,9 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False
                     if m.comfy_patched_weights == True:
                         continue
 
-                for param in params:
+                for param, param_value in params.items():
+                    if hasattr(m, "comfy_cast_weights") and getattr(param_value, "is_meta", False):
+                        comfy.ops.disable_weight_init._zero_init_parameter(m, param)
                     key = key_param_name_to_key(n, param)
                     self.unpin_weight(key)
                     self.patch_weight_to_device(key, device_to=device_to)
diff --git a/comfy/ops.py b/comfy/ops.py
index 7a9b4b84c6bd..050f7cda0866 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -79,14 +79,21 @@ def cast_to_input(weight, input, non_blocking=False, copy=True):
     return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
 
 
-def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant):
+def materialize_meta_param(s, param_keys):
+    for param_key in param_keys:
+        param = getattr(s, param_key, None)
+        if param is not None and getattr(param, "is_meta", False):
+            setattr(s, param_key, torch.nn.Parameter(torch.zeros(param.shape, dtype=param.dtype), requires_grad=param.requires_grad))
+
 
+def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant):
     #vbar doesn't support CPU weights, but some custom nodes have weird paths
     #that might switch the layer to the CPU and expect it to work. We have to take
     #a clone conservatively as we are mmapped and some SFT files are packed misaligned
     #If you are a custom node author reading this, please move your layer to the GPU
     #or declare your ModelPatcher as CPU in the first place.
     if comfy.model_management.is_device_cpu(device):
+        materialize_meta_param(s, ["weight", "bias"])
         weight = s.weight.to(dtype=dtype, copy=True)
         if isinstance(weight, QuantizedTensor):
             weight = weight.dequantize()
@@ -108,6 +115,7 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
             xfer_dest = comfy_aimdo.torch.aimdo_to_tensor(s._v, device)
 
     if not resident:
+        materialize_meta_param(s, ["weight", "bias"])
         cast_geometry = comfy.memory_management.tensors_to_geometries([ s.weight, s.bias ])
         cast_dest = None
 
@@ -306,6 +314,12 @@ class CastWeightBiasOp:
     bias_function = []
 
 class disable_weight_init:
+    @staticmethod
+    def _zero_init_parameter(module, name):
+        param = getattr(module, name)
+        device = None if getattr(param, "is_meta", False) else param.device
+        setattr(module, name, torch.nn.Parameter(torch.zeros(param.shape, device=device, dtype=param.dtype), requires_grad=False))
+
     @staticmethod
     def _lazy_load_from_state_dict(module, state_dict, prefix, local_metadata,
                                    missing_keys, unexpected_keys, weight_shape,
diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py
index eb4d3701d54d..812b3eb30d66 100644
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@@ -12,6 +12,7 @@
 import math
 import torch
 from .._util import VideoContainer, VideoCodec, VideoComponents
+import logging
 
 
 def container_to_output_format(container_format: str | None) -> str | None:
@@ -238,32 +239,86 @@ def get_components_internal(self, container: InputContainer) -> VideoComponents:
             start_time = max(self._get_raw_duration() + self.__start_time, 0)
         else:
             start_time = self.__start_time
+
         # Get video frames
         frames = []
+        audio_frames = []
         alphas = None
         start_pts = int(start_time / video_stream.time_base)
         end_pts = int((start_time + self.__duration) / video_stream.time_base)
-        container.seek(start_pts, stream=video_stream)
+
+        if start_pts != 0:
+            container.seek(start_pts, stream=video_stream)
+
         image_format = 'gbrpf32le'
-        for frame in container.decode(video_stream):
-            if alphas is None:
-                for comp in frame.format.components:
-                    if comp.is_alpha:
-                        alphas = []
-                        image_format = 'gbrapf32le'
-                        break
+        audio = None
+
+        streams = [video_stream]
+        has_first_audio_frame = False
+        checked_alpha = False
 
-            if frame.pts < start_pts:
-                continue
-            if self.__duration and frame.pts >= end_pts:
+        # Default to False so we decode until EOF if duration is 0
+        video_done = False
+        audio_done = True
+
+        if len(container.streams.audio):
+            audio_stream = container.streams.audio[-1]
+            streams += [audio_stream]
+            resampler = av.audio.resampler.AudioResampler(format='fltp')
+            audio_done = False
+
+        for packet in container.demux(*streams):
+            if video_done and audio_done:
                 break
 
-            img = frame.to_ndarray(format=image_format)  # shape: (H, W, 4)
-            if alphas is None:
-                frames.append(torch.from_numpy(img))
-            else:
-                frames.append(torch.from_numpy(img[..., :-1]))
-                alphas.append(torch.from_numpy(img[..., -1:]))
+            if packet.stream.type == "video":
+                if video_done:
+                    continue
+                try:
+                    for frame in packet.decode():
+                        if frame.pts < start_pts:
+                            continue
+                        if self.__duration and frame.pts >= end_pts:
+                            video_done = True
+                            break
+
+                        if not checked_alpha:
+                            for comp in frame.format.components:
+                                if comp.is_alpha:
+                                    alphas = []
+                                    image_format = 'gbrapf32le'
+                                    break
+                            checked_alpha = True
+
+                        img = frame.to_ndarray(format=image_format)  # shape: (H, W, 4)
+                        if alphas is None:
+                            frames.append(torch.from_numpy(img))
+                        else:
+                            frames.append(torch.from_numpy(img[..., :-1]))
+                            alphas.append(torch.from_numpy(img[..., -1:]))
+                except av.error.InvalidDataError:
+                    logging.info("pyav decode error")
+
+            elif packet.stream.type == "audio":
+                if audio_done:
+                    continue
+
+                aframes = itertools.chain.from_iterable(
+                    map(resampler.resample, packet.decode())
+                )
+                for frame in aframes:
+                    if self.__duration and frame.time > start_time + self.__duration:
+                        audio_done = True
+                        break
+
+                    if not has_first_audio_frame:
+                        offset_seconds = start_time - frame.pts * audio_stream.time_base
+                        to_skip = max(0, int(offset_seconds * audio_stream.sample_rate))
+                        if to_skip < frame.samples:
+                            has_first_audio_frame = True
+                            audio_frames.append(frame.to_ndarray()[..., to_skip:])
+                    else:
+                        audio_frames.append(frame.to_ndarray())
 
         images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 0, 0, 3)
         if alphas is not None:
@@ -272,42 +327,16 @@ def get_components_internal(self, container: InputContainer) -> VideoComponents:
         # Get frame rate
         frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1)
 
-        # Get audio if available
-        audio = None
-        container.seek(start_pts, stream=video_stream)
-        # Use last stream for consistency
-        if len(container.streams.audio):
-            audio_stream = container.streams.audio[-1]
-            audio_frames = []
-            resample = av.audio.resampler.AudioResampler(format='fltp').resample
-            frames = itertools.chain.from_iterable(
-                map(resample, container.decode(audio_stream))
-            )
-
-            has_first_frame = False
-            for frame in frames:
-                offset_seconds = start_time - frame.pts * audio_stream.time_base
-                to_skip = max(0, int(offset_seconds * audio_stream.sample_rate))
-                if to_skip < frame.samples:
-                    has_first_frame = True
-                    break
-            if has_first_frame:
-                audio_frames.append(frame.to_ndarray()[..., to_skip:])
+        if len(audio_frames) > 0:
+            audio_data = np.concatenate(audio_frames, axis=1)  # shape: (channels, total_samples)
+            if self.__duration:
+                audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)]
 
-            for frame in frames:
-                if self.__duration and frame.time > start_time + self.__duration:
-                    break
-                audio_frames.append(frame.to_ndarray())  # shape: (channels, samples)
-            if len(audio_frames) > 0:
-                audio_data = np.concatenate(audio_frames, axis=1)  # shape: (channels, total_samples)
-                if self.__duration:
-                    audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)]
-
-                audio_tensor = torch.from_numpy(audio_data).unsqueeze(0)  # shape: (1, channels, total_samples)
-                audio = AudioInput({
-                    "waveform": audio_tensor,
-                    "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1,
-                })
+            audio_tensor = torch.from_numpy(audio_data).unsqueeze(0)  # shape: (1, channels, total_samples)
+            audio = AudioInput({
+                "waveform": audio_tensor,
+                "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1,
+            })
 
         metadata = container.metadata
         return VideoComponents(images=images, alpha=alphas, audio=audio, frame_rate=frame_rate, metadata=metadata)