diff --git a/.github/workflows/tag-dispatch-cloud.yml b/.github/workflows/tag-dispatch-cloud.yml new file mode 100644 index 000000000000..53a0e91d6946 --- /dev/null +++ b/.github/workflows/tag-dispatch-cloud.yml @@ -0,0 +1,45 @@ +name: Tag Dispatch to Cloud + +on: + push: + tags: + - 'v*' + +jobs: + dispatch-cloud: + runs-on: ubuntu-latest + steps: + - name: Send repository dispatch to cloud + env: + DISPATCH_TOKEN: ${{ secrets.CLOUD_REPO_DISPATCH_TOKEN }} + RELEASE_TAG: ${{ github.ref_name }} + run: | + set -euo pipefail + + if [ -z "${DISPATCH_TOKEN:-}" ]; then + echo "::error::CLOUD_REPO_DISPATCH_TOKEN is required but not set." + exit 1 + fi + + RELEASE_URL="https://github.com/${{ github.repository }}/releases/tag/${RELEASE_TAG}" + + PAYLOAD="$(jq -n \ + --arg release_tag "$RELEASE_TAG" \ + --arg release_url "$RELEASE_URL" \ + '{ + event_type: "comfyui_tag_pushed", + client_payload: { + release_tag: $release_tag, + release_url: $release_url + } + }')" + + curl -fsSL \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${DISPATCH_TOKEN}" \ + https://api.github.com/repos/Comfy-Org/cloud/dispatches \ + -d "$PAYLOAD" + + echo "✅ Dispatched ComfyUI tag ${RELEASE_TAG} to Comfy-Org/cloud" diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index ee56f8523cdd..e259aed6316e 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -31,6 +31,7 @@ import comfy.hooks import comfy.lora import comfy.model_management +import comfy.ops import comfy.patcher_extension import comfy.utils from comfy.comfy_types import UnetWrapperFunction @@ -856,7 +857,9 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False if m.comfy_patched_weights == True: continue - for param in params: + for param, param_value in params.items(): + if hasattr(m, "comfy_cast_weights") and getattr(param_value, "is_meta", False): + comfy.ops.disable_weight_init._zero_init_parameter(m, param) key = key_param_name_to_key(n, param) self.unpin_weight(key) self.patch_weight_to_device(key, device_to=device_to) diff --git a/comfy/ops.py b/comfy/ops.py index 7a9b4b84c6bd..050f7cda0866 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -79,14 +79,21 @@ def cast_to_input(weight, input, non_blocking=False, copy=True): return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy) -def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant): +def materialize_meta_param(s, param_keys): + for param_key in param_keys: + param = getattr(s, param_key, None) + if param is not None and getattr(param, "is_meta", False): + setattr(s, param_key, torch.nn.Parameter(torch.zeros(param.shape, dtype=param.dtype), requires_grad=param.requires_grad)) + +def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant): #vbar doesn't support CPU weights, but some custom nodes have weird paths #that might switch the layer to the CPU and expect it to work. We have to take #a clone conservatively as we are mmapped and some SFT files are packed misaligned #If you are a custom node author reading this, please move your layer to the GPU #or declare your ModelPatcher as CPU in the first place. if comfy.model_management.is_device_cpu(device): + materialize_meta_param(s, ["weight", "bias"]) weight = s.weight.to(dtype=dtype, copy=True) if isinstance(weight, QuantizedTensor): weight = weight.dequantize() @@ -108,6 +115,7 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu xfer_dest = comfy_aimdo.torch.aimdo_to_tensor(s._v, device) if not resident: + materialize_meta_param(s, ["weight", "bias"]) cast_geometry = comfy.memory_management.tensors_to_geometries([ s.weight, s.bias ]) cast_dest = None @@ -306,6 +314,12 @@ class CastWeightBiasOp: bias_function = [] class disable_weight_init: + @staticmethod + def _zero_init_parameter(module, name): + param = getattr(module, name) + device = None if getattr(param, "is_meta", False) else param.device + setattr(module, name, torch.nn.Parameter(torch.zeros(param.shape, device=device, dtype=param.dtype), requires_grad=False)) + @staticmethod def _lazy_load_from_state_dict(module, state_dict, prefix, local_metadata, missing_keys, unexpected_keys, weight_shape, diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py index eb4d3701d54d..812b3eb30d66 100644 --- a/comfy_api/latest/_input_impl/video_types.py +++ b/comfy_api/latest/_input_impl/video_types.py @@ -12,6 +12,7 @@ import math import torch from .._util import VideoContainer, VideoCodec, VideoComponents +import logging def container_to_output_format(container_format: str | None) -> str | None: @@ -238,32 +239,86 @@ def get_components_internal(self, container: InputContainer) -> VideoComponents: start_time = max(self._get_raw_duration() + self.__start_time, 0) else: start_time = self.__start_time + # Get video frames frames = [] + audio_frames = [] alphas = None start_pts = int(start_time / video_stream.time_base) end_pts = int((start_time + self.__duration) / video_stream.time_base) - container.seek(start_pts, stream=video_stream) + + if start_pts != 0: + container.seek(start_pts, stream=video_stream) + image_format = 'gbrpf32le' - for frame in container.decode(video_stream): - if alphas is None: - for comp in frame.format.components: - if comp.is_alpha: - alphas = [] - image_format = 'gbrapf32le' - break + audio = None + + streams = [video_stream] + has_first_audio_frame = False + checked_alpha = False - if frame.pts < start_pts: - continue - if self.__duration and frame.pts >= end_pts: + # Default to False so we decode until EOF if duration is 0 + video_done = False + audio_done = True + + if len(container.streams.audio): + audio_stream = container.streams.audio[-1] + streams += [audio_stream] + resampler = av.audio.resampler.AudioResampler(format='fltp') + audio_done = False + + for packet in container.demux(*streams): + if video_done and audio_done: break - img = frame.to_ndarray(format=image_format) # shape: (H, W, 4) - if alphas is None: - frames.append(torch.from_numpy(img)) - else: - frames.append(torch.from_numpy(img[..., :-1])) - alphas.append(torch.from_numpy(img[..., -1:])) + if packet.stream.type == "video": + if video_done: + continue + try: + for frame in packet.decode(): + if frame.pts < start_pts: + continue + if self.__duration and frame.pts >= end_pts: + video_done = True + break + + if not checked_alpha: + for comp in frame.format.components: + if comp.is_alpha: + alphas = [] + image_format = 'gbrapf32le' + break + checked_alpha = True + + img = frame.to_ndarray(format=image_format) # shape: (H, W, 4) + if alphas is None: + frames.append(torch.from_numpy(img)) + else: + frames.append(torch.from_numpy(img[..., :-1])) + alphas.append(torch.from_numpy(img[..., -1:])) + except av.error.InvalidDataError: + logging.info("pyav decode error") + + elif packet.stream.type == "audio": + if audio_done: + continue + + aframes = itertools.chain.from_iterable( + map(resampler.resample, packet.decode()) + ) + for frame in aframes: + if self.__duration and frame.time > start_time + self.__duration: + audio_done = True + break + + if not has_first_audio_frame: + offset_seconds = start_time - frame.pts * audio_stream.time_base + to_skip = max(0, int(offset_seconds * audio_stream.sample_rate)) + if to_skip < frame.samples: + has_first_audio_frame = True + audio_frames.append(frame.to_ndarray()[..., to_skip:]) + else: + audio_frames.append(frame.to_ndarray()) images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 0, 0, 3) if alphas is not None: @@ -272,42 +327,16 @@ def get_components_internal(self, container: InputContainer) -> VideoComponents: # Get frame rate frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1) - # Get audio if available - audio = None - container.seek(start_pts, stream=video_stream) - # Use last stream for consistency - if len(container.streams.audio): - audio_stream = container.streams.audio[-1] - audio_frames = [] - resample = av.audio.resampler.AudioResampler(format='fltp').resample - frames = itertools.chain.from_iterable( - map(resample, container.decode(audio_stream)) - ) - - has_first_frame = False - for frame in frames: - offset_seconds = start_time - frame.pts * audio_stream.time_base - to_skip = max(0, int(offset_seconds * audio_stream.sample_rate)) - if to_skip < frame.samples: - has_first_frame = True - break - if has_first_frame: - audio_frames.append(frame.to_ndarray()[..., to_skip:]) + if len(audio_frames) > 0: + audio_data = np.concatenate(audio_frames, axis=1) # shape: (channels, total_samples) + if self.__duration: + audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)] - for frame in frames: - if self.__duration and frame.time > start_time + self.__duration: - break - audio_frames.append(frame.to_ndarray()) # shape: (channels, samples) - if len(audio_frames) > 0: - audio_data = np.concatenate(audio_frames, axis=1) # shape: (channels, total_samples) - if self.__duration: - audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)] - - audio_tensor = torch.from_numpy(audio_data).unsqueeze(0) # shape: (1, channels, total_samples) - audio = AudioInput({ - "waveform": audio_tensor, - "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1, - }) + audio_tensor = torch.from_numpy(audio_data).unsqueeze(0) # shape: (1, channels, total_samples) + audio = AudioInput({ + "waveform": audio_tensor, + "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1, + }) metadata = container.metadata return VideoComponents(images=images, alpha=alphas, audio=audio, frame_rate=frame_rate, metadata=metadata)