From 244703046f1ce9ca6ed9dd64587ff37da2e6506c Mon Sep 17 00:00:00 2001 From: Jason Wong <86539564+Aintor@users.noreply.github.com> Date: Mon, 11 Nov 2024 00:13:05 +0800 Subject: [PATCH 1/4] Update torch2coreml.py --- python_coreml_stable_diffusion/torch2coreml.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python_coreml_stable_diffusion/torch2coreml.py b/python_coreml_stable_diffusion/torch2coreml.py index fc4e633e..5f685fcd 100644 --- a/python_coreml_stable_diffusion/torch2coreml.py +++ b/python_coreml_stable_diffusion/torch2coreml.py @@ -336,6 +336,7 @@ def patched_make_causal_mask(input_ids_shape, dtype, device, past_key_values_len return mask[None, None, :, :].expand(bsz, 1, tgt_len, tgt_len + past_key_values_length) modeling_clip._make_causal_mask = patched_make_causal_mask +modeling_clip._create_4d_causal_attention_mask = patched_make_causal_mask def convert_text_encoder(text_encoder, tokenizer, submodule_name, args): """ Converts the text encoder component of Stable Diffusion From 4ef83f13d0dff36af5ffd744e612d3f5e5e03080 Mon Sep 17 00:00:00 2001 From: Jason Wong <86539564+Aintor@users.noreply.github.com> Date: Tue, 12 Nov 2024 10:31:26 +0800 Subject: [PATCH 2/4] Update torch2coreml.py --- python_coreml_stable_diffusion/torch2coreml.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python_coreml_stable_diffusion/torch2coreml.py b/python_coreml_stable_diffusion/torch2coreml.py index 5f685fcd..54bb662e 100644 --- a/python_coreml_stable_diffusion/torch2coreml.py +++ b/python_coreml_stable_diffusion/torch2coreml.py @@ -335,8 +335,10 @@ def patched_make_causal_mask(input_ids_shape, dtype, device, past_key_values_len mask = torch.cat([torch.zeros(tgt_len, past_key_values_length, dtype=dtype, device=device), mask], dim=-1) return mask[None, None, :, :].expand(bsz, 1, tgt_len, tgt_len + past_key_values_length) -modeling_clip._make_causal_mask = patched_make_causal_mask -modeling_clip._create_4d_causal_attention_mask = patched_make_causal_mask +# Starting from transformers >= 4.35.0, the _make_causal_mask function is replaced by _create_4d_causal_attention_mask in modeling_clip. +# For backward compatibility with versions < 4.35.0, both functions are patched here. +modeling_clip._make_causal_mask = patched_make_causal_mask # For transformers < 4.35.0 +modeling_clip._create_4d_causal_attention_mask = patched_make_causal_mask # For transformers >= 4.35.0 def convert_text_encoder(text_encoder, tokenizer, submodule_name, args): """ Converts the text encoder component of Stable Diffusion From 7e3df29361ae887727105ba26a2238f9a252ae97 Mon Sep 17 00:00:00 2001 From: Jason Wong <86539564+Aintor@users.noreply.github.com> Date: Tue, 12 Nov 2024 10:36:17 +0800 Subject: [PATCH 3/4] Update torch2coreml.py --- python_coreml_stable_diffusion/torch2coreml.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python_coreml_stable_diffusion/torch2coreml.py b/python_coreml_stable_diffusion/torch2coreml.py index 54bb662e..95aab700 100644 --- a/python_coreml_stable_diffusion/torch2coreml.py +++ b/python_coreml_stable_diffusion/torch2coreml.py @@ -322,6 +322,8 @@ def bundle_resources_for_swift_cli(args): from transformers.models.clip import modeling_clip # Copied from https://github.com/huggingface/transformers/blob/v4.30.0/src/transformers/models/clip/modeling_clip.py#L677C1-L692C1 +# Starting from transformers >= 4.35.0, the _make_causal_mask function is replaced by _create_4d_causal_attention_mask in modeling_clip. +# For backward compatibility with versions < 4.35.0, both functions are patched here. def patched_make_causal_mask(input_ids_shape, dtype, device, past_key_values_length: int = 0): """ Patch to replace torch.finfo(dtype).min with -1e4 """ @@ -334,9 +336,7 @@ def patched_make_causal_mask(input_ids_shape, dtype, device, past_key_values_len if past_key_values_length > 0: mask = torch.cat([torch.zeros(tgt_len, past_key_values_length, dtype=dtype, device=device), mask], dim=-1) return mask[None, None, :, :].expand(bsz, 1, tgt_len, tgt_len + past_key_values_length) - -# Starting from transformers >= 4.35.0, the _make_causal_mask function is replaced by _create_4d_causal_attention_mask in modeling_clip. -# For backward compatibility with versions < 4.35.0, both functions are patched here. + modeling_clip._make_causal_mask = patched_make_causal_mask # For transformers < 4.35.0 modeling_clip._create_4d_causal_attention_mask = patched_make_causal_mask # For transformers >= 4.35.0 From 50bb4c43d0139f76c6b99bc1ee16b200eae20c50 Mon Sep 17 00:00:00 2001 From: Jason Wong <86539564+Aintor@users.noreply.github.com> Date: Tue, 12 Nov 2024 11:19:16 +0800 Subject: [PATCH 4/4] Update torch2coreml.py --- python_coreml_stable_diffusion/torch2coreml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python_coreml_stable_diffusion/torch2coreml.py b/python_coreml_stable_diffusion/torch2coreml.py index 95aab700..3bbf7ea5 100644 --- a/python_coreml_stable_diffusion/torch2coreml.py +++ b/python_coreml_stable_diffusion/torch2coreml.py @@ -337,7 +337,7 @@ def patched_make_causal_mask(input_ids_shape, dtype, device, past_key_values_len mask = torch.cat([torch.zeros(tgt_len, past_key_values_length, dtype=dtype, device=device), mask], dim=-1) return mask[None, None, :, :].expand(bsz, 1, tgt_len, tgt_len + past_key_values_length) -modeling_clip._make_causal_mask = patched_make_causal_mask # For transformers < 4.35.0 +modeling_clip._make_causal_mask = patched_make_causal_mask # For transformers >= 4.30.0 and transformers < 4.35.0 modeling_clip._create_4d_causal_attention_mask = patched_make_causal_mask # For transformers >= 4.35.0 def convert_text_encoder(text_encoder, tokenizer, submodule_name, args):