|
18 | 18 | import comfy.text_encoders.lumina2 |
19 | 19 | import comfy.text_encoders.wan |
20 | 20 | import comfy.text_encoders.ace |
| 21 | +import comfy.text_encoders.omnigen2 |
21 | 22 |
|
22 | 23 | from . import supported_models_base |
23 | 24 | from . import latent_formats |
@@ -1181,6 +1182,36 @@ def get_model(self, state_dict, prefix="", device=None): |
1181 | 1182 | def clip_target(self, state_dict={}): |
1182 | 1183 | return supported_models_base.ClipTarget(comfy.text_encoders.ace.AceT5Tokenizer, comfy.text_encoders.ace.AceT5Model) |
1183 | 1184 |
|
1184 | | -models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream, Chroma, ACEStep] |
| 1185 | +class Omnigen2(supported_models_base.BASE): |
| 1186 | + unet_config = { |
| 1187 | + "image_model": "omnigen2", |
| 1188 | + } |
| 1189 | + |
| 1190 | + sampling_settings = { |
| 1191 | + "multiplier": 1.0, |
| 1192 | + "shift": 2.6, |
| 1193 | + } |
| 1194 | + |
| 1195 | + memory_usage_factor = 1.65 #TODO |
| 1196 | + |
| 1197 | + unet_extra_config = {} |
| 1198 | + latent_format = latent_formats.Flux |
| 1199 | + |
| 1200 | + supported_inference_dtypes = [torch.float16, torch.bfloat16, torch.float32] |
| 1201 | + |
| 1202 | + vae_key_prefix = ["vae."] |
| 1203 | + text_encoder_key_prefix = ["text_encoders."] |
| 1204 | + |
| 1205 | + def get_model(self, state_dict, prefix="", device=None): |
| 1206 | + out = model_base.Omnigen2(self, device=device) |
| 1207 | + return out |
| 1208 | + |
| 1209 | + def clip_target(self, state_dict={}): |
| 1210 | + pref = self.text_encoder_key_prefix[0] |
| 1211 | + hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen25_3b.transformer.".format(pref)) |
| 1212 | + return supported_models_base.ClipTarget(comfy.text_encoders.omnigen2.LuminaTokenizer, comfy.text_encoders.omnigen2.te(**hunyuan_detect)) |
| 1213 | + |
| 1214 | + |
| 1215 | +models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream, Chroma, ACEStep, Omnigen2] |
1185 | 1216 |
|
1186 | 1217 | models += [SVD_img2vid] |
0 commit comments