@@ -97,15 +97,16 @@ struct SDParams {
97
97
98
98
std::string prompt;
99
99
std::string negative_prompt;
100
- float min_cfg = 1 .0f ;
101
- float cfg_scale = 7 .0f ;
102
- float guidance = 3 .5f ;
103
- float eta = 0 .f;
104
- float style_ratio = 20 .f;
105
- int clip_skip = -1 ; // <= 0 represents unspecified
106
- int width = 512 ;
107
- int height = 512 ;
108
- int batch_count = 1 ;
100
+ float min_cfg = 1 .0f ;
101
+ float cfg_scale = 7 .0f ;
102
+ float img_cfg_scale = INFINITY;
103
+ float guidance = 3 .5f ;
104
+ float eta = 0 .f;
105
+ float style_ratio = 20 .f;
106
+ int clip_skip = -1 ; // <= 0 represents unspecified
107
+ int width = 512 ;
108
+ int height = 512 ;
109
+ int batch_count = 1 ;
109
110
110
111
int video_frames = 6 ;
111
112
int motion_bucket_id = 127 ;
@@ -176,6 +177,7 @@ void print_params(SDParams params) {
176
177
printf (" negative_prompt: %s\n " , params.negative_prompt .c_str ());
177
178
printf (" min_cfg: %.2f\n " , params.min_cfg );
178
179
printf (" cfg_scale: %.2f\n " , params.cfg_scale );
180
+ printf (" img_cfg_scale: %.2f\n " , params.img_cfg_scale );
179
181
printf (" slg_scale: %.2f\n " , params.slg_scale );
180
182
printf (" guidance: %.2f\n " , params.guidance );
181
183
printf (" eta: %.2f\n " , params.eta );
@@ -234,7 +236,8 @@ void print_usage(int argc, const char* argv[]) {
234
236
printf (" -p, --prompt [PROMPT] the prompt to render\n " );
235
237
printf (" -n, --negative-prompt PROMPT the negative prompt (default: \"\" )\n " );
236
238
printf (" --cfg-scale SCALE unconditional guidance scale: (default: 7.0)\n " );
237
- printf (" --guidance SCALE guidance scale for img2img (default: 3.5)\n " );
239
+ printf (" --img-cfg-scale SCALE image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)\n " );
240
+ printf (" --guidance SCALE distilled guidance scale for models with guidance input (default: 3.5)\n " );
238
241
printf (" --slg-scale SCALE skip layer guidance (SLG) scale, only for DiT models: (default: 0)\n " );
239
242
printf (" 0 means disabled, a value of 2.5 is nice for sd3.5 medium\n " );
240
243
printf (" --eta SCALE eta in DDIM, only for DDIM and TCD: (default: 0)\n " );
@@ -470,6 +473,12 @@ void parse_args(int argc, const char** argv, SDParams& params) {
470
473
break ;
471
474
}
472
475
params.cfg_scale = std::stof (argv[i]);
476
+ } else if (arg == " --img-cfg-scale" ) {
477
+ if (++i >= argc) {
478
+ invalid_arg = true ;
479
+ break ;
480
+ }
481
+ params.img_cfg_scale = std::stof (argv[i]);
473
482
} else if (arg == " --guidance" ) {
474
483
if (++i >= argc) {
475
484
invalid_arg = true ;
@@ -755,6 +764,10 @@ void parse_args(int argc, const char** argv, SDParams& params) {
755
764
params.output_path = " output.gguf" ;
756
765
}
757
766
}
767
+
768
+ if (!isfinite (params.img_cfg_scale )) {
769
+ params.img_cfg_scale = params.cfg_scale ;
770
+ }
758
771
}
759
772
760
773
static std::string sd_basename (const std::string& path) {
@@ -849,6 +862,18 @@ int main(int argc, const char* argv[]) {
849
862
850
863
parse_args (argc, argv, params);
851
864
865
+ sd_guidance_params_t guidance_params = {params.cfg_scale ,
866
+ params.img_cfg_scale ,
867
+ params.min_cfg ,
868
+ params.guidance ,
869
+ {
870
+ params.skip_layers .data (),
871
+ params.skip_layers .size (),
872
+ params.skip_layer_start ,
873
+ params.skip_layer_end ,
874
+ params.slg_scale ,
875
+ }};
876
+
852
877
sd_set_log_callback (sd_log_cb, (void *)¶ms);
853
878
854
879
if (params.verbose ) {
@@ -1041,8 +1066,7 @@ int main(int argc, const char* argv[]) {
1041
1066
params.prompt .c_str (),
1042
1067
params.negative_prompt .c_str (),
1043
1068
params.clip_skip ,
1044
- params.cfg_scale ,
1045
- params.guidance ,
1069
+ guidance_params,
1046
1070
params.eta ,
1047
1071
params.width ,
1048
1072
params.height ,
@@ -1054,12 +1078,7 @@ int main(int argc, const char* argv[]) {
1054
1078
params.control_strength ,
1055
1079
params.style_ratio ,
1056
1080
params.normalize_input ,
1057
- params.input_id_images_path .c_str (),
1058
- params.skip_layers .data (),
1059
- params.skip_layers .size (),
1060
- params.slg_scale ,
1061
- params.skip_layer_start ,
1062
- params.skip_layer_end );
1081
+ params.input_id_images_path .c_str ());
1063
1082
} else if (params.mode == IMG2IMG || params.mode == IMG2VID) {
1064
1083
sd_image_t input_image = {(uint32_t )params.width ,
1065
1084
(uint32_t )params.height ,
@@ -1075,8 +1094,7 @@ int main(int argc, const char* argv[]) {
1075
1094
params.motion_bucket_id ,
1076
1095
params.fps ,
1077
1096
params.augmentation_level ,
1078
- params.min_cfg ,
1079
- params.cfg_scale ,
1097
+ guidance_params,
1080
1098
params.sample_method ,
1081
1099
params.sample_steps ,
1082
1100
params.strength ,
@@ -1109,8 +1127,7 @@ int main(int argc, const char* argv[]) {
1109
1127
params.prompt .c_str (),
1110
1128
params.negative_prompt .c_str (),
1111
1129
params.clip_skip ,
1112
- params.cfg_scale ,
1113
- params.guidance ,
1130
+ guidance_params,
1114
1131
params.eta ,
1115
1132
params.width ,
1116
1133
params.height ,
@@ -1123,12 +1140,7 @@ int main(int argc, const char* argv[]) {
1123
1140
params.control_strength ,
1124
1141
params.style_ratio ,
1125
1142
params.normalize_input ,
1126
- params.input_id_images_path .c_str (),
1127
- params.skip_layers .data (),
1128
- params.skip_layers .size (),
1129
- params.slg_scale ,
1130
- params.skip_layer_start ,
1131
- params.skip_layer_end );
1143
+ params.input_id_images_path .c_str ());
1132
1144
}
1133
1145
} else { // EDIT
1134
1146
results = edit (sd_ctx,
@@ -1137,25 +1149,19 @@ int main(int argc, const char* argv[]) {
1137
1149
params.prompt .c_str (),
1138
1150
params.negative_prompt .c_str (),
1139
1151
params.clip_skip ,
1140
- params.cfg_scale ,
1141
- params.guidance ,
1152
+ guidance_params,
1142
1153
params.eta ,
1143
1154
params.width ,
1144
1155
params.height ,
1145
1156
params.sample_method ,
1146
1157
params.sample_steps ,
1147
- params.strength ,
1148
1158
params.seed ,
1149
1159
params.batch_count ,
1150
1160
control_image,
1151
1161
params.control_strength ,
1152
1162
params.style_ratio ,
1153
1163
params.normalize_input ,
1154
- params.skip_layers .data (),
1155
- params.skip_layers .size (),
1156
- params.slg_scale ,
1157
- params.skip_layer_start ,
1158
- params.skip_layer_end );
1164
+ params.input_id_images_path .c_str ());
1159
1165
}
1160
1166
1161
1167
if (results == NULL ) {
0 commit comments