stduhpf
diff --git a/‎clip.hpp
Lines changed: 2 additions & 2 deletions b/‎clip.hpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎conditioner.hpp
Lines changed: 2 additions & 2 deletions b/‎conditioner.hpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎control.hpp
Lines changed: 5 additions & 2 deletions b/‎control.hpp
Lines changed: 5 additions & 2 deletions
diff --git a/‎denoiser.hpp
Lines changed: 49 additions & 6 deletions b/‎denoiser.hpp
Lines changed: 49 additions & 6 deletions
diff --git a/‎diffusion_model.hpp
Lines changed: 4 additions & 4 deletions b/‎diffusion_model.hpp
Lines changed: 4 additions & 4 deletions
diff --git a/‎esrgan.hpp
Lines changed: 2 additions & 2 deletions b/‎esrgan.hpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎flux.hpp
Lines changed: 2 additions & 2 deletions b/‎flux.hpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎ggml_extend.hpp
Lines changed: 10 additions & 7 deletions b/‎ggml_extend.hpp
Lines changed: 10 additions & 7 deletions
diff --git a/‎lora.hpp
Lines changed: 2 additions & 2 deletions b/‎lora.hpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎mmdit.hpp
Lines changed: 2 additions & 2 deletions b/‎mmdit.hpp
Lines changed: 2 additions & 2 deletions
@@ -936,7 +936,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
         return gf;
     }
 
-    void compute(const int n_threads,
+    bool compute(const int n_threads,
                  struct ggml_tensor* input_ids,
                  int num_custom_embeddings,
                  void* custom_embeddings_data,
@@ -947,7 +947,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
         auto get_graph = [&]() -> struct ggml_cgraph* {
             return build_graph(input_ids, num_custom_embeddings, custom_embeddings_data, max_token_idx, return_pooled);
         };
-        GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
+        return GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
     }
 };
 
 
@@ -642,14 +642,14 @@ struct FrozenCLIPVisionEmbedder : public GGMLRunner {
         return gf;
     }
 
-    void compute(const int n_threads,
+    bool compute(const int n_threads,
                  ggml_tensor* pixel_values,
                  ggml_tensor** output,
                  ggml_context* output_ctx) {
         auto get_graph = [&]() -> struct ggml_cgraph* {
             return build_graph(pixel_values);
         };
-        GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
+        return GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
     }
 };
 
 
@@ -410,7 +410,7 @@ struct ControlNet : public GGMLRunner {
         return gf;
     }
 
-    void compute(int n_threads,
+    bool compute(int n_threads,
                  struct ggml_tensor* x,
                  struct ggml_tensor* hint,
                  struct ggml_tensor* timesteps,
@@ -426,8 +426,11 @@ struct ControlNet : public GGMLRunner {
             return build_graph(x, hint, timesteps, context, y);
         };
 
-        GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
+        if (!GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx)) {
+            return false;
+        }
         guided_hint_cached = true;
+        return true;
     }
 
     bool load_from_file(const std::string& file_path) {
 
@@ -352,7 +352,7 @@ struct EDMVDenoiser : public CompVisVDenoiser {
     }
 
     float t_to_sigma(float t) {
-        return std::exp(t * 4/(float)TIMESTEPS);
+        return std::exp(t * 4 / (float)TIMESTEPS);
     }
 
     float sigma_to_t(float s) {
@@ -491,7 +491,7 @@ struct FluxFlowDenoiser : public Denoiser {
 typedef std::function<ggml_tensor*(ggml_tensor*, float, int)> denoise_cb_t;
 
 // k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t
-static void sample_k_diffusion(sample_method_t method,
+static bool sample_k_diffusion(sample_method_t method,
                                denoise_cb_t model,
                                ggml_context* work_ctx,
                                ggml_tensor* x,
@@ -510,6 +510,9 @@ static void sample_k_diffusion(sample_method_t method,
 
                 // denoise
                 ggml_tensor* denoised = model(x, sigma, i + 1);
+                if (denoised == NULL) {
+                    return false;
+                }
 
                 // d = (x - denoised) / sigma
                 {
@@ -563,6 +566,9 @@ static void sample_k_diffusion(sample_method_t method,
 
                 // denoise
                 ggml_tensor* denoised = model(x, sigma, i + 1);
+                if (denoised == NULL) {
+                    return false;
+                }
 
                 // d = (x - denoised) / sigma
                 {
@@ -594,6 +600,9 @@ static void sample_k_diffusion(sample_method_t method,
             for (int i = 0; i < steps; i++) {
                 // denoise
                 ggml_tensor* denoised = model(x, sigmas[i], -(i + 1));
+                if (denoised == NULL) {
+                    return false;
+                }
 
                 // d = (x - denoised) / sigma
                 {
@@ -628,7 +637,10 @@ static void sample_k_diffusion(sample_method_t method,
                     }
 
                     ggml_tensor* denoised = model(x2, sigmas[i + 1], i + 1);
-                    float* vec_denoised   = (float*)denoised->data;
+                    if (denoised == NULL) {
+                        return false;
+                    }
+                    float* vec_denoised = (float*)denoised->data;
                     for (int j = 0; j < ggml_nelements(x); j++) {
                         float d2 = (vec_x2[j] - vec_denoised[j]) / sigmas[i + 1];
                         vec_d[j] = (vec_d[j] + d2) / 2;
@@ -644,6 +656,9 @@ static void sample_k_diffusion(sample_method_t method,
             for (int i = 0; i < steps; i++) {
                 // denoise
                 ggml_tensor* denoised = model(x, sigmas[i], i + 1);
+                if (denoised == NULL) {
+                    return false;
+                }
 
                 // d = (x - denoised) / sigma
                 {
@@ -680,7 +695,10 @@ static void sample_k_diffusion(sample_method_t method,
                     }
 
                     ggml_tensor* denoised = model(x2, sigma_mid, i + 1);
-                    float* vec_denoised   = (float*)denoised->data;
+                    if (denoised == NULL) {
+                        return false;
+                    }
+                    float* vec_denoised = (float*)denoised->data;
                     for (int j = 0; j < ggml_nelements(x); j++) {
                         float d2 = (vec_x2[j] - vec_denoised[j]) / sigma_mid;
                         vec_x[j] = vec_x[j] + d2 * dt_2;
@@ -697,6 +715,9 @@ static void sample_k_diffusion(sample_method_t method,
             for (int i = 0; i < steps; i++) {
                 // denoise
                 ggml_tensor* denoised = model(x, sigmas[i], i + 1);
+                if (denoised == NULL) {
+                    return false;
+                }
 
                 // get_ancestral_step
                 float sigma_up   = std::min(sigmas[i + 1],
@@ -741,6 +762,9 @@ static void sample_k_diffusion(sample_method_t method,
                     }
 
                     ggml_tensor* denoised = model(x2, sigmas[i + 1], i + 1);
+                    if (denoised == NULL) {
+                        return false;
+                    }
 
                     // Second half-step
                     for (int j = 0; j < ggml_nelements(x); j++) {
@@ -771,6 +795,9 @@ static void sample_k_diffusion(sample_method_t method,
             for (int i = 0; i < steps; i++) {
                 // denoise
                 ggml_tensor* denoised = model(x, sigmas[i], i + 1);
+                if (denoised == NULL) {
+                    return false;
+                }
 
                 float t                 = t_fn(sigmas[i]);
                 float t_next            = t_fn(sigmas[i + 1]);
@@ -810,6 +837,9 @@ static void sample_k_diffusion(sample_method_t method,
             for (int i = 0; i < steps; i++) {
                 // denoise
                 ggml_tensor* denoised = model(x, sigmas[i], i + 1);
+                if (denoised == NULL) {
+                    return false;
+                }
 
                 float t                 = t_fn(sigmas[i]);
                 float t_next            = t_fn(sigmas[i + 1]);
@@ -860,7 +890,10 @@ static void sample_k_diffusion(sample_method_t method,
 
                 // Denoising step
                 ggml_tensor* denoised = model(x_cur, sigma, i + 1);
-                float* vec_denoised   = (float*)denoised->data;
+                if (denoised == NULL) {
+                    return false;
+                }
+                float* vec_denoised = (float*)denoised->data;
                 // d_cur = (x_cur - denoised) / sigma
                 struct ggml_tensor* d_cur = ggml_dup_tensor(work_ctx, x_cur);
                 float* vec_d_cur          = (float*)d_cur->data;
@@ -1003,6 +1036,9 @@ static void sample_k_diffusion(sample_method_t method,
 
                 // denoise
                 ggml_tensor* denoised = model(x, sigma, i + 1);
+                if (denoised == NULL) {
+                    return false;
+                }
 
                 // x = denoised
                 {
@@ -1129,6 +1165,9 @@ static void sample_k_diffusion(sample_method_t method,
                 // p. 8 (7), compare also p. 38 (226) therein.
                 struct ggml_tensor* model_output =
                     model(x, sigma, i + 1);
+                if (model_output == NULL) {
+                    return false;
+                }
                 // Here model_output is still the k-diffusion denoiser
                 // output, not the U-net output F_theta(c_in(sigma) x;
                 // ...) in Karras et al. (2022), whereas Diffusers'
@@ -1288,6 +1327,9 @@ static void sample_k_diffusion(sample_method_t method,
                 }
                 struct ggml_tensor* model_output =
                     model(x, sigma, i + 1);
+                if (model_output == NULL) {
+                    return false;
+                }
                 {
                     float* vec_x = (float*)x->data;
                     float* vec_model_output =
@@ -1395,8 +1437,9 @@ static void sample_k_diffusion(sample_method_t method,
 
         default:
             LOG_ERROR("Attempting to sample with nonexisting sample method %i", method);
-            abort();
+            return false;
     }
+    return true;
 }
 
 #endif  // __DENOISER_HPP__
@@ -6,7 +6,7 @@
 #include "unet.hpp"
 
 struct DiffusionModel {
-    virtual void compute(int n_threads,
+    virtual bool compute(int n_threads,
                          struct ggml_tensor* x,
                          struct ggml_tensor* timesteps,
                          struct ggml_tensor* context,
@@ -61,7 +61,7 @@ struct UNetModel : public DiffusionModel {
         return unet.unet.adm_in_channels;
     }
 
-    void compute(int n_threads,
+    bool compute(int n_threads,
                  struct ggml_tensor* x,
                  struct ggml_tensor* timesteps,
                  struct ggml_tensor* context,
@@ -111,7 +111,7 @@ struct MMDiTModel : public DiffusionModel {
         return 768 + 1280;
     }
 
-    void compute(int n_threads,
+    bool compute(int n_threads,
                  struct ggml_tensor* x,
                  struct ggml_tensor* timesteps,
                  struct ggml_tensor* context,
@@ -162,7 +162,7 @@ struct FluxModel : public DiffusionModel {
         return 768;
     }
 
-    void compute(int n_threads,
+    bool compute(int n_threads,
                  struct ggml_tensor* x,
                  struct ggml_tensor* timesteps,
                  struct ggml_tensor* context,
 
@@ -183,14 +183,14 @@ struct ESRGAN : public GGMLRunner {
         return gf;
     }
 
-    void compute(const int n_threads,
+    bool compute(const int n_threads,
                  struct ggml_tensor* x,
                  ggml_tensor** output,
                  ggml_context* output_ctx = NULL) {
         auto get_graph = [&]() -> struct ggml_cgraph* {
             return build_graph(x);
         };
-        GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
+        return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
     }
 };
 
 
@@ -1163,7 +1163,7 @@ namespace Flux {
             return gf;
         }
 
-        void compute(int n_threads,
+        bool compute(int n_threads,
                      struct ggml_tensor* x,
                      struct ggml_tensor* timesteps,
                      struct ggml_tensor* context,
@@ -1182,7 +1182,7 @@ namespace Flux {
                 return build_graph(x, timesteps, context, c_concat, y, guidance, skip_layers);
             };
 
-            GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
+            return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
         }
 
         void test() {
 
@@ -383,19 +383,19 @@ __STATIC_INLINE__ void sd_apply_mask(struct ggml_tensor* image_data,
             ggml_tensor_set_f32(mask, m, ix, iy);
         }
     }
-    float rescale_mx = mask->ne[0]/output->ne[0];
-    float rescale_my = mask->ne[1]/output->ne[1];
+    float rescale_mx = mask->ne[0] / output->ne[0];
+    float rescale_my = mask->ne[1] / output->ne[1];
     GGML_ASSERT(output->type == GGML_TYPE_F32);
     for (int ix = 0; ix < width; ix++) {
         for (int iy = 0; iy < height; iy++) {
-            int mx = (int)(ix * rescale_mx);
-            int my = (int)(iy * rescale_my);
+            int mx  = (int)(ix * rescale_mx);
+            int my  = (int)(iy * rescale_my);
             float m = ggml_tensor_get_f32(mask, mx, my);
             m       = round(m);  // inpaint models need binary masks
             ggml_tensor_set_f32(mask, m, mx, my);
             for (int k = 0; k < channels; k++) {
                 float value = ggml_tensor_get_f32(image_data, ix, iy, k);
-                value = (1 - m) * (value - masked_value) + masked_value;
+                value       = (1 - m) * (value - masked_value) + masked_value;
                 ggml_tensor_set_f32(output, value, ix, iy, k);
             }
         }
@@ -1319,12 +1319,14 @@ struct GGMLRunner {
         }
     }
 
-    void compute(get_graph_cb_t get_graph,
+    bool compute(get_graph_cb_t get_graph,
                  int n_threads,
                  bool free_compute_buffer_immediately = true,
                  struct ggml_tensor** output          = NULL,
                  struct ggml_context* output_ctx      = NULL) {
-        alloc_compute_buffer(get_graph);
+        if (!alloc_compute_buffer(get_graph)) {
+            return false;
+        }
         reset_compute_ctx();
         struct ggml_cgraph* gf = get_graph();
         GGML_ASSERT(ggml_gallocr_alloc_graph(compute_allocr, gf));
@@ -1382,6 +1384,7 @@ struct GGMLRunner {
         if (free_compute_buffer_immediately) {
             free_compute_buffer();
         }
+        return true;
     }
 };
 
 
@@ -835,11 +835,11 @@ struct LoraModel : public GGMLRunner {
         return gf;
     }
 
-    void apply(std::map<std::string, struct ggml_tensor*> model_tensors, SDVersion version, int n_threads) {
+    bool apply(std::map<std::string, struct ggml_tensor*> model_tensors, SDVersion version, int n_threads) {
         auto get_graph = [&]() -> struct ggml_cgraph* {
             return build_lora_graph(model_tensors, version);
         };
-        GGMLRunner::compute(get_graph, n_threads, true);
+        return GGMLRunner::compute(get_graph, n_threads, true);
     }
 };
 
 
@@ -910,7 +910,7 @@ struct MMDiTRunner : public GGMLRunner {
         return gf;
     }
 
-    void compute(int n_threads,
+    bool compute(int n_threads,
                  struct ggml_tensor* x,
                  struct ggml_tensor* timesteps,
                  struct ggml_tensor* context,
@@ -926,7 +926,7 @@ struct MMDiTRunner : public GGMLRunner {
             return build_graph(x, timesteps, context, y, skip_layers);
         };
 
-        GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
+        return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
     }
 
     void test() {
Original file line number	Diff line number	Diff line change
`@@ -1163,7 +1163,7 @@ namespace Flux {`
`1163`	`1163`	`return gf;`
`1164`	`1164`	`}`
`1165`	`1165`
`1166`		`- void compute(int n_threads,`
	`1166`	`+ bool compute(int n_threads,`
`1167`	`1167`	`struct ggml_tensor* x,`
`1168`	`1168`	`struct ggml_tensor* timesteps,`
`1169`	`1169`	`struct ggml_tensor* context,`
`@@ -1182,7 +1182,7 @@ namespace Flux {`
`1182`	`1182`	`return build_graph(x, timesteps, context, c_concat, y, guidance, skip_layers);`
`1183`	`1183`	`};`
`1184`	`1184`
`1185`		`- GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);`
	`1185`	`+ return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);`
`1186`	`1186`	`}`
`1187`	`1187`
`1188`	`1188`	`void test() {`
Original file line number	Diff line number	Diff line change
`@@ -835,11 +835,11 @@ struct LoraModel : public GGMLRunner {`
`835`	`835`	`return gf;`
`836`	`836`	`}`
`837`	`837`
`838`		`- void apply(std::map<std::string, struct ggml_tensor*> model_tensors, SDVersion version, int n_threads) {`
	`838`	`+ bool apply(std::map<std::string, struct ggml_tensor*> model_tensors, SDVersion version, int n_threads) {`
`839`	`839`	`auto get_graph = [&]() -> struct ggml_cgraph* {`
`840`	`840`	`return build_lora_graph(model_tensors, version);`
`841`	`841`	`};`
`842`		`- GGMLRunner::compute(get_graph, n_threads, true);`
	`842`	`+ return GGMLRunner::compute(get_graph, n_threads, true);`
`843`	`843`	`}`
`844`	`844`	`};`
`845`	`845`
Original file line number	Diff line number	Diff line change
`@@ -910,7 +910,7 @@ struct MMDiTRunner : public GGMLRunner {`
`910`	`910`	`return gf;`
`911`	`911`	`}`
`912`	`912`
`913`		`- void compute(int n_threads,`
	`913`	`+ bool compute(int n_threads,`
`914`	`914`	`struct ggml_tensor* x,`
`915`	`915`	`struct ggml_tensor* timesteps,`
`916`	`916`	`struct ggml_tensor* context,`
`@@ -926,7 +926,7 @@ struct MMDiTRunner : public GGMLRunner {`
`926`	`926`	`return build_graph(x, timesteps, context, y, skip_layers);`
`927`	`927`	`};`
`928`	`928`
`929`		`- GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);`
	`929`	`+ return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);`
`930`	`930`	`}`
`931`	`931`
`932`	`932`	`void test() {`