@@ -635,7 +635,9 @@ sd_tiling_calc_tiles(int &num_tiles_dim, float& tile_overlap_factor_dim, int sma
635
635
}
636
636
637
637
// Tiling
638
- __STATIC_INLINE__ void sd_tiling (ggml_tensor* input, ggml_tensor* output, const int scale, const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
638
+ __STATIC_INLINE__ void sd_tiling_non_square (ggml_tensor* input, ggml_tensor* output, const int scale,
639
+ const int p_tile_size_x, const int p_tile_size_y,
640
+ const float tile_overlap_factor, on_tile_process on_processing) {
639
641
int input_width = (int )input->ne [0 ];
640
642
int input_height = (int )input->ne [1 ];
641
643
int output_width = (int )output->ne [0 ];
@@ -656,25 +658,25 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
656
658
657
659
int num_tiles_x;
658
660
float tile_overlap_factor_x;
659
- sd_tiling_calc_tiles (num_tiles_x, tile_overlap_factor_x, small_width, tile_size , tile_overlap_factor);
661
+ sd_tiling_calc_tiles (num_tiles_x, tile_overlap_factor_x, small_width, p_tile_size_x , tile_overlap_factor);
660
662
661
663
int num_tiles_y;
662
664
float tile_overlap_factor_y;
663
- sd_tiling_calc_tiles (num_tiles_y, tile_overlap_factor_y, small_height, tile_size , tile_overlap_factor);
665
+ sd_tiling_calc_tiles (num_tiles_y, tile_overlap_factor_y, small_height, p_tile_size_y , tile_overlap_factor);
664
666
665
667
LOG_DEBUG (" num tiles : %d, %d " , num_tiles_x, num_tiles_y);
666
668
LOG_DEBUG (" optimal overlap : %f, %f (targeting %f)" , tile_overlap_factor_x, tile_overlap_factor_y, tile_overlap_factor);
667
669
668
670
GGML_ASSERT (input_width % 2 == 0 && input_height % 2 == 0 && output_width % 2 == 0 && output_height % 2 == 0 ); // should be multiple of 2
669
671
670
- int tile_overlap_x = (int32_t )(tile_size * tile_overlap_factor_x);
671
- int non_tile_overlap_x = tile_size - tile_overlap_x;
672
+ int tile_overlap_x = (int32_t )(p_tile_size_x * tile_overlap_factor_x);
673
+ int non_tile_overlap_x = p_tile_size_x - tile_overlap_x;
672
674
673
- int tile_overlap_y = (int32_t )(tile_size * tile_overlap_factor_y);
674
- int non_tile_overlap_y = tile_size - tile_overlap_y;
675
+ int tile_overlap_y = (int32_t )(p_tile_size_y * tile_overlap_factor_y);
676
+ int non_tile_overlap_y = p_tile_size_y - tile_overlap_y;
675
677
676
- int tile_size_x = tile_size < small_width ? tile_size : small_width;
677
- int tile_size_y = tile_size < small_height ? tile_size : small_height;
678
+ int tile_size_x = p_tile_size_x < small_width ? p_tile_size_x : small_width;
679
+ int tile_size_y = p_tile_size_y < small_height ? p_tile_size_y : small_height;
678
680
679
681
int input_tile_size_x = tile_size_x;
680
682
int input_tile_size_y = tile_size_y;
@@ -763,6 +765,11 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
763
765
ggml_free (tiles_ctx);
764
766
}
765
767
768
+ __STATIC_INLINE__ void sd_tiling (ggml_tensor* input, ggml_tensor* output, const int scale,
769
+ const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
770
+ sd_tiling_non_square (input, output, scale, tile_size, tile_size, tile_overlap_factor, on_processing);
771
+ }
772
+
766
773
__STATIC_INLINE__ struct ggml_tensor * ggml_group_norm_32 (struct ggml_context * ctx,
767
774
struct ggml_tensor * a) {
768
775
const float eps = 1e-6f ; // default eps parameter
0 commit comments