Skip to content

Commit cf6ddd6

Browse files
committed
support non-square tiles
1 parent 61b3ba6 commit cf6ddd6

File tree

2 files changed

+41
-13
lines changed

2 files changed

+41
-13
lines changed

ggml_extend.hpp

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,9 @@ sd_tiling_calc_tiles(int &num_tiles_dim, float& tile_overlap_factor_dim, int sma
635635
}
636636

637637
// Tiling
638-
__STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const int scale, const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
638+
__STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input, ggml_tensor* output, const int scale,
639+
const int p_tile_size_x, const int p_tile_size_y,
640+
const float tile_overlap_factor, on_tile_process on_processing) {
639641
int input_width = (int)input->ne[0];
640642
int input_height = (int)input->ne[1];
641643
int output_width = (int)output->ne[0];
@@ -656,25 +658,25 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
656658

657659
int num_tiles_x;
658660
float tile_overlap_factor_x;
659-
sd_tiling_calc_tiles(num_tiles_x, tile_overlap_factor_x, small_width, tile_size, tile_overlap_factor);
661+
sd_tiling_calc_tiles(num_tiles_x, tile_overlap_factor_x, small_width, p_tile_size_x, tile_overlap_factor);
660662

661663
int num_tiles_y;
662664
float tile_overlap_factor_y;
663-
sd_tiling_calc_tiles(num_tiles_y, tile_overlap_factor_y, small_height, tile_size, tile_overlap_factor);
665+
sd_tiling_calc_tiles(num_tiles_y, tile_overlap_factor_y, small_height, p_tile_size_y, tile_overlap_factor);
664666

665667
LOG_DEBUG("num tiles : %d, %d ", num_tiles_x, num_tiles_y);
666668
LOG_DEBUG("optimal overlap : %f, %f (targeting %f)", tile_overlap_factor_x, tile_overlap_factor_y, tile_overlap_factor);
667669

668670
GGML_ASSERT(input_width % 2 == 0 && input_height % 2 == 0 && output_width % 2 == 0 && output_height % 2 == 0); // should be multiple of 2
669671

670-
int tile_overlap_x = (int32_t)(tile_size * tile_overlap_factor_x);
671-
int non_tile_overlap_x = tile_size - tile_overlap_x;
672+
int tile_overlap_x = (int32_t)(p_tile_size_x * tile_overlap_factor_x);
673+
int non_tile_overlap_x = p_tile_size_x - tile_overlap_x;
672674

673-
int tile_overlap_y = (int32_t)(tile_size * tile_overlap_factor_y);
674-
int non_tile_overlap_y = tile_size - tile_overlap_y;
675+
int tile_overlap_y = (int32_t)(p_tile_size_y * tile_overlap_factor_y);
676+
int non_tile_overlap_y = p_tile_size_y - tile_overlap_y;
675677

676-
int tile_size_x = tile_size < small_width ? tile_size : small_width;
677-
int tile_size_y = tile_size < small_height ? tile_size : small_height;
678+
int tile_size_x = p_tile_size_x < small_width ? p_tile_size_x : small_width;
679+
int tile_size_y = p_tile_size_y < small_height ? p_tile_size_y : small_height;
678680

679681
int input_tile_size_x = tile_size_x;
680682
int input_tile_size_y = tile_size_y;
@@ -763,6 +765,11 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
763765
ggml_free(tiles_ctx);
764766
}
765767

768+
__STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const int scale,
769+
const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
770+
sd_tiling_non_square(input, output, scale, tile_size, tile_size, tile_overlap_factor, on_processing);
771+
}
772+
766773
__STATIC_INLINE__ struct ggml_tensor* ggml_group_norm_32(struct ggml_context* ctx,
767774
struct ggml_tensor* a) {
768775
const float eps = 1e-6f; // default eps parameter

stable-diffusion.cpp

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,13 +1427,30 @@ class StableDiffusionGGML {
14271427
x->ne[3]); // channels
14281428
int64_t t0 = ggml_time_ms();
14291429

1430-
int tile_size = 32;
1430+
int tile_size_x = 32;
1431+
int tile_size_y = 32;
14311432
// TODO: arg instead of env?
14321433
const char* SD_TILE_SIZE = getenv("SD_TILE_SIZE");
14331434
if (SD_TILE_SIZE != nullptr) {
14341435
std::string sd_tile_size_str = SD_TILE_SIZE;
1436+
size_t dot_pos = sd_tile_size_str.find('.');
1437+
size_t x_pos = sd_tile_size_str.find('x');
14351438
try {
1436-
tile_size = std::stoi(sd_tile_size_str);
1439+
if (dot_pos != std::string::npos) {
1440+
float factor = std::stof(sd_tile_size_str);
1441+
tile_size_x = W * (decode ? 8 : 1) / factor;
1442+
tile_size_y = H * (decode ? 8 : 1) / factor;
1443+
}
1444+
else {
1445+
int tmp = std::stoi(sd_tile_size_str.substr(0, x_pos));
1446+
if (x_pos != std::string::npos) {
1447+
tile_size_y = std::stoi(sd_tile_size_str.substr(x_pos + 1));
1448+
}
1449+
else {
1450+
tile_size_y = tmp;
1451+
}
1452+
tile_size_x = tmp;
1453+
}
14371454
} catch (const std::invalid_argument&) {
14381455
LOG_WARN("Invalid");
14391456
} catch (const std::out_of_range&) {
@@ -1443,7 +1460,8 @@ class StableDiffusionGGML {
14431460
if(!decode){
14441461
// TODO: also use and arg for this one?
14451462
// to keep the compute buffer size consistent
1446-
tile_size*=1.30539;
1463+
tile_size_x*=1.30539;
1464+
tile_size_y*=1.30539;
14471465
}
14481466
if (!use_tiny_autoencoder) {
14491467
if (decode) {
@@ -1452,11 +1470,14 @@ class StableDiffusionGGML {
14521470
ggml_tensor_scale_input(x);
14531471
}
14541472
if (vae_tiling) {
1473+
if (SD_TILE_SIZE != nullptr) {
1474+
LOG_INFO("VAE Tile size: %dx%d", tile_size_x, tile_size_y);
1475+
}
14551476
// split latent in 32x32 tiles and compute in several steps
14561477
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
14571478
first_stage_model->compute(n_threads, in, decode, &out);
14581479
};
1459-
sd_tiling(x, result, 8, tile_size, 0.5f, on_tiling);
1480+
sd_tiling_non_square(x, result, 8, tile_size_x, tile_size_y, 0.5f, on_tiling);
14601481
} else {
14611482
first_stage_model->compute(n_threads, x, decode, &result);
14621483
}

0 commit comments

Comments
 (0)