@@ -1457,6 +1457,18 @@ class StableDiffusionGGML {
1457
1457
LOG_WARN (" OOR" );
1458
1458
}
1459
1459
}
1460
+ float tile_overlap = 0 .5f ;
1461
+ const char * SD_TILE_OVERLAP = getenv (" SD_TILE_OVERLAP" );
1462
+ if (SD_TILE_OVERLAP != nullptr ) {
1463
+ std::string sd_tile_overlap_str = SD_TILE_OVERLAP;
1464
+ try {
1465
+ tile_overlap = std::stof (sd_tile_overlap_str);
1466
+ } catch (const std::invalid_argument&) {
1467
+ LOG_WARN (" Invalid" );
1468
+ } catch (const std::out_of_range&) {
1469
+ LOG_WARN (" OOR" );
1470
+ }
1471
+ }
1460
1472
if (!decode){
1461
1473
// TODO: also use and arg for this one?
1462
1474
// to keep the compute buffer size consistent
@@ -1473,11 +1485,14 @@ class StableDiffusionGGML {
1473
1485
if (SD_TILE_SIZE != nullptr ) {
1474
1486
LOG_INFO (" VAE Tile size: %dx%d" , tile_size_x, tile_size_y);
1475
1487
}
1488
+ if (SD_TILE_OVERLAP != nullptr ) {
1489
+ LOG_INFO (" VAE Tile overlap: %.2f" , tile_overlap);
1490
+ }
1476
1491
// split latent in 32x32 tiles and compute in several steps
1477
1492
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
1478
1493
first_stage_model->compute (n_threads, in, decode, &out);
1479
1494
};
1480
- sd_tiling_non_square (x, result, 8 , tile_size_x, tile_size_y, 0 . 5f , on_tiling);
1495
+ sd_tiling_non_square (x, result, 8 , tile_size_x, tile_size_y, tile_overlap , on_tiling);
1481
1496
} else {
1482
1497
first_stage_model->compute (n_threads, x, decode, &result);
1483
1498
}
0 commit comments