Skip to content

Commit 8325688

Browse files
authored
turn off ddp_overlap for non-O2 usage and add synthetic data argument (#396)
1 parent 6a76374 commit 8325688

File tree

6 files changed

+28
-32
lines changed

6 files changed

+28
-32
lines changed

launcher_scripts/conf/training/stable_diffusion/860m_res_256_pretrain.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ model:
165165
resume_from_checkpoint: null # manually set the checkpoint file to load from
166166
apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
167167
gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
168-
ddp_overlap: True # True for using PyTorch DDP overlap.
168+
ddp_overlap: False # True for using PyTorch DDP overlap.
169169

170170
optim:
171171
name: fused_adam
@@ -189,6 +189,7 @@ model:
189189

190190
data:
191191
num_workers: 16
192+
synthetic_data: False
192193
train:
193194
dataset_path:
194195
- ${data_dir}/your_dataset/wdinfo.pkl

launcher_scripts/conf/training/stable_diffusion/860m_res_256_v2_0_pretrain.yaml

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -148,26 +148,21 @@ model:
148148
target: torch.nn.Identity
149149

150150
cond_stage_config:
151-
_target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder
152-
restore_from_path: /path/to/nemo_clip.nemo
153-
device: cuda
154-
freeze: True
155-
layer: "penultimate"
156-
# For compatibility of history version that uses open clip model
157-
# _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenOpenCLIPEmbedder
158-
# arch: ViT-H-14
159-
# version: laion2b_s32b_b79k
160-
# device: cuda
161-
# max_length: 77
162-
# freeze: True
163-
# layer: "penultimate"
151+
For compatibility of history version that uses open clip model
152+
_target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenOpenCLIPEmbedder
153+
arch: ViT-H-14
154+
version: laion2b_s32b_b79k
155+
device: cuda
156+
max_length: 77
157+
freeze: True
158+
layer: "penultimate"
164159

165160
# miscellaneous
166161
seed: 666
167162
resume_from_checkpoint: null # manually set the checkpoint file to load from
168163
apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
169164
gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
170-
ddp_overlap: True # True for using PyTorch DDP overlap.
165+
ddp_overlap: False # True for using PyTorch DDP overlap.
171166

172167
optim:
173168
name: fused_adam
@@ -191,6 +186,7 @@ model:
191186

192187
data:
193188
num_workers: 16
189+
synthetic_data: False
194190
train:
195191
dataset_path:
196192
- ${data_dir}/your_dataset/wdinfo.pkl

launcher_scripts/conf/training/stable_diffusion/860m_res_512_v1_1.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ model:
164164
resume_from_checkpoint: null # manually set the checkpoint file to load from
165165
apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
166166
gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
167-
ddp_overlap: True # True for using PyTorch DDP overlap.
167+
ddp_overlap: False # True for using PyTorch DDP overlap.
168168

169169
optim:
170170
name: fused_adam
@@ -188,6 +188,7 @@ model:
188188

189189
data:
190190
num_workers: 16
191+
synthetic_data: False
191192
train:
192193
dataset_path:
193194
- ${data_dir}/your_dataset/wdinfo.pkl

launcher_scripts/conf/training/stable_diffusion/860m_res_512_v1_2.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ model:
164164
resume_from_checkpoint: null # manually set the checkpoint file to load from
165165
apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
166166
gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
167-
ddp_overlap: True # True for using PyTorch DDP overlap.
167+
ddp_overlap: False # True for using PyTorch DDP overlap.
168168

169169
optim:
170170
name: fused_adam
@@ -188,6 +188,7 @@ model:
188188

189189
data:
190190
num_workers: 16
191+
synthetic_data: False
191192
train:
192193
dataset_path:
193194
- ${data_dir}/your_dataset/wdinfo.pkl

launcher_scripts/conf/training/stable_diffusion/860m_res_512_v1_5.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ model:
164164
resume_from_checkpoint: null # manually set the checkpoint file to load from
165165
apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
166166
gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
167-
ddp_overlap: True # True for using PyTorch DDP overlap.
167+
ddp_overlap: False # True for using PyTorch DDP overlap.
168168

169169
optim:
170170
name: fused_adam
@@ -188,6 +188,7 @@ model:
188188

189189
data:
190190
num_workers: 16
191+
synthetic_data: False
191192
train:
192193
dataset_path:
193194
- ${data_dir}/your_dataset/wdinfo.pkl

launcher_scripts/conf/training/stable_diffusion/860m_res_512_v2_0_base.yaml

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -148,26 +148,21 @@ model:
148148
target: torch.nn.Identity
149149

150150
cond_stage_config:
151-
_target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenMegatronCLIPEmbedder
152-
restore_from_path: /path/to/nemo_clip.nemo
153-
device: cuda
154-
freeze: True
155-
layer: "penultimate"
156-
# For compatibility of history version that uses open clip model
157-
# _target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenOpenCLIPEmbedder
158-
# arch: ViT-H-14
159-
# version: laion2b_s32b_b79k
160-
# device: cuda
161-
# max_length: 77
162-
# freeze: True
163-
# layer: "penultimate"
151+
For compatibility of history version that uses open clip model
152+
_target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenOpenCLIPEmbedder
153+
arch: ViT-H-14
154+
version: laion2b_s32b_b79k
155+
device: cuda
156+
max_length: 77
157+
freeze: True
158+
layer: "penultimate"
164159

165160
# miscellaneous
166161
seed: 666
167162
resume_from_checkpoint: null # manually set the checkpoint file to load from
168163
apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
169164
gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
170-
ddp_overlap: True # True for using PyTorch DDP overlap.
165+
ddp_overlap: False # True for using PyTorch DDP overlap.
171166

172167
optim:
173168
name: fused_adam
@@ -191,6 +186,7 @@ model:
191186

192187
data:
193188
num_workers: 16
189+
synthetic_data: False
194190
train:
195191
dataset_path:
196192
- ${data_dir}/your_dataset/wdinfo.pkl

0 commit comments

Comments
 (0)