@@ -73,6 +73,7 @@ def test_checkpoint_save_and_load_single_process_ddp(recipe_path, tmp_path):
7373 overrides = [
7474 f"checkpoint.ckpt_dir={ temp_dir } " ,
7575 f"+wandb_init_args.dir={ tmp_path } " ,
76+ f"hydra.run.dir={ tmp_path } " ,
7677 "num_train_steps=10" ,
7778 "checkpoint.save_every_n_steps=5" ,
7879 "checkpoint.resume_from_checkpoint=false" , # Start fresh
@@ -121,6 +122,7 @@ def test_checkpoint_save_and_load_single_process_ddp(recipe_path, tmp_path):
121122 overrides = [
122123 f"checkpoint.ckpt_dir={ temp_dir } " ,
123124 f"+wandb_init_args.dir={ tmp_path } " ,
125+ f"hydra.run.dir={ tmp_path } " ,
124126 "num_train_steps=15" ,
125127 "checkpoint.save_every_n_steps=5" ,
126128 "checkpoint.resume_from_checkpoint=true" , # Resume from checkpoint
@@ -205,6 +207,7 @@ def test_checkpoint_save_and_load_two_processes_ddp(recipe_path, tmp_path):
205207 "checkpoint.save_every_n_steps=5" ,
206208 "checkpoint.resume_from_checkpoint=false" , # Start fresh
207209 "dataset.use_stateful_dataloader=true" ,
210+ f"hydra.run.dir={ tmp_path } " ,
208211 ]
209212
210213 result1 = subprocess .run (cmd_phase1 , check = False , capture_output = True , text = True , env = env )
@@ -268,6 +271,7 @@ def test_checkpoint_save_and_load_two_processes_ddp(recipe_path, tmp_path):
268271 "checkpoint.save_every_n_steps=5" ,
269272 "checkpoint.resume_from_checkpoint=true" , # Resume from checkpoint
270273 "dataset.use_stateful_dataloader=true" ,
274+ f"hydra.run.dir={ tmp_path } " ,
271275 ]
272276
273277 result2 = subprocess .run (cmd_phase2 , check = False , capture_output = True , text = True , env = env )
@@ -346,6 +350,7 @@ def test_checkpoint_save_and_load_single_process_mfsdp(recipe_path, tmp_path):
346350 overrides = [
347351 f"checkpoint.ckpt_dir={ temp_dir } " ,
348352 f"+wandb_init_args.dir={ tmp_path } " ,
353+ f"hydra.run.dir={ tmp_path } " ,
349354 "num_train_steps=10" ,
350355 "checkpoint.save_every_n_steps=5" ,
351356 "checkpoint.resume_from_checkpoint=false" , # Start fresh
@@ -390,6 +395,7 @@ def test_checkpoint_save_and_load_single_process_mfsdp(recipe_path, tmp_path):
390395 overrides = [
391396 f"checkpoint.ckpt_dir={ temp_dir } " ,
392397 f"+wandb_init_args.dir={ tmp_path } " ,
398+ f"hydra.run.dir={ tmp_path } " ,
393399 "num_train_steps=15" ,
394400 "checkpoint.save_every_n_steps=5" ,
395401 "checkpoint.resume_from_checkpoint=true" , # Resume from checkpoint
@@ -457,6 +463,7 @@ def test_checkpoint_save_and_load_two_processes_mfsdp(recipe_path, tmp_path):
457463 "checkpoint.save_every_n_steps=5" ,
458464 "checkpoint.resume_from_checkpoint=false" , # Start fresh
459465 "dataset.use_stateful_dataloader=true" ,
466+ f"hydra.run.dir={ tmp_path } " ,
460467 ]
461468
462469 result1 = subprocess .run (cmd_phase1 , check = False , capture_output = True , text = True , env = env )
@@ -503,6 +510,7 @@ def test_checkpoint_save_and_load_two_processes_mfsdp(recipe_path, tmp_path):
503510 "checkpoint.save_every_n_steps=5" ,
504511 "checkpoint.resume_from_checkpoint=true" , # Resume from checkpoint
505512 "dataset.use_stateful_dataloader=true" ,
513+ f"hydra.run.dir={ tmp_path } " ,
506514 ]
507515
508516 result2 = subprocess .run (cmd_phase2 , check = False , capture_output = True , text = True , env = env )
@@ -559,6 +567,7 @@ def test_checkpoint_save_and_load_single_process_fsdp2(recipe_path, tmp_path):
559567 overrides = [
560568 f"checkpoint.ckpt_dir={ temp_dir } " ,
561569 f"+wandb_init_args.dir={ tmp_path } " ,
570+ f"hydra.run.dir={ tmp_path } " ,
562571 "num_train_steps=10" ,
563572 "checkpoint.save_every_n_steps=5" ,
564573 "checkpoint.resume_from_checkpoint=false" , # Start fresh
@@ -668,6 +677,7 @@ def test_checkpoint_save_and_load_two_processes_fsdp2(recipe_path, tmp_path):
668677 "num_train_steps=10" ,
669678 "checkpoint.save_every_n_steps=5" ,
670679 "dataset.use_stateful_dataloader=true" ,
680+ f"hydra.run.dir={ tmp_path } " ,
671681 ]
672682
673683 result1 = subprocess .run (cmd_phase1 , check = False , capture_output = True , text = True , env = env )
@@ -714,6 +724,7 @@ def test_checkpoint_save_and_load_two_processes_fsdp2(recipe_path, tmp_path):
714724 "checkpoint.save_every_n_steps=5" ,
715725 "checkpoint.resume_from_checkpoint=true" , # Resume from checkpoint
716726 "dataset.use_stateful_dataloader=true" ,
727+ f"hydra.run.dir={ tmp_path } " ,
717728 ]
718729
719730 result2 = subprocess .run (cmd_phase2 , check = False , capture_output = True , text = True , env = env )
@@ -797,6 +808,7 @@ def test_final_model_save_mfsdp(recipe_path, tmp_path):
797808 overrides = [
798809 f"checkpoint.ckpt_dir={ temp_dir } " ,
799810 f"+wandb_init_args.dir={ tmp_path } " ,
811+ f"hydra.run.dir={ tmp_path } " ,
800812 "num_train_steps=3" ,
801813 "checkpoint.save_final_model=true" ,
802814 ],
@@ -831,6 +843,7 @@ def test_final_model_save_fsdp2(recipe_path, tmp_path):
831843 overrides = [
832844 f"checkpoint.ckpt_dir={ temp_dir } " ,
833845 f"+wandb_init_args.dir={ tmp_path } " ,
846+ f"hydra.run.dir={ tmp_path } " ,
834847 "checkpoint.save_final_model=true" ,
835848 "num_train_steps=3" ,
836849 ],
@@ -874,6 +887,7 @@ def test_scheduler_resume_single_gpu(recipe_path, tmp_path):
874887 overrides = [
875888 f"checkpoint.ckpt_dir={ temp_dir } " ,
876889 f"+wandb_init_args.dir={ tmp_path } " ,
890+ f"hydra.run.dir={ tmp_path } " ,
877891 "num_train_steps=10" ,
878892 "checkpoint.save_every_n_steps=5" ,
879893 "checkpoint.resume_from_checkpoint=false" , # Start fresh, don't look for checkpoints
@@ -891,6 +905,7 @@ def test_scheduler_resume_single_gpu(recipe_path, tmp_path):
891905 overrides = [
892906 f"checkpoint.ckpt_dir={ temp_dir } " ,
893907 f"+wandb_init_args.dir={ tmp_path } " ,
908+ f"hydra.run.dir={ tmp_path } " ,
894909 "num_train_steps=15" ,
895910 "checkpoint.save_every_n_steps=5" ,
896911 "checkpoint.resume_from_checkpoint=true" , # Resume from checkpoint
@@ -951,6 +966,7 @@ def test_scheduler_resume_two_gpu(recipe_path, tmp_path):
951966 "checkpoint.resume_from_checkpoint=false" , # Start fresh, don't look for checkpoints
952967 "lr_scheduler_kwargs.num_warmup_steps=20" ,
953968 "lr_scheduler_kwargs.num_training_steps=100" ,
969+ f"hydra.run.dir={ tmp_path } " ,
954970 ]
955971
956972 result1 = subprocess .run (cmd_phase1 , check = False , capture_output = True , text = True , env = env )
@@ -974,6 +990,7 @@ def test_scheduler_resume_two_gpu(recipe_path, tmp_path):
974990 "checkpoint.resume_from_checkpoint=true" , # Resume from checkpoint
975991 "lr_scheduler_kwargs.num_warmup_steps=20" ,
976992 "lr_scheduler_kwargs.num_training_steps=100" ,
993+ f"hydra.run.dir={ tmp_path } " ,
977994 ]
978995
979996 result2 = subprocess .run (cmd_phase2 , check = False , capture_output = True , text = True , env = env )
0 commit comments