diff --git a/tests/recipes/test_eleuther_eval.py b/tests/recipes/test_eleuther_eval.py
index ea612bf381..0be74081ca 100644
--- a/tests/recipes/test_eleuther_eval.py
+++ b/tests/recipes/test_eleuther_eval.py
@@ -15,11 +15,10 @@
 from tests.common import TUNE_PATH
 from tests.recipes.utils import (
     llama3_2_vision_test_config,
-    llama3_test_config,
-    write_hf_ckpt_config,
+    MODEL_TEST_CONFIGS,
     write_hf_vision_ckpt_config,
 )
-from tests.test_utils import CKPT_MODEL_PATHS, gpu_test
+from tests.test_utils import CKPT_MODEL_PATHS, gpu_test, TOKENIZER_PATHS
 
 
 class TestEleutherEval:
@@ -48,20 +47,19 @@ def expected_vision_acc(self):
         }
 
     @pytest.mark.parametrize(
-        "eval_name, expected_acc, bsz",
+        "model_ckpt, eval_name, expected_acc, bsz",
         [
-            ("truthfulqa_gen", 0.1818, 4),
-            ("truthfulqa_mc2", 0.3015, 4),
+            ("llama3_hf_138m", "truthfulqa_gen", 0.1818, 4),
+            ("llama3_hf_138m", "truthfulqa_mc2", 0.3015, 4),
         ],
     )
     @pytest.mark.integration_test
     @gpu_test(gpu_count=1)
     def test_torchtune_checkpoint_eval_results(
-        self, caplog, monkeypatch, tmpdir, eval_name, expected_acc, bsz
+        self, caplog, monkeypatch, tmpdir, eval_name, expected_acc, bsz, model_ckpt
     ):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         # explicitly setting limit to an odd number here to ensure generation tasks
         # work with KV-cacheing + bsz > 1 - we'll receive batches of size 4, 4, 3
@@ -69,13 +67,10 @@ def test_torchtune_checkpoint_eval_results(
         tune run eleuther_eval \
             --config eleuther_evaluation \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}'\
             tokenizer.prompt_template=null \
             limit=11 \
             dtype=fp32 \
@@ -83,7 +78,7 @@ def test_torchtune_checkpoint_eval_results(
             batch_size={bsz} \
         """.split()
 
-        model_config = llama3_test_config()
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         cmd = cmd + model_config
 
         monkeypatch.setattr(sys, "argv", cmd)
@@ -108,28 +103,30 @@ def test_torchtune_checkpoint_eval_results(
     @pytest.mark.integration_test
     @pytest.mark.usefixtures("hide_correct_version_number")
     @gpu_test(gpu_count=1)
-    def test_eval_recipe_errors_without_lm_eval(self, monkeypatch, tmpdir):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
+    def test_eval_recipe_errors_without_lm_eval(self, monkeypatch, tmpdir, model_ckpt):
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         cmd = f"""
         tune run eleuther_eval \
             --config eleuther_evaluation \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             limit=1 \
             dtype=fp32 \
         """.split()
 
-        model_config = llama3_test_config()
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         cmd = cmd + model_config
 
         monkeypatch.setattr(sys, "argv", cmd)
@@ -142,27 +139,26 @@ def test_eval_recipe_errors_without_lm_eval(self, monkeypatch, tmpdir):
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=1)
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
     def test_eval_recipe_errors_with_quantization_hf_checkpointer(
-        self, monkeypatch, tmpdir
+        self, monkeypatch, tmpdir, model_ckpt
     ):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
-
-        # Config file needed for model conversion.
-        write_hf_ckpt_config(ckpt_dir)
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         cmd = f"""
         tune run eleuther_eval \
             --config eleuther_evaluation \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelHFCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             limit=1 \
             dtype=fp32 \
@@ -170,7 +166,7 @@ def test_eval_recipe_errors_with_quantization_hf_checkpointer(
             quantizer.groupsize=256 \
         """.split()
 
-        model_config = llama3_test_config()
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         cmd = cmd + model_config
 
         monkeypatch.setattr(sys, "argv", cmd)
@@ -183,22 +179,26 @@ def test_eval_recipe_errors_with_quantization_hf_checkpointer(
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=1)
-    def test_eval_recipe_errors_with_qat_quantizer(self, monkeypatch, tmpdir):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
+    def test_eval_recipe_errors_with_qat_quantizer(
+        self, monkeypatch, tmpdir, model_ckpt
+    ):
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         cmd = f"""
         tune run eleuther_eval \
             --config eleuther_evaluation \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             limit=1 \
             dtype=fp32 \
@@ -206,7 +206,7 @@ def test_eval_recipe_errors_with_qat_quantizer(self, monkeypatch, tmpdir):
             quantizer.groupsize=32\
         """.split()
 
-        model_config = llama3_test_config()
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         cmd = cmd + model_config
 
         monkeypatch.setattr(sys, "argv", cmd)
@@ -223,6 +223,9 @@ def test_meta_eval_vision(self, caplog, monkeypatch, tmpdir, expected_vision_acc
         ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
         ckpt_dir = ckpt_path.parent
 
+        # Config file needed for model conversion.
+        write_hf_vision_ckpt_config(ckpt_dir)
+
         cmd = f"""
         tune run eleuther_eval \
             --config llama3_2_vision/11B_evaluation \
diff --git a/tests/recipes/test_full_dpo_distributed.py b/tests/recipes/test_full_dpo_distributed.py
index f20533c934..4534695d05 100644
--- a/tests/recipes/test_full_dpo_distributed.py
+++ b/tests/recipes/test_full_dpo_distributed.py
@@ -11,11 +11,7 @@
 import pytest
 import torch
 from tests.common import TUNE_PATH
-from tests.recipes.utils import (
-    dummy_stack_exchange_dataset_config,
-    MODEL_TEST_CONFIGS,
-    write_hf_ckpt_config,
-)
+from tests.recipes.utils import dummy_stack_exchange_dataset_config, MODEL_TEST_CONFIGS
 from tests.test_utils import (
     CKPT_MODEL_PATHS,
     gen_log_file_name,
@@ -48,8 +44,14 @@ def _get_test_config_overrides(self, dtype_str: str = "fp32", epochs: int = 2):
         ] + dummy_stack_exchange_dataset_config()
 
     @pytest.mark.integration_test
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
     @gpu_test(gpu_count=2)
-    def test_training_state_on_resume(self, tmpdir, monkeypatch):
+    def test_training_state_on_resume(self, tmpdir, monkeypatch, model_ckpt):
         """Test whether the recipe state is correctly updated on resume. Since this
         is model agnostic, we should run this on the small model only. The test
         consists of three stages:
@@ -58,37 +60,26 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
             - Make sure final loss matches the expected value of a model successfully resumed from a ckpt
         """
 
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
-        tokenizer_path = Path(TOKENIZER_PATHS["llama3"])
-
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         # Train for two epochs
         cmd_1 = f"""
         tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed \
             --config llama3_1/8B_full_dpo \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            ref_checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             ref_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            ref_checkpointer.checkpoint_files=[{ckpt_path}]\
+            ref_checkpointer.checkpoint_files=[model.safetensors]\
             ref_checkpointer.output_dir={tmpdir} \
-            ref_checkpointer.model_type=LLAMA3 \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             metric_logger.filename={log_file} \
         """.split()
-        model_config = MODEL_TEST_CONFIGS["llama3"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd_1)
         runpy.run_path(TUNE_PATH, run_name="__main__")
@@ -100,7 +91,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         )
 
         # We rename the model and we want to resume from epoch 0 (which trained for 1 epoch)
-        ckpt_to_resume_from = "epoch_0/model-00001-of-00001.bin"
+        ckpt_to_resume_from = "epoch_0/model-00001-of-00001.safetensors"
 
         # Now we resume training from epoch 1
         resumed_log_dir = (tmpdir / "resumed/").mkdir()
@@ -109,16 +100,12 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed \
             --config llama3_1/8B_full_dpo \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
             checkpointer.checkpoint_files=[{ckpt_to_resume_from}]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            ref_checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             ref_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            ref_checkpointer.checkpoint_files=[{ckpt_path}]\
+            ref_checkpointer.checkpoint_files=[model.safetensors]\
             ref_checkpointer.output_dir={tmpdir} \
-            ref_checkpointer.model_type=LLAMA3 \
             resume_from_checkpoint=True \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
@@ -135,44 +122,39 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         )
 
     @pytest.mark.integration_test
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
     @gpu_test(gpu_count=2)
     def test_training_state_on_resume_with_async_checkpointing(
-        self, tmpdir, monkeypatch
+        self, tmpdir, monkeypatch, model_ckpt
     ):
         """Same as above test but with async checkpointing."""
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
-        tokenizer_path = Path(TOKENIZER_PATHS["llama3"])
-
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         # Train for two epochs
         cmd_1 = f"""
         tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed \
             --config llama3_1/8B_full_dpo \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            ref_checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             ref_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            ref_checkpointer.checkpoint_files=[{ckpt_path}]\
+            ref_checkpointer.checkpoint_files=[model.safetensors]\
             ref_checkpointer.output_dir={tmpdir} \
-            ref_checkpointer.model_type=LLAMA3 \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             metric_logger.filename={log_file} \
             enable_async_checkpointing=True \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
 
         cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd_1)
@@ -191,16 +173,12 @@ def test_training_state_on_resume_with_async_checkpointing(
         tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed \
             --config llama3_1/8B_full_dpo \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            ref_checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             ref_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            ref_checkpointer.checkpoint_files=[{ckpt_path}]\
+            ref_checkpointer.checkpoint_files=[model.safetensors]\
             ref_checkpointer.output_dir={tmpdir} \
-            ref_checkpointer.model_type=LLAMA3 \
             resume_from_checkpoint=True \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
diff --git a/tests/recipes/test_knowledge_distillation_distributed.py b/tests/recipes/test_knowledge_distillation_distributed.py
index c3fd00ca3d..e037ba6370 100644
--- a/tests/recipes/test_knowledge_distillation_distributed.py
+++ b/tests/recipes/test_knowledge_distillation_distributed.py
@@ -13,12 +13,7 @@
 import torch
 from omegaconf import OmegaConf
 from tests.common import TUNE_PATH
-from tests.recipes.utils import (
-    CKPT_COMPONENT_MAP,
-    dummy_alpaca_dataset_config,
-    MODEL_TEST_CONFIGS,
-    write_hf_ckpt_config,
-)
+from tests.recipes.utils import dummy_alpaca_dataset_config, MODEL_TEST_CONFIGS
 from tests.test_utils import (
     CKPT_MODEL_PATHS,
     gen_log_file_name,
@@ -53,46 +48,49 @@ def _get_test_config_overrides(self, epochs: int = 2):
             "compile=False",
         ] + dummy_alpaca_dataset_config()
 
-    def _fetch_expected_loss_values(self, model_type):
+    def _fetch_expected_loss_values(self, model_ckpt):
         loss_values_map = {
-            "llama3": [
-                11.777642250061035,
-                11.760451793670654,
-                11.755887508392334,
-                11.76237678527832,
+            "llama3_hf_138m": [
+                # TODO
+                # 11.777642250061035,
+                # 11.760451793670654,
+                # 11.755887508392334,
+                # 11.76237678527832,
             ],
         }
-        return loss_values_map[model_type]
+        return loss_values_map[model_ckpt]
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=4)
-    def test_loss(self, tmpdir, monkeypatch):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
+    def test_loss(self, tmpdir, monkeypatch, model_ckpt):
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
-        tokenizer_path = Path(TOKENIZER_PATHS["llama3"])
 
         cmd = f"""
         tune run --nnodes 1 --nproc_per_node 4 knowledge_distillation_distributed \
             --config llama3_2/8B_to_1B_KD_lora_distributed \
             output_dir={tmpdir} \
-            checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}] \
+            checkpointer.checkpoint_files=[model.safetensors] \
             checkpointer.output_dir={tmpdir} \
-            teacher_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
             teacher_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            teacher_checkpointer.checkpoint_files=[{ckpt_path}] \
+            teacher_checkpointer.checkpoint_files=[model.safetensors] \
             teacher_checkpointer.output_dir={tmpdir} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             metric_logger.filename={log_file} \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
         teacher_config = [
-            "teacher_" + config for config in MODEL_TEST_CONFIGS["llama3"]
+            "teacher_" + config for config in MODEL_TEST_CONFIGS[model_ckpt]
         ]
 
         cmd = cmd + self._get_test_config_overrides() + model_config + teacher_config
@@ -103,15 +101,20 @@ def test_loss(self, tmpdir, monkeypatch):
         # only take the first loss
         num_losses = int(len(loss_values) / 4)  # 2 steps per epoch, 2 epochs
         loss_values = loss_values[0::num_losses]
-        expected_loss_values = self._fetch_expected_loss_values("llama3")
-
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)
         torch.testing.assert_close(
             loss_values, expected_loss_values, rtol=1e-5, atol=1e-5
         )
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=4)
-    def test_training_state_on_resume(self, tmpdir, monkeypatch):
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
+    def test_training_state_on_resume(self, tmpdir, monkeypatch, model_ckpt):
         """Test whether the recipe state is correctly updated on resume. Since this
         is model agnostic, we should run this on the small model only. The test
         consists of three stages:
@@ -120,37 +123,28 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
             - Make sure final loss matches the expected value of a model successfully resumed from a ckpt
         """
 
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
-        tokenizer_path = Path(TOKENIZER_PATHS["llama3"])
-
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
 
         # Train for two epochs
         cmd_1 = f"""
         tune run --nnodes 1 --nproc_per_node 4 knowledge_distillation_distributed \
             --config llama3_2/8B_to_1B_KD_lora_distributed \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            teacher_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
             teacher_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            teacher_checkpointer.checkpoint_files=[{ckpt_path}] \
+            teacher_checkpointer.checkpoint_files=[model.safetensors] \
             teacher_checkpointer.output_dir={tmpdir} \
             tokenizer.path={tokenizer_path} \
             tokenizer.prompt_template=null \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
         teacher_config = [
-            "teacher_" + config for config in MODEL_TEST_CONFIGS["llama3"]
+            "teacher_" + config for config in MODEL_TEST_CONFIGS[model_ckpt]
         ]
 
         cmd_1 = (
@@ -166,15 +160,13 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         tune run --nnodes 1 --nproc_per_node 4 knowledge_distillation_distributed \
             --config llama3_2/8B_to_1B_KD_lora_distributed \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
-            checkpointer.checkpoint_dir={ckpt_dir} \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
-            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")}
-            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")}
+            checkpointer.checkpoint_dir='{ckpt_dir}' \
+            checkpointer.checkpoint_files=[model.safetensors]\
+            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")} \
+            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")} \
             checkpointer.output_dir={tmpdir} \
-            teacher_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
             teacher_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            teacher_checkpointer.checkpoint_files=[{ckpt_path}] \
+            teacher_checkpointer.checkpoint_files=[model.safetensors] \
             teacher_checkpointer.output_dir={tmpdir} \
             resume_from_checkpoint=True \
             metric_logger.filename={log_file} \
@@ -191,7 +183,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         runpy.run_path(TUNE_PATH, run_name="__main__")
 
         # Second epoch only
-        expected_loss_values = self._fetch_expected_loss_values("llama3")[2:]
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)[2:]
         loss_values = get_loss_values_from_metric_logger(log_file)
         # only take the first loss
         num_losses = int(len(loss_values) / 4)  # 2 steps per epoch, 2 epochs
@@ -203,8 +195,14 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=4)
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
     def test_training_state_on_resume_with_async_checkpointing(
-        self, tmpdir, monkeypatch
+        self, tmpdir, monkeypatch, model_ckpt
     ):
         """Test whether the recipe state is correctly updated on resume with async checkpointing. Since this
         is model agnostic, we should run this on the small model only. The test
@@ -214,38 +212,29 @@ def test_training_state_on_resume_with_async_checkpointing(
             - Make sure final loss matches the expected value of a model successfully resumed from a ckpt
         """
 
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
-        tokenizer_path = Path(TOKENIZER_PATHS["llama3"])
-
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
 
         # Train for two epochs
         cmd_1 = f"""
         tune run --nnodes 1 --nproc_per_node 4 knowledge_distillation_distributed \
             --config llama3_2/8B_to_1B_KD_lora_distributed \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            teacher_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
             teacher_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            teacher_checkpointer.checkpoint_files=[{ckpt_path}] \
+            teacher_checkpointer.checkpoint_files=[model.safetensors] \
             teacher_checkpointer.output_dir={tmpdir} \
             enable_async_checkpointing=True \
             tokenizer.path={tokenizer_path} \
             tokenizer.prompt_template=null \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
         teacher_config = [
-            "teacher_" + config for config in MODEL_TEST_CONFIGS["llama3"]
+            "teacher_" + config for config in MODEL_TEST_CONFIGS[model_ckpt]
         ]
 
         cmd_1 = (
@@ -254,18 +243,15 @@ def test_training_state_on_resume_with_async_checkpointing(
         monkeypatch.setattr(sys, "argv", cmd_1)
         runpy.run_path(TUNE_PATH, run_name="__main__")
 
-        # Resume training
         cmd_2 = f"""
         tune run --nnodes 1 --nproc_per_node 4 knowledge_distillation_distributed \
             --config llama3_2/8B_to_1B_KD_lora_distributed \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir={ckpt_dir} \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            teacher_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
             teacher_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            teacher_checkpointer.checkpoint_files=[{ckpt_path}] \
+            teacher_checkpointer.checkpoint_files=[model.safetensors] \
             teacher_checkpointer.output_dir={tmpdir} \
             resume_from_checkpoint=True \
             enable_async_checkpointing=True \
@@ -283,7 +269,7 @@ def test_training_state_on_resume_with_async_checkpointing(
         runpy.run_path(TUNE_PATH, run_name="__main__")
 
         # Second epoch only
-        expected_loss_values = self._fetch_expected_loss_values("llama3")[2:]
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)[2:]
         loss_values = get_loss_values_from_metric_logger(log_file)
         # only take the first loss
         num_losses = int(len(loss_values) / 4)  # 2 steps per epoch, 2 epochs
@@ -295,36 +281,35 @@ def test_training_state_on_resume_with_async_checkpointing(
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=4)
-    def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):
-        ckpt_type = "tune"
-        model_type = "llama3"
-        ckpt_component = CKPT_COMPONENT_MAP[ckpt_type]
-        ckpt = model_type + "_" + ckpt_type
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        tokenizer_path = Path(TOKENIZER_PATHS[model_type])
-        ckpt_dir = ckpt_path.parent
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
+    def test_save_and_load_merged_weights(self, tmpdir, monkeypatch, model_ckpt):
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
         cmd = f"""
         tune run --nnodes 1 --nproc_per_node 4 knowledge_distillation_distributed \
             --config llama3_2/8B_to_1B_KD_lora_distributed \
             output_dir={tmpdir} \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}] \
+            checkpointer.checkpoint_files=[model.safetensors] \
             checkpointer.output_dir={tmpdir} \
-            teacher_checkpointer._component_={ckpt_component} \
             teacher_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            teacher_checkpointer.checkpoint_files=[{ckpt_path}] \
+            teacher_checkpointer.checkpoint_files=[model.safetensors] \
             teacher_checkpointer.output_dir={tmpdir} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             metric_logger.filename={log_file} \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS[model_type + "_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
         teacher_config = [
-            "teacher_" + config for config in MODEL_TEST_CONFIGS[model_type]
+            "teacher_" + config for config in MODEL_TEST_CONFIGS[model_ckpt]
         ]
 
         cmd = cmd + self._get_test_config_overrides() + model_config + teacher_config
@@ -340,7 +325,7 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):
         lora_model = config.instantiate(OmegaConf.from_dotlist(model_config).model)
 
         # Build base llama3 model for loading merged weights
-        base_llama3_config = MODEL_TEST_CONFIGS[model_type]
+        base_llama3_config = MODEL_TEST_CONFIGS[model_ckpt]
         llama3_model = config.instantiate(
             OmegaConf.from_dotlist(base_llama3_config).model
         )
@@ -350,16 +335,18 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):
         adpt_path = os.path.join(tmpdir, epoch_folder, f"{ADAPTER_MODEL_FNAME}.pt")
         lora_sd = safe_torch_load(adpt_path, weights_only=True)
 
-        with open(ckpt_path, "rb") as f:
-            base_model_sd = torch.load(f, weights_only=True)
+        # Load base model from HF checkpoint
+        base_model_path = os.path.join(ckpt_dir, "model.safetensors")
+        base_model_sd = safe_torch_load(base_model_path, weights_only=True)
+
         lora_model.load_state_dict(lora_sd, strict=False)
         lora_model.load_state_dict(base_model_sd, strict=False)
         baseline_out = lora_model(inputs)
 
-        # Load merged final ckpt directly into 3 and call fwd
-        suffix = ".safetensors" if ckpt_type == "hf" else ".bin"
+        # Load merged final ckpt directly into llama3 and call fwd
         model_ckpt_fname = (
-            SHARD_FNAME.format(cpt_idx="1".zfill(5), num_shards="1".zfill(5)) + suffix
+            SHARD_FNAME.format(cpt_idx="1".zfill(5), num_shards="1".zfill(5))
+            + ".safetensors"
         )
         model_path = os.path.join(tmpdir, epoch_folder, model_ckpt_fname)
         sd = safe_torch_load(model_path, weights_only=True)
diff --git a/tests/recipes/test_knowledge_distillation_single_device.py b/tests/recipes/test_knowledge_distillation_single_device.py
index b95aea8306..1ad35bd610 100644
--- a/tests/recipes/test_knowledge_distillation_single_device.py
+++ b/tests/recipes/test_knowledge_distillation_single_device.py
@@ -13,12 +13,7 @@
 import torch
 from omegaconf import OmegaConf
 from tests.common import TUNE_PATH
-from tests.recipes.utils import (
-    CKPT_COMPONENT_MAP,
-    dummy_alpaca_dataset_config,
-    MODEL_TEST_CONFIGS,
-    write_hf_ckpt_config,
-)
+from tests.recipes.utils import dummy_alpaca_dataset_config, MODEL_TEST_CONFIGS
 from tests.test_utils import (
     CKPT_MODEL_PATHS,
     gen_log_file_name,
@@ -54,17 +49,21 @@ def _get_test_config_overrides(self, dtype_str: str = "fp32", epochs: int = 2):
             "clip_grad_norm=100",
         ] + dummy_alpaca_dataset_config()
 
-    def _fetch_expected_loss_values(self, model_type):
+    def _fetch_expected_loss_values(self, model_ckpt):
         loss_values_map = {
-            "llama3": [11.7898, 11.7825, 11.7788, 11.7671],
+            # "llama3_hf_138m": [11.7898, 11.7825, 11.7788, 11.7671],
         }
-        return loss_values_map[model_type]
+        return loss_values_map[model_ckpt]
 
     @pytest.mark.integration_test
     @pytest.mark.parametrize(
         "micro_batch_size, gradient_accumulation_steps, compile",
         [(8, 1, False), (2, 4, True), (2, 4, False)],
     )
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [("llama3_hf_138m")],
+    )
     @gpu_test(gpu_count=1)
     def test_loss(
         self,
@@ -73,15 +72,11 @@ def test_loss(
         compile,
         tmpdir,
         monkeypatch,
+        model_ckpt,
     ):
         config = "qwen2/1.5_to_0.5B_KD_lora_single_device"
-        model_type = "llama3"
-        ckpt_type = "tune"
-        ckpt_component = CKPT_COMPONENT_MAP[ckpt_type]
-        ckpt = model_type + "_" + ckpt_type
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        tokenizer_path = Path(TOKENIZER_PATHS[model_type])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
         cmd = f"""
@@ -90,28 +85,22 @@ def test_loss(
             output_dir={tmpdir} \
             batch_size={micro_batch_size} \
             gradient_accumulation_steps={gradient_accumulation_steps} \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}] \
+            checkpointer.checkpoint_files=[model.safetensors] \
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
-            teacher_checkpointer._component_={ckpt_component} \
             teacher_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            teacher_checkpointer.checkpoint_files=[{ckpt_path}] \
+            teacher_checkpointer.checkpoint_files=[model.safetensors] \
             teacher_checkpointer.output_dir={tmpdir} \
-            teacher_checkpointer.model_type={model_type.upper()} \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             ~tokenizer.merges_file \
-            metric_logger._component_=torchtune.training.metric_logging.DiskLogger \
             metric_logger.filename={log_file} \
             compile={compile} \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS[model_type + "_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
         teacher_config = [
-            "teacher_" + config for config in MODEL_TEST_CONFIGS[model_type]
+            "teacher_" + config for config in MODEL_TEST_CONFIGS[model_ckpt]
         ]
 
         cmd = (
@@ -132,14 +121,20 @@ def test_loss(
         # only take the first loss
         num_losses = int(len(loss_values) / 4)  # 2 steps per epoch, 2 epochs
         loss_values = loss_values[0::num_losses]
-        expected_loss_values = self._fetch_expected_loss_values(model_type)
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)
         torch.testing.assert_close(
             loss_values, expected_loss_values, rtol=1e-5, atol=1e-5
         )
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=1)
-    def test_training_state_on_resume(self, tmpdir, monkeypatch):
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
+    def test_training_state_on_resume(self, tmpdir, monkeypatch, model_ckpt):
         """Test whether the recipe state is correctly updated on resume. Since this
         is model agnostic, we should run this on the small model only. The test
         consists of three stages:
@@ -148,42 +143,29 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
             - Make sure final loss matches the expected value of a model successfully resumed from a ckpt
         """
 
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
-        tokenizer_path = Path(TOKENIZER_PATHS["llama3"])
-
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         # Train for two epochs
         cmd_1 = f"""
         tune run knowledge_distillation_single_device \
             --config qwen2/1.5_to_0.5B_KD_lora_single_device \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            teacher_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
             teacher_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            teacher_checkpointer.checkpoint_files=[{ckpt_path}] \
+            teacher_checkpointer.checkpoint_files=[model.safetensors] \
             teacher_checkpointer.output_dir={tmpdir} \
-            teacher_checkpointer.model_type=LLAMA3 \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
             tokenizer.path={tokenizer_path} \
             tokenizer.prompt_template=null \
             ~tokenizer.merges_file \
-            metric_logger._component_=torchtune.training.metric_logging.DiskLogger \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
         teacher_config = [
-            "teacher_" + config for config in MODEL_TEST_CONFIGS["llama3"]
+            "teacher_" + config for config in MODEL_TEST_CONFIGS[model_ckpt]
         ]
 
         cmd_1 = (
@@ -200,22 +182,16 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         tune run knowledge_distillation_single_device \
             --config qwen2/1.5_to_0.5B_KD_lora_single_device \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir={ckpt_dir} \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
-            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")}
-            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")}
+            checkpointer.checkpoint_files=[model.safetensors]\
+            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")}\
+            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")}\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            teacher_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
             teacher_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            teacher_checkpointer.checkpoint_files=[{ckpt_path}] \
+            teacher_checkpointer.checkpoint_files=[model.safetensors] \
             teacher_checkpointer.output_dir={tmpdir} \
-            teacher_checkpointer.model_type=LLAMA3 \
             resume_from_checkpoint=True \
-            metric_logger._component_=torchtune.training.metric_logging.DiskLogger \
             metric_logger.filename={log_file} \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
             tokenizer.path={tokenizer_path} \
             tokenizer.prompt_template=null \
             ~tokenizer.merges_file \
@@ -231,7 +207,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
             runpy.run_path(TUNE_PATH, run_name="__main__")
 
         # Second epoch only
-        expected_loss_values = self._fetch_expected_loss_values("llama3")[2:]
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)[2:]
         loss_values = get_loss_values_from_metric_logger(log_file)
         # only take the first loss
         num_losses = int(len(loss_values) / 4)  # 2 steps per epoch, 2 epochs
@@ -243,8 +219,14 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=1)
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
     def test_training_state_on_resume_with_async_checkpointing(
-        self, tmpdir, monkeypatch
+        self, tmpdir, monkeypatch, model_ckpt
     ):
         """Test whether the recipe state is correctly updated on resume. Since this
         is model agnostic, we should run this on the small model only. The test
@@ -254,42 +236,29 @@ def test_training_state_on_resume_with_async_checkpointing(
             - Make sure final loss matches the expected value of a model successfully resumed from a ckpt
         """
 
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
-        tokenizer_path = Path(TOKENIZER_PATHS["llama3"])
-
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
 
         # Train for two epochs
         cmd_1 = f"""
         tune run knowledge_distillation_single_device \
             --config qwen2/1.5_to_0.5B_KD_lora_single_device \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            teacher_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
             teacher_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            teacher_checkpointer.checkpoint_files=[{ckpt_path}] \
+            teacher_checkpointer.checkpoint_files=[model.safetensors] \
             teacher_checkpointer.output_dir={tmpdir} \
-            teacher_checkpointer.model_type=LLAMA3 \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
             tokenizer.path={tokenizer_path} \
             tokenizer.prompt_template=null \
             ~tokenizer.merges_file \
-            metric_logger._component_=torchtune.training.metric_logging.DiskLogger \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
         teacher_config = [
-            "teacher_" + config for config in MODEL_TEST_CONFIGS["llama3"]
+            "teacher_" + config for config in MODEL_TEST_CONFIGS[model_ckpt]
         ]
 
         cmd_1 = (
@@ -306,22 +275,16 @@ def test_training_state_on_resume_with_async_checkpointing(
         tune run knowledge_distillation_single_device \
             --config qwen2/1.5_to_0.5B_KD_lora_single_device \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir={ckpt_dir} \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
-            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")}
-            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")}
+            checkpointer.checkpoint_files=[model.safetensors]\
+            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")}\
+            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")}\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            teacher_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
             teacher_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            teacher_checkpointer.checkpoint_files=[{ckpt_path}] \
+            teacher_checkpointer.checkpoint_files=[model.safetensors] \
             teacher_checkpointer.output_dir={tmpdir} \
-            teacher_checkpointer.model_type=LLAMA3 \
             resume_from_checkpoint=True \
-            metric_logger._component_=torchtune.training.metric_logging.DiskLogger \
             metric_logger.filename={log_file} \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
             tokenizer.path={tokenizer_path} \
             tokenizer.prompt_template=null \
             ~tokenizer.merges_file \
@@ -337,7 +300,7 @@ def test_training_state_on_resume_with_async_checkpointing(
             runpy.run_path(TUNE_PATH, run_name="__main__")
 
         # Second epoch only
-        expected_loss_values = self._fetch_expected_loss_values("llama3")[2:]
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)[2:]
         loss_values = get_loss_values_from_metric_logger(log_file)
         # only take the first loss
         num_losses = int(len(loss_values) / 4)  # 2 steps per epoch, 2 epochs
@@ -348,41 +311,37 @@ def test_training_state_on_resume_with_async_checkpointing(
         )
 
     @pytest.mark.integration_test
-    def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):
-        ckpt_type = "tune"
-        model_type = "llama3"
-        ckpt_component = CKPT_COMPONENT_MAP[ckpt_type]
-        ckpt = model_type + "_" + ckpt_type
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        tokenizer_path = Path(TOKENIZER_PATHS[model_type])
-        ckpt_dir = ckpt_path.parent
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
+    @gpu_test(gpu_count=1)
+    def test_save_and_load_merged_weights(self, tmpdir, monkeypatch, model_ckpt):
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
         cmd = f"""
         tune run knowledge_distillation_single_device \
             --config qwen2/1.5_to_0.5B_KD_lora_single_device \
             output_dir={tmpdir} \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}] \
+            checkpointer.checkpoint_files=[model.safetensors] \
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
-            teacher_checkpointer._component_={ckpt_component} \
             teacher_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            teacher_checkpointer.checkpoint_files=[{ckpt_path}] \
+            teacher_checkpointer.checkpoint_files=[model.safetensors] \
             teacher_checkpointer.output_dir={tmpdir} \
-            teacher_checkpointer.model_type={model_type.upper()} \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             ~tokenizer.merges_file \
-            metric_logger._component_=torchtune.training.metric_logging.DiskLogger \
             metric_logger.filename={log_file} \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS[model_type + "_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
         teacher_config = [
-            "teacher_" + config for config in MODEL_TEST_CONFIGS[model_type]
+            "teacher_" + config for config in MODEL_TEST_CONFIGS[model_ckpt]
         ]
 
         cmd = (
@@ -404,7 +363,7 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):
         lora_model = config.instantiate(OmegaConf.from_dotlist(model_config).model)
 
         # Build base llama3 model for loading merged weights
-        base_llama3_config = MODEL_TEST_CONFIGS[model_type]
+        base_llama3_config = MODEL_TEST_CONFIGS[model_ckpt]
         llama3_model = config.instantiate(
             OmegaConf.from_dotlist(base_llama3_config).model
         )
@@ -414,16 +373,18 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):
         adpt_path = os.path.join(tmpdir, epoch_folder, f"{ADAPTER_MODEL_FNAME}.pt")
         lora_sd = safe_torch_load(adpt_path, weights_only=True)
 
-        with open(ckpt_path, "rb") as f:
-            base_model_sd = torch.load(f, weights_only=True)
+        # Load base model from HF checkpoint
+        base_model_path = os.path.join(ckpt_dir, "model.safetensors")
+        base_model_sd = safe_torch_load(base_model_path, weights_only=True)
+
         lora_model.load_state_dict(lora_sd, strict=False)
         lora_model.load_state_dict(base_model_sd, strict=False)
         baseline_out = lora_model(inputs)
 
         # Load merged final ckpt directly into 3 and call fwd
-        suffix = ".safetensors" if ckpt_type == "hf" else ".bin"
         model_ckpt_fname = (
-            SHARD_FNAME.format(cpt_idx="1".zfill(5), num_shards="1".zfill(5)) + suffix
+            SHARD_FNAME.format(cpt_idx="1".zfill(5), num_shards="1".zfill(5))
+            + ".safetensors"
         )
         model_path = os.path.join(tmpdir, epoch_folder, model_ckpt_fname)
         sd = safe_torch_load(model_path, weights_only=True)
diff --git a/tests/recipes/test_lora_dpo_distributed.py b/tests/recipes/test_lora_dpo_distributed.py
index abc1cddc07..ae361cbdbb 100644
--- a/tests/recipes/test_lora_dpo_distributed.py
+++ b/tests/recipes/test_lora_dpo_distributed.py
@@ -13,11 +13,7 @@
 import torch
 from omegaconf import OmegaConf
 from tests.common import TUNE_PATH
-from tests.recipes.utils import (
-    dummy_stack_exchange_dataset_config,
-    MODEL_TEST_CONFIGS,
-    write_hf_ckpt_config,
-)
+from tests.recipes.utils import dummy_stack_exchange_dataset_config, MODEL_TEST_CONFIGS
 from tests.test_utils import (
     CKPT_MODEL_PATHS,
     gen_log_file_name,
diff --git a/tests/recipes/test_lora_dpo_single_device.py b/tests/recipes/test_lora_dpo_single_device.py
index e7766d6330..a4bae256cd 100644
--- a/tests/recipes/test_lora_dpo_single_device.py
+++ b/tests/recipes/test_lora_dpo_single_device.py
@@ -13,16 +13,13 @@
 import torch
 from omegaconf import OmegaConf
 from tests.common import TUNE_PATH
-from tests.recipes.utils import (
-    dummy_stack_exchange_dataset_config,
-    MODEL_TEST_CONFIGS,
-    write_hf_ckpt_config,
-)
+from tests.recipes.utils import dummy_stack_exchange_dataset_config, MODEL_TEST_CONFIGS
 from tests.test_utils import (
     CKPT_MODEL_PATHS,
     gen_log_file_name,
     get_loss_values_from_metric_logger,
     gpu_test,
+    TOKENIZER_PATHS,
 )
 from torchtune import config
 
@@ -54,8 +51,14 @@ def _get_test_config_overrides(self, dtype_str: str = "fp32", epochs: int = 2):
     @pytest.mark.parametrize("save_adapter_weights_only", [False, True])
     @pytest.mark.integration_test
     @gpu_test(gpu_count=1)
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
     def test_training_state_on_resume(
-        self, tmpdir, monkeypatch, save_adapter_weights_only
+        self, tmpdir, monkeypatch, save_adapter_weights_only, model_ckpt
     ):
         """Test whether the recipe state is correctly updated on resume. Since this
         is model agnostic, we should run this on the small model only. The test
@@ -66,16 +69,10 @@ def test_training_state_on_resume(
         Unlike `tests.recipes.test_lora_finetune_single_device`, this test does not use pre-computed loss
         values to benchmark against. This test just ensures the loss values are identical when resuming.
         """
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
-
         # Train for two epochs
         cmd_1 = f"""
         tune run lora_dpo_single_device \
@@ -83,12 +80,10 @@ def test_training_state_on_resume(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj'] \
             model.apply_lora_to_mlp=False \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             save_adapter_weights_only={save_adapter_weights_only} \
             metric_logger.filename={log_file} \
@@ -96,7 +91,7 @@ def test_training_state_on_resume(
             enable_activation_offloading=False \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd_1)
@@ -117,16 +112,13 @@ def test_training_state_on_resume(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj'] \
             model.apply_lora_to_mlp=False \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir={ckpt_dir} \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.adapter_checkpoint={os.path.join(tmpdir, epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")}
             checkpointer.recipe_checkpoint={os.path.join(tmpdir, RECIPE_STATE_DIRNAME, "recipe_state.pt")}
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
             resume_from_checkpoint=True \
             metric_logger.filename={resumed_log_file} \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
             tokenizer.prompt_template=null \
             enable_activation_checkpointing=True \
             enable_activation_offloading=False \
@@ -145,8 +137,14 @@ def test_training_state_on_resume(
 
     @pytest.mark.parametrize("save_adapter_weights_only", [False, True])
     @pytest.mark.integration_test
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
     def test_training_state_on_resume_with_async_checkpointing(
-        self, tmpdir, monkeypatch, save_adapter_weights_only
+        self, tmpdir, monkeypatch, save_adapter_weights_only, model_ckpt
     ):
         """Test whether the recipe state is correctly updated on resume. Since this
         is model agnostic, we should run this on the small model only. The test
@@ -157,16 +155,10 @@ def test_training_state_on_resume_with_async_checkpointing(
         Unlike `tests.recipes.test_lora_finetune_single_device`, this test does not use pre-computed loss
         values to benchmark against. This test just ensures the loss values are identical when resuming.
         """
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
-
         # Train for two epochs
         cmd_1 = f"""
         tune run lora_dpo_single_device \
@@ -174,12 +166,10 @@ def test_training_state_on_resume_with_async_checkpointing(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj'] \
             model.apply_lora_to_mlp=False \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             save_adapter_weights_only={save_adapter_weights_only} \
             metric_logger.filename={log_file} \
@@ -188,7 +178,7 @@ def test_training_state_on_resume_with_async_checkpointing(
             enable_async_checkpointing=True \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd_1)
@@ -209,16 +199,14 @@ def test_training_state_on_resume_with_async_checkpointing(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj'] \
             model.apply_lora_to_mlp=False \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir={ckpt_dir} \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.adapter_checkpoint={os.path.join(tmpdir, epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")}
             checkpointer.recipe_checkpoint={os.path.join(tmpdir, RECIPE_STATE_DIRNAME, "recipe_state.pt")}
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
             resume_from_checkpoint=True \
             metric_logger.filename={resumed_log_file} \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             enable_activation_checkpointing=True \
             enable_activation_offloading=False \
@@ -238,10 +226,15 @@ def test_training_state_on_resume_with_async_checkpointing(
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=1)
-    def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
+    def test_save_and_load_merged_weights(self, tmpdir, monkeypatch, model_ckpt):
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         cmd = f"""
         tune run lora_dpo_single_device \
@@ -249,18 +242,16 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj'] \
             model.apply_lora_to_mlp=False \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             enable_activation_checkpointing=False \
             enable_activation_offloading=False \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd = cmd + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd)
@@ -276,7 +267,7 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):
         lora_model = config.instantiate(OmegaConf.from_dotlist(model_config).model)
 
         # Build base llama3 model for loading merged weights
-        base_llama3_config = MODEL_TEST_CONFIGS["llama3"]
+        base_llama3_config = MODEL_TEST_CONFIGS[model_ckpt]
         llama3_model = config.instantiate(
             OmegaConf.from_dotlist(base_llama3_config).model
         )
@@ -286,14 +277,16 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):
         adpt_path = os.path.join(tmpdir, epoch_folder, f"{ADAPTER_MODEL_FNAME}.pt")
         lora_sd = safe_torch_load(adpt_path, weights_only=True)
 
-        with open(ckpt_path, "rb") as f:
-            base_model_sd = torch.load(f, weights_only=True)
+        # Load base model from HF checkpoint
+        base_model_path = os.path.join(ckpt_dir, "model.safetensors")
+        base_model_sd = safe_torch_load(base_model_path, weights_only=True)
+
         lora_model.load_state_dict(lora_sd, strict=False)
         lora_model.load_state_dict(base_model_sd, strict=False)
         baseline_out = lora_model(inputs)
 
         # Load merged final ckpt directly into llama3 and call fwd
-        suffix = ".bin"
+        suffix = ".safetensors"
         model_ckpt_fname = (
             SHARD_FNAME.format(cpt_idx="1".zfill(5), num_shards="1".zfill(5)) + suffix
         )
diff --git a/tests/recipes/test_lora_finetune_distributed.py b/tests/recipes/test_lora_finetune_distributed.py
index 7e82d31c9c..6b7411300b 100644
--- a/tests/recipes/test_lora_finetune_distributed.py
+++ b/tests/recipes/test_lora_finetune_distributed.py
@@ -13,12 +13,7 @@
 import torch
 from omegaconf import OmegaConf
 from tests.common import TUNE_PATH
-from tests.recipes.utils import (
-    CKPT_COMPONENT_MAP,
-    dummy_alpaca_dataset_config,
-    MODEL_TEST_CONFIGS,
-    write_hf_ckpt_config,
-)
+from tests.recipes.utils import dummy_alpaca_dataset_config, MODEL_TEST_CONFIGS
 from tests.test_utils import (
     CKPT_MODEL_PATHS,
     gen_log_file_name,
@@ -50,19 +45,20 @@ def _get_test_config_overrides(self):
             "compile=False",
         ] + dummy_alpaca_dataset_config()
 
-    def _fetch_expected_loss_values(self, model_type):
+    def _fetch_expected_loss_values(self, model_ckpt):
         # These values have been validated against single device recipe test via
         # https://gist.github.com/ebsmothers/f1c3db7c66655a23a91e0290360960c4
+        # TODO
         loss_values_map = {
-            "llama3": [11.9839, 11.9691, 11.9617, 11.9383],
+            # "llama3": [11.9839, 11.9691, 11.9617, 11.9383],
         }
-        return loss_values_map[model_type]
+        return loss_values_map[model_ckpt]
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=2)
     @pytest.mark.parametrize(
-        "micro_batch_size, gradient_accumulation_steps, reshard_after_forward",
-        [(4, 1, True), (1, 4, False)],
+        "model_ckpt, micro_batch_size, gradient_accumulation_steps, reshard_after_forward",
+        [("llama3_hf_138m", 4, 1, True), ("llama3_hf_138m", 1, 4, False)],
     )
     def test_loss(
         self,
@@ -71,11 +67,12 @@ def test_loss(
         reshard_after_forward,
         tmpdir,
         monkeypatch,
+        model_ckpt,
     ):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
+
         cmd = f"""
         tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed
             --config llama3/8B_lora \
@@ -84,26 +81,24 @@ def test_loss(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj'] \
             model.apply_lora_to_mlp=False \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
             metric_logger.filename={log_file} \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             reshard_after_forward={reshard_after_forward} \
             enable_activation_checkpointing=False \
             enable_activation_offloading=False \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd = cmd + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd)
         runpy.run_path(TUNE_PATH, run_name="__main__")
         loss_values = get_loss_values_from_metric_logger(log_file)
-        expected_loss_values = self._fetch_expected_loss_values("llama3")
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)
         torch.testing.assert_close(
             loss_values, expected_loss_values, rtol=1e-5, atol=1e-5
         )
@@ -111,19 +106,13 @@ def test_loss(
     @pytest.mark.integration_test
     @gpu_test(gpu_count=2)
     @pytest.mark.parametrize(
-        "config, model_type, ckpt_type, save_adapter_weights_only",
+        "config, model_ckpt, save_adapter_weights_only",
         [
-            ("llama3/8B_lora", "llama3", "tune", False),
+            ("llama3/8B_lora", "llama3_hf_138m", False),
         ],
     )
     def test_training_state_on_resume(
-        self,
-        config,
-        model_type,
-        ckpt_type,
-        tmpdir,
-        monkeypatch,
-        save_adapter_weights_only,
+        self, config, tmpdir, monkeypatch, save_adapter_weights_only, model_ckpt
     ):
         """Test whether the recipe state is correctly updated on resume. Since this
         is model agnostic, we should run this on the small model only. The test
@@ -132,19 +121,11 @@ def test_training_state_on_resume(
             - Resume training after epoch 1
             - Make sure final loss matches the expected value of a model successfully resumed from a ckpt
         """
-        ckpt_component = CKPT_COMPONENT_MAP[ckpt_type]
-        ckpt = model_type + "_" + ckpt_type
 
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        tokenizer_path = Path(TOKENIZER_PATHS[model_type])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
-
         # Train for two epochs
         cmd_1 = f"""
         tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed \
@@ -154,11 +135,9 @@ def test_training_state_on_resume(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj'] \
             model.apply_lora_to_mlp=False \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             save_adapter_weights_only={save_adapter_weights_only} \
@@ -166,7 +145,7 @@ def test_training_state_on_resume(
             enable_activation_offloading=True \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS[model_type + "_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd_1)
@@ -183,13 +162,11 @@ def test_training_state_on_resume(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj'] \
             model.apply_lora_to_mlp=False \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir={ckpt_dir} \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
-            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")}
-            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")}
+            checkpointer.checkpoint_files=[model.safetensors]\
+            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")}\
+            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")}\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             resume_from_checkpoint=True \
@@ -202,7 +179,7 @@ def test_training_state_on_resume(
         monkeypatch.setattr(sys, "argv", cmd_2)
         runpy.run_path(TUNE_PATH, run_name="__main__")
 
-        expected_loss_values = self._fetch_expected_loss_values(model_type)[2:]
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)[2:]
 
         loss_values = get_loss_values_from_metric_logger(log_file)
         torch.testing.assert_close(
@@ -212,19 +189,13 @@ def test_training_state_on_resume(
     @pytest.mark.integration_test
     @gpu_test(gpu_count=2)
     @pytest.mark.parametrize(
-        "config, model_type, ckpt_type, save_adapter_weights_only",
+        "config, model_ckpt, save_adapter_weights_only",
         [
-            ("llama3/8B_lora", "llama3", "tune", False),
+            ("llama3/8B_lora", "llama3_hf_138m", False),
         ],
     )
     def test_training_state_on_resume_with_async_checkpointing(
-        self,
-        config,
-        model_type,
-        ckpt_type,
-        tmpdir,
-        monkeypatch,
-        save_adapter_weights_only,
+        self, config, tmpdir, monkeypatch, save_adapter_weights_only, model_ckpt
     ):
         """Test whether the recipe state is correctly updated on resume. Since this
         is model agnostic, we should run this on the small model only. The test
@@ -233,19 +204,10 @@ def test_training_state_on_resume_with_async_checkpointing(
             - Resume training after epoch 1
             - Make sure final loss matches the expected value of a model successfully resumed from a ckpt
         """
-        ckpt_component = CKPT_COMPONENT_MAP[ckpt_type]
-        ckpt = model_type + "_" + ckpt_type
-
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        tokenizer_path = Path(TOKENIZER_PATHS[model_type])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
-
         # Train for two epochs
         cmd_1 = f"""
         tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed \
@@ -255,11 +217,9 @@ def test_training_state_on_resume_with_async_checkpointing(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj'] \
             model.apply_lora_to_mlp=False \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             save_adapter_weights_only={save_adapter_weights_only} \
@@ -268,7 +228,7 @@ def test_training_state_on_resume_with_async_checkpointing(
             enable_async_checkpointing=True \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS[model_type + "_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd_1)
@@ -283,11 +243,9 @@ def test_training_state_on_resume_with_async_checkpointing(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj'] \
             model.apply_lora_to_mlp=False \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir={ckpt_dir} \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             resume_from_checkpoint=True \
@@ -301,7 +259,7 @@ def test_training_state_on_resume_with_async_checkpointing(
         monkeypatch.setattr(sys, "argv", cmd_2)
         runpy.run_path(TUNE_PATH, run_name="__main__")
 
-        expected_loss_values = self._fetch_expected_loss_values(model_type)[2:]
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)[2:]
 
         loss_values = get_loss_values_from_metric_logger(log_file)
         torch.testing.assert_close(
@@ -310,20 +268,17 @@ def test_training_state_on_resume_with_async_checkpointing(
 
     @pytest.mark.integration_test
     @pytest.mark.parametrize(
-        "recipe_config, model_type, ckpt_type, use_dora",
+        "recipe_config, use_dora, model_ckpt",
         [
-            ("llama3/8B_lora", "llama3", "tune", False),
+            ("llama3/8B_lora", False, "llama3_hf_138m"),
         ],
     )
     @gpu_test(gpu_count=2)
     def test_save_and_load_merged_weights(
-        self, recipe_config, model_type, ckpt_type, use_dora, tmpdir, monkeypatch
+        self, recipe_config, use_dora, tmpdir, monkeypatch, model_ckpt
     ):
-        ckpt_component = CKPT_COMPONENT_MAP[ckpt_type]
-        ckpt = model_type + "_" + ckpt_type
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        tokenizer_path = Path(TOKENIZER_PATHS[model_type])
-        ckpt_dir = ckpt_path.parent
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
         cmd = f"""
         tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed \
             --config {recipe_config} \
@@ -332,19 +287,16 @@ def test_save_and_load_merged_weights(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj'] \
             model.apply_lora_to_mlp=False \
-            model=torchtune.models.lora_small_test_model \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             enable_activation_checkpointing=True \
             enable_activation_offloading=True \
         """.split()
         model_config = MODEL_TEST_CONFIGS[
-            model_type + ("_dora" if use_dora else "_lora")
+            model_ckpt + ("_dora" if use_dora else "_lora")
         ]
         cmd = cmd + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd)
@@ -359,7 +311,7 @@ def test_save_and_load_merged_weights(
         lora_model = config.instantiate(OmegaConf.from_dotlist(model_config).model)
 
         # Build base model for loading merged weights
-        base_config = MODEL_TEST_CONFIGS[model_type]
+        base_config = MODEL_TEST_CONFIGS[model_ckpt]
         model = config.instantiate(OmegaConf.from_dotlist(base_config).model)
 
         # Load base model and trained adapter weights into LoRA model and call fwd
@@ -367,15 +319,16 @@ def test_save_and_load_merged_weights(
         adpt_path = os.path.join(tmpdir, epoch_folder, f"{ADAPTER_MODEL_FNAME}.pt")
         lora_sd = safe_torch_load(adpt_path, weights_only=True)
 
-        with open(ckpt_path, "rb") as f:
-            base_model_sd = torch.load(f, weights_only=True)
+        # Load base model from HF checkpoint
+        base_model_path = os.path.join(ckpt_dir, "model.safetensors")
+        base_model_sd = safe_torch_load(base_model_path, weights_only=True)
 
         lora_model.load_state_dict(lora_sd, strict=False)
         lora_model.load_state_dict(base_model_sd, strict=False)
         baseline_out = lora_model(inputs)
 
         # Load merged final ckpt directly into model and call fwd
-        suffix = ".safetensors" if ckpt_type == "hf" else ".bin"
+        suffix = ".safetensors"
         model_ckpt_fname = (
             SHARD_FNAME.format(cpt_idx="1".zfill(5), num_shards="1".zfill(5)) + suffix
         )
diff --git a/tests/recipes/test_lora_finetune_single_device.py b/tests/recipes/test_lora_finetune_single_device.py
index a42c298466..de9d675555 100644
--- a/tests/recipes/test_lora_finetune_single_device.py
+++ b/tests/recipes/test_lora_finetune_single_device.py
@@ -13,12 +13,7 @@
 import torch
 from omegaconf import OmegaConf
 from tests.common import TUNE_PATH
-from tests.recipes.utils import (
-    CKPT_COMPONENT_MAP,
-    dummy_alpaca_dataset_config,
-    MODEL_TEST_CONFIGS,
-    write_hf_ckpt_config,
-)
+from tests.recipes.utils import dummy_alpaca_dataset_config, MODEL_TEST_CONFIGS
 from tests.test_utils import (
     CKPT_MODEL_PATHS,
     gen_log_file_name,
@@ -50,23 +45,25 @@ def _get_test_config_overrides(self, dtype_str: str = "fp32", epochs: int = 2):
             "clip_grad_norm=100",
         ] + dummy_alpaca_dataset_config()
 
-    def _fetch_expected_loss_values(self, model_type):
+    def _fetch_expected_loss_values(self, model_ckpt):
+        # TODO
         loss_values_map = {
-            "llama3": [11.9838, 11.9691, 11.9616, 11.9383],
+            # "llama3": [11.9838, 11.9691, 11.9616, 11.9383],
         }
-        return loss_values_map[model_type]
+        return loss_values_map[model_ckpt]
 
     def _fetch_qlora_expected_loss_values(self, dtype):
+        # TODO
         if dtype == "bf16":
             return [11.9857, 11.9711, 11.9619, 11.9407]
         return [11.9857, 11.9712, 11.9613, 11.9408]
 
     @pytest.mark.integration_test
     @pytest.mark.parametrize(
-        "config, model_type, ckpt_type, micro_batch_size, gradient_accumulation_steps, compile",
+        "config, model_ckpt, micro_batch_size, gradient_accumulation_steps, compile",
         [
-            ("llama3/8B_lora_single_device", "llama3", "tune", 2, 4, True),
-            ("llama3/8B_lora_single_device", "llama3", "tune", 2, 4, False),
+            ("llama3/8B_lora_single_device", "llama3_hf_138m", 2, 4, True),
+            ("llama3/8B_lora_single_device", "llama3_hf_138m", 2, 4, False),
         ],
     )
     @gpu_test(gpu_count=1)
@@ -76,16 +73,12 @@ def test_loss(
         micro_batch_size,
         gradient_accumulation_steps,
         config,
-        model_type,
-        ckpt_type,
+        model_ckpt,
         tmpdir,
         monkeypatch,
     ):
-        ckpt_component = CKPT_COMPONENT_MAP[ckpt_type]
-        ckpt = model_type + "_" + ckpt_type
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        tokenizer_path = Path(TOKENIZER_PATHS[model_type])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
         cmd = f"""
@@ -96,18 +89,16 @@ def test_loss(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj'] \
             model.apply_lora_to_mlp=False \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}] \
+            checkpointer.checkpoint_files=[model.safetensors] \
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             metric_logger.filename={log_file} \
             compile={compile} \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS[model_type + "_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd = cmd + self._get_test_config_overrides(dtype_str="fp32") + model_config
         monkeypatch.setattr(sys, "argv", cmd)
@@ -119,19 +110,19 @@ def test_loss(
             torch._dynamo.reset()
 
         loss_values = get_loss_values_from_metric_logger(log_file)
-        expected_loss_values = self._fetch_expected_loss_values(model_type)
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)
         torch.testing.assert_close(
             loss_values, expected_loss_values, rtol=1e-5, atol=1e-5
         )
 
     @pytest.mark.integration_test
     @pytest.mark.parametrize(
-        "dtype, compile, micro_batch_size, gradient_accumulation_steps",
+        "model_ckpt, dtype, compile, micro_batch_size, gradient_accumulation_steps",
         [
-            ("fp32", True, 8, 1),
-            ("bf16", True, 2, 4),
-            ("fp32", False, 4, 2),
-            ("bf16", False, 8, 1),
+            ("llama3_hf_138m", "fp32", True, 8, 1),
+            ("llama3_hf_138m", "bf16", True, 2, 4),
+            ("llama3_hf_138m", "fp32", False, 4, 2),
+            ("llama3_hf_138m", "bf16", False, 8, 1),
         ],
     )
     @gpu_test(gpu_count=1)
@@ -143,10 +134,10 @@ def test_loss_qlora(
         gradient_accumulation_steps,
         tmpdir,
         monkeypatch,
+        model_ckpt,
     ):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
         cmd = f"""
@@ -157,20 +148,18 @@ def test_loss_qlora(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj','k_proj','output_proj'] \
             model.apply_lora_to_mlp=True \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
             metric_logger.filename={log_file} \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             compile={compile} \
             enable_activation_checkpointing=False \
             enable_activation_offloading=False \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3_qlora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_qlora"]
 
         cmd = cmd + self._get_test_config_overrides(dtype_str=dtype) + model_config
         monkeypatch.setattr(sys, "argv", cmd)
@@ -190,8 +179,14 @@ def test_loss_qlora(
     @pytest.mark.parametrize("save_adapter_weights_only", [False, True])
     @pytest.mark.integration_test
     @gpu_test(gpu_count=1)
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
     def test_training_state_on_resume(
-        self, tmpdir, monkeypatch, save_adapter_weights_only
+        self, tmpdir, monkeypatch, save_adapter_weights_only, model_ckpt
     ):
         """Test whether the recipe state is correctly updated on resume. Since this
         is model agnostic, we should run this on the small model only. The test
@@ -201,16 +196,10 @@ def test_training_state_on_resume(
             - Make sure final loss matches the expected value of a model successfully resumed from a ckpt
         """
 
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
-
         # Train for two epochs
         cmd_1 = f"""
         tune run lora_finetune_single_device \
@@ -220,19 +209,17 @@ def test_training_state_on_resume(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj','k_proj','output_proj'] \
             model.apply_lora_to_mlp=True \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             save_adapter_weights_only={save_adapter_weights_only} \
             enable_activation_checkpointing=True \
             enable_activation_offloading=False \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd_1)
@@ -250,16 +237,14 @@ def test_training_state_on_resume(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj','k_proj','output_proj'] \
             model.apply_lora_to_mlp=True \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir={ckpt_dir} \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
-            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")}
-            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")}
+            checkpointer.checkpoint_files=[model.safetensors]\
+            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")} \
+            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")} \
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
             resume_from_checkpoint=True \
             metric_logger.filename={log_file} \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             enable_activation_checkpointing=True \
             enable_activation_offloading=False \
@@ -270,7 +255,7 @@ def test_training_state_on_resume(
             runpy.run_path(TUNE_PATH, run_name="__main__")
 
         # Second epoch only
-        expected_loss_values = self._fetch_expected_loss_values("llama3")[2:]
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)[2:]
         loss_values = get_loss_values_from_metric_logger(log_file)[:2]
 
         torch.testing.assert_close(
@@ -279,8 +264,15 @@ def test_training_state_on_resume(
 
     @pytest.mark.parametrize("save_adapter_weights_only", [False, True])
     @pytest.mark.integration_test
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
+    @gpu_test(gpu_count=1)
     def test_training_state_on_resume_with_async_checkpointing(
-        self, tmpdir, monkeypatch, save_adapter_weights_only
+        self, tmpdir, monkeypatch, save_adapter_weights_only, model_ckpt
     ):
         """Test whether the recipe state is correctly updated on resume. Since this
         is model agnostic, we should run this on the small model only. The test
@@ -290,16 +282,10 @@ def test_training_state_on_resume_with_async_checkpointing(
             - Make sure final loss matches the expected value of a model successfully resumed from a ckpt
         """
 
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
-
         # Train for two epochs
         cmd_1 = f"""
         tune run lora_finetune_single_device \
@@ -309,12 +295,10 @@ def test_training_state_on_resume_with_async_checkpointing(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj','k_proj','output_proj'] \
             model.apply_lora_to_mlp=True \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             save_adapter_weights_only={save_adapter_weights_only} \
             enable_activation_checkpointing=True \
@@ -322,7 +306,7 @@ def test_training_state_on_resume_with_async_checkpointing(
             enable_async_checkpointing=True \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd_1)
@@ -338,14 +322,12 @@ def test_training_state_on_resume_with_async_checkpointing(
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj','k_proj','output_proj'] \
             model.apply_lora_to_mlp=True \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir={ckpt_dir} \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
             resume_from_checkpoint=True \
             metric_logger.filename={log_file} \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             enable_activation_checkpointing=True \
             enable_activation_offloading=False \
@@ -357,7 +339,7 @@ def test_training_state_on_resume_with_async_checkpointing(
             runpy.run_path(TUNE_PATH, run_name="__main__")
 
         # Second epoch only
-        expected_loss_values = self._fetch_expected_loss_values("llama3")[2:]
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)[2:]
         loss_values = get_loss_values_from_metric_logger(log_file)[:2]
 
         torch.testing.assert_close(
@@ -367,10 +349,17 @@ def test_training_state_on_resume_with_async_checkpointing(
     @pytest.mark.parametrize("use_dora", [False, True])
     @pytest.mark.integration_test
     @gpu_test(gpu_count=1)
-    def test_save_and_load_merged_weights(self, tmpdir, monkeypatch, use_dora):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
+    def test_save_and_load_merged_weights(
+        self, tmpdir, monkeypatch, use_dora, model_ckpt
+    ):
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         cmd = f"""
         tune run lora_finetune_single_device \
@@ -378,21 +367,19 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch, use_dora):
             output_dir={tmpdir} \
             model.lora_attn_modules=['q_proj','v_proj','k_proj','output_proj'] \
             model.apply_lora_to_mlp=True \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             enable_activation_checkpointing=True \
             enable_activation_offloading=False \
         """.split()
 
         if use_dora:
-            model_config = MODEL_TEST_CONFIGS["llama3_dora"]
+            model_config = MODEL_TEST_CONFIGS[model_ckpt + "_dora"]
         else:
-            model_config = MODEL_TEST_CONFIGS["llama3_lora"]
+            model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd = cmd + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd)
@@ -408,7 +395,7 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch, use_dora):
         lora_model = config.instantiate(OmegaConf.from_dotlist(model_config).model)
 
         # Build base llama3 model for loading merged weights
-        base_llama3_config = MODEL_TEST_CONFIGS["llama3"]
+        base_llama3_config = MODEL_TEST_CONFIGS[model_ckpt]
         llama3_model = config.instantiate(
             OmegaConf.from_dotlist(base_llama3_config).model
         )
@@ -418,15 +405,18 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch, use_dora):
         adpt_path = os.path.join(tmpdir, epoch_folder, f"{ADAPTER_MODEL_FNAME}.pt")
         lora_sd = safe_torch_load(adpt_path, weights_only=True)
 
-        with open(ckpt_path, "rb") as f:
-            base_model_sd = torch.load(f, weights_only=True)
+        # Load base model from HF checkpoint
+        base_model_path = os.path.join(ckpt_dir, "model.safetensors")
+        base_model_sd = safe_torch_load(base_model_path, weights_only=True)
+
         lora_model.load_state_dict(lora_sd, strict=False)
         lora_model.load_state_dict(base_model_sd, strict=False)
         baseline_out = lora_model(inputs)
 
         # Load merged final ckpt directly into llama3 and call fwd
         model_ckpt_fname = (
-            SHARD_FNAME.format(cpt_idx="1".zfill(5), num_shards="1".zfill(5)) + ".bin"
+            SHARD_FNAME.format(cpt_idx="1".zfill(5), num_shards="1".zfill(5))
+            + ".safetensors"
         )
         model_path = os.path.join(tmpdir, epoch_folder, model_ckpt_fname)
         sd = safe_torch_load(model_path, weights_only=True)
diff --git a/tests/recipes/test_ppo_full_finetune_single_device.py b/tests/recipes/test_ppo_full_finetune_single_device.py
index d2afae4e4f..7ed1c3db75 100644
--- a/tests/recipes/test_ppo_full_finetune_single_device.py
+++ b/tests/recipes/test_ppo_full_finetune_single_device.py
@@ -16,7 +16,6 @@
 from tests.recipes.utils import (
     dummy_text_completion_alpaca_dataset_config,
     MODEL_TEST_CONFIGS,
-    write_llama3_hf_ckpt_config,
 )
 from tests.test_utils import (
     CKPT_MODEL_PATHS,
@@ -47,7 +46,6 @@ def _get_test_config_overrides(self):
             "enable_activation_checkpointing=False",
             "enable_activation_offloading=False",
             f"tokenizer.path={TOKENIZER_PATHS['llama3']}",
-            "tokenizer._component_=torchtune.models.llama3.llama3_tokenizer",
             "tokenizer.prompt_template=null",
             "tokenizer.max_seq_len=64",
             "seed=9",
@@ -82,44 +80,41 @@ def _get_expected_loss_values(self):
         or torch.cuda.get_device_capability() not in ((8, 6)),
         reason="Unexpected device type",
     )
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [("llama3_hf_138m")],
+    )
     @gpu_test(gpu_count=1)
-    def test_loss(self, tmpdir, monkeypatch):
+    def test_loss(self, tmpdir, monkeypatch, model_ckpt):
         reward_ckpt_path = Path(CKPT_MODEL_PATHS["llama3_reward_hf"])
-        policy_ckpt_path = Path(CKPT_MODEL_PATHS["llama3_tune"])
+        policy_ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
 
-        ckpt_dir = policy_ckpt_path.parent
         log_file = gen_log_file_name(tmpdir)
         policy_tmpdir = (tmpdir / "policy").mkdir()
         value_tmpdir = (tmpdir / "value").mkdir()
 
-        write_llama3_hf_ckpt_config(ckpt_dir)
         cmd_1 = f"""
         tune run ppo_full_finetune_single_device \
             --config mistral/7B_full_ppo_low_memory \
             output_dir={tmpdir} \
-            checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
-            checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{policy_ckpt_path}]\
+            checkpointer.checkpoint_dir='{policy_ckpt_dir}' \
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={policy_tmpdir} \
-            checkpointer.model_type=LLAMA3 \
 
-            ref_policy_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
-            ref_policy_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            ref_policy_checkpointer.checkpoint_files=[{policy_ckpt_path}]\
-            ref_policy_checkpointer.model_type=LLAMA3 \
+            ref_policy_checkpointer.checkpoint_dir='{policy_ckpt_dir}' \
+            ref_policy_checkpointer.checkpoint_files=[model.safetensors]\
 
-            value_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            value_checkpointer.checkpoint_files=[{reward_ckpt_path}]\
+            value_checkpointer.checkpoint_dir='{reward_ckpt_path.parent}' \
+            value_checkpointer.checkpoint_files=[{reward_ckpt_path.name}]\
             value_checkpointer.output_dir={value_tmpdir} \
 
-            reward_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            reward_checkpointer.checkpoint_files=[{reward_ckpt_path}]\
+            reward_checkpointer.checkpoint_dir='{reward_ckpt_path.parent}' \
+            reward_checkpointer.checkpoint_files=[{reward_ckpt_path.name}]\
 
-            metric_logger._component_=torchtune.training.metric_logging.DiskLogger \
             metric_logger.filename={log_file} \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         model_config = [k.replace("model.", "policy_model.") for k in model_config]
 
         reward_and_value_model_config = MODEL_TEST_CONFIGS["llama3_classifier"]
@@ -146,24 +141,21 @@ def test_loss(self, tmpdir, monkeypatch):
         )
 
     @pytest.mark.integration_test
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [("llama3_hf_138m")],
+    )
     @gpu_test(gpu_count=1)
-    def test_training_state_on_resume(self, tmpdir, monkeypatch):
+    def test_training_state_on_resume(self, tmpdir, monkeypatch, model_ckpt):
         """Test whether the recipe state correctly saved and restored after training."""
 
         reward_ckpt_path = Path(CKPT_MODEL_PATHS["llama3_reward_hf"])
-        policy_ckpt_path = Path(CKPT_MODEL_PATHS["llama3_tune"])
+        policy_ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
 
-        ckpt_dir = policy_ckpt_path.parent
         log_file = gen_log_file_name(tmpdir)
         policy_tmpdir = (tmpdir / "policy").mkdir()
         value_tmpdir = (tmpdir / "value").mkdir()
 
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_llama3_hf_ckpt_config(ckpt_dir)
-        write_llama3_hf_ckpt_config(policy_tmpdir)
-        write_llama3_hf_ckpt_config(value_tmpdir)
-
         # There are 4 steps in total (num_steps / batch size)
         # and the dataset has 8 samples, so each epoch will be 2 batches
         # a single step is a single batch update, and we checkpoint at every epoch (2 steps)
@@ -173,29 +165,24 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         tune run ppo_full_finetune_single_device \
             --config mistral/7B_full_ppo_low_memory \
             output_dir={tmpdir} \
-            checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
-            checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{policy_ckpt_path}]\
+            checkpointer.checkpoint_dir='{policy_ckpt_dir}' \
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={policy_tmpdir} \
-            checkpointer.model_type=LLAMA3 \
 
-            ref_policy_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
-            ref_policy_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            ref_policy_checkpointer.checkpoint_files=[{policy_ckpt_path}]\
-            ref_policy_checkpointer.model_type=LLAMA3 \
+            ref_policy_checkpointer.checkpoint_dir='{policy_ckpt_dir}' \
+            ref_policy_checkpointer.checkpoint_files=[model.safetensors]\
 
-            value_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            value_checkpointer.checkpoint_files=[{reward_ckpt_path}]\
+            value_checkpointer.checkpoint_dir='{reward_ckpt_path.parent}' \
+            value_checkpointer.checkpoint_files=[{reward_ckpt_path.name}]\
             value_checkpointer.output_dir={value_tmpdir} \
 
-            reward_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            reward_checkpointer.checkpoint_files=[{reward_ckpt_path}]\
+            reward_checkpointer.checkpoint_dir='{reward_ckpt_path.parent}' \
+            reward_checkpointer.checkpoint_files=[{reward_ckpt_path.name}]\
 
-            metric_logger._component_=torchtune.training.metric_logging.DiskLogger \
             metric_logger.filename={log_file} \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         model_config = [k.replace("model.", "policy_model.") for k in model_config]
 
         reward_and_value_model_config = MODEL_TEST_CONFIGS["llama3_classifier"]
@@ -222,7 +209,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
 
         epoch_folder = get_largest_iter_folder(value_tmpdir)
         epoch_folder_minus_one = f"epoch_{int(epoch_folder.split('_')[-1]) - 1}"
-        policy_suffix = ".bin"
+        policy_suffix = ".safetensors"
         value_suffix = ".safetensors"
         policy_model_ckpt_fname = (
             SHARD_FNAME.format(cpt_idx="1".zfill(5), num_shards="1".zfill(5))
@@ -236,27 +223,22 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         tune run ppo_full_finetune_single_device \
             --config mistral/7B_full_ppo_low_memory \
             output_dir={tmpdir} \
-            checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
-            checkpointer.checkpoint_dir='{ckpt_dir}' \
+            checkpointer.checkpoint_dir='{policy_tmpdir}' \
             checkpointer.checkpoint_files=[{os.path.join(epoch_folder_minus_one, policy_model_ckpt_fname)}]\
             checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")}\
             checkpointer.output_dir={policy_tmpdir} \
-            checkpointer.model_type=LLAMA3 \
 
-            ref_policy_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
-            ref_policy_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            ref_policy_checkpointer.checkpoint_files=[{policy_ckpt_path}]\
-            ref_policy_checkpointer.model_type=LLAMA3 \
+            ref_policy_checkpointer.checkpoint_dir='{policy_ckpt_dir}' \
+            ref_policy_checkpointer.checkpoint_files=[model.safetensors]\
 
-            value_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            value_checkpointer.checkpoint_files=[{os.path.join(value_tmpdir, epoch_folder_minus_one, value_model_ckpt_fname)}]\
+            value_checkpointer.checkpoint_dir='{value_tmpdir}' \
+            value_checkpointer.checkpoint_files=[{os.path.join(epoch_folder_minus_one, value_model_ckpt_fname)}]\
             value_checkpointer.output_dir={value_tmpdir} \
 
-            reward_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            reward_checkpointer.checkpoint_files=[{reward_ckpt_path}]\
+            reward_checkpointer.checkpoint_dir='{reward_ckpt_path.parent}' \
+            reward_checkpointer.checkpoint_files=[{reward_ckpt_path.name}]\
 
             resume_from_checkpoint=True \
-            metric_logger._component_=torchtune.training.metric_logging.DiskLogger \
             metric_logger.filename={resumed_log_file} \
         """.split()
 
@@ -279,8 +261,14 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         )
 
     @pytest.mark.integration_test
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [("llama3_hf_138m")],
+    )
     @gpu_test(gpu_count=1)
-    def test_training_state_on_resume_with_optimizer_in_bwd(self, tmpdir, monkeypatch):
+    def test_training_state_on_resume_with_optimizer_in_bwd(
+        self, tmpdir, monkeypatch, model_ckpt
+    ):
         """Test whether the recipe state correctly saves and restores optimizer state
         when using ``optimizer_in_bwd``, since the optimizer checkpoint dict will include
         parameters for two models.
@@ -289,47 +277,36 @@ def test_training_state_on_resume_with_optimizer_in_bwd(self, tmpdir, monkeypatc
         """
 
         reward_ckpt_path = Path(CKPT_MODEL_PATHS["llama3_reward_hf"])
-        policy_ckpt_path = Path(CKPT_MODEL_PATHS["llama3_tune"])
+        policy_ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
 
-        ckpt_dir = policy_ckpt_path.parent
         log_file = gen_log_file_name(tmpdir)
         policy_tmpdir = (tmpdir / "policy").mkdir()
         value_tmpdir = (tmpdir / "value").mkdir()
 
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_llama3_hf_ckpt_config(ckpt_dir)
-        write_llama3_hf_ckpt_config(policy_tmpdir)
-        write_llama3_hf_ckpt_config(value_tmpdir)
         cmd_1 = f"""
         tune run ppo_full_finetune_single_device \
             --config mistral/7B_full_ppo_low_memory \
             output_dir={tmpdir} \
-            checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
-            checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{policy_ckpt_path}]\
+            checkpointer.checkpoint_dir='{policy_ckpt_dir}' \
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={policy_tmpdir} \
-            checkpointer.model_type=LLAMA3 \
 
-            ref_policy_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
-            ref_policy_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            ref_policy_checkpointer.checkpoint_files=[{policy_ckpt_path}]\
-            ref_policy_checkpointer.model_type=LLAMA3 \
+            ref_policy_checkpointer.checkpoint_dir='{policy_ckpt_dir}' \
+            ref_policy_checkpointer.checkpoint_files=[model.safetensors]\
 
-            value_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            value_checkpointer.checkpoint_files=[{reward_ckpt_path}]\
+            value_checkpointer.checkpoint_dir='{reward_ckpt_path.parent}' \
+            value_checkpointer.checkpoint_files=[{reward_ckpt_path.name}]\
             value_checkpointer.output_dir={value_tmpdir} \
 
-            reward_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            reward_checkpointer.checkpoint_files=[{reward_ckpt_path}]\
+            reward_checkpointer.checkpoint_dir='{reward_ckpt_path.parent}' \
+            reward_checkpointer.checkpoint_files=[{reward_ckpt_path.name}]\
 
-            metric_logger._component_=torchtune.training.metric_logging.DiskLogger \
             metric_logger.filename={log_file} \
 
             optimizer_in_bwd=True
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         model_config = [k.replace("model.", "policy_model.") for k in model_config]
 
         reward_and_value_model_config = MODEL_TEST_CONFIGS["llama3_classifier"]
@@ -357,7 +334,7 @@ def test_training_state_on_resume_with_optimizer_in_bwd(self, tmpdir, monkeypatc
 
         epoch_folder = get_largest_iter_folder(value_tmpdir)
         epoch_folder_minus_one = f"epoch_{int(epoch_folder.split('_')[-1]) - 1}"
-        policy_suffix = ".bin"
+        policy_suffix = ".safetensors"
         value_suffix = ".safetensors"
         policy_model_ckpt_fname = (
             SHARD_FNAME.format(cpt_idx="1".zfill(5), num_shards="1".zfill(5))
@@ -371,27 +348,22 @@ def test_training_state_on_resume_with_optimizer_in_bwd(self, tmpdir, monkeypatc
         tune run ppo_full_finetune_single_device \
             --config mistral/7B_full_ppo_low_memory \
             output_dir={tmpdir} \
-            checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
-            checkpointer.checkpoint_dir='{ckpt_dir}' \
+            checkpointer.checkpoint_dir='{policy_tmpdir}' \
             checkpointer.checkpoint_files=[{os.path.join(epoch_folder_minus_one, policy_model_ckpt_fname)}]\
             checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")}\
             checkpointer.output_dir={policy_tmpdir} \
-            checkpointer.model_type=LLAMA3 \
 
-            ref_policy_checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
-            ref_policy_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            ref_policy_checkpointer.checkpoint_files=[{policy_ckpt_path}]\
-            ref_policy_checkpointer.model_type=LLAMA3 \
+            ref_policy_checkpointer.checkpoint_dir='{policy_ckpt_dir}' \
+            ref_policy_checkpointer.checkpoint_files=[model.safetensors]\
 
-            value_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            value_checkpointer.checkpoint_files=[{os.path.join(value_tmpdir, epoch_folder_minus_one, value_model_ckpt_fname)}]\
+            value_checkpointer.checkpoint_dir='{value_tmpdir}' \
+            value_checkpointer.checkpoint_files=[{os.path.join(epoch_folder_minus_one, value_model_ckpt_fname)}]\
             value_checkpointer.output_dir={value_tmpdir} \
 
-            reward_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            reward_checkpointer.checkpoint_files=[{reward_ckpt_path}]\
+            reward_checkpointer.checkpoint_dir='{reward_ckpt_path.parent}' \
+            reward_checkpointer.checkpoint_files=[{reward_ckpt_path.name}]\
 
             resume_from_checkpoint=True \
-            metric_logger._component_=torchtune.training.metric_logging.DiskLogger \
             metric_logger.filename={resumed_log_file} \
 
             optimizer_in_bwd=True
diff --git a/tests/recipes/test_qat_distributed.py b/tests/recipes/test_qat_distributed.py
index 1ea8544fd9..083c82175b 100644
--- a/tests/recipes/test_qat_distributed.py
+++ b/tests/recipes/test_qat_distributed.py
@@ -3,7 +3,6 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-
 import runpy
 
 import sys
@@ -13,12 +12,7 @@
 import torch
 from tests.common import TUNE_PATH
 
-from tests.recipes.utils import (
-    CKPT_COMPONENT_MAP,
-    dummy_alpaca_dataset_config,
-    MODEL_TEST_CONFIGS,
-    write_hf_ckpt_config,
-)
+from tests.recipes.utils import dummy_alpaca_dataset_config, MODEL_TEST_CONFIGS
 from tests.test_utils import (
     CKPT_MODEL_PATHS,
     gen_log_file_name,
@@ -43,68 +37,60 @@ def _get_test_config_overrides(self):
             "log_every_n_steps=1",
         ] + dummy_alpaca_dataset_config()
 
-    def _fetch_expected_loss_values(self, model_type):
+    def _fetch_expected_loss_values(self, model_ckpt):
         loss_values_map = {
-            "llama3": [
+            "llama3_hf_138m": [
+                # TODO
                 11.977460861206055,
                 11.978384017944336,
                 11.946539878845215,
                 11.909686088562012,
             ],
         }
-        return loss_values_map[model_type]
+        return loss_values_map[model_ckpt]
 
     @pytest.mark.integration_test
     @pytest.mark.parametrize(
-        "config, model_type, ckpt_type, micro_batch_size, gradient_accumulation_steps",
+        "config, micro_batch_size, model_ckpt, gradient_accumulation_steps",
         [
-            ("llama3/8B_qat_full", "llama3", "tune", 4, 1),
-            ("llama3/8B_qat_full", "llama3", "tune", 1, 4),
+            ("llama3/8B_qat_full", "llama3_hf_138m", 4, 1),
+            ("llama3/8B_qat_full", "llama3_hf_138m", 1, 4),
         ],
     )
     @gpu_test(gpu_count=4)
     def test_loss(
         self,
         config,
-        model_type,
-        ckpt_type,
+        model_ckpt,
         micro_batch_size,
         gradient_accumulation_steps,
         tmpdir,
         monkeypatch,
     ):
-        ckpt_component = CKPT_COMPONENT_MAP[ckpt_type]
-        ckpt = model_type + "_" + ckpt_type
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        tokenizer_path = Path(TOKENIZER_PATHS[model_type])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
-        # Config file needed for model conversion.
-        write_hf_ckpt_config(ckpt_dir)
-
         cmd = f"""
         tune run --nnodes 1 --nproc_per_node 4 qat_distributed \
             --config {config} \
             output_dir={tmpdir} \
             batch_size={micro_batch_size} \
             gradient_accumulation_steps={gradient_accumulation_steps} \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             metric_logger.filename={log_file} \
         """.split()
-        model_config = MODEL_TEST_CONFIGS[model_type]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         cmd = cmd + self._get_test_config_overrides() + model_config
 
         monkeypatch.setattr(sys, "argv", cmd)
         runpy.run_path(TUNE_PATH, run_name="__main__")
         loss_values = get_loss_values_from_metric_logger(log_file)
-        expected_loss_values = self._fetch_expected_loss_values(model_type)
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)
 
         torch.testing.assert_close(
             loss_values, expected_loss_values, rtol=1e-3, atol=1e-3
diff --git a/tests/recipes/test_qat_lora_finetune_distributed.py b/tests/recipes/test_qat_lora_finetune_distributed.py
index 39f7bf9ed9..052ed7e358 100644
--- a/tests/recipes/test_qat_lora_finetune_distributed.py
+++ b/tests/recipes/test_qat_lora_finetune_distributed.py
@@ -13,12 +13,7 @@
 import torch
 from omegaconf import OmegaConf
 from tests.common import TUNE_PATH
-from tests.recipes.utils import (
-    CKPT_COMPONENT_MAP,
-    dummy_alpaca_dataset_config,
-    MODEL_TEST_CONFIGS,
-    write_hf_ckpt_config,
-)
+from tests.recipes.utils import dummy_alpaca_dataset_config, MODEL_TEST_CONFIGS
 from tests.test_utils import (
     CKPT_MODEL_PATHS,
     gen_log_file_name,
@@ -50,22 +45,23 @@ def _get_test_config_overrides(self):
             "compile=False",
         ] + dummy_alpaca_dataset_config()
 
-    def _fetch_expected_loss_values(self, model_type):
+    def _fetch_expected_loss_values(self, model_ckpt):
         loss_values_map = {
-            "llama3": [
+            "llama3_hf_138m": [
+                # TODO
                 11.977421760559082,
                 11.979637145996094,
                 11.948746681213379,
                 11.912514686584473,
             ],
         }
-        return loss_values_map[model_type]
+        return loss_values_map[model_ckpt]
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=4)
     @pytest.mark.parametrize(
-        "micro_batch_size, gradient_accumulation_steps, should_compile",
-        [(4, 1, True), (1, 4, False)],
+        "model_ckpt, micro_batch_size, gradient_accumulation_steps, should_compile",
+        [("llama3_hf_138m", 4, 1, True), ("llama3_hf_138m", 1, 4, False)],
     )
     def test_loss(
         self,
@@ -74,11 +70,10 @@ def test_loss(
         should_compile,
         tmpdir,
         monkeypatch,
+        model_ckpt,
     ):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        tokenizer_path = Path(TOKENIZER_PATHS["llama3"])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
         cmd = f"""
         tune run --nnodes 1 --nproc_per_node 4 qat_lora_finetune_distributed
@@ -86,13 +81,11 @@ def test_loss(
             batch_size={micro_batch_size} \
             gradient_accumulation_steps={gradient_accumulation_steps} \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
             metric_logger.filename={log_file} \
-            tokenizer.path={tokenizer_path} \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             compile={should_compile} \
             enable_activation_checkpointing=False \
@@ -100,14 +93,14 @@ def test_loss(
             quantizer.groupsize=32 \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd = cmd + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd)
         runpy.run_path(TUNE_PATH, run_name="__main__")
         loss_values = get_loss_values_from_metric_logger(log_file)
 
-        expected_loss_values = self._fetch_expected_loss_values("llama3")
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)
         torch.testing.assert_close(
             loss_values, expected_loss_values, rtol=1e-5, atol=1e-5
         )
@@ -115,16 +108,15 @@ def test_loss(
     @pytest.mark.integration_test
     @gpu_test(gpu_count=4)
     @pytest.mark.parametrize(
-        "config, model_type, ckpt_type, save_adapter_weights_only",
+        "config, model_ckpt, save_adapter_weights_only",
         [
-            ("llama3/8B_qat_lora", "llama3", "tune", False),
+            ("llama3/8B_qat_lora", "llama3_hf_138m", False),
         ],
     )
     def test_training_state_on_resume(
         self,
         config,
-        model_type,
-        ckpt_type,
+        model_ckpt,
         tmpdir,
         monkeypatch,
         save_adapter_weights_only,
@@ -136,20 +128,12 @@ def test_training_state_on_resume(
             - Resume training after epoch 1
             - Make sure final loss matches the expected value of a model successfully resumed from a ckpt
         """
-        ckpt_component = CKPT_COMPONENT_MAP[ckpt_type]
-        ckpt = model_type + "_" + ckpt_type
-        expected_loss_values = self._fetch_expected_loss_values(model_type)
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)
 
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        tokenizer_path = Path(TOKENIZER_PATHS[model_type])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
-
         # Train for two epochs
         cmd_1 = f"""
         tune run --nnodes 1 --nproc_per_node 4 qat_lora_finetune_distributed \
@@ -157,11 +141,9 @@ def test_training_state_on_resume(
             batch_size=4 \
             gradient_accumulation_steps=1 \
             output_dir={tmpdir} \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             save_adapter_weights_only={save_adapter_weights_only} \
@@ -170,7 +152,7 @@ def test_training_state_on_resume(
             quantizer.groupsize=32 \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS[model_type + "_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd_1)
@@ -185,13 +167,11 @@ def test_training_state_on_resume(
             batch_size=4 \
             gradient_accumulation_steps=1 \
             output_dir={tmpdir} \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir={ckpt_dir} \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
-            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")}
-            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")}
+            checkpointer.checkpoint_files=[model.safetensors]\
+            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")} \
+            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")} \
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             resume_from_checkpoint=True \
@@ -205,7 +185,7 @@ def test_training_state_on_resume(
         monkeypatch.setattr(sys, "argv", cmd_2)
         runpy.run_path(TUNE_PATH, run_name="__main__")
 
-        expected_loss_values = self._fetch_expected_loss_values(model_type)[2:]
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)[2:]
 
         loss_values = get_loss_values_from_metric_logger(log_file)
         torch.testing.assert_close(
@@ -215,16 +195,15 @@ def test_training_state_on_resume(
     @pytest.mark.integration_test
     @gpu_test(gpu_count=2)
     @pytest.mark.parametrize(
-        "config, model_type, ckpt_type, save_adapter_weights_only",
+        "config, model_ckpt, save_adapter_weights_only",
         [
-            ("llama3/8B_qat_lora", "llama3", "tune", False),
+            ("llama3/8B_qat_lora", "llama3_hf_138m", False),
         ],
     )
     def test_training_state_on_resume_with_async_checkpointing(
         self,
         config,
-        model_type,
-        ckpt_type,
+        model_ckpt,
         tmpdir,
         monkeypatch,
         save_adapter_weights_only,
@@ -236,20 +215,12 @@ def test_training_state_on_resume_with_async_checkpointing(
             - Resume training after epoch 1
             - Make sure final loss matches the expected value of a model successfully resumed from a ckpt
         """
-        ckpt_component = CKPT_COMPONENT_MAP[ckpt_type]
-        ckpt = model_type + "_" + ckpt_type
-        expected_loss_values = self._fetch_expected_loss_values(model_type)
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)
 
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        tokenizer_path = Path(TOKENIZER_PATHS[model_type])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
-
         # Train for two epochs
         cmd_1 = f"""
         tune run --nnodes 1 --nproc_per_node 2 qat_lora_finetune_distributed \
@@ -257,11 +228,9 @@ def test_training_state_on_resume_with_async_checkpointing(
             batch_size=8 \
             gradient_accumulation_steps=1 \
             output_dir={tmpdir} \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             save_adapter_weights_only={save_adapter_weights_only} \
@@ -271,7 +240,7 @@ def test_training_state_on_resume_with_async_checkpointing(
             quantizer.groupsize=32 \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS[model_type + "_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd_1)
@@ -286,13 +255,11 @@ def test_training_state_on_resume_with_async_checkpointing(
             batch_size=8 \
             gradient_accumulation_steps=1 \
             output_dir={tmpdir} \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir={ckpt_dir} \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
-            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")}
-            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")}
+            checkpointer.checkpoint_files=[model.safetensors]\
+            checkpointer.adapter_checkpoint={os.path.join(epoch_folder_minus_one, f"{ADAPTER_MODEL_FNAME}.pt")} \
+            checkpointer.recipe_checkpoint={os.path.join(RECIPE_STATE_DIRNAME, "recipe_state.pt")} \
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             resume_from_checkpoint=True \
@@ -307,7 +274,7 @@ def test_training_state_on_resume_with_async_checkpointing(
         monkeypatch.setattr(sys, "argv", cmd_2)
         runpy.run_path(TUNE_PATH, run_name="__main__")
 
-        expected_loss_values = self._fetch_expected_loss_values(model_type)[2:]
+        expected_loss_values = self._fetch_expected_loss_values(model_ckpt)[2:]
 
         loss_values = get_loss_values_from_metric_logger(log_file)
         torch.testing.assert_close(
@@ -316,32 +283,26 @@ def test_training_state_on_resume_with_async_checkpointing(
 
     @pytest.mark.integration_test
     @pytest.mark.parametrize(
-        "recipe_config, model_type, ckpt_type",
+        "recipe_config, model_ckpt",
         [
-            ("llama3/8B_qat_lora", "llama3", "tune"),
+            ("llama3/8B_qat_lora", "llama3_hf_138m"),
         ],
     )
     @gpu_test(gpu_count=4)
     def test_save_and_load_merged_weights(
-        self, recipe_config, model_type, ckpt_type, tmpdir, monkeypatch
+        self, recipe_config, model_ckpt, tmpdir, monkeypatch
     ):
-        ckpt_component = CKPT_COMPONENT_MAP[ckpt_type]
-        ckpt = model_type + "_" + ckpt_type
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        tokenizer_path = Path(TOKENIZER_PATHS[model_type])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         cmd = f"""
         tune run --nnodes 1 --nproc_per_node 4 qat_lora_finetune_distributed \
             --config {recipe_config} \
             batch_size=4 \
             gradient_accumulation_steps=1 \
             output_dir={tmpdir} \
-            model=torchtune.models.lora_small_test_model \
-            checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             enable_activation_checkpointing=True \
@@ -349,7 +310,7 @@ def test_save_and_load_merged_weights(
             quantizer.groupsize=32 \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS[model_type + "_lora"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt + "_lora"]
 
         cmd = cmd + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd)
@@ -364,7 +325,7 @@ def test_save_and_load_merged_weights(
         lora_model = config.instantiate(OmegaConf.from_dotlist(model_config).model)
 
         # Build base model for loading merged weights
-        base_config = MODEL_TEST_CONFIGS[model_type]
+        base_config = MODEL_TEST_CONFIGS[model_ckpt]
         model = config.instantiate(OmegaConf.from_dotlist(base_config).model)
 
         # Load base model and trained adapter weights into LoRA model and call fwd
@@ -372,14 +333,16 @@ def test_save_and_load_merged_weights(
         adpt_path = os.path.join(tmpdir, epoch_folder, f"{ADAPTER_MODEL_FNAME}.pt")
         lora_sd = safe_torch_load(adpt_path, weights_only=True)
 
-        with open(ckpt_path, "rb") as f:
-            base_model_sd = torch.load(f, weights_only=True)
+        # Load base model from HF checkpoint
+        base_model_path = os.path.join(ckpt_dir, "model.safetensors")
+        base_model_sd = safe_torch_load(base_model_path, weights_only=True)
+
         lora_model.load_state_dict(lora_sd, strict=False)
         lora_model.load_state_dict(base_model_sd, strict=False)
         baseline_out = lora_model(inputs)
 
         # Load merged final ckpt directly into model and call fwd
-        suffix = ".safetensors" if ckpt_type == "hf" else ".bin"
+        suffix = ".safetensors"
         model_ckpt_fname = (
             SHARD_FNAME.format(cpt_idx="1".zfill(5), num_shards="1".zfill(5)) + suffix
         )
diff --git a/tests/recipes/test_qat_single_device.py b/tests/recipes/test_qat_single_device.py
index e9c6fe52fc..0244a22b6d 100644
--- a/tests/recipes/test_qat_single_device.py
+++ b/tests/recipes/test_qat_single_device.py
@@ -12,11 +12,7 @@
 import torch
 from tests.common import TUNE_PATH
 
-from tests.recipes.utils import (
-    dummy_alpaca_dataset_config,
-    MODEL_TEST_CONFIGS,
-    write_hf_ckpt_config,
-)
+from tests.recipes.utils import dummy_alpaca_dataset_config, MODEL_TEST_CONFIGS
 from tests.test_utils import (
     CKPT_MODEL_PATHS,
     gen_log_file_name,
@@ -40,53 +36,47 @@ def _get_test_config_overrides(self):
             "log_every_n_steps=1",
         ] + dummy_alpaca_dataset_config()
 
-    def _fetch_expected_loss_values(self, model_type, ckpt_type):
-        # logic here may need to be adjusted in the future
-        return [12.0118, 11.9262, 11.8976, 11.9700]
+    def _fetch_expected_loss_values(self, model_ckpt):
+        expected_losses = {
+            "llama3": [12.0118, 11.9262, 11.8976, 11.9700],
+            "llama3_hf_138m": [],
+        }
+        return expected_losses[model_ckpt]
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=1)
     @pytest.mark.parametrize(
-        "model_type, ckpt_type, micro_batch_size, gradient_accumulation_steps",
+        "model_ckpt, micro_batch_size, gradient_accumulation_steps",
         [
-            ("llama3", "tune", 1, 1),
+            ("llama3_hf_138m", 1, 1),
         ],
     )
     def test_loss(
         self,
-        model_type,
-        ckpt_type,
+        model_ckpt,
         micro_batch_size,
         gradient_accumulation_steps,
         tmpdir,
         monkeypatch,
     ):
-        ckpt = model_type + "_" + ckpt_type
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        tokenizer_path = Path(TOKENIZER_PATHS[model_type])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
 
-        # Config file needed for model conversion.
-        write_hf_ckpt_config(ckpt_dir)
-
         cmd = f"""
         tune run qat_single_device \
             --config llama2/1B_qat_single_device \
             output_dir={tmpdir} \
             batch_size={micro_batch_size} \
             gradient_accumulation_steps={gradient_accumulation_steps} \
-            checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}] \
+            checkpointer.checkpoint_files=[model.safetensors] \
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type={model_type.upper()} \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             metric_logger.filename={log_file} \
         """.split()
-        model_config = MODEL_TEST_CONFIGS[model_type]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         cmd = cmd + self._get_test_config_overrides() + model_config
 
         monkeypatch.setattr(sys, "argv", cmd)
@@ -94,5 +84,5 @@ def test_loss(
             runpy.run_path(TUNE_PATH, run_name="__main__")
 
         loss_values = get_loss_values_from_metric_logger(log_file)
-        expected_losses = self._fetch_expected_loss_values(model_type, ckpt_type)
+        expected_losses = self._fetch_expected_loss_values(model_ckpt)
         torch.testing.assert_close(loss_values, expected_losses, rtol=1e-3, atol=1e-3)
diff --git a/tests/recipes/utils.py b/tests/recipes/utils.py
index 2303a8be4a..209e0124cc 100644
--- a/tests/recipes/utils.py
+++ b/tests/recipes/utils.py
@@ -281,6 +281,37 @@ def lora_llama3_test_config(
     return config_overrides
 
 
+def lora_llama3_test_config_138m(
+    apply_lora_to_mlp=False,
+    apply_lora_to_output=False,
+    lora_rank=8,
+    lora_alpha=16,
+    quantize_base: bool = False,
+    use_dora: bool = False,
+) -> list[str]:
+    """
+    Test config with slightly larger embed dim to be paged and flex attention friendly
+    """
+    return [
+        f"model.lora_rank={lora_rank}",
+        f"model.lora_alpha={lora_alpha}",
+        f"model.lora_attn_modules={lora_attn_modules}",
+        f"model.apply_lora_to_mlp={apply_lora_to_mlp}",
+        f"model.apply_lora_to_output={apply_lora_to_output}",
+        "model._component_=torchtune.models.llama3.lora_llama3",
+        "model.vocab_size=128_256",
+        "model.num_layers=2",
+        "model.num_heads=16",
+        "model.embed_dim=512",
+        "model.max_seq_len=1024",
+        "model.norm_eps=1e-5",
+        "model.num_kv_heads=8",
+        "model.lora_dropout=0.0",
+        f"model.quantize_base={quantize_base}",
+        f"model.use_dora={use_dora}",
+    ]
+
+
 def write_hf_ckpt_config(ckpt_dir: Union[str, Path]):
     config = {
         "hidden_size": 256,
@@ -376,4 +407,27 @@ def write_hf_vision_ckpt_config(ckpt_dir: str):
         use_dora=True,
     ),
     "llama3_hf_138m": llama3_test_config_138m(),
+    "llama3_hf_138m_lora": lora_llama3_test_config_138m(
+        lora_attn_modules=["q_proj", "k_proj", "v_proj", "output_proj"],
+        apply_lora_to_mlp=False,
+        apply_lora_to_output=False,
+        lora_rank=8,
+        lora_alpha=16,
+    ),
+    "llama3_hf_138m_qlora": lora_llama3_test_config_138m(
+        lora_attn_modules=["q_proj", "k_proj", "v_proj", "output_proj"],
+        apply_lora_to_mlp=False,
+        apply_lora_to_output=False,
+        lora_rank=8,
+        lora_alpha=16,
+        quantize_base=True,
+    ),
+    "llama3_hf_138m_dora": lora_llama3_test_config_138m(
+        lora_attn_modules=["q_proj", "k_proj", "v_proj", "output_proj"],
+        apply_lora_to_mlp=False,
+        apply_lora_to_output=False,
+        lora_rank=8,
+        lora_alpha=16,
+        use_dora=True,
+    ),
 }