pytorch · felipemello1 · Jun 25, 2025 · Jun 25, 2025 · Jun 25, 2025 · Jun 25, 2025
diff --git a/recipes/configs/llama3_2/3B_full.yaml b/recipes/configs/llama3_2/3B_full.yaml
@@ -26,21 +26,30 @@ tokenizer:
   path: /tmp/Llama-3.2-3B-Instruct/original/tokenizer.model
   max_seq_len: null
 
-# Dataset and Sampler
+# Dataloader
+dataloader:
+  batch_size: 16
+  # num_workers and pin_memory can be added here if needed
+
+# Dataset - now a list to support multiple weighted sources
 dataset:
-  _component_: torchtune.datasets.alpaca_cleaned_dataset
-  packed: False  # True increases speed
-  split: train[:95%]
-seed: null
-shuffle: True
-batch_size: 4
-
-# Validation
-run_val_every_n_steps: null  # Change to an integer to enable validation every N steps
-dataset_val:
-  _component_: torchtune.datasets.alpaca_cleaned_dataset
-  split: train[95%:]
-batch_size_val: ${batch_size}
+  - _component_: torchtune.datasets.slimorca_iterable_dataset
+    shuffle_buffer_size: 1000
+    weight: 0.8
+    split: train[:5%] # simular 1 epoch quickly
+  - _component_: torchtune.datasets.alpaca_iterable_dataset
+    shuffle_buffer_size: 1000
+    weight: 0.2
+    split: train[:5%] # simular 1 epoch quickly
+
+# Packing (TBD by follow up PR)
+# packing:
+#   _component_: torchtune.datasets.packing.SFTPacking
+#   max_seq_len: 8192
+
+seed: 42
+
+# Validation not supported yet with iterable datasets
 
 # Model Arguments
 model:
@@ -65,10 +74,11 @@ optimizer:
 loss:
   _component_: torchtune.modules.loss.LinearCrossEntropyLoss
 
-# Training
-epochs: 1
-max_steps_per_epoch: null
-gradient_accumulation_steps: 8  # Use to increase effective batch size
+# Training - now step-based
+num_training_steps: 100 # Total number of training steps to run
+save_every_n_steps: 200 # Save a checkpoint every N steps. Using 200 to avoid ckpt.
+gradient_accumulation_steps: 1
+dataset_metrics_log_freq: 5 # Log dataset-specific metrics every N steps
 
 # Environment
 device: cuda
@@ -83,7 +93,7 @@ optimizer_in_bwd: False  # True saves memory. Requires gradient_accumulation_ste
 
 # Logging
 metric_logger:
-  _component_: torchtune.training.metric_logging.DiskLogger
+  _component_: torchtune.training.metric_logging.WandBLogger
   log_dir: ${output_dir}/logs
 log_every_n_steps: 1
 log_peak_memory_stats: True