Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 29 additions & 19 deletions recipes/configs/llama3_2/3B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,30 @@ tokenizer:
path: /tmp/Llama-3.2-3B-Instruct/original/tokenizer.model
max_seq_len: null

# Dataset and Sampler
# Dataloader
dataloader:
batch_size: 16
# num_workers and pin_memory can be added here if needed

# Dataset - now a list to support multiple weighted sources
dataset:
_component_: torchtune.datasets.alpaca_cleaned_dataset
packed: False # True increases speed
split: train[:95%]
seed: null
shuffle: True
batch_size: 4

# Validation
run_val_every_n_steps: null # Change to an integer to enable validation every N steps
dataset_val:
_component_: torchtune.datasets.alpaca_cleaned_dataset
split: train[95%:]
batch_size_val: ${batch_size}
- _component_: torchtune.datasets.slimorca_iterable_dataset
shuffle_buffer_size: 1000
weight: 0.8
split: train[:5%] # simular 1 epoch quickly
- _component_: torchtune.datasets.alpaca_iterable_dataset
shuffle_buffer_size: 1000
weight: 0.2
split: train[:5%] # simular 1 epoch quickly

# Packing (TBD by follow up PR)
# packing:
# _component_: torchtune.datasets.packing.SFTPacking
# max_seq_len: 8192

seed: 42

# Validation not supported yet with iterable datasets

# Model Arguments
model:
Expand All @@ -65,10 +74,11 @@ optimizer:
loss:
_component_: torchtune.modules.loss.LinearCrossEntropyLoss

# Training
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
# Training - now step-based
num_training_steps: 100 # Total number of training steps to run
save_every_n_steps: 200 # Save a checkpoint every N steps. Using 200 to avoid ckpt.
gradient_accumulation_steps: 1
dataset_metrics_log_freq: 5 # Log dataset-specific metrics every N steps

# Environment
device: cuda
Expand All @@ -83,7 +93,7 @@ optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_ste

# Logging
metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.WandBLogger
log_dir: ${output_dir}/logs
log_every_n_steps: 1
log_peak_memory_stats: True
Expand Down
Loading
Loading