diff --git a/examples/llm_qad/configs/qwen3-30b-a3b-instruct-2507-moe_template.conf b/examples/llm_qad/configs/qwen3-30b-a3b-instruct-2507-moe_template.conf index c30af0f989..6d2c52cc8e 100644 --- a/examples/llm_qad/configs/qwen3-30b-a3b-instruct-2507-moe_template.conf +++ b/examples/llm_qad/configs/qwen3-30b-a3b-instruct-2507-moe_template.conf @@ -61,7 +61,7 @@ export DATACACHE_DIR="" # path to data cache directory ######################################################## # CONTAINER ######################################################## -export CONTAINER_IMAGE="" # path to container image, e.g., nvcr.io/nvidia/pytorch:26.01-py3 +export CONTAINER_IMAGE="nvcr.io/nvidia/pytorch:26.01-py3" # path to container image or .sqsh file export CONTAINER_MOUNTS="" # container mounts, e.g., "/lustre/fs1:/lustre/fs1" export CONTAINER_WORKDIR="" # container work directory, e.g., "/Model-Optimizer/examples/llm_qad" diff --git a/examples/llm_qad/configs/qwen3-8b_template.conf b/examples/llm_qad/configs/qwen3-8b_template.conf index 0def8dc8c2..24beec5d63 100644 --- a/examples/llm_qad/configs/qwen3-8b_template.conf +++ b/examples/llm_qad/configs/qwen3-8b_template.conf @@ -59,7 +59,7 @@ export DATACACHE_DIR="" # path to data cache directory ######################################################## # CONTAINER ######################################################## -export CONTAINER_IMAGE="" # path to container image, e.g., nvcr.io/nvidia/pytorch:26.01-py3 +export CONTAINER_IMAGE="nvcr.io/nvidia/pytorch:26.01-py3" # path to container image or .sqsh file export CONTAINER_MOUNTS="" # container mounts, e.g., "/lustre/fs1:/lustre/fs1" export CONTAINER_WORKDIR="" # container work directory diff --git a/examples/llm_qad/sbatch_qad.sh b/examples/llm_qad/sbatch_qad.sh index 613b9bc272..7ecc01281e 100755 --- a/examples/llm_qad/sbatch_qad.sh +++ b/examples/llm_qad/sbatch_qad.sh @@ -58,19 +58,8 @@ if [[ -n "$CONFIG_FILE" ]]; then fi fi -# === Default Paths (override in config) === -MLM_DIR="${MLM_DIR:-/lustre/fs1/portfolios/coreai/projects/coreai_dlalgo_modelopt/users/weimingc/workspace/Megatron-LM}" -MODELOPT_DIR="${MODELOPT_DIR:-/lustre/fs1/portfolios/coreai/projects/coreai_dlalgo_modelopt/users/weimingc/workspace/TensorRT-Model-Optimizer}" -MODELS_ROOT="${MODELS_ROOT:-/lustre/fs1/portfolios/coreai/projects/coreai_dlalgo_modelopt/users/weimingc/models}" -QAD_CHECKPOINT_ROOT="${QAD_CHECKPOINT_ROOT:-/lustre/fs1/portfolios/coreai/projects/coreai_dlalgo_modelopt/users/weimingc/checkpoints}" -DATACACHE_DIR="${DATACACHE_DIR:-/lustre/fs1/portfolios/coreai/projects/coreai_dlalgo_modelopt/users/weimingc/data_cache}" LOG_DIR="${LOG_DIR:-${QAD_CHECKPOINT_ROOT}/logs_slurm}" -# Container settings -CONTAINER_IMAGE="${CONTAINER_IMAGE:-/lustre/fs1/portfolios/coreai/projects/coreai_dlalgo_modelopt/users/weimingc/containers/pytorch_25.06-py3.sqsh}" -CONTAINER_MOUNTS="${CONTAINER_MOUNTS:-/lustre/fs1:/lustre/fs1}" -CONTAINER_WORKDIR="${CONTAINER_WORKDIR:-/lustre/fs1/portfolios/coreai/projects/coreai_dlalgo_modelopt/users/weimingc/workspace/TensorRT-Model-Optimizer/examples/llm_qad}" - # Parallelism (required from config) TP_SIZE="${TP_SIZE:?ERROR: TP_SIZE must be set in config}" MBS="${MBS:?ERROR: MBS must be set in config}" diff --git a/examples/windows/torch_onnx/diffusers/qad_example/ltx2_qad.yaml b/examples/windows/torch_onnx/diffusers/qad_example/ltx2_qad.yaml index da6e5d74ed..1314deb753 100644 --- a/examples/windows/torch_onnx/diffusers/qad_example/ltx2_qad.yaml +++ b/examples/windows/torch_onnx/diffusers/qad_example/ltx2_qad.yaml @@ -1,9 +1,9 @@ # LTX-2 QAD Training Configuration model: - model_path: "/lustre/fsw/portfolios/adlr/projects/adlr_psx_numerics/users/ynankani/ComfyUI/models/checkpoints/ltx-av-step-1933500-split-new-vae.safetensors" + model_path: "/path/to/ltx2/checkpoint.safetensors" # TODO: Set your LTX-2 checkpoint path training_mode: "full" load_checkpoint: - text_encoder_path: "/lustre/fsw/portfolios/adlr/users/dhutchins/models/gemma" + text_encoder_path: "/path/to/gemma" # TODO: Set your Gemma text encoder path training_strategy: name: "text_to_video" @@ -26,7 +26,7 @@ acceleration: load_text_encoder_in_8bit: true data: - preprocessed_data_root: "/lustre/fsw/portfolios/adlr/users/scavallari/ltx-qad/qad-dataset" + preprocessed_data_root: "/path/to/preprocessed" # TODO: Set your preprocessed dataset path num_dataloader_workers: 2 validation: