diff --git a/configs/xdit/text-to-image/benchmark.yaml b/configs/xdit/text-to-image/benchmark.yaml index 5aca2a6..97e38c9 100644 --- a/configs/xdit/text-to-image/benchmark.yaml +++ b/configs/xdit/text-to-image/benchmark.yaml @@ -12,33 +12,33 @@ command_template: | --workload.ring_degree {ring_degree} sweep_defaults: - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [1] ring_degree: [1] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [1] ring_degree: [2] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [2] ring_degree: [1] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [1] ring_degree: [4] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [2] ring_degree: [2] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [4] ring_degree: [1] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [1] ring_degree: [8] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [2] ring_degree: [4] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [4] ring_degree: [2] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [8] ring_degree: [1] diff --git a/configs/xdit/text-to-video/benchmark.yaml b/configs/xdit/text-to-video/benchmark.yaml index 242a50f..e0b610c 100644 --- a/configs/xdit/text-to-video/benchmark.yaml +++ b/configs/xdit/text-to-video/benchmark.yaml @@ -12,34 +12,34 @@ command_template: | --workload.ring_degree {ring_degree} sweep_defaults: - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [1] ring_degree: [1] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [1] ring_degree: [2] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [2] ring_degree: [1] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [1] ring_degree: [4] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [2] ring_degree: [2] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [4] ring_degree: [1] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [1] ring_degree: [8] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [2] ring_degree: [4] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [4] ring_degree: [2] - - batch_size: [1, 2, 4, 8, 16, 32] + - batch_size: [1, 2, 4, 8, 16] ulysses_degree: [8] ring_degree: [1] diff --git a/scripts/generate_jobs.py b/scripts/generate_jobs.py index 8bc2c60..b12ec6c 100644 --- a/scripts/generate_jobs.py +++ b/scripts/generate_jobs.py @@ -815,18 +815,16 @@ def generate_xdit_slurm_script( f"#SBATCH --output=logs/{dataset}_{gpu_model}_{num_gpus}gpu_{model_slug}_%j.out", f"#SBATCH --error=logs/{dataset}_{gpu_model}_{num_gpus}gpu_{model_slug}_%j.err", "", - "set -e", + "set -v", "", "# Change to submission directory", "cd $SLURM_SUBMIT_DIR", "", "# Load Python, CUDA, and GCC", "module load python/3.12.1 && \\", - "module load cuda/12.6.3 && \\", + "module load cuda/12.8.1 && \\", "module load gcc", "", - "source .venv/bin/activate", - "", "# Ensure required environment variables are set", 'if [[ -z "$HF_HOME" ]]; then', ' echo "ERROR: HF_HOME environment variable is not set. Please export HF_HOME before running sbatch." >&2',