pytorch
diff --git a/‎benchmarks/bench_galore_fused_kernels.py
Lines changed: 0 additions & 65 deletions b/‎benchmarks/bench_galore_fused_kernels.py
Lines changed: 0 additions & 65 deletions
diff --git a/‎benchmarks/float8/training/README.md
Lines changed: 1 addition & 1 deletion b/‎benchmarks/float8/training/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/float8/training/torchtitan_benchmark.sh
Lines changed: 3 additions & 3 deletions b/‎benchmarks/float8/training/torchtitan_benchmark.sh
Lines changed: 3 additions & 3 deletions
diff --git a/‎benchmarks/fused_benchmark_utils.py
Lines changed: 0 additions & 261 deletions b/‎benchmarks/fused_benchmark_utils.py
Lines changed: 0 additions & 261 deletions
diff --git a/‎docs/source/api_ref_quantization.rst
Lines changed: 0 additions & 6 deletions b/‎docs/source/api_ref_quantization.rst
Lines changed: 0 additions & 6 deletions
@@ -12,7 +12,7 @@ Training parameters can be configured via environment variables.
     - `TORCHTITAN_ROOT`: Root directory of torchtitan in your local filesystem
 - Optional:
     - `FLOAT8_RECIPE_WITH_BEST_SETTINGS`: "rowwise" or "tensorwise". Applies float8 training with the specified scaling recipe, as well as additional training configs which are optimal for that scaling recipe. See `torchtitan_benchmark.sh` for more details.
-    - `BATCH_SIZE`: Defaults to 1.
+    - `LOCAL_BATCH_SIZE`: Defaults to 1.
     - `STEPS`: Defaults to 100.
     - `EXTRA_ARGS`: Extra arguments to pass to torchtitan training script. See [torchtitan](https://github.com/pytorch/torchtitan) docs for the full list of options.
 
 
@@ -8,7 +8,7 @@
 # with the given parameters,
 
 # script arguments
-BATCH_SIZE=${BATCH_SIZE:-1}
+LOCAL_BATCH_SIZE=${LOCAL_BATCH_SIZE:-1}
 STEPS=${STEPS:-100}
 
 # temporary log file which is deleted after performance data is parsed out and metrics are calculated.
@@ -20,7 +20,7 @@ if [ -z "${TORCHTITAN_ROOT}" ]; then
   echo "Usage: TORCHTITAN_ROOT=<directory> ./float8_training_benchmark.sh"
   echo "Optional parameters configurable via environment variables:"
   echo " * FLOAT8_RECIPE_WITH_BEST_SETTINGS: "rowwise" or "tensorwise". if set, use float8 training in torchtitan with the specified recipe, including the additional settings which are optimal for that recipe. otherwise, use bf16 mixed precision training."
-  echo " * BATCH_SIZE: defaults to 1."
+  echo " * LOCAL_BATCH_SIZE: defaults to 1."
   echo " * STEPS: defaults to 100."
   echo " * EXTRA_ARGS: additional arguments to pass to the torchtitan training script."
   exit 1
@@ -45,7 +45,7 @@ cd ${TORCHTITAN_ROOT}
 echo "float8 args: ${FLOAT8_ARGS}"
 
 # run the command with the specified arguments
-CONFIG_FILE="./torchtitan/models/llama3/train_configs/llama3_8b.toml" ${TORCHTITAN_ROOT}/run_train.sh --training.steps=${STEPS} --training.batch_size=${BATCH_SIZE} --training.compile ${FLOAT8_ARGS} ${EXTRA_ARGS} 2>&1 | tee ${LOG_FILE}
+CONFIG_FILE="./torchtitan/models/llama3/train_configs/llama3_8b.toml" ${TORCHTITAN_ROOT}/run_train.sh --training.steps=${STEPS} --training.local-batch-size=${LOCAL_BATCH_SIZE} --training.compile ${FLOAT8_ARGS} ${EXTRA_ARGS} 2>&1 | tee ${LOG_FILE}
 
 # return to original working directory
 cd $original_dir
 
@@ -63,14 +63,8 @@ Quantization Primitives
 
     choose_qparams_affine
     choose_qparams_affine_with_min_max
-    choose_qparams_affine_floatx
     quantize_affine
-    quantize_affine_floatx
     dequantize_affine
-    dequantize_affine_floatx
-    choose_qparams_and_quantize_affine_hqq
-    fake_quantize_affine
-    fake_quantize_affine_cachemask
     safe_int_mm
     int_scaled_matmul
     MappingType