diff --git a/.github/actions/submit-delete-k8s-job/action.yml b/.github/actions/submit-delete-k8s-job/action.yml
index 678e22439..45ee34f4c 100644
--- a/.github/actions/submit-delete-k8s-job/action.yml
+++ b/.github/actions/submit-delete-k8s-job/action.yml
@@ -24,7 +24,6 @@ runs:
           echo "Submit K8s job"
           kubectl apply -f "${{ inputs.job-config-file }}"
           kubectl get event | grep ${{ inputs.job-name }}
-
           # Wait for job to be created
           kubectl wait --for=create job/${{ inputs.job-name }} --timeout=$TIMEOUT_JOB_CREATION
 
diff --git a/.github/container/Dockerfile.axlearn b/.github/container/Dockerfile.axlearn
index 276a0bda2..4be031003 100644
--- a/.github/container/Dockerfile.axlearn
+++ b/.github/container/Dockerfile.axlearn
@@ -25,6 +25,8 @@ seqio==0.0.18
 protobuf==3.20.3
 pytest>=7.4.3
 tensorflow==2.18.1
+pytest-xdist
+pytest-reportlog
 REQUIREMENTS
 EOF
 
diff --git a/.github/container/test-axlearn.sh b/.github/container/test-axlearn.sh
index d1993cc03..9cefa64ce 100755
--- a/.github/container/test-axlearn.sh
+++ b/.github/container/test-axlearn.sh
@@ -2,7 +2,9 @@
 
 set -uo pipefail
 
+# HELPER FUNCTIONS
 usage() {
+    # Function to handle all the inputs
     echo "Run tests in axlearn with specified options."
     echo ""
     echo "Usage: $0 [OPTIONS]"
@@ -18,12 +20,30 @@ usage() {
     exit 1
 }
 
-# Default values
-DIR='axlearn/axlearn/common'
+run_tests() {
+    # Function to run tests for AXLearn
+    local env_spec=$1
+    local marker=$2
+    local suffix=$3
+    shift 3
+    local -a test_files=("$@")
+
+    local junit="log_${suffix}.xml"
+    local log="log_${suffix}.log"
+
+    cmd="${env_spec:+${env_spec} }pytest -m \"${marker}\" ${test_files[@]}\
+    --capture=tee-sys -v \
+    --junit-xml=${LOG_DIRECTORY}/${junit} | tee ${LOG_DIRECTORY}/${log}"
+    echo "Running command ${cmd}"
+    eval "${cmd}"
+}
+
+# DEFAULT VALUES
+DIR='/opt/axlearn/axlearn/common'
 TEST_FILES=()
 OUTPUT_DIRECTORY=''
 
-# Parse args manually
+# INPUT PARSING
 while [[ $# -gt 0 ]]; do
     key="$1"
     case $key in
@@ -66,19 +86,15 @@ while [[ $# -gt 0 ]]; do
             ;;
     esac
 done
-
-
+cd "$DIR"
 if [ -z "$OUTPUT_DIRECTORY" ]; then
     timestamp=$(date +%Y%m%d_%H%M%S)
-    OUTPUT_DIRECTORY="test_runs/${timestamp}"
+    OUTPUT_DIRECTORY="output/${timestamp}"
 fi
 LOG_DIRECTORY="${OUTPUT_DIRECTORY}/logs"
 
 mkdir -p "${LOG_DIRECTORY}"
 
-# Print out config for sanity check
-echo "Configuration:"
-echo "  Directory: $DIR"
 if [ "${#TEST_FILES[@]}" -gt 0 ]; then
     echo "  Test Files:"
     for f in "${TEST_FILES[@]}"; do
@@ -87,16 +103,18 @@ if [ "${#TEST_FILES[@]}" -gt 0 ]; then
 else
     echo "  Test Files Pattern: '*_test.py' (default)"
 fi
-echo "  Output Directory: $OUTPUT_DIRECTORY"
-
-cd "$DIR" || exit 1
-
-echo "Running tests..."
 
+# DEPENDENCIES
 pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
-pip install timm transformers scikit-learn 
-
-
+pip install timm transformers scikit-learn grain evaluate prefixed wandb
+echo "Downloading input data..."
+mkdir -p /opt/axlearn/axlearn/data/tokenizers/sentencepiece
+mkdir -p /opt/axlearn/axlearn/data/tokenizers/bpe
+curl https://huggingface.co/t5-base/resolve/main/spiece.model -o /opt/axlearn/axlearn/data/tokenizers/sentencepiece/t5-base
+curl https://huggingface.co/FacebookAI/roberta-base/raw/main/merges.txt -o /opt/axlearn/axlearn/data/tokenizers/bpe/roberta-base-merges.txt
+curl https://huggingface.co/FacebookAI/roberta-base/raw/main/vocab.json -o /opt/axlearn/axlearn/data/tokenizers/bpe/roberta-base-vocab.json
+
+# RETRIEVE TEST FILES
 if [ "${#TEST_FILES[@]}" -eq 0 ]; then
     TEST_FILES=("*_test.py")
 fi
@@ -117,53 +135,111 @@ if [ "${#expanded_test_files[@]}" -eq 0 ]; then
     exit 1
 fi
 
-# in case we have the exclusion list file 
-EXCLUDE_LIST_FILE="$DIR/exclusion_list.txt"
-EXCLUDE_PATTERNS=()
-
-if [ -f "$EXCLUDE_LIST_FILE" ]; then
-    echo "Reading exclusion list from '$EXCLUDE_LIST_FILE'"
-    mapfile -t EXCLUDE_PATTERNS < "$EXCLUDE_LIST_FILE"
-else
-    echo "Exclusion list file not found at '$EXCLUDE_LIST_FILE'"
-fi
-
+EXCLUDE_PATTERNS=("array_serialization_test.py"
+    "t5_test.py" # tensorflow bug
+    "loss_test.py"
+    "input_t5_test.py"
+    "layers_test.py" # tensorflow bug
+    "checkpointer_orbax_test.py"
+    "checkpointer_orbax_emergency_test.py"
+    "checkpointer_test.py"
+    "input_glue_test.py"
+    "deberta_test.py"
+    "orbax_checkpointer"
+    "loss_test.py" # optax bug
+    "quantizer_test.py"
+    "test_utils_test.py"
+    "update_transformation_test.py"
+    "env_test.py"
+    "causal_lm_test.py"
+    "gradient_accumulation_test.py"
+    "file_system_test.py"
+    "compiler_options_test.py" # tpu only
+    "metrics_correlation_test.py" # manual only
+    "metrics_glue_test.py"
+    "ssm_test.py" # test on ssm
+    "summary_test.py" # wandb test
+    "param_converter_test.py"
+    "attention_test.py" # assertion errors to fix
+    # run these as part of the for_8_devices:
+    "gda_test.py"
+    "input_base_test.py"
+    "input_dispatch_test.py"
+    "trainer_test.py"
+    "utils_test.py"
+    )
 final_test_files=()
 
-for test_file in "${expanded_test_files[@]}"; do 
-    exclude=false 
-    for pattern in "${EXCLUDE_PATTERNS[@]}"; do 
+for test_file in "${expanded_test_files[@]}"; do
+    exclude=false
+    for pattern in "${EXCLUDE_PATTERNS[@]}"; do
         if [[ "$(basename "$test_file")" == "$(basename "$pattern")" ]]; then
-            exclude=true 
-            break 
-        fi 
-    done 
-    if [ "$exclude" = false ]; then 
+            exclude=true
+            break
+        fi
+    done
+    if [ "$exclude" = false ]; then
         final_test_files+=("$test_file")
-    fi 
+    fi
 done
 
-# Initialize counters for test
-failures=0
-passed=0
-SUMMARY_FILE="${OUTPUT_DIRECTORY}/summary.txt"
-
-
-for test_file in "${final_test_files[@]}"; do
-    echo "Running: ${test_file}"
-    log_file_name=$(echo "${test_file%.py}" | sed 's/\//__/g').log
-    log_file="${LOG_DIRECTORY}/${log_file_name}"
-    # run the tests and save them as *.log
-    pytest "${test_file}" --capture=tee-sys | tee "${log_file}"
-    exit_code=${PIPESTATUS[0]}
-    echo $exit_code
-    # write number of tests passed and failed
-    if [ $exit_code -eq 0 ]; then
-        echo "${test_file}: PASSED" >> "${SUMMARY_FILE}"
-        ((passed++))
+
+# RUN TESTS
+TEST_8_DEVICES_FILES=("gda_test.py"
+    "input_base_test.py"
+    "input_dispatch_test.py"
+    "trainer_test.py"
+    "utils_test.py"
+)
+TEST_8_DEVICES_WITH_PATHS=()
+for file in "${TEST_8_DEVICES_FILES[@]}"; do
+    found_files=$(find . -name "$file" -type f 2>/dev/null)
+    if [[ -n "$found_files" ]]; then
+        while IFS= read -r found_file; do
+            TEST_8_DEVICES_WITH_PATHS+=("$found_file")
+        done <<< "$found_files"
     else
-        echo "${test_file}: FAILED (Exit code: $exit_code)" >> "${SUMMARY_FILE}"
-        ((failures++))
+        echo "Warning: Test file $file not found in current directory structure"
     fi
-    echo ""
 done
+
+run_tests "" "for_8_devices" "8_dev" "${TEST_8_DEVICES_WITH_PATHS[@]}"
+# All the other tests
+runs=(
+  "|not (gs_login or tpu or high_cpu or fp64 or for_8_devices)|base"
+  "JAX_ENABLE_X64=1|fp64|fp64"
+)
+for spec in "${runs[@]}"; do
+    IFS='|' read -r env_spec marker suffix <<< "${spec}"
+    echo "Running tests with ${env_spec}, ${marker}, ${suffix}"
+    run_tests "${env_spec}" "${marker}" "${suffix}" "${final_test_files[@]}"
+    echo "Test run"
+done
+
+# SUMMARY STATUS
+passed=0
+failed=0
+error=0
+skipped=0
+for log in ${LOG_DIRECTORY}/log_*.log; do
+    count_pass=$(grep -Eo '[0-9]+ passed' "${log}" | awk '{print $1}' || true)
+    count_fail=$(grep -Eo '[0-9]+ failed' "${log}" | awk '{print $1}' || true)
+    count_error=$(grep -Eo '[0-9]+ error' "${log}" | awk '{print $1}' || true)
+    count_skipped=$(grep -Eo '[0-9]+ skipped' "${log}" | awk '{print $1}' || true)
+    # in case of None
+    count_pass=${count_pass:-0}
+    count_fail=${count_fail:-0}
+    count_error=${count_error:-0}
+    count_skipped=${count_skipped:-0}
+    # count all the tests
+    (( passed += count_pass ))
+    (( failed += count_fail ))
+    (( failed += count_error ))
+    (( skipped += count_skipped ))
+done
+
+echo "Total number of passed tests ${passed}"
+echo "Total number of failed tests ${failed}"
+echo "Total number of skipped tests ${skipped}"
+# add those to summary.txt and we're using it for extracting values
+echo "PASSED: ${passed} FAILED: ${failed} SKIPPED: ${skipped}" >> ${LOG_DIRECTORY}/summary.txt
diff --git a/.github/eks-workflow-files/axlearn/axlearn-job.yml b/.github/eks-workflow-files/axlearn/axlearn-job.yml
index e027a434e..24e7ca8e7 100644
--- a/.github/eks-workflow-files/axlearn/axlearn-job.yml
+++ b/.github/eks-workflow-files/axlearn/axlearn-job.yml
@@ -21,12 +21,8 @@ spec:
 
                       LOG_DIR="/output/${RUN_ID}"
                       mkdir -p ${LOG_DIR}
-
-                      # Start MPS daemon
-                      nvidia-cuda-mps-control -d
-                      # Run tests
-                      pytest-xdist.sh 1 6 ${LOG_DIR}/axlearn-unittests.jsonl test-axlearn.sh --directory "." --output ${LOG_DIR} --test-files "/opt/axlearn/axlearn/common/*_test.py" | tee -a ${LOG_DIR}/pytest_stdout.log
-
+                      # test on JAX, make sure 8 devices are visible
+                      pytest-xdist.sh 8 4 ${LOG_DIR}/axlearn-unittests.jsonl test-axlearn.sh --directory "." --output ${LOG_DIR} --test-files "/opt/axlearn/axlearn/common/*_test.py"
                   env:
                     - name: RUN_ID
                       value: PLACEHOLDER
diff --git a/.github/workflows/_ci.yaml b/.github/workflows/_ci.yaml
index 6f9b9f4d9..86517a405 100644
--- a/.github/workflows/_ci.yaml
+++ b/.github/workflows/_ci.yaml
@@ -26,6 +26,11 @@ on:
         description: 'A JSON object containing git url+refs for softwares to be built'
         required: false
         default: '{}'
+      MODE:
+        type: string
+        description: 'Mode selection for running specific tests only'
+        required: false
+        default: full
     outputs:
       DOCKER_TAGS:
         description: 'JSON object containing tags of all docker images built'
@@ -203,7 +208,12 @@ jobs:
 
   test-jax:
     needs: build-jax
-    if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a
+    if: >-
+      inputs.ARCHITECTURE == 'amd64' &&
+      (
+        inputs.MODE == 'full' ||
+        inputs.MODE == 'jax'
+      )
     uses: ./.github/workflows/_test_unit.yaml
     with:
       TEST_NAME: jax
@@ -242,7 +252,12 @@ jobs:
 
   test-nsys-jax:
     needs: build-jax
-    if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a
+    if: >-
+      inputs.ARCHITECTURE == 'amd64' &&
+      (
+        inputs.MODE == 'full' ||
+        inputs.MODE == 'jax'
+      )
     uses: ./.github/workflows/_test_unit.yaml
     with:
       TEST_NAME: nsys-jax
@@ -345,7 +360,12 @@ jobs:
   # not already have nsys-jax installed
   test-nsys-jax-archive:
     needs: test-nsys-jax
-    if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a
+    if: >-
+      inputs.ARCHITECTURE == 'amd64' &&
+      (
+        inputs.MODE == 'full' ||
+        inputs.MODE == 'jax'
+      )
     strategy:
       matrix:
         os: [ubuntu-22.04, ubuntu-24.04, macOS-latest]
@@ -373,7 +393,12 @@ jobs:
 
   test-nsys-jax-eks:
     needs: build-jax
-    if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a
+    if: >-
+      inputs.ARCHITECTURE == 'amd64' &&
+      (
+        inputs.MODE == 'full' ||
+        inputs.MODE == 'jax'
+      )
     runs-on: eks
     env:
       JAX_DOCKER_IMAGE: ${{ needs.build-jax.outputs.DOCKER_TAG_FINAL }}
@@ -451,7 +476,12 @@ jobs:
 
   test-te-h100:
     needs: build-jax
-    if: inputs.ARCHITECTURE == 'amd64'
+    if: >-
+      inputs.ARCHITECTURE == 'amd64' &&
+      (
+        inputs.MODE == 'full' ||
+        inputs.MODE == 'te'
+      )
     uses: ./.github/workflows/_transformer_engine_eks.yaml
     with:
       JAX_IMAGE: ${{ needs.build-jax.outputs.DOCKER_TAG_FINAL }}
@@ -463,7 +493,12 @@ jobs:
   test-te-unit-a100:
     needs: build-jax
     secrets: inherit
-    if: inputs.ARCHITECTURE == 'amd64'  # arm64 runners n/a
+    if: >-
+      inputs.ARCHITECTURE == 'amd64' &&
+      (
+        inputs.MODE == 'full' ||
+        inputs.MODE == 'te'
+      )
     uses: ./.github/workflows/_test_unit.yaml
     with:
       TEST_NAME: te
@@ -511,9 +546,15 @@ jobs:
         pytest-report-L0-unittest.jsonl
         pytest-report-L0-distributed-unittest.jsonl
         pytest-report-L1-distributed-unittest.jsonl
+
   test-rosetta-t5x:
     needs: build-rosetta-t5x
-    if: inputs.ARCHITECTURE == 'amd64' # no images for arm64
+    if: >-
+      inputs.ARCHITECTURE == 'amd64' &&
+      (
+        inputs.MODE == 'full' ||
+        inputs.MODE == 't5x'
+      )
     uses: ./.github/workflows/_test_t5x_rosetta.yaml
     with:
       T5X_IMAGE: ${{ needs.build-rosetta-t5x.outputs.DOCKER_TAG_FINAL }}
@@ -521,7 +562,12 @@ jobs:
 
   test-maxtext:
     needs: build-maxtext
-    if: inputs.ARCHITECTURE == 'amd64' # no arm64 gpu runners
+    if: >-
+      inputs.ARCHITECTURE == 'amd64' &&
+      (
+        inputs.MODE == 'full' ||
+        inputs.MODE == 'maxtext'
+      )
     uses: ./.github/workflows/_test_maxtext.yaml
     with:
       MAXTEXT_IMAGE: ${{ needs.build-maxtext.outputs.DOCKER_TAG_FINAL }}
@@ -529,7 +575,12 @@ jobs:
 
   test-axlearn-eks:
     needs: build-axlearn
-    if: inputs.ARCHITECTURE == 'amd64'
+    if: >-
+      inputs.ARCHITECTURE == 'amd64' &&
+      (
+        inputs.MODE == 'full' ||
+        inputs.MODE == 'axlearn'
+      )
     runs-on: eks
     env:
       AXLEARN_DOCKER_IMAGE: ${{ needs.build-axlearn.outputs.DOCKER_TAG_FINAL }}
@@ -565,16 +616,19 @@ jobs:
       id: log-s3
       run: |
         mkdir -p axlearn-output
-        aws s3 cp s3://jax-toolbox-eks-output/axlearn/${{ github.run_id }}/summary.txt axlearn-output/
-        aws s3 cp s3://jax-toolbox-eks-output/axlearn/${{ github.run_id }}/ axlearn-output/ --recursive --exclude "*" --include "*.log"
-        aws s3 cp s3://jax-toolbox-eks-output/axlearn/${{ github.run_id }}/axlearn-unittests.jsonl axlearn-output/
+        aws s3 cp s3://jax-toolbox-eks-output/axlearn/${{ github.run_id }}/logs/summary.txt axlearn-output/
+        aws s3 cp s3://jax-toolbox-eks-output/axlearn/${{ github.run_id }}/logs/ axlearn-output/ --recursive --exclude "*" --include "*.log"
+        aws s3 cp s3://jax-toolbox-eks-output/axlearn/${{ github.run_id }}/logs/ axlearn-output/ --recursive --exclude "*" --include "*.xml"
 
-        passed_tests=$(grep -c ": PASSED" axlearn-output/summary.txt || true)
-        failed_tests=$(grep -c ": FAILED" axlearn-output/summary.txt || true)
-        total_tests=$((failed_tests + passed_tests))
+
+        passed_tests=$(grep -Eo 'PASSED:[[:space:]]*[0-9]+' axlearn-output/summary.txt | grep -Eo '[0-9]+' )
+        failed_tests=$(grep -Eo 'FAILED:[[:space:]]*[0-9]+' axlearn-output/summary.txt | grep -Eo '[0-9]+' )
+        skipped_tests=$(grep -Eo 'SKIPPED:[[:space:]]*[0-9]+' axlearn-output/summary.txt | grep -Eo '[0-9]+' )
+        total_tests=$((failed_tests + passed_tests + skipped_tests))
 
         echo "Passed tests: $passed_tests"
         echo "Failed tests: $failed_tests"
+        echo "Skipped tests: $skipped_tests"
         echo "Total tests: $total_tests"
         echo "PASSED_TESTS=$passed_tests" >> $GITHUB_OUTPUT
         echo "FAILED_TESTS=$failed_tests" >> $GITHUB_OUTPUT
@@ -626,7 +680,12 @@ jobs:
 
   test-axlearn-fuji-models-eks:
     needs: build-axlearn
-    if: inputs.ARCHITECTURE == 'amd64'
+    if: >-
+      inputs.ARCHITECTURE == 'amd64' &&
+      (
+        inputs.MODE == 'full' ||
+        inputs.MODE == 'axlearn'
+      )
     runs-on: eks
     env:
       AXLEARN_DOCKER_IMAGE: ${{ needs.build-axlearn.outputs.DOCKER_TAG_FINAL }}
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 004cacc59..a2294fc9c 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -40,6 +40,18 @@ on:
           PACKAGE∊{JAX,XLA,Flax,transformer-engine,airio,axlearn,equinox,T5X,maxtext,google-jetstream} (case-insensitive)
         default: ''
         required: false
+      MODE:
+        type: choice
+        description: |
+          This option is to run just specific part in the _ci step.
+          - full - everything will be run (default)
+          - jax - run all the tests related to jax system
+          - te - run all the tests related to TE
+          - t5x - run build rosetta
+          - maxtext - run only the tests for maxtext
+          - axlearn - run only the tests for axlearn
+        options: [full, jax, te, t5x, maxtext, axlearn]
+        default: full
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -197,6 +209,7 @@ jobs:
       CUDA_IMAGE: ${{ needs.metadata.outputs.CUDA_IMAGE }}
       MANIFEST_ARTIFACT_NAME: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }}
       SOURCE_URLREFS: ${{ needs.bump-manifest.outputs.SOURCE_URLREFS }}
+      MODE: ${{ github.event_name == 'workflow_dispatch' && inputs.MODE || 'full' }}
     secrets: inherit
 
   arm64:
@@ -208,6 +221,7 @@ jobs:
       CUDA_IMAGE: ${{ needs.metadata.outputs.CUDA_IMAGE }}
       MANIFEST_ARTIFACT_NAME: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }}
       SOURCE_URLREFS: ${{ needs.bump-manifest.outputs.SOURCE_URLREFS }}
+      MODE: ${{ github.event_name == 'workflow_dispatch' && inputs.MODE || 'full' }}
     secrets: inherit
 
   # Only merge if everything succeeds