Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,41 @@ jobs:
run: |
source ./env-setup/12.8_env_setup.sh
rm -rf ./gpu-simulator/gpgpu-sim

# Clone gpgpu-sim with fork-aware branch selection
echo "Cloning gpgpu-sim with fork-aware branch selection..."
git clone --quiet [email protected]:accel-sim/gpgpu-sim_distribution.git ./gpu-simulator/gpgpu-sim

# Try to checkout the same branch from the same owner's fork first
if [[ ${{ github.event_name }} == 'pull_request' ]]; then
current_owner=$(echo ${{ github.event.pull_request.head.repo.full_name }} | cut -d'/' -f1)
else
current_owner=$(echo ${{ github.repository }} | cut -d'/' -f1)
fi
current_repo=$(echo ${{ github.repository }} | cut -d'/' -f2)

gpgpusim_repo=$(echo $current_repo | sed 's/accel-sim-framework/gpgpu-sim_distribution/')

echo "Attempting to checkout branch '$BRANCH_NAME' from '$current_owner/$gpgpusim_repo'"

# First, try to add the fork owner's repository as a remote and check if the branch exists
if git -C ./gpu-simulator/gpgpu-sim/ remote add fork-owner [email protected]:$current_owner/$gpgpusim_repo.git 2>/dev/null; then
# Check if the branch exists in the fork owner's repository
if git -C ./gpu-simulator/gpgpu-sim/ ls-remote fork-owner | grep -q "refs/heads/$BRANCH_NAME"; then
echo "Found branch '$BRANCH_NAME' in '$current_owner/$gpgpusim_repo' repository, checking it out"
git -C ./gpu-simulator/gpgpu-sim/ fetch fork-owner
git -C ./gpu-simulator/gpgpu-sim/ checkout -B $BRANCH_NAME fork-owner/$BRANCH_NAME
else
echo "Branch '$BRANCH_NAME' not found in '$current_owner/$gpgpusim_repo' repository, falling back to upstream dev branch"
git -C ./gpu-simulator/gpgpu-sim/ checkout -B dev origin/dev
fi
# Remove the temporary remote
git -C ./gpu-simulator/gpgpu-sim/ remote remove fork-owner
else
echo "Could not add '$current_owner/$gpgpusim_repo' remote, falling back to upstream dev branch"
git -C ./gpu-simulator/gpgpu-sim/ checkout -B dev origin/dev
fi

source ./gpu-simulator/setup_environment.sh
make clean -C gpu-simulator
make -j20 -C gpu-simulator
Expand Down
86 changes: 68 additions & 18 deletions .github/workflows/weekly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ on:
# push:
schedule:
- cron: '0 20 * * FRI' # 8:00 PM every Friday

env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
jobs:
Tracer-Weekly:
timeout-minutes: 720
if: ${{ github.repository == 'accel-sim/accel-sim-framework' || github.event_name == 'workflow_dispatch' }}
runs-on: tgrogers-gpu01
defaults:
Expand Down Expand Up @@ -38,6 +40,7 @@ jobs:
git -C ./gpu-app-collection/ submodule update --init -- ./src/cuda/cuda-samples
source ./gpu-app-collection/src/setup_environment
ln -s /home/tgrogers-raid/a/common/data_dirs ./gpu-app-collection/
make -j8 -C ./gpu-app-collection/src rodinia_2.0-ft
make -j8 -C ./gpu-app-collection/src rodinia-3.1
make -j8 -C ./gpu-app-collection/src GPU_Microbenchmark
# make -j8 -C ./gpu-app-collection/src Deepbench_nvidia
Expand All @@ -49,30 +52,22 @@ jobs:
source ./env-setup/12.8_env_setup.sh
source ./gpu-app-collection/src/setup_environment
rm -rf ./hw_run/
./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft,rodinia-3.1,GPU_Microbenchmark -D 7
rm -rf /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces
mkdir -p /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces
ln -s /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces ./hw_run
./util/tracer_nvbit/run_hw_trace.py -B rodinia-3.1,GPU_Microbenchmark -D 7
# ./util/tracer_nvbit/run_hw_trace.py -B rodinia-3.1,GPU_Microbenchmark,parboil,polybench,cutlass_5_trace,Deepbench_nvidia_tencore,Deepbench_nvidia_normal -D 7
mv ./hw_run /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run
# ./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft,rodinia-3.1,GPU_Microbenchmark,parboil,polybench,cutlass_5_trace,Deepbench_nvidia_tencore,Deepbench_nvidia_normal -D 7
- name: generate-spinlock-traces-spinlock_handling
run: |
source ./env-setup/12.8_env_setup.sh
source ./gpu-app-collection/src/setup_environment
rm -rf ./hw_run/
./util/tracer_nvbit/run_hw_trace.py -B Spinlock -D 7 --spinlock_handling fast_forward
mv ./hw_run ./hw_run_fast_forward
mv ./hw_run /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run_fast_forward
./util/tracer_nvbit/run_hw_trace.py -B Spinlock -D 7 --spinlock_handling none
mv ./hw_run ./hw_run_none
- name: test-new-traces-spinlock_handling
# Test only fast-forwarded traces as the none one takes too long to run (~2-3 hr)
run: |
source ./env-setup/12.8_env_setup.sh
source ./gpu-simulator/setup_environment.sh
./util/job_launching/run_simulations.py -B Spinlock -C QV100-SASS -T ./hw_run_fast_forward/traces/device-7/ -N spinlock-microbenchmark-$$-fast_forward
./util/job_launching/monitor_func_test.py -I -v -s spinlock-stats-per-app.csv -N spinlock-microbenchmark-$$-fast_forward
# ./util/job_launching/run_simulations.py -B Spinlock -C QV100-SASS -T ./hw_run_none/traces/device-7/ -N spinlock-microbenchmark-$$-none
# ./util/job_launching/monitor_func_test.py -I -v -s spinlock-stats-per-app.csv -N spinlock-microbenchmark-$$-none
mv ./hw_run /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run_none
SASS-Weekly:
timeout-minutes: 720
needs: [Tracer-Weekly]
if: ${{ github.repository == 'accel-sim/accel-sim-framework' || github.event_name == 'workflow_dispatch' }}
runs-on: tgrogers-raid
Expand All @@ -93,14 +88,69 @@ jobs:
run: |
source ./env-setup/12.8_env_setup.sh
rm -rf ./gpu-simulator/gpgpu-sim

# Clone gpgpu-sim with fork-aware branch selection
echo "Cloning gpgpu-sim with fork-aware branch selection..."
git clone --quiet [email protected]:accel-sim/gpgpu-sim_distribution.git ./gpu-simulator/gpgpu-sim

current_owner=$(echo ${{ github.repository }} | cut -d'/' -f1)
current_branch=$BRANCH_NAME
current_repo=$(echo $GITHUB_REPOSITORY | cut -d'/' -f2)

gpgpusim_repo=$(echo $current_repo | sed 's/accel-sim-framework/gpgpu-sim_distribution/')

echo "Attempting to checkout branch '$BRANCH_NAME' from '$current_owner/$gpgpusim_repo'"

# First, try to add the fork owner's repository as a remote and check if the branch exists
if git -C ./gpu-simulator/gpgpu-sim/ remote add fork-owner [email protected]:$current_owner/$gpgpusim_repo.git 2>/dev/null; then
# Check if the branch exists in the fork owner's repository
if git -C ./gpu-simulator/gpgpu-sim/ ls-remote fork-owner | grep -q "refs/heads/$BRANCH_NAME"; then
echo "Found branch '$BRANCH_NAME' in '$current_owner/$gpgpusim_repo' repository, checking it out"
git -C ./gpu-simulator/gpgpu-sim/ fetch fork-owner
git -C ./gpu-simulator/gpgpu-sim/ checkout -B $BRANCH_NAME fork-owner/$BRANCH_NAME
else
echo "Branch '$BRANCH_NAME' not found in '$current_owner/$gpgpusim_repo' repository, falling back to accel-sim dev branch"
git -C ./gpu-simulator/gpgpu-sim/ checkout -B dev origin/dev
fi
# Remove the temporary remote
git -C ./gpu-simulator/gpgpu-sim/ remote remove fork-owner
else
echo "Could not add '$current_owner/$gpgpusim_repo' remote, falling back to upstream dev branch"
git -C ./gpu-simulator/gpgpu-sim/ checkout -B dev origin/dev
fi

source ./gpu-simulator/setup_environment.sh
make clean -C gpu-simulator
make -j -C gpu-simulator
- name: run SASS
run: |
source ./env-setup/12.8_env_setup.sh
source ./gpu-simulator/setup_environment.sh
ln -s /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces ./hw_run
# ./util/job_launching/run_simulations.py -B rodinia-3.1,GPU_Microbenchmark,sdk-4.2-scaled,parboil,polybench,cutlass_5_trace,Deepbench_nvidia_tencore,Deepbench_nvidia_normal -C QV100-SASS-5B_INSN -T ./hw_run/traces/device-7/12.8 -N weekly-$$ -M 70G
./util/job_launching/run_simulations.py -B rodinia-3.1,GPU_Microbenchmark -C QV100-SASS-5B_INSN -T ./hw_run/traces/device-7/12.8 -N weekly-$$ -M 70G
ln -s /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run ./hw_run
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,rodinia-3.1,GPU_Microbenchmark -C QV100-SASS -T ./hw_run/traces/device-7/12.8 -N weekly-$$ -M 70G
./util/job_launching/monitor_func_test.py -T 12 -S 1800 -I -v -s weekly-stats-per-app.csv -N weekly-$$
- name: test-new-traces-spinlock_handling
# Test only fast-forwarded traces as the none one takes too long to run (~2-3 hr)
run: |
source ./env-setup/12.8_env_setup.sh
source ./gpu-simulator/setup_environment.sh
./util/job_launching/run_simulations.py -B Spinlock -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run_fast_forward/traces/device-7/ -N spinlock-microbenchmark-$$-fast_forward
./util/job_launching/monitor_func_test.py -I -v -s spinlock-stats-per-app.csv -N spinlock-microbenchmark-$$-fast_forward
# ./util/job_launching/run_simulations.py -B Spinlock -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run_none/traces/device-7/ -N spinlock-microbenchmark-$$-none
# ./util/job_launching/monitor_func_test.py -I -v -s spinlock-stats-per-app.csv -N spinlock-microbenchmark-$$-none
failures:
if: failure()
env:
ACTION_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
REPORT_URL: ""
runs-on: tgrogers-raid
needs: [Tracer-Weekly, SASS-Weekly]
steps:
- uses: actions/checkout@v4
- name: Notify Failure
run: |
# Setup envs
git clone --quiet --branch cluster-ubuntu [email protected]:purdue-aalp/env-setup.git
source ./env-setup/common/common_inc.sh
export BRANCH_NAME="Weekly Tests"
python3 .github/scripts/send_ci_email.py -t failure
Loading