Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions containers/singularity/get_interactive_fugaku_node.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ rm -rf hosts master_success
NPROC=2
PJSUB_ARGS=(
-L elapse=1:30:00
-L node=${NPROC}
--mpi proc=${NPROC}
-L node="${NPROC}"
# -L freq=2200 -L throttling_state=0 -L issue_state=0 -L ex_pipe_state=0 -L eco_state=0 -L retention_state=0
--mpi proc="${NPROC}"
--interact
--sparam wait-time=60
-x PJM_LLIO_GFSCACHE=/vol0004
Expand All @@ -13,4 +14,4 @@ PJSUB_ARGS=(
--llio localtmp-size=20Gi
-L jobenv=singularity
)
pjsub ${PJSUB_ARGS[@]}
pjsub "${PJSUB_ARGS[@]}"
84 changes: 84 additions & 0 deletions containers/singularity/multi_level.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/bash

# set -x


NGROUP=${1:-1}
PART_SIZE=$(( $PJM_MPI_PROC / $NGROUP ))
GROUP_RANK=$(( $PMIX_RANK / $PART_SIZE ))
SUB_RANK=$(( $PMIX_RANK % $PART_SIZE ))

OUTFILES=outfiles/${PJM_JOBID}
mkdir -p ${OUTFILES}
TMPFILES=tmpfile/${PJM_JOBID}
mkdir -p ${TMPFILES}
OUTLOGDIR=${OUTFILES}/log
mkdir -p ${OUTLOGDIR}
OUTWORKDIR=${OUTFILES}/work
mkdir -p ${OUTWORKDIR}
OUTRUNDIR=${OUTFILES}/run
mkdir -p ${OUTRUNDIR}

HOSTS_FILE=${TMPFILES}/hosts-${PJM_JOBID}-${GROUP_RANK}
MASTER_FILE=${TMPFILES}/master-${PJM_JOBID}-${GROUP_RANK}

start_master() {
SINGULARITY_ARGS=(
--bind $(mktemp -d ${OUTRUNDIR}/$(hostname)_XXXX):/run
--bind log/:/opt/spark-2.2.0-bin-hadoop2.6/logs
--bind work/:/opt/spark-2.2.0-bin-hadoop2.6/work
--bind ${HOSTS_FILE}:/etc/hosts
--bind data:/tmp/data
--disable-cache
)
singularity \
instance start ${SINGULARITY_ARGS[@]} \
sparkleblast_latest.sif spark-process

echo "starting master on node: $(hostname)"
singularity exec --env SPARK_MASTER_HOST=$(hostname) \
instance://spark-process \
/opt/spark-2.2.0-bin-hadoop2.6/sbin/start-master.sh

sleep 5

echo $(hostname) > ${MASTER_FILE}
}

start_worker () {
echo WORKER: $NGROUP
while [ ! -e ${MASTER_FILE} ]; do
sleep 1
done
singularity instance start ${SINGULARITY_ARGS[@]} sparkleblast_latest.sif spark-process
sleep 5
master_url="spark://$(head -n1 ${MASTER_FILE}):7077"
echo "Starting worker on node: $(hostname)"
singularity exec instance://spark-process /opt/spark-2.2.0-bin-hadoop2.6/sbin/start-slave.sh ${master_url}

while [ -e ${MASTER_FILE} ]; do
sleep 1
done
}

start_cluster () {
if [ ${SUB_RANK} -eq "0" ]; then
start_master
else
start_worker
fi
}

split -n$NGROUP -d -a1 hosts hosts-${PJM_JOBID}-

OF_PROC=${OUTPUT_DIR}/${PJM_JOBID}-${NAME}/mpi

mkdir -p log run work $(dirname ${OF_PROC})

mpiexec -of-proc ${OF_PROC} ./gatherhosts_ips hosts-${PJM_JOBID}
mpiexec -of-proc ${OF_PROC} ./multi_level.sh &
bash -x ./run_spark_jobs.sh ${DBFILE} ${QUERYFILE}
# mpiexec -of-proc \${OF_PROC} ./stop_spark_cluster.sh &
rm -rf master_success-${PJM_JOBID}
echo FSUB IS DONE
EOF
12 changes: 0 additions & 12 deletions containers/singularity/split.sh

This file was deleted.

45 changes: 41 additions & 4 deletions containers/singularity/start_spark_cluster.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,46 @@
#!/bin/bash

rm -f master_success-${PJM_JOBID}
if [ ${PMIX_RANK} -eq "0" ]; then
bash ./start_spark_master.sh &
master_node=$(hostname)

OUT_DIR=$(realpath "${PJM_STDOUT_PATH%.*}")
LOG_DIR=${OUT_DIR}/$PMIX_RANK/log
RUN_DIR=${OUT_DIR}/$PMIX_RANK/run
WORK_DIR=${OUT_DIR}/$PMIX_RANK/work
mkdir -p "$LOG_DIR" "$RUN_DIR" "$WORK_DIR"

SINGULARITY_ARGS=(
--bind "$RUN_DIR:/run"
--bind "$LOG_DIR:/opt/spark-2.2.0-bin-hadoop2.6/logs"
--bind "$WORK_DIR:/opt/spark-2.2.0-bin-hadoop2.6/work"
--bind "hosts-${PJM_JOBID}:/etc/hosts"
--bind data:/tmp/data
--disable-cache
--cleanenv
)

start_master () {
singularity instance start "${SINGULARITY_ARGS[@]}" sparkleblast_latest.sif spark-process
echo "starting master on node: ${master_node}"
singularity exec --env SPARK_MASTER_HOST="${master_node}" instance://spark-process /opt/spark-2.2.0-bin-hadoop2.6/sbin/start-master.sh
sleep 5
echo "${master_node}" > "master_success-${PJM_JOBID}"
}

start_worker () {
while [ ! -e "master_success-${PJM_JOBID}" ]; do sleep 1; done
master_node=$(head -n 1 "master_success-${PJM_JOBID}")
singularity instance start "${SINGULARITY_ARGS[@]}" sparkleblast_latest.sif spark-process
sleep 5
master_url="spark://${master_node}:7077"
echo "Starting worker on node: $(hostname)"
singularity exec instance://spark-process /opt/spark-2.2.0-bin-hadoop2.6/sbin/start-slave.sh "${master_url}"
while [ -e "master_success-${PJM_JOBID}" ]; do sleep 1; done
}

rm -f "master_success-${PJM_JOBID}"
if [ "${PMIX_RANK}" -eq "0" ]; then
start_master &
else
bash ./start_spark_workers.sh &
start_worker &
fi

22 changes: 0 additions & 22 deletions containers/singularity/start_spark_master.sh

This file was deleted.

29 changes: 0 additions & 29 deletions containers/singularity/start_spark_workers.sh

This file was deleted.

8 changes: 4 additions & 4 deletions containers/singularity/test_bigrun.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CLEARALL=no ./fsub.sh nt non-rRNA-reads.fa 16 12:00:00
CLEARALL=no ./fsub.sh nt non-rRNA-reads.fa 32 12:00:00
CLEARALL=no ./fsub.sh nt non-rRNA-reads.fa 64 12:00:00
CLEARALL=no ./fsub.sh nt non-rRNA-reads.fa 128 12:00:00
CLEARALL=no ./fsub.sh nt non-rRNA-reads.fa 16 24:00:00
CLEARALL=no ./fsub.sh nt non-rRNA-reads.fa 32 24:00:00
CLEARALL=no ./fsub.sh nt non-rRNA-reads.fa 64 24:00:00
CLEARALL=no ./fsub.sh nt non-rRNA-reads.fa 128 24:00:00

2 changes: 1 addition & 1 deletion containers/singularity/test_run.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
CLEARALL=yes ./fsub.sh non-rRNA-reads.fa sample_text.fa 4
CLEARALL=no ./fsub.sh non-rRNA-reads.fa sample_text.fa 4
136 changes: 136 additions & 0 deletions containers/singularity/tiny.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#!/bin/bash
#PJM -g ra000012
#PJM -x PJM_LLIO_GFSCACHE=/vol0004
#PJM -L elapse=10:00
#PJM -L node=2
#PJM --mpi proc=8
#PJM -o ttiny/%j.stdout
#PJM -e ttiny/%j.stderr

# shellcheck disable=2034
# echo PJM_STDOUT_PATH $PJM_STDOUT_PATH
# echo PJM_STDERR_PATH $PJM_STDERR_PATH

set -x

# $PJM_NODE $PJM_MPI_PROC
if [[ "$PJM_ENVIRONMENT" = "BATCH" ]]; then
PLE_MPI_STD_EMPTYFILE="off"
NUM_SEGMENTS=${PJM_NODE}
OUT_DIR=$(realpath "${PJM_STDOUT_PATH%.*}") # ttiny/123
else
NUM_SEGMENTS=3
OUT_DIR=$(realpath ./foobar)
fi
DB_FILE=$(realpath data/non-rRNA-reads.fa)
QUERY_FILE=$(realpath data/g50.fasta)

seg_dir() { echo "$(realpath "${OUT_DIR}")/seg$1"; }
seg_tmp_dir() { echo "$(seg_dir "$1")/tmp"; }
seg_mpi_dir() { echo "$(seg_dir "$1")/out"; }
seg_of_proc() { echo "$(seg_mpi_dir "$1")/mpi"; }
seg_hosts_file() { echo "$(seg_tmp_dir "$1")/hosts"; }
seg_vcoord_file() { echo "$(seg_tmp_dir "$SEG")/vcoord"; }

# seg_master_file() { echo "$(seg_tmp_dir "$SEG")/master"; }
# seg_run_dir() { echo "$(seg_tmp_dir "$SEG")/run"; }
# seg_log_dir() { echo "$(seg_tmp_dir "$SEG")/log"; }
# seg_work_dir() { echo "$(seg_tmp_dir "$SEG")/work"; }

mkdirs () {
DIRS=(
"$(seg_tmp_dir "$SEG")"
"$(seg_mpi_dir "$SEG")"
# "$(seg_run_dir "$SEG")"
# "$(seg_log_dir "$SEG")"
# "$(seg_work_dir "$SEG")"
)
mkdir -p "${DIRS[@]}"
}

mkvcoord () {
yes "(${SEG})" | head -n $group_size > "$(seg_vcoord_file "${SEG}")"
}

mpi_with_args () {
MPI_ARGS=(
-of-proc "$(seg_of_proc "$SEG")"
--vcoordfile "$(seg_vcoord_file "${SEG}")"
)
mpiexec "${MPI_ARGS[@]}" "$@"
}

split_query () {
pushd "$OUT_DIR" || exit
num_lines=$(wc -l "$QUERY_FILE" | cut -f1 -d \ )
# num_lines=CEIL(num_lines / NUM_SEGMENTS)
num_lines=$(( ( num_lines + NUM_SEGMENTS - 1) / NUM_SEGMENTS ))
if [ $(( num_lines % 2 )) -eq 1 ]; then num_lines=$(( num_lines + 1 )); fi
suffix="-$(basename "$QUERY_FILE")-numseg${NUM_SEGMENTS}"
split -l $num_lines "$QUERY_FILE" --additional-suffix="$suffix"
mapfile -t < <(ls ./*"${suffix}"*)
echo "num files: ${#MAPFILE[@]}"
for i in "${!MAPFILE[@]}"; do
echo "$i ${MAPFILE[$i]}"
seg_query_file[i]="$(seg_dir "$i")/$(basename "${MAPFILE[$i]}")"
mv "${MAPFILE[$i]}" "${seg_query_file[$i]}"
done
popd || exit
}

singularity_instance_start () {
SINGULARITY_ARGS=(
--bind "$(seg_run_dir "$1"):/run"
--bind "$(seg_log_dir "$1"):/opt/spark-2.2.0-bin-hadoop2.6/logs"
--bind "$(seg_work_dir "$1"):/opt/spark-2.2.0-bin-hadoop2.6/work"
--bind "$(seg_hosts_file "$1"):/etc/hosts"
--bind data:/tmp/data
--disable-cache
--cleanenv
sparkleblast_latest.sif
spark-process
)
singularity instance start "${SINGULARITY_ARGS[@]}"
}

start_master () {
singularity_instance_start "$1"
singularity exec --env SPARK_MASTER_HOST="$(hostname)" instance://spark-process /opt/spark-2.2.0-bin-hadoop2.6/sbin/start-master.sh
sleep 5
hostname > "$(seg_master_file "$1")"
}

start_worker () {
while [ ! -e "$(seg_master_file "$1")" ]; do sleep 1; done
master_node=$(head -n 1 "$(seg_master_file "$1")")
singularity_instance_start "$1"
sleep 5
master_url="spark://${master_node}:7077"
singularity exec instance://spark-process /opt/spark-2.2.0-bin-hadoop2.6/sbin/start-slave.sh "${master_url}"
while [ -e "$(seg_master_file "$1")" ]; do sleep 1; done
}

start_cluster () {
if [ "${PMIX_RANK}" -eq "0" ]; then
start_master "$1"
else
start_worker "$1"
fi
}

group_size=$(( PJM_MPI_PROC / PJM_NODE ))
for SEG in $(seq 0 $(( NUM_SEGMENTS - 1 ))); do
mkdirs
mkvcoord
done
split_query
for SEG in $(seq 0 $(( NUM_SEGMENTS - 1 ))); do
mpi_with_args ./gatherhosts_ips "$(seg_hosts_file "$SEG")"
done
for SEG in $(seq 0 $(( NUM_SEGMENTS - 1 ))); do
mpi_with_args multi_start.sh "$(seg_tmp_dir "$SEG")"
# bash -x ./run_spark_jobs.sh ${DBFILE} ${QUERYFILE}
# # mpiexec -of-proc \${OF_PROC} ./stop_spark_cluster.sh &
# rm -rf master_success-\${PJM_JOBID}
done
wait