#!/bin/bash --login
#SBATCH --job-name=1GPUSharedNode
#SBATCH --partition=gpu
#SBATCH --nodes=1 #1 nodes in this example
#SBATCH --gres=gpu:1 #1 GPU per node (1 "allocation-pack" in total for the job)
#SBATCH --time=00:05:00
#SBATCH --account=<yourProject>-gpu #IMPORTANT: use your own project and the -gpu suffix
#(Note that there is not request for exclusive access to the node)
#----
#Loading needed modules (adapt this for your own purposes):
module load PrgEnv-cray
module load rocm craype-accel-amd-gfx90a
echo -e "\n\n#------------------------#"
module list
#----
#Printing the status of the given allocation
echo -e "\n\n#------------------------#"
echo "Printing from scontrol:"
scontrol show job ${SLURM_JOBID}
#----
#Definition of the executable (we assume the example code has been compiled and is available in $MYSCRATCH):
exeDir=$MYSCRATCH/hello_jobstep
exeName=hello_jobstep
theExe=$exeDir/$exeName
#----
#MPI & OpenMP settings
#Not needed for 1GPU:export MPICH_GPU_SUPPORT_ENABLED=1 #This allows for GPU-aware MPI communication among GPUs
export OMP_NUM_THREADS=1 #This controls the real CPU-cores per task for the executable
#----
#Execution
#Note: srun needs the explicit indication full parameters for use of resources in the job step.
# These are independent from the allocation parameters (which are not inherited by srun)
# For optimal GPU binding using slurm options,
# "--gpus-per-task=1" and "--gpu-bind=closest" create the optimal binding of GPUs
# (Although in this case this can be avoided as only 1 "allocation-pack" has been requested)
# (The "-l" option is for displaying, at the beginning of each line, the taskID that generates the output.)
# (The "-u" option is for unbuffered output, so that output is displayed as soon as it's generated.)
# (If the output needs to be sorted for clarity, then add "| sort -n" at the end of the command.)
echo -e "\n\n#------------------------#"
echo "Test code execution:"
srun -l -u -N 1 -n 1 -c 8 --gres=gpu:1 --gpus-per-task=1 --gpu-bind=closest ${theExe}
|
sort -n
#----
#Printing information of finished job steps:
echo -e "\n\n#------------------------#"
echo "Printing information of finished jobs steps using sacct:"
sacct -j ${SLURM_JOBID} -o jobid%20,Start%20,elapsed%20
#----
#Done
echo -e "\n\n#------------------------#"
echo "Done" |