#!/bin/bash --login
#SBATCH --account=project-gpu
#SBATCH --partition=gpu
#SBATCH --ntasks=8
#SBATCH --ntasks-per-node=8
#SBATCH --cpus-per-task=8
#SBATCH --sockets-per-node=8
#SBATCH --gpus-per-node=8
#SBATCH --time=2400:0005:00
#SBATCH --exclusive
#----
#Loading needed modules (adapt this for your own purposes):
module load PrgEnv-cray
module load rocm craype-accel-amd-gfx90a
module list
#----
#MPI & OpenMP settings
export OMP_NUM_THREADS=1 #This controls the real number of threads per task
#----
#First preliminar "hack": create a selectGPU wrapper to be used for
# binding only 1 GPU per each task spawned by srun
wrapper="selectGPU_${SLURM_JOBID}.sh"
cat << EOF > $wrapper
#!/bin/bash
export ROCR_VISIBLE_DEVICES=\$SLURM_LOCALID
exec \$*
EOF
chmod +x ./$wrapper
#----
#Second preliminar "hack": generate an ordered list of CPU-cores (each on a different slurm-socket)
# to be matched with the correct GPU in the srun command using --cpu-bind option.
CPU_BIND="map_cpu:48,56,16,24,0,8,32,40"
#----
#Execution
srun -c 8 --cpu-bind=${CPU_BIND} ./$wrapper ./program
#----
#Deleting the wrapper
rm -f ./$wrapper |