$ salloc -N 1 --gres=gpu:1 -p gpu-dev -A <project>-gpu -t 0:0110:00
$ module load rocm craype-accel-amd-gfx90a
$ hipify-perl hello.cu > hello.hip.cpp
$ hipcc --offload-arch=gfx90a hello.hip.cpp -o helloHIP
$ export OMP_NUM_THREADS=1
$ srun -N 1 -n 1 -c 8 --gres=gpu:1 --gpus-per-task=1 --gpu-bind=closest ./helloHIP
Hello from GPU thread 0 in block 0
Hello from GPU thread 1 in block 0
Hello from GPU thread 2 in block 0
Hello from GPU thread 3 in block 0
Hello from GPU thread 4 in block 0
Hello from GPU thread 0 in block 1
Hello from GPU thread 1 in block 1
Hello from GPU thread 2 in block 1
Hello from GPU thread 3 in block 1
Hello from GPU thread 4 in block 1
Hello from GPU thread 0 in block 3
Hello from GPU thread 1 in block 3
Hello from GPU thread 2 in block 3
Hello from GPU thread 3 in block 3
Hello from GPU thread 4 in block 3
Hello from GPU thread 0 in block 2
Hello from GPU thread 1 in block 2
Hello from GPU thread 2 in block 2
Hello from GPU thread 3 in block 2
Hello from GPU thread 4 in block 2
|