Commit 0ae01c3e authored by Papatheodore, Thomas's avatar Papatheodore, Thomas
Browse files

added more Makefiles and updated the README

parent 588d5341
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
COMP  = CC
#FLAGS = -std=c++11 -fopenmp -D__HIP_PLATFORM_HCC__ -D__HIP_ROCclr__ -D__HIP_ARCH_GFX908__=1
FLAGS = -std=c++11 -fopenmp -D__HIP_PLATFORM_HCC__ -D__HIP_ROCclr__

INCLUDES  = -I$(HIP_PATH)/include
LIBRARIES = -L$(HIP_PATH)/lib -lamdhip64

hello_jobstep: hello_jobstep.o
	$(COMP) $(FLAGS) $(LIBRARIES) hello_jobstep.o -o hello_jobstep

hello_jobstep.o: hello_jobstep.cpp
	$(COMP) $(FLAGS) $(INCLUDES) -c hello_jobstep.cpp

.PHONY: clean

clean:
	rm -f hello_jobstep *.o

Makefile.crayMPI.hipcc

0 → 100644
+16 −0
Original line number Diff line number Diff line
COMP  = hipcc
FLAGS = --amdgpu-target=gfx906,gfx908 -fopenmp

INCLUDES  = -I/opt/cray/pe/cray-mvapich2_nogpu/2.3.4/infiniband/cray/10.0/include
LIBRARIES = -L/opt/cray/pe/cray-mvapich2_nogpu/2.3.4/infiniband/cray/10.0/lib -lmpich

hello_jobstep: hello_jobstep.o
	$(COMP) $(FLAGS) $(LIBRARIES) hello_jobstep.o -o hello_jobstep

hello_jobstep.o: hello_jobstep.cpp
	$(COMP) $(FLAGS) $(INCLUDES) -c hello_jobstep.cpp

.PHONY: clean

clean:
	rm -f hello_jobstep *.o
+0 −0

File moved.

+11 −2
Original line number Diff line number Diff line
@@ -4,7 +4,16 @@ For each job step launched with a job launcher, this program prints the hardware

## Compiling

To compile, you'll need to have HIP and MPI installed, and use a OpenMP-capable compiler. Modify the Makefile accordingly.
To compile, you'll need to have HIP and MPI installed, and you'll need to use an OpenMP-capable compiler. Modify the Makefile accordingly.

### MPI + Compiler + HIP Combinations

<b>CrayMPI + Cray Clang + ROCm  --> Makefile.crayMPI.crayClang</b>
* Requires ROCm <= v3.8 due to incompatibilities with the latest Cray compilers

<b>CrayMPI + hipcc + ROCm       --> Makefile.crayMPI.hipcc</b>

<b>OpenMPI + hipcc + ROCm       --> Makefile.openMPI.hipcc</b>

## Usage

@@ -12,4 +21,4 @@ To run, simply launch the executable with your favorite job launcher.

> NOTE: `HIP_VISIBLE_DEVICES` must be set.

> [OPTIONAL]: On Lyra, the current Slurm doesn't easily allow for fine-grained process/thread placement so an example mapping script is also included in this repo. It can be modifed and called "in front of" `hello_jobstep` (or any other executable really). The script uses `numactl` to map hardware threads and GPUs to node-local MPI ranks. NOTE: You will need to use the `srun` argument `--ntasks_per_gpu` with this script.
> [OPTIONAL] An example mapping script is also included in this repo for an optional heavy-handed approach to process/thread mapping. It can be modifed and called "in front of" `hello_jobstep` (or any other executable really). The script uses `numactl` to map hardware threads and GPUs to node-local MPI ranks. NOTE: You will need to use the `srun` argument `--ntasks_per_gpu` with this script.
+22 −9
Original line number Diff line number Diff line
#!/bin/bash

#------------------------------------------------------
# You'll need to read in more command line args if your
# executable takes arguments
# Set the executable name from the first command line 
# argument to this script
#
# NOTE: You'll need to read in more command line args 
# if your executable takes arguments
#------------------------------------------------------
APP=$1

#------------------------------------------------------
# The number of node-local MPI ranks
# The `--ntasks_per_node` flag to srun should be used
# Set the number of node-local MPI ranks
#
# NOTE: The `--ntasks-per-node` flag to srun must be 
# used to set SLURM_NTASKS_PER_NODE.
#------------------------------------------------------
lrank=$(($SLURM_PROCID % $SLURM_NTASKS_PER_NODE))

#------------------------------------------------------
# Ideally, the number of hardware threads set below
# for each rank with numactl should be the same as
# OMP_NUM_THREADS
# OpenMP environment variables
#
# NOTE: If you change the number of OpenMP threads, 
# you will also need to change the --physcpubind
# values below. The values given are hardware thread
# IDs, so if you want 1 OpenMP thread per physical
# core, look at the Lyra node diagram and make sure 
# to use only 1 hw thread per physical core for each 
# comma-separated value.
#------------------------------------------------------
export OMP_NUM_THREADS=4
export OMP_PLACES=cores

#------------------------------------------------------
# Set hardware threads and GPUs for each node-local
# MPI rank. NOTE: For more than 4 MPI ranks per node, 
# Set hardware thread IDs and GPUs for each node-local
# MPI rank. 
#
# NOTE: For more than 4 MPI ranks per node, 
# additional cases would need to be added.
#------------------------------------------------------
case ${lrank} in
Loading