Commit 99634760 authored by Papatheodore, Thomas's avatar Papatheodore, Thomas
Browse files

initial commit

parents
Loading
Loading
Loading
Loading

.gitignore

0 → 100644
+2 −0
Original line number Diff line number Diff line
*.o
hello_srun

Makefile

0 → 100644
+16 −0
Original line number Diff line number Diff line
COMP  = hipcc
FLAGS = --amdgpu-target=gfx906,gfx908 -fopenmp

INCLUDES  = -I$(OLCF_OPENMPI_ROOT)/include -I$(ROCM_PATH)/rocm_smi/include
LIBRARIES = -L$(OLCF_OPENMPI_ROOT)/lib -lmpi -L$(ROCM_PATH)/lib -lrocm_smi64

hello_jobstep: hello_jobstep.o
	$(COMP) $(FLAGS) $(LIBRARIES) hello_jobstep.o -o hello_jobstep

hello_jobstep.o: hello_jobstep.cpp
	$(COMP) $(FLAGS) $(INCLUDES) -c hello_jobstep.cpp

.PHONY: clean

clean:
	rm -f hello_jobstep *.o

README.md

0 → 100644
+13 −0
Original line number Diff line number Diff line
# hello_jobstep

For each job step launched with a job launcher, this program prints the hardware thread IDs that each MPI rank and OpenMP thread runs on, and the GPU IDs that each rank/thread has access to.

## Compiling

To compile, you'll need to have HIP and MPI installed, and use a OpenMP-capable compiler.

## Usage

To run, simply launch the code with your favorite job launcher.

> OPTIONAL: There is an `example_map.sh` script that can be modified and called "in front of" `hello_jobstep` (or any other executable really). The script uses `numactl` to map hardware threads and GPUs to node-local MPI ranks.

example_map.sh

0 → 100755
+19 −0
Original line number Diff line number Diff line
#!/bin/bash

export APP=$1

lrank=$(($SLURM_PROCID % 4))
export OMP_NUM_THREADS=4
export OMP_PLACES=cores

case ${lrank} in
[0])
export HIP_VISIBLE_DEVICES=0,1
numactl --physcpubind=64,65,66,67 $APP
  ;;

[1])
export HIP_VISIBLE_DEVICES=2,3
numactl --physcpubind=68,69,70,71 $APP
  ;;
esac

hello_jobstep.cpp

0 → 100644
+111 −0
Original line number Diff line number Diff line
/**********************************************************

"Hello World"-type program to test different srun layouts.

Written by Tom Papatheodore

**********************************************************/

#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include <iomanip>
#include <string.h>
#include <mpi.h>
#include <sched.h>
#include <hip/hip_runtime.h>
#include <omp.h>

// Macro for checking errors in HIP API calls
#define hipErrorCheck(call)                                                                 \
do{                                                                                         \
    hipError_t hipErr = call;                                                               \
    if(hipSuccess != hipErr){                                                               \
        printf("HIP Error - %s:%d: '%s'\n", __FILE__, __LINE__, hipGetErrorString(hipErr)); \
        exit(0);                                                                            \
    }                                                                                       \
}while(0)

int main(int argc, char *argv[]){

	MPI_Init(&argc, &argv);

	int size;
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	int rank;
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);

	char name[MPI_MAX_PROCESSOR_NAME];
	int resultlength;
	MPI_Get_processor_name(name, &resultlength);

    const char* gpu_id_list = getenv("HIP_VISIBLE_DEVICES");

	// Find how many GPUs HIP runtime says are available
	int num_devices = 0;
    hipErrorCheck( hipGetDeviceCount(&num_devices) );

	int hwthread;
	int num_threads = 0;
	int thread_id = 0;

	#pragma omp parallel default(shared)
	{
		num_threads = omp_get_num_threads();
	}

	if(num_devices == 0){
		#pragma omp parallel default(shared) private(hwthread, thread_id)
		{
			thread_id = omp_get_thread_num();
			hwthread = sched_getcpu();

            std::cout << "MPI " << rank << " - OMP " << thread_id << " - HWT "
                      << hwthread << " - Node " << name << std::endl;
		}
	}
	else{

		char busid[64];

        std::string busid_list = "";
        std::string rt_gpu_id_list = "";

		// Loop over the GPUs available to each MPI rank
		for(int i=0; i<num_devices; i++){

			hipErrorCheck( hipSetDevice(i) );

			// Get the PCIBusId for each GPU and use it to query for UUID
			hipErrorCheck( hipDeviceGetPCIBusId(busid, 64, i) );

			// Concatenate per-MPIrank GPU info into strings for printf
            if(i > 0) rt_gpu_id_list.append(",");
            rt_gpu_id_list.append(std::to_string(i));

            std::string temp_busid(busid);

            if(i > 0) busid_list.append(",");
            busid_list.append(temp_busid.substr(5,2));            

		}

		#pragma omp parallel default(shared) private(hwthread, thread_id)
		{
            #pragma omp critical
            {
			thread_id = omp_get_thread_num();
			hwthread = sched_getcpu();

            std::cout << "MPI " << rank << " - OMP " << thread_id << " - HWT " 
                      << hwthread << " - Node " << name << " - RT_GPU_ID " << rt_gpu_id_list 
                      << " - GPU_ID " << gpu_id_list << " - Bus_ID " << busid_list << std::endl;
            }
		}
	}

	MPI_Finalize();

	return 0;
}