Commit acc08cda authored by Pillai, Himanshu's avatar Pillai, Himanshu
Browse files

HIP version to Convert CUDA to Portable C++ to target AMD GPUs

parent 72adcbe9
#include <array>
#include <sstream>
#include <iterator>
#include <exception>
#include <string>
#include <stdlib.h>
#include <cstring>
#include <vector>
#include <iostream>
#include <iomanip>
#include <numeric>
#include <algorithm>
#include <fstream>
#include <time.h>
#include <cuda.h>
#include "utils.hh"
#include "readers.hh"
#include "CanopyHydrology_decl.hh"
namespace ELM {
namespace Utils {
static const int n_months = 12;
static const int n_pfts = 17;
static const int n_max_times = 31 * 24 * 2; // max days per month times hours per
// day * half hour timestep
static const int n_grid_cells = 24;
using MatrixState = MatrixStatic<n_grid_cells, n_pfts>;
using MatrixForc = MatrixStatic<n_max_times,n_grid_cells>;
} // namespace
} // namespace
int main(int argc, char ** argv)
{
using ELM::Utils::n_months;
using ELM::Utils::n_pfts;
using ELM::Utils::n_grid_cells;
using ELM::Utils::n_max_times;
// fixed magic parameters for now
const int ctype = 1;
const int ltype = 1;
const bool urbpoi = false;
const bool do_capsnow = false;
const int frac_veg_nosno = 1;
int n_irrig_steps_left = 0;
const double dewmx = 0.1;
const double dtime = 1800.0;
// phenology state
ELM::Utils::MatrixState elai;
ELM::Utils::MatrixState esai;
ELM::Utils::read_phenology("../links/surfacedataWBW.nc", n_months, n_pfts, 0, elai, esai);
ELM::Utils::read_phenology("../links/surfacedataBRW.nc", n_months, n_pfts, n_months, elai, esai);
// forcing state
ELM::Utils::MatrixForc forc_rain;
ELM::Utils::MatrixForc forc_snow;
ELM::Utils::MatrixForc forc_air_temp;
const int n_times = ELM::Utils::read_forcing("../links/forcing", n_max_times, 0, n_grid_cells, forc_rain, forc_snow, forc_air_temp);
ELM::Utils::MatrixForc forc_irrig; forc_irrig = 0.;
// output state by the grid cell
// auto qflx_prec_intr = std::array<double,n_grid_cells>();
// auto qflx_irrig = std::array<double,n_grid_cells>();
// auto qflx_prec_grnd = std::array<double,n_grid_cells>();
// auto qflx_snwcp_liq = std::array<double,n_grid_cells>();
// auto qflx_snwcp_ice = std::array<double,n_grid_cells>();
// auto qflx_snow_grnd_patch = std::array<double,n_grid_cells>();
// auto qflx_rain_grnd = std::array<double,n_grid_cells>();
auto qflx_prec_intr = ELM::Utils::MatrixState();
auto qflx_irrig = ELM::Utils::MatrixState();
auto qflx_prec_grnd = ELM::Utils::MatrixState();
auto qflx_snwcp_liq = ELM::Utils::MatrixState();
auto qflx_snwcp_ice = ELM::Utils::MatrixState();
auto qflx_snow_grnd_patch = ELM::Utils::MatrixState();
auto qflx_rain_grnd = ELM::Utils::MatrixState();
// output state by the pft
auto h2o_can = ELM::Utils::MatrixState(); h2o_can = 0.;
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// Wait for GPU to finish before accessing on host
cudaDeviceSynchronize();
std::ofstream soln_file;
soln_file.open("test_CanopyHydrology_kern1_multiple.soln");
soln_file << "Time\t Total Canopy Water\t Min Water\t Max Water" << std::endl;
auto min_max = std::minmax_element(h2o_can.begin(), h2o_can.end());
soln_file << std::setprecision(16)
<< 0 << "\t" << std::accumulate(h2o_can.begin(), h2o_can.end(), 0.)
<< "\t" << *min_max.first
<< "\t" << *min_max.second << std::endl;
cudaEventRecord(start, 0);
// main loop
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
// grid cell and/or pft loop can be parallelized
for (size_t g = 0; g != n_grid_cells; ++g) {
for (size_t p = 0; p != n_pfts; ++p) {
// NOTE: this currently punts on what to do with the qflx variables!
// Surely they should be either accumulated or stored on PFTs as well.
// --etc
ELM::CanopyHydrology_Interception<<<1, 256>>>(dtime,
forc_rain(t,g), forc_snow(t,g), forc_irrig(t,g),
ltype, ctype, urbpoi, do_capsnow,
elai(g,p), esai(g,p), dewmx, frac_veg_nosno,
h2o_can(g,p), n_irrig_steps_left,
qflx_prec_intr(g,p), qflx_irrig(g,p), qflx_prec_grnd(g,p),
qflx_snwcp_liq(g,p), qflx_snwcp_ice(g,p),
qflx_snow_grnd_patch(g,p), qflx_rain_grnd(g,p));
// qflx_prec_intr[g], qflx_irrig[g], qflx_prec_grnd[g],
// qflx_snwcp_liq[g], qflx_snwcp_ice[g],
// qflx_snow_grnd_patch[g], qflx_rain_grnd[g]);
//printf("%i %i %16.8g %16.8g %16.8g %16.8g %16.8g %16.8g\n", g, p, forc_rain(t,g), forc_snow(t,g), elai(g,p), esai(g,p), h2o_can(g,p), qflx_prec_intr[g]);
}
}
auto min_max = std::minmax_element(h2o_can.begin(), h2o_can.end());
soln_file << std::setprecision(16)
<< t+1 << "\t" << std::accumulate(h2o_can.begin(), h2o_can.end(), 0.)
<< "\t" << *min_max.first
<< "\t" << *min_max.second << std::endl;
}
cudaThreadSynchronize();
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
std::cout <<"Time for the kernel: "<< time << std::endl;
return 0;
}
#include <netcdf.h>
#include <array>
#include <sstream>
#include <iterator>
#include <exception>
#include <string>
#include <stdlib.h>
#include <cstring>
#include <vector>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <time.h>
#include <cuda.h>
#include "utils.hh"
#include "readers.hh"
#include "CanopyHydrology_decl.hh"
namespace ELM {
namespace Utils {
static const int n_months = 12;
static const int n_pfts = 17;
using MatrixState = MatrixStatic<n_months, n_pfts>;
static const int n_max_times = 31 * 24 * 2; // max days per month times hours per
// day * half hour timestep
using MatrixForc = MatrixStatic<n_max_times,1>;
} // namespace
} // namespace
int main(int argc, char ** argv)
{
// dimensions
const int n_months = 12;
const int n_pfts = 17;
const int n_max_times = 31 * 24 * 2; // max days per month times hours per
// day * half hour timestep
// fixed magic parameters for now
const int ctype = 1;
const int ltype = 1;
const bool urbpoi = false;
const bool do_capsnow = false;
const int frac_veg_nosno = 1;
const double irrig_rate = 0.;
int n_irrig_steps_left = 0;
const double dewmx = 0.1;
const double dtime = 1800.0;
// phenology state
ELM::Utils::MatrixState elai;
ELM::Utils::MatrixState esai;
ELM::Utils::read_phenology("../links/surfacedataWBW.nc", n_months, n_pfts, 0, elai, esai);
// forcing state
ELM::Utils::MatrixForc forc_rain;
ELM::Utils::MatrixForc forc_snow;
ELM::Utils::MatrixForc forc_air_temp;
const int n_times = ELM::Utils::read_forcing("../links/forcing", n_max_times, 6, 1, forc_rain, forc_snow, forc_air_temp);
double h2ocan = 0.0;
double qflx_prec_intr = 0.;
double qflx_irrig = 0.;
double qflx_prec_grnd = 0.;
double qflx_snwcp_liq = 0.;
double qflx_snwcp_ice = 0.;
double qflx_snow_grnd_patch = 0.;
double qflx_rain_grnd = 0.;
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// Wait for GPU to finish before accessing on host
cudaDeviceSynchronize();
std::ofstream soln_file;
soln_file.open("test_CanopyHydrology_kern1_single.soln");
soln_file << "Timestep, forc_rain, h2ocan, qflx_prec_grnd, qflx_prec_intr" << std::endl;
cudaEventRecord(start, 0);
for(size_t itime = 0; itime < n_times; itime += 1) {
// note this call puts all precip as rain for testing
double total_precip = forc_rain[itime][0] + forc_snow[itime][0];
ELM::CanopyHydrology_Interception<<<1, 256>>>(dtime, total_precip, 0., irrig_rate,
ltype, ctype, urbpoi, do_capsnow,
elai[5][7], esai[5][7], dewmx, frac_veg_nosno,
h2ocan, n_irrig_steps_left,
qflx_prec_intr, qflx_irrig, qflx_prec_grnd,
qflx_snwcp_liq, qflx_snwcp_ice,
qflx_snow_grnd_patch, qflx_rain_grnd);
soln_file << std::setprecision(16) << itime+1 << "\t" << total_precip << "\t" << h2ocan<< "\t" << qflx_prec_grnd << "\t" << qflx_prec_intr << std::endl;
}
cudaThreadSynchronize();
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
std::cout <<"Time for the kernel: "<< time << std::endl;
// Free memory
// cudaFree(elai);
// cudaFree(esai);
// cudaFree(forc_rain);
// cudaFree(forc_snow);
// cudaFree(forc_air_temp);
return 0;
}
#include <array>
#include <sstream>
#include <iterator>
#include <exception>
#include <string>
#include <stdlib.h>
#include <cstring>
#include <vector>
#include <iostream>
#include <iomanip>
#include <numeric>
#include <algorithm>
#include <fstream>
#include <time.h>
#include <cuda.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include "utils.hh"
#include "readers.hh"
#include "CanopyHydrology_decl.hh"
#include "CanopyHydrology_SnowWater_impl.hh"
namespace ELM {
namespace Utils {
static const int n_months = 12;
static const int n_pfts = 17;
static const int n_max_times = 31 * 24 * 2; // max days per month times hours per
// day * half hour timestep
static const int n_grid_cells = 24;
static const int n_levels_snow = 5;
using MatrixStatePFT = MatrixStatic<n_grid_cells, n_pfts>;
using MatrixStateSoilColumn = MatrixStatic<n_grid_cells, n_levels_snow>;
using MatrixForc = MatrixStatic<n_max_times,n_grid_cells>;
using VectorColumn = VectorStatic<n_grid_cells>;
using VectorColumnInt = VectorStatic<n_grid_cells,int>;
} // namespace
} // namespace
int main(int argc, char ** argv)
{
using ELM::Utils::n_months;
using ELM::Utils::n_pfts;
using ELM::Utils::n_grid_cells;
using ELM::Utils::n_max_times;
// fixed magic parameters for now
const int ctype = 1;
const int ltype = 1;
const bool urbpoi = false;
const bool do_capsnow = false;
const int frac_veg_nosno = 1;
int n_irrig_steps_left = 0;
const double dewmx = 0.1;
const double dtime = 1800.0;
// fixed magic parameters for SnowWater
const double qflx_snow_melt = 0.;
// fixed magic parameters for fracH2Osfc
const int oldfflag = 0;
const double micro_sigma = 0.1;
const double min_h2osfc = 1.0e-8;
const double n_melt = 0.7;
// phenology input
ELM::Utils::MatrixStatePFT elai;
ELM::Utils::MatrixStatePFT esai;
ELM::Utils::read_phenology("../links/surfacedataWBW.nc", n_months, n_pfts, 0, elai, esai);
ELM::Utils::read_phenology("../links/surfacedataBRW.nc", n_months, n_pfts, n_months, elai, esai);
// forcing input
ELM::Utils::MatrixForc forc_rain;
ELM::Utils::MatrixForc forc_snow;
ELM::Utils::MatrixForc forc_air_temp;
const int n_times = ELM::Utils::read_forcing("../links/forcing", n_max_times, 0, n_grid_cells, forc_rain, forc_snow, forc_air_temp);
ELM::Utils::MatrixForc forc_irrig; forc_irrig = 0.;
double qflx_floodg = 0.0;
// mesh input (though can also change as snow layers evolve)
//
// NOTE: in a real case, these would be populated, but we don't actually
// need them to be for these kernels. --etc
auto z = ELM::Utils::MatrixStateSoilColumn(0.);
auto zi = ELM::Utils::MatrixStateSoilColumn(0.);
auto dz = ELM::Utils::MatrixStateSoilColumn(0.);
// state variables that require ICs and evolve (in/out)
auto h2ocan = ELM::Utils::MatrixStatePFT(); h2ocan = 0.;
auto swe_old = ELM::Utils::MatrixStateSoilColumn(0.);
auto h2osoi_liq = ELM::Utils::MatrixStateSoilColumn(0.);
auto h2osoi_ice = ELM::Utils::MatrixStateSoilColumn(0.);
auto t_soisno = ELM::Utils::MatrixStateSoilColumn(0.);
auto frac_iceold = ELM::Utils::MatrixStateSoilColumn(0.);
auto t_grnd = ELM::Utils::VectorColumn(0.);
auto h2osno = ELM::Utils::VectorColumn(0.); h2osno = 0.;
auto snow_depth = ELM::Utils::VectorColumn(0.);
auto snow_level = ELM::Utils::VectorColumnInt(0.); // note this tracks the snow_depth
auto h2osfc = ELM::Utils::VectorColumn(0.);
auto frac_h2osfc = ELM::Utils::VectorColumn(0.); frac_h2osfc = 0.;
// output fluxes by pft
auto qflx_prec_intr = ELM::Utils::MatrixStatePFT();
auto qflx_irrig = ELM::Utils::MatrixStatePFT();
auto qflx_prec_grnd = ELM::Utils::MatrixStatePFT();
auto qflx_snwcp_liq = ELM::Utils::MatrixStatePFT();
auto qflx_snwcp_ice = ELM::Utils::MatrixStatePFT();
auto qflx_snow_grnd_patch = ELM::Utils::MatrixStatePFT();
auto qflx_rain_grnd = ELM::Utils::MatrixStatePFT();
// FIXME: I have no clue what this is... it is inout on WaterSnow. For now I
// am guessing the data structure. Ask Scott. --etc
auto integrated_snow = ELM::Utils::VectorColumn(0.);
// output fluxes, state by the column
auto qflx_snow_grnd_col = ELM::Utils::VectorColumn();
auto qflx_snow_h2osfc = ELM::Utils::VectorColumn();
auto qflx_h2osfc2topsoi = ELM::Utils::VectorColumn();
auto qflx_floodc = ELM::Utils::VectorColumn();
auto frac_sno_eff = ELM::Utils::VectorColumn();
auto frac_sno = ELM::Utils::VectorColumn();
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaDeviceSynchronize();
std::ofstream soln_file;
soln_file.open("test_CanopyHydrology_module.soln");
soln_file << "Time\t Total Canopy Water\t Min Water\t Max Water\t Total Snow\t Min Snow\t Max Snow\t Avg Frac Sfc\t Min Frac Sfc\t Max Frac Sfc" << std::endl;
auto min_max_water = std::minmax_element(h2ocan.begin(), h2ocan.end());
auto sum_water = std::accumulate(h2ocan.begin(), h2ocan.end(), 0.);
double* end2 = &h2osno(n_grid_cells-1) ;
double* end3 = &frac_h2osfc(n_grid_cells-1) ;
auto min_max_snow = std::minmax_element(&h2osno(0), end2+1);
auto sum_snow = std::accumulate(&h2osno(0), end2+1, 0.);
auto min_max_frac_sfc = std::minmax_element(&frac_h2osfc(0), end3+1);
auto avg_frac_sfc = std::accumulate(&frac_h2osfc(0), end3+1, 0.) / (end3+1 - &frac_h2osfc(0));
soln_file << std::setprecision(16)
<< 0 << "\t" << sum_water << "\t" << *min_max_water.first << "\t" << *min_max_water.second
<< "\t" << sum_snow << "\t" << *min_max_snow.first << "\t" << *min_max_snow.second
<< "\t" << avg_frac_sfc << "\t" << *min_max_frac_sfc.first << "\t" << *min_max_frac_sfc.second << std::endl;
cudaEventRecord(start, 0);
// main loop
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
// grid cell and/or pft loop can be parallelized
for (size_t g = 0; g != n_grid_cells; ++g) {
// PFT level operations
for (size_t p = 0; p != n_pfts; ++p) {
//
// Calculate interception
//
// NOTE: this currently punts on what to do with the qflx variables!
// Surely they should be either accumulated or stored on PFTs as well.
// --etc
ELM::CanopyHydrology_Interception<<<1, 256>>>(dtime,
forc_rain(t,g), forc_snow(t,g), forc_irrig(t,g),
ltype, ctype, urbpoi, do_capsnow,
elai(g,p), esai(g,p), dewmx, frac_veg_nosno,
h2ocan(g,p), n_irrig_steps_left,
qflx_prec_intr(g,p), qflx_irrig(g,p), qflx_prec_grnd(g,p),
qflx_snwcp_liq(g,p), qflx_snwcp_ice(g,p),
qflx_snow_grnd_patch(g,p), qflx_rain_grnd(g,p));
//printf("%i %i %16.8g %16.8g %16.8g %16.8g %16.8g %16.8g\n", g, p, forc_rain(t,g), forc_snow(t,g), elai(g,p), esai(g,p), h2ocan(g,p), qflx_prec_intr[g]);
//
// Calculate fraction of LAI that is wet vs dry.
//
// FIXME: this currently punts on what to do with the fwet/fdry variables.
// Surely they should be something, as such this is dead code.
// By the PFT?
// --etc
double fwet = 0., fdry = 0.;
ELM::CanopyHydrology_FracWet<<<1, 256>>>(frac_veg_nosno, h2ocan(g,p), elai(g,p), esai(g,p), dewmx, fwet, fdry);
} // end PFT loop
// Column level operations
// NOTE: this is effectively an accumulation kernel/task! --etc
qflx_snow_grnd_col[g] = std::accumulate(qflx_snow_grnd_patch[g].begin(),
qflx_snow_grnd_patch[g].end(), 0.);
// Calculate ?water balance? on the snow column, adding throughfall,
// removing melt, etc.
//
// local outputs
int newnode;
ELM::CanopyHydrology_SnowWater<<<1, 256>>>(dtime, qflx_floodg,
ltype, ctype, urbpoi, do_capsnow, oldfflag,
forc_air_temp(t,g), t_grnd(g),
qflx_snow_grnd_col[g], qflx_snow_melt, n_melt, frac_h2osfc[g],
snow_depth[g], h2osno[g], integrated_snow[g], swe_old[g],
h2osoi_liq[g], h2osoi_ice[g], t_soisno[g], frac_iceold[g],
snow_level[g], dz[g], z[g], zi[g], newnode,
qflx_floodc[g], qflx_snow_h2osfc[g], frac_sno_eff[g], frac_sno[g]);
// Calculate Fraction of Water to the Surface?
//
// FIXME: Fortran black magic... h2osoi_liq is a vector, but the
// interface specifies a single double. For now passing the 0th
// entry. --etc
ELM::CanopyHydrology_FracH2OSfc<<<1, 256>>>(dtime, min_h2osfc, ltype, micro_sigma,
h2osno[g], h2osfc[g], h2osoi_liq[g][0], frac_sno[g], frac_sno_eff[g],
qflx_h2osfc2topsoi[g], frac_h2osfc[g]);
} // end grid cell loop
auto min_max_water = std::minmax_element(h2ocan.begin(), h2ocan.end());
auto sum_water = std::accumulate(h2ocan.begin(), h2ocan.end(), 0.);
auto min_max_snow = std::minmax_element(&h2osno(0), end2+1);
auto sum_snow = std::accumulate(&h2osno(0), end2+1, 0.);
auto min_max_frac_sfc = std::minmax_element(&frac_h2osfc(0), end3+1);
auto avg_frac_sfc = std::accumulate(&frac_h2osfc(0), end3+1, 0.) / (end3+1 - &frac_h2osfc(0));
soln_file << std::setprecision(16)
<< t+1 << "\t" << sum_water << "\t" << *min_max_water.first << "\t" << *min_max_water.second
<< "\t" << sum_snow << "\t" << *min_max_snow.first << "\t" << *min_max_snow.second
<< "\t" << avg_frac_sfc << "\t" << *min_max_frac_sfc.first << "\t" << *min_max_frac_sfc.second << std::endl;
} // end timestep loop
cudaThreadSynchronize();
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
std::cout <<"Time for the kernel: "<< time << std::endl;
return 0;
}
OBJECT = ../../src/
KERNEL_LANG = cc
SRCDIR = $(OBJECT)$(KERNEL_LANG)
HIP_FLAGS += -DNATURE=__global__
HIPCC =/home/7hp/Downloads/HIP/build/bin/hipcc
HIPFY=/home/7hp/Downloads/HIP/build/bin/hipify-perl
include $(OBJECT)config/Makefile.config
include $(OBJECT)config/Makefile.rules
INC_FLAGS ?= -I$(NETCDF_ROOT)/include -I$(SRCDIR)
TESTS = test_CanopyHydrology_kern1_single \
test_CanopyHydrology_kern1_multiple \
test_CanopyHydrology_module
EXEC_TESTS = CanopyHydrology_kern1_single \
CanopyHydrology_kern1_multiple \
CanopyHydrology_module
.PHONY: links library test
default: all
all: links library $(TESTS)
hipfy:
$(HIPFY) CanopyHydrology_kern1_single.cu > CanopyHydrology_kern1_single.cpp
$(HIPFY) CanopyHydrology_kern1_multiple.cu > CanopyHydrology_kern1_multiple.cpp
$(HIPFY) CanopyHydrology_module.cu > CanopyHydrology_module.cpp
test: readers.hh utils.hh #$(EXEC_TESTS)
# python ../compare_to_gold.py $(TESTS)
# CanopyHydrology_kern1_single: test_CanopyHydrology_kern1_single
# ./test_CanopyHydrology_kern1_single > test_CanopyHydrology_kern1_single.stdout
# CanopyHydrology_kern1_multiple: test_CanopyHydrology_kern1_multiple
# ./test_CanopyHydrology_kern1_multiple > test_CanopyHydrology_kern1_multiple.stdout
# CanopyHydrology_module: test_CanopyHydrology_module
# ./test_CanopyHydrology_module > test_CanopyHydrology_module.stdout
# test_%: readers.hh utils.hh
#$(HIPCC) CanopyHydrology_kern1_single.cpp $(INC_FLAGS) $(HIP_LD_FLAGS) -o CanopyHydrology_kern1_single.out
$(HIPCC) $(HIP_FLAGS) CanopyHydrology_kern1_multiple.cpp $(INC_FLAGS) $(HIP_LD_FLAGS) -o CanopyHydrology_kern_multiple.out
#$(HIPCC) CanopyHydrology_module.cpp $(INC_FLAGS) $(HIP_LD_FLAGS) -o CanopyHydrology_module.out
clean:
@$(ELM_CLEAN)
$(RM) test_* *.cpp
allclean:
@$(ELM_CLEAN)
$(RM) test_*
$(MAKE) -C $(SRCDIR) allclean
links:
@echo "making in links"
$(MAKE) -C ../links links
\ No newline at end of file
//! A set of utilities for testing ELM kernels in C++
#ifndef ELM_KERNEL_TEST_NETCDF_HH_
#define ELM_KERNEL_TEST_NETCDF_HH_
#include <string>
#include <array>
#include <vector>
#include <sstream>
#include <iostream>
#include "netcdf.h"