Commit 095218de authored by Pillai, Himanshu's avatar Pillai, Himanshu
Browse files

Vanilla Cuda fixed and pass tests

parent 38dc913c
......@@ -15,7 +15,7 @@ using std::max ;
namespace ELM {
template<typename Array_d>
NATURE void CanopyHydrology_SnowWater(const double& dtime,
NATURE1 void CanopyHydrology_SnowWater(const double& dtime,
const double& qflx_floodg,
const int& ltype,
const int& ctype,
......
......@@ -4,6 +4,9 @@
#ifndef NATURE
#define NATURE
#endif
#ifndef NATURE1
#define NATURE1
#endif
namespace ELM {
......@@ -37,7 +40,7 @@ NATURE void CanopyHydrology_FracWet(const int& frac_veg_nosno,
template<typename Array_d>
NATURE void CanopyHydrology_SnowWater(const double& dtime,
NATURE1 void CanopyHydrology_SnowWater(const double& dtime,
const double& qflx_floodg,
const int& ltype,
const int& ctype,
......
......@@ -10,6 +10,8 @@
#include <iomanip>
#include <numeric>
#include <algorithm>
#include <fstream>
#include <time.h>
#include "utils.hh"
#include "readers.hh"
......@@ -84,15 +86,26 @@ int main(int argc, char ** argv)
// output state by the pft
auto h2o_can = ELM::Utils::MatrixState(); h2o_can = 0.;
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// Wait for GPU to finish before accessing on host
cudaDeviceSynchronize();
std::cout << "Time\t Total Canopy Water\t Min Water\t Max Water" << std::endl;
std::ofstream soln_file;
soln_file.open("test_CanopyHydrology_kern1_multiple.soln");
soln_file << "Time\t Total Canopy Water\t Min Water\t Max Water" << std::endl;
auto min_max = std::minmax_element(h2o_can.begin(), h2o_can.end());
std::cout << std::setprecision(16)
soln_file << std::setprecision(16)
<< 0 << "\t" << std::accumulate(h2o_can.begin(), h2o_can.end(), 0.)
<< "\t" << *min_max.first
<< "\t" << *min_max.second << std::endl;
cudaEventRecord(start, 0);
// main loop
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
......@@ -120,11 +133,17 @@ int main(int argc, char ** argv)
}
auto min_max = std::minmax_element(h2o_can.begin(), h2o_can.end());
std::cout << std::setprecision(16)
soln_file << std::setprecision(16)
<< t+1 << "\t" << std::accumulate(h2o_can.begin(), h2o_can.end(), 0.)
<< "\t" << *min_max.first
<< "\t" << *min_max.second << std::endl;
}
cudaThreadSynchronize();
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
std::cout <<"Time for the kernel: "<< time << std::endl;
return 0;
}
......@@ -9,7 +9,8 @@
#include <vector>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <time.h>
#include "utils.hh"
#include "readers.hh"
......@@ -69,11 +70,19 @@ int main(int argc, char ** argv)
double qflx_snwcp_ice = 0.;
double qflx_snow_grnd_patch = 0.;
double qflx_rain_grnd = 0.;
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// Wait for GPU to finish before accessing on host
cudaDeviceSynchronize();
std::cout << "Timestep, forc_rain, h2ocan, qflx_prec_grnd, qflx_prec_intr" << std::endl;
std::ofstream soln_file;
soln_file.open("test_CanopyHydrology_kern1_single.soln");
soln_file << "Timestep, forc_rain, h2ocan, qflx_prec_grnd, qflx_prec_intr" << std::endl;
cudaEventRecord(start, 0);
for(size_t itime = 0; itime < n_times; itime += 1) {
// note this call puts all precip as rain for testing
double total_precip = forc_rain[itime][0] + forc_snow[itime][0];
......@@ -85,8 +94,14 @@ int main(int argc, char ** argv)
qflx_snwcp_liq, qflx_snwcp_ice,
qflx_snow_grnd_patch, qflx_rain_grnd);
std::cout << std::setprecision(16) << itime+1 << "\t" << total_precip << "\t" << h2ocan<< "\t" << qflx_prec_grnd << "\t" << qflx_prec_intr << std::endl;
soln_file << std::setprecision(16) << itime+1 << "\t" << total_precip << "\t" << h2ocan<< "\t" << qflx_prec_grnd << "\t" << qflx_prec_intr << std::endl;
}
cudaThreadSynchronize();
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
std::cout <<"Time for the kernel: "<< time << std::endl;
// Free memory
// cudaFree(elai);
// cudaFree(esai);
......
......@@ -9,12 +9,15 @@
#include <iostream>
#include <iomanip>
#include <numeric>
#include <algorithm>
#include <fstream>
#include <time.h>
#include "utils.hh"
#include "readers.hh"
#include "CanopyHydrology.hh"
#include "CanopyHydrology_decl.hh"
#include "CanopyHydrology_SnowWater_impl.hh"
......@@ -44,7 +47,7 @@ int main(int argc, char ** argv)
using ELM::Utils::n_pfts;
using ELM::Utils::n_grid_cells;
using ELM::Utils::n_max_times;
// fixed magic parameters for now
const int ctype = 1;
const int ltype = 1;
......@@ -89,19 +92,19 @@ int main(int argc, char ** argv)
auto dz = ELM::Utils::MatrixStateSoilColumn(0.);
// state variables that require ICs and evolve (in/out)
auto h2ocan = ELM::Utils::MatrixStatePFT(0.);
auto h2ocan = ELM::Utils::MatrixStatePFT(); h2ocan = 0.;
auto swe_old = ELM::Utils::MatrixStateSoilColumn(0.);
auto h2osoi_liq = ELM::Utils::MatrixStateSoilColumn(0.);
auto h2osoi_ice = ELM::Utils::MatrixStateSoilColumn(0.);
auto t_soisno = ELM::Utils::MatrixStateSoilColumn(0.);
auto frac_iceold = ELM::Utils::MatrixStateSoilColumn(0.);
auto t_grnd = ELM::Utils::VectorColumn(0.);
auto h2osno = ELM::Utils::VectorColumn(0.);
auto h2osno = ELM::Utils::VectorColumn(0.); h2osno = 0.;
auto snow_depth = ELM::Utils::VectorColumn(0.);
auto snl = ELM::Utils::VectorColumnInt(0.); // note this tracks the snow_depth
auto snow_level = ELM::Utils::VectorColumnInt(0.); // note this tracks the snow_depth
auto h2osfc = ELM::Utils::VectorColumn(0.);
auto frac_h2osfc = ELM::Utils::VectorColumn(0.);
auto frac_h2osfc = ELM::Utils::VectorColumn(0.); frac_h2osfc = 0.;
// output fluxes by pft
......@@ -115,7 +118,7 @@ int main(int argc, char ** argv)
// FIXME: I have no clue what this is... it is inout on WaterSnow. For now I
// am guessing the data structure. Ask Scott. --etc
auto int_snow = ELM::Utils::VectorColumn(0.);
auto integrated_snow = ELM::Utils::VectorColumn(0.);
// output fluxes, state by the column
auto qflx_snow_grnd_col = ELM::Utils::VectorColumn();
......@@ -125,16 +128,33 @@ int main(int argc, char ** argv)
auto frac_sno_eff = ELM::Utils::VectorColumn();
auto frac_sno = ELM::Utils::VectorColumn();
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaDeviceSynchronize();
// std::cout << "Time\t Total Canopy Water\t Min Water\t Max Water" << std::endl;
// auto min_max = std::minmax_element(h2ocan.begin(), h2ocan.end());
// std::cout << std::setprecision(16)
// << 0 << "\t" << std::accumulate(h2ocan.begin(), h2ocan.end(), 0.)
// << "\t" << *min_max.first
// << "\t" << *min_max.second << std::endl;
// main loop
std::ofstream soln_file;
soln_file.open("test_CanopyHydrology_module.soln");
soln_file << "Time\t Total Canopy Water\t Min Water\t Max Water\t Total Snow\t Min Snow\t Max Snow\t Avg Frac Sfc\t Min Frac Sfc\t Max Frac Sfc" << std::endl;
auto min_max_water = std::minmax_element(h2ocan.begin(), h2ocan.end());
auto sum_water = std::accumulate(h2ocan.begin(), h2ocan.end(), 0.);
double* end2 = &h2osno(n_grid_cells-1) ;
double* end3 = &frac_h2osfc(n_grid_cells-1) ;
auto min_max_snow = std::minmax_element(&h2osno(0), end2+1);
auto sum_snow = std::accumulate(&h2osno(0), end2+1, 0.);
auto min_max_frac_sfc = std::minmax_element(&frac_h2osfc(0), end3+1);
auto avg_frac_sfc = std::accumulate(&frac_h2osfc(0), end3+1, 0.) / (end3+1 - &frac_h2osfc(0));
soln_file << std::setprecision(16)
<< 0 << "\t" << sum_water << "\t" << *min_max_water.first << "\t" << *min_max_water.second
<< "\t" << sum_snow << "\t" << *min_max_snow.first << "\t" << *min_max_snow.second
<< "\t" << avg_frac_sfc << "\t" << *min_max_frac_sfc.first << "\t" << *min_max_frac_sfc.second << std::endl;
cudaEventRecord(start, 0);
// main loop
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
......@@ -181,13 +201,13 @@ int main(int argc, char ** argv)
//
// local outputs
int newnode;
ELM::CanopyHydrology_SnowWater<<<1, 256>>>(dtime, qflx_floodg,
ELM::CanopyHydrology_SnowWater(dtime, qflx_floodg,
ltype, ctype, urbpoi, do_capsnow, oldfflag,
forc_air_temp(t,g), t_grnd(g),
qflx_snow_grnd_col[g], qflx_snow_melt, n_melt, frac_h2osfc[g],
snow_depth[g], h2osno[g], int_snow[g], swe_old[g],
snow_depth[g], h2osno[g], integrated_snow[g], swe_old[g],
h2osoi_liq[g], h2osoi_ice[g], t_soisno[g], frac_iceold[g],
snl[g], dz[g], z[g], zi[g], newnode,
snow_level[g], dz[g], z[g], zi[g], newnode,
qflx_floodc[g], qflx_snow_h2osfc[g], frac_sno_eff[g], frac_sno[g]);
// Calculate Fraction of Water to the Surface?
......@@ -201,13 +221,25 @@ int main(int argc, char ** argv)
} // end grid cell loop
// auto min_max = std::minmax_element(h2ocan.begin(), h2ocan.end());
// std::cout << std::setprecision(16)
// << t+1 << "\t" << std::accumulate(h2ocan.begin(), h2ocan.end(), 0.)
// << "\t" << *min_max.first
// << "\t" << *min_max.second << std::endl;
auto min_max_water = std::minmax_element(h2ocan.begin(), h2ocan.end());
auto sum_water = std::accumulate(h2ocan.begin(), h2ocan.end(), 0.);
auto min_max_snow = std::minmax_element(&h2osno(0), end2+1);
auto sum_snow = std::accumulate(&h2osno(0), end2+1, 0.);
auto min_max_frac_sfc = std::minmax_element(&frac_h2osfc(0), end3+1);
auto avg_frac_sfc = std::accumulate(&frac_h2osfc(0), end3+1, 0.) / (end3+1 - &frac_h2osfc(0));
soln_file << std::setprecision(16)
<< t+1 << "\t" << sum_water << "\t" << *min_max_water.first << "\t" << *min_max_water.second
<< "\t" << sum_snow << "\t" << *min_max_snow.first << "\t" << *min_max_snow.second
<< "\t" << avg_frac_sfc << "\t" << *min_max_frac_sfc.first << "\t" << *min_max_frac_sfc.second << std::endl;
} // end timestep loop
cudaThreadSynchronize();
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
std::cout <<"Time for the kernel: "<< time << std::endl;
return 0;
}
OBJECT = ../../src/
KERNEL_LANG = cc
SRCDIR = $(OBJECT)$(KERNEL_LANG)
CUDA_FLAGS += -DNATURE=__global__
CUDA_FLAGS += -DNATURE=__global__ -DNATURE1=__host__
NVCC = nvcc
include $(OBJECT)config/Makefile.config
INC_FLAGS ?= -I$(NETCDF_ROOT)/include -I$(SRCDIR) -I../tests_cuda
TESTS = test_CanopyHydrology_kern1_multiple \
TESTS = test_CanopyHydrology_kern1_single \
test_CanopyHydrology_kern1_multiple \
test_CanopyHydrology_module
EXEC_TESTS = CanopyHydrology_kern1_multiple \
EXEC_TESTS = CanopyHydrology_kern1_single \
CanopyHydrology_kern1_multiple \
CanopyHydrology_module
......@@ -25,10 +27,13 @@ test: $(EXEC_TESTS)
CanopyHydrology_kern1_single: test_CanopyHydrology_kern1_single
./test_CanopyHydrology_kern1_multiple&> test_CanopyHydrology_kern1_single.stdout
./test_CanopyHydrology_kern1_single > test_CanopyHydrology_kern1_single.stdout
CanopyHydrology_kern1_multiple: test_CanopyHydrology_kern1_multiple
./test_CanopyHydrology_kern1_multiple > test_CanopyHydrology_kern1_multiple.stdout
CanopyHydrology_module: test_CanopyHydrology_module
./test_CanopyHydrology_module&> test_CanopyHydrology_module.stdout
./test_CanopyHydrology_module > test_CanopyHydrology_module.stdout
test_%: %.cu.o readers.hh utils.hh
$(NVCC) -o $@ $< $(CUDA_LD_FLAGS)
......
/* ---------------------------------------------
Makefile constructed configuration:
Thu Jun 27 15:29:53 EDT 2019
Fri Jun 28 16:31:21 EDT 2019
----------------------------------------------*/
#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)
#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
......
/* ---------------------------------------------
Makefile constructed configuration:
Thu Jun 27 15:45:49 EDT 2019
Fri Jun 28 16:31:21 EDT 2019
----------------------------------------------*/
#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)
#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
......
......@@ -34,7 +34,7 @@ KOKKOS_ARCH = "BSW"
endif
# Set the Kernel programming model
CXXFLAGS += -DNATURE=KOKKOS_INLINE_FUNCTION
CXXFLAGS += -DNATURE=KOKKOS_INLINE_FUNCTION -DNATURE1=KOKKOS_INLINE_FUNCTION
default: Serial
......
CSRCDIR = ../../src/cpp/
KOKKOS_PATH = ${HOME}/Downloads/kokkos
KOKKOS_DEVICES = "Cuda,OpenMP"
OBJECT = ../../src/
include $(OBJECT)config/Makefile.config
TESTS = test_CanopyHydrology_kern1_single \
test_CanopyHydrology_kern1_multiple
EXEC_TESTS = CanopyHydrology_kern1_single \
CanopyHydrology_kern1_multiple
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
EXE = ${TESTS }.cuda
KOKKOS_ARCH = "HSW,Pascal60"
KOKKOS_CUDA_OPTIONS = "enable_lambda,force_uvm"
else
CXX = g++
EXE = ${TESTS }.host
KOKKOS_ARCH = "HSW"
endif
CXXFLAGS = -g -O0
LINK = ${CXX}
LINKFLAGS = -lnetcdf -I../../src/cpp -I../tests_c
EXTRA_PATH = -I/usr/local/include
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
.PHONY: links library test
default: all
all: links library $(TESTS)
test: $(EXEC_TESTS)
python ../compare_to_gold.py $(TESTS)
CanopyHydrology_kern1_single: test_CanopyHydrology_kern1_single
./test_CanopyHydrology_kern1_single &> test_CanopyHydrology_kern1_single.stdout
CanopyHydrology_kern1_multiple: test_CanopyHydrology_kern1_multiple
./test_CanopyHydrology_kern1_multiple &> test_CanopyHydrology_kern1_multiple.stdout
test_%: $(OBJ) $(KOKKOS_LINK_DEPENDS) readers.hh utils.hh library
$(LINK) $(KOKKOS_LDFLAGS) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) $(LINKFLAGS) $(EXTRA_PATH)
%.o : %.cpp $(KOKKOS_CPP_DEPENDS) domains.hh readers.hh utils.hh
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(EXTRA_INC) -c $<
clean:
@$(ELM_CLEAN)
$(RM) test_*
allclean:
@$(ELM_CLEAN)
$(RM) test_*
$(MAKE) -C $(OBJECT) allclean
links:
@echo "making in links"
$(MAKE) -C ../links links
library:
$(MAKE) -C $(OBJECT) all
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment