Commit 38dc913c authored by Pillai, Himanshu's avatar Pillai, Himanshu
Browse files

Timer added

parent e981be8a
......@@ -14,6 +14,8 @@ ELM_DATA_LOCATION ?= /home/7hp/Downloads/acme_inputdata
# ATS as a part of an ATS installation.
NETCDF_ROOT ?= /usr/local
#NETCDF_ROOT ?= $(AMANZI_TPLS_DIR)
MPI_LIB_ROOT ?= /usr/lib/x86_64-linux-gnu/openmpi
ELM_UTILS_DIR = cime_utils
# assumes you have a working gfortran
FC ?= gfortran
......@@ -31,13 +33,13 @@ CXX ?= g++
MPICC ?= mpicc
MPICXX ?= mpic++
CXXFLAGS += -Wall -Wshadow
CXXFLAGS ?= -g -Wall -Wshadow -std=c++11
STD_LIB_ROOT = /usr
# assumes you have a working CUDA
NVCC=nvcc
CUDA_FLAGS= -std=c++11 -c -arch=sm_60
CUDA_FLAGS ?= -std=c++11 -c -arch=sm_60
CUDA_LIBS= -lopenblas -lpthread -lcudart -lcublas
CUDA_LIBDIRS=/usr/local/cuda-10.0/lib64
CUDA_INCDIRS=-I/usr/local/cuda-10.0/include
......@@ -54,7 +56,10 @@ ELM_INC_FLAGS = -I$(ELM_ROOT)
GFORTRAN_FLAGS = -L$(FC_LIB_ROOT) -lgfortran
# linking flags
CUDA_LD_FLAGS = -L$(CUDA_LIBDIRS) -lpthread -lcudart -lcublas
CC_LD_FLAGS += -L$(SRCDIR) -lelm -L$(NETCDF_ROOT) -lnetcdf -L$(STD_LIB_ROOT)/lib -lstdc++ -L$(MPI_LIB_ROOT)/lib -L$(FC_LIB_ROOT) -lgfortran
FC_LD_FLAGS += -L$(ELM_ROOT) -lelm -L$(NETCDF_ROOT) -lnetcdff
CXX_LD_FLAGS += -L$(ELM_ROOT) -lelm -L$(NETCDF_ROOT) -lnetcdf
CUDA_LD_FLAGS = -L$(SRCDIR) -lelm -L$(NETCDF_ROOT) -lnetcdf -L$(CUDA_LIBDIRS) -lstdc++ -lpthread -lcudart -lcublas
#
# rules
......@@ -64,11 +69,13 @@ CUDA_LD_FLAGS = -L$(CUDA_LIBDIRS) -lpthread -lcudart -lcublas
.SUFFIXES: .cc.o .F90.o .cpp.o .cu.o .cc .F90 .cpp .cu
%.F90.o: %.F90
$(FC) $(FC_FLAGS) $(NETCDF_INC_FLAGS) $(ELM_INC_FLAGS) -c $< -o $@
$(FC) $(FC_FLAGS) -c $< -o $@
%.cc.o: %.cc
$(CC) $(CXXFLAGS) $(INC_FLAGS) -c $< -o $@
%.cpp.o: %.cpp
$(CXX) $(CXXFLAGS) $(NETCDF_INC_FLAGS) $(ELM_INC_FLAGS) -c $< -o $@
$(CXX) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) $(NETCDF_INC_FLAGS) $(ELM_INC_FLAGS) -c $< -o $@
%.cu.o: %.cu
$(NVCC) $(CUDA_FLAGS) -c $< -o $@
$(NVCC) $(CUDA_FLAGS) $(INC_FLAGS) -c $< -o $@
SHELL = /bin/sh
......
......@@ -9,14 +9,16 @@
#include <iostream>
#include <iomanip>
#include <numeric>
#include <fstream>
#include <algorithm>
#include <assert.h>
#include <mpi.h>
#include <chrono>
#include "utils.hh"
#include "readers.hh"
#include "CanopyHydrology.hh"
using namespace std::chrono;
namespace ELM {
namespace Utils {
......@@ -87,16 +89,18 @@ int main(int argc, char ** argv)
// output state by the pft
auto h2o_can = ELM::Utils::MatrixState(); h2o_can = 0.;
std::ofstream soln_file;
soln_file.open("test_CanopyHydrology_kern1_multiple.soln");
{
std::cout << "Time\t Total Canopy Water\t Min Water\t Max Water" << std::endl;
soln_file << "Time\t Total Canopy Water\t Min Water\t Max Water" << std::endl;
auto min_max = std::minmax_element(h2o_can.begin(), h2o_can.end());
std::cout << std::setprecision(16)
soln_file << std::setprecision(16)
<< 0 << "\t" << std::accumulate(h2o_can.begin(), h2o_can.end(), 0.)
<< "\t" << *min_max.first
<< "\t" << *min_max.second << std::endl;
}
auto start = high_resolution_clock::now();
// main loop
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
......@@ -125,12 +129,15 @@ int main(int argc, char ** argv)
{
auto min_max = std::minmax_element(h2o_can.begin(), h2o_can.end());
std::cout << std::setprecision(16)
soln_file << std::setprecision(16)
<< t+1 << "\t" << std::accumulate(h2o_can.begin(), h2o_can.end(), 0.)
<< "\t" << *min_max.first
<< "\t" << *min_max.second << std::endl;
}
}
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
std::cout << "Time taken by function: "<< duration.count() << " microseconds" << std::endl;
return 0;
MPI_Finalize();
}
......@@ -9,13 +9,15 @@
#include <vector>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <assert.h>
#include <mpi.h>
#include <chrono>
#include "utils.hh"
#include "readers.hh"
#include "CanopyHydrology.hh"
using namespace std::chrono;
namespace ELM {
namespace Utils {
......@@ -71,8 +73,10 @@ int main(int argc, char ** argv)
double qflx_snwcp_ice = 0.;
double qflx_snow_grnd_patch = 0.;
double qflx_rain_grnd = 0.;
std::cout << "Timestep, forc_rain, h2ocan, qflx_prec_grnd, qflx_prec_intr" << std::endl;
auto start = high_resolution_clock::now();
std::ofstream soln_file;
soln_file.open("test_CanopyHydrology_kern1_single.soln");
soln_file << "Timestep, forc_rain, h2ocan, qflx_prec_grnd, qflx_prec_intr" << std::endl;
for(size_t itime = 0; itime < n_times; itime += 1) {
// note this call puts all precip as rain for testing
double total_precip = forc_rain[itime][0] + forc_snow[itime][0];
......@@ -84,9 +88,11 @@ int main(int argc, char ** argv)
qflx_snwcp_liq, qflx_snwcp_ice,
qflx_snow_grnd_patch, qflx_rain_grnd);
std::cout << std::setprecision(16) << itime+1 << "\t" << total_precip << "\t" << h2ocan<< "\t" << qflx_prec_grnd << "\t" << qflx_prec_intr << std::endl;
soln_file << std::setprecision(16) << itime+1 << "\t" << total_precip << "\t" << h2ocan<< "\t" << qflx_prec_grnd << "\t" << qflx_prec_intr << std::endl;
}
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
std::cout << "Time taken by function: "<< duration.count() << " microseconds" << std::endl;
return 0;
MPI_Finalize();
}
......@@ -11,14 +11,16 @@
#include <numeric>
#include <algorithm>
#include <assert.h>
#include <fstream>
#include <mpi.h>
#include <chrono>
#include "utils.hh"
#include "readers.hh"
#include "CanopyHydrology.hh"
#include "CanopyHydrology_SnowWater_impl.hh"
using namespace std::chrono;
namespace ELM {
......@@ -129,8 +131,10 @@ int main(int argc, char ** argv)
auto frac_sno_eff = ELM::Utils::VectorColumn();
auto frac_sno = ELM::Utils::VectorColumn();
std::ofstream soln_file;
soln_file.open("test_CanopyHydrology_module.soln");
{
std::cout << "Time\t Total Canopy Water\t Min Water\t Max Water\t Total Snow\t Min Snow\t Max Snow\t Avg Frac Sfc\t Min Frac Sfc\t Max Frac Sfc" << std::endl;
soln_file << "Time\t Total Canopy Water\t Min Water\t Max Water\t Total Snow\t Min Snow\t Max Snow\t Avg Frac Sfc\t Min Frac Sfc\t Max Frac Sfc" << std::endl;
auto min_max_water = std::minmax_element(h2ocan.begin(), h2ocan.end());
auto sum_water = std::accumulate(h2ocan.begin(), h2ocan.end(), 0.);
......@@ -140,12 +144,12 @@ int main(int argc, char ** argv)
auto min_max_frac_sfc = std::minmax_element(frac_h2osfc.begin(), frac_h2osfc.end());
auto avg_frac_sfc = std::accumulate(frac_h2osfc.begin(), frac_h2osfc.end(), 0.) / (frac_h2osfc.end() - frac_h2osfc.begin());
std::cout << std::setprecision(16)
soln_file << std::setprecision(16)
<< 0 << "\t" << sum_water << "\t" << *min_max_water.first << "\t" << *min_max_water.second
<< "\t" << sum_snow << "\t" << *min_max_snow.first << "\t" << *min_max_snow.second
<< "\t" << avg_frac_sfc << "\t" << *min_max_frac_sfc.first << "\t" << *min_max_frac_sfc.second << std::endl;
}
auto start = high_resolution_clock::now();
// main loop
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
......@@ -222,11 +226,14 @@ int main(int argc, char ** argv)
auto min_max_frac_sfc = std::minmax_element(frac_h2osfc.begin(), frac_h2osfc.end());
auto avg_frac_sfc = std::accumulate(frac_h2osfc.begin(), frac_h2osfc.end(), 0.) / (frac_h2osfc.end() - frac_h2osfc.begin());
std::cout << std::setprecision(16)
soln_file << std::setprecision(16)
<< t+1 << "\t" << sum_water << "\t" << *min_max_water.first << "\t" << *min_max_water.second
<< "\t" << sum_snow << "\t" << *min_max_snow.first << "\t" << *min_max_snow.second
<< "\t" << avg_frac_sfc << "\t" << *min_max_frac_sfc.first << "\t" << *min_max_frac_sfc.second << std::endl;
} // end timestep loop
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
std::cout << "Time taken by function: "<< duration.count() << " microseconds" << std::endl;
return 0;
MPI_Finalize();
}
......@@ -8,13 +8,15 @@
#include <vector>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <numeric>
#include <algorithm>
#include <chrono>
#include "utils.hh"
#include "readers.hh"
#include "CanopyHydrology.hh"
using namespace std::chrono;
namespace ELM {
......@@ -88,16 +90,18 @@ int main(int argc, char ** argv)
// output state by the pft
auto h2o_can = ELM::Utils::MatrixState(); h2o_can = 0.;
std::ofstream soln_file;
soln_file.open("test_CanopyHydrology_kern1_multiple.soln");
{
std::cout << "Time\t Total Canopy Water\t Min Water\t Max Water" << std::endl;
soln_file << "Time\t Total Canopy Water\t Min Water\t Max Water" << std::endl;
auto min_max = std::minmax_element(h2o_can.begin(), h2o_can.end());
std::cout << std::setprecision(16)
soln_file << std::setprecision(16)
<< 0 << "\t" << std::accumulate(h2o_can.begin(), h2o_can.end(), 0.)
<< "\t" << *min_max.first
<< "\t" << *min_max.second << std::endl;
}
auto start = high_resolution_clock::now();
// main loop
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
......@@ -125,11 +129,14 @@ int main(int argc, char ** argv)
}
auto min_max = std::minmax_element(h2o_can.begin(), h2o_can.end());
std::cout << std::setprecision(16)
soln_file << std::setprecision(16)
<< t+1 << "\t" << std::accumulate(h2o_can.begin(), h2o_can.end(), 0.)
<< "\t" << *min_max.first
<< "\t" << *min_max.second << std::endl;
}
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
std::cout << "Time taken by function: "<< duration.count() << " microseconds" << std::endl;
return 0;
}
......@@ -7,15 +7,16 @@
#include <stdlib.h>
#include <cstring>
#include <vector>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <chrono>
#include "utils.hh"
#include "readers.hh"
#include "CanopyHydrology.hh"
using namespace std::chrono;
namespace ELM {
namespace Utils {
......@@ -68,8 +69,10 @@ int main(int argc, char ** argv)
double qflx_snwcp_ice = 0.;
double qflx_snow_grnd_patch = 0.;
double qflx_rain_grnd = 0.;
std::cout << "Timestep, forc_rain, h2ocan, qflx_prec_grnd, qflx_prec_intr" << std::endl;
std::ofstream soln_file;
soln_file.open("test_CanopyHydrology_kern1_single.soln");
soln_file << "Timestep, forc_rain, h2ocan, qflx_prec_grnd, qflx_prec_intr" << std::endl;
auto start = high_resolution_clock::now();
for(size_t itime = 0; itime < n_times; itime += 1) {
// note this call puts all precip as rain for testing
double total_precip = forc_rain[itime][0] + forc_snow[itime][0];
......@@ -81,8 +84,10 @@ int main(int argc, char ** argv)
qflx_snwcp_liq, qflx_snwcp_ice,
qflx_snow_grnd_patch, qflx_rain_grnd);
std::cout << std::setprecision(16) << itime+1 << "\t" << total_precip << "\t" << h2ocan<< "\t" << qflx_prec_grnd << "\t" << qflx_prec_intr << std::endl;
soln_file << std::setprecision(16) << itime+1 << "\t" << total_precip << "\t" << h2ocan<< "\t" << qflx_prec_grnd << "\t" << qflx_prec_intr << std::endl;
}
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
std::cout << "Time taken by function: "<< duration.count() << " microseconds" << std::endl;
return 0;
}
......@@ -6,15 +6,17 @@
#include <stdlib.h>
#include <cstring>
#include <vector>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <numeric>
#include <algorithm>
#include <chrono>
#include "utils.hh"
#include "readers.hh"
#include "CanopyHydrology.hh"
using namespace std::chrono;
namespace ELM {
namespace Utils {
......@@ -123,9 +125,10 @@ int main(int argc, char ** argv)
auto frac_sno_eff = ELM::Utils::VectorColumn();
auto frac_sno = ELM::Utils::VectorColumn();
std::ofstream soln_file;
soln_file.open("test_CanopyHydrology_module.soln");
{
std::cout << "Time\t Total Canopy Water\t Min Water\t Max Water\t Total Snow\t Min Snow\t Max Snow\t Avg Frac Sfc\t Min Frac Sfc\t Max Frac Sfc" << std::endl;
soln_file << "Time\t Total Canopy Water\t Min Water\t Max Water\t Total Snow\t Min Snow\t Max Snow\t Avg Frac Sfc\t Min Frac Sfc\t Max Frac Sfc" << std::endl;
auto min_max_water = std::minmax_element(h2ocan.begin(), h2ocan.end());
auto sum_water = std::accumulate(h2ocan.begin(), h2ocan.end(), 0.);
......@@ -135,12 +138,12 @@ int main(int argc, char ** argv)
auto min_max_frac_sfc = std::minmax_element(frac_h2osfc.begin(), frac_h2osfc.end());
auto avg_frac_sfc = std::accumulate(frac_h2osfc.begin(), frac_h2osfc.end(), 0.) / (frac_h2osfc.end() - frac_h2osfc.begin());
std::cout << std::setprecision(16)
soln_file << std::setprecision(16)
<< 0 << "\t" << sum_water << "\t" << *min_max_water.first << "\t" << *min_max_water.second
<< "\t" << sum_snow << "\t" << *min_max_snow.first << "\t" << *min_max_snow.second
<< "\t" << avg_frac_sfc << "\t" << *min_max_frac_sfc.first << "\t" << *min_max_frac_sfc.second << std::endl;
}
auto start = high_resolution_clock::now();
// main loop
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
......@@ -217,10 +220,13 @@ int main(int argc, char ** argv)
auto min_max_frac_sfc = std::minmax_element(frac_h2osfc.begin(), frac_h2osfc.end());
auto avg_frac_sfc = std::accumulate(frac_h2osfc.begin(), frac_h2osfc.end(), 0.) / (frac_h2osfc.end() - frac_h2osfc.begin());
std::cout << std::setprecision(16)
soln_file << std::setprecision(16)
<< t+1 << "\t" << sum_water << "\t" << *min_max_water.first << "\t" << *min_max_water.second
<< "\t" << sum_snow << "\t" << *min_max_snow.first << "\t" << *min_max_snow.second
<< "\t" << avg_frac_sfc << "\t" << *min_max_frac_sfc.first << "\t" << *min_max_frac_sfc.second << std::endl;
} // end timestep loop
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
std::cout << "Time taken by function: "<< duration.count() << " microseconds" << std::endl;
return 0;
}
......@@ -13,7 +13,7 @@
#include "utils.hh"
#include "readers.hh"
#include "CanopyHydrology_cpp.hh"
#include "CanopyHydrology_decl.hh"
namespace ELM {
......
......@@ -13,7 +13,7 @@
#include "utils.hh"
#include "readers.hh"
#include "CanopyHydrology_cpp.hh"
#include "CanopyHydrology_decl.hh"
namespace ELM {
......
......@@ -14,8 +14,7 @@
#include "readers.hh"
#include "CanopyHydrology_cpp.hh"
#include "CanopyHydrology_SnowWater_impl.hh"
#include "CanopyHydrology.hh"
......
OBJECT = ../../src/
KERNEL_LANG = cc
SRCDIR = $(OBJECT)$(KERNEL_LANG)
NATURE = "__global__"
CUDA_FLAGS += -DNATURE=__global__
NVCC = nvcc
include $(OBJECT)config/Makefile.config
INC_FLAGS = -I$(AMANZI_TPLS_DIR)/include -I$(SRCDIR)
INC_FLAGS ?= -I$(NETCDF_ROOT)/include -I$(SRCDIR) -I../tests_cuda
TESTS = test_CanopyHydrology_kern1_multiple \
test_CanopyHydrology_module
......@@ -30,7 +30,7 @@ CanopyHydrology_kern1_single: test_CanopyHydrology_kern1_single
CanopyHydrology_module: test_CanopyHydrology_module
./test_CanopyHydrology_module&> test_CanopyHydrology_module.stdout
test_%: %.cu.o readers.hh utils.hh library
test_%: %.cu.o readers.hh utils.hh
$(NVCC) -o $@ $< $(CUDA_LD_FLAGS)
......@@ -41,11 +41,8 @@ clean:
allclean:
@$(ELM_CLEAN)
$(RM) test_*
$(MAKE) -C $(OBJECT) allclean
$(MAKE) -C $(SRCDIR) allclean
links:
@echo "making in links"
$(MAKE) -C ../links links
library:
$(MAKE) -C $(OBJECT) all
\ No newline at end of file
$(MAKE) -C ../links links
\ No newline at end of file
SRCDIR = ../../src/fortran/
OBJECT = ../../src/
ELM_BASE = ../../src
KERNEL_FOLDER = fortran
ELM_ROOT = $(ELM_BASE)/$(KERNEL_FOLDER)
include $(OBJECT)config/Makefile.config
FC = gfortran
FC_FLAGS += -I$(SRCDIR) -I$(SRCDIR)$(ELM_UTILS_DIR) -I$(NETCDF_ROOT)/include
include $(ELM_BASE)/config/Makefile.config
FC_FLAGS += -I$(ELM_ROOT) -I$(ELM_ROOT)/$(ELM_UTILS_DIR) -I$(NETCDF_ROOT)/include
TESTS = test_CanopyHydrology_kern1_single \
test_CanopyHydrology_kern1_multiple \
......@@ -45,11 +47,11 @@ clean:
allclean:
@$(ELM_CLEAN)
$(RM) test_*
$(MAKE) -C $(OBJECT) allclean
$(MAKE) -C $(ELM_ROOT) allclean
links:
@echo "making in links"
$(MAKE) -C ../links links
library:
$(MAKE) -C $(OBJECT) fortran
$(MAKE) -C $(ELM_ROOT)
......@@ -11,12 +11,14 @@
#include <numeric>
#include <fstream>
#include <algorithm>
#include <chrono>
#include <Kokkos_Core.hpp>
#include "utils.hh"
#include "readers.hh"
#include "landunit_varcon.h"
#include "column_varcon.h"
#include "CanopyHydrology.hh"
using namespace std::chrono;
namespace ELM {
namespace Utils {
......@@ -163,7 +165,8 @@ int main(int argc, char ** argv)
Kokkos::Timer timer;
auto start = high_resolution_clock::now();
// main loop
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
......@@ -209,6 +212,15 @@ int main(int argc, char ** argv)
<< "\t" << *min_max.second << std::endl;
} soln_file.close();
double time = timer.seconds();
double Gbytes = 1.0e-9 * double( sizeof(double) * ( n_grid_cells + n_grid_cells * n_pfts + n_pfts ) );
printf( " n_pfts( %d ) n_grid_cells( %d ) n_times ( %d ) problem( %g MB ) time( %g s ) bandwidth( %g GB/s )\n",
n_pfts, n_grid_cells, n_times, Gbytes * 1000, time, Gbytes * n_times / time );
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
std::cout << "Time taken by function: "<< duration.count() << " microseconds" << std::endl;
}
Kokkos::finalize();
return 0;
......
......@@ -11,6 +11,7 @@
#include <iomanip>
#include <numeric>
#include <fstream>
#include <chrono>
#include <algorithm>
#include <Kokkos_Core.hpp>
......@@ -18,6 +19,7 @@
#include "readers.hh"
#include "CanopyHydrology.hh"
using namespace std::chrono;
namespace ELM {
namespace Utils {
......@@ -100,6 +102,9 @@ int main(int argc, char ** argv)
soln_file.open("test_CanopyHydrology_kern1_single.soln");
std::cout << "Timestep, forc_rain, h2ocan, qflx_prec_grnd, qflx_prec_intr, total_precip_loop" << std::endl;
soln_file << "Timestep, forc_rain, h2ocan, qflx_prec_grnd, qflx_prec_intr, total_precip_loop" << std::endl;
auto start = high_resolution_clock::now();
for(size_t itime = 0; itime < n_times; itime += 1) { //Kokkos::parallel_for(n_times, KOKKOS_LAMBDA (const int itime) {
// note this call puts all precip as rain for testing
......@@ -115,6 +120,11 @@ int main(int argc, char ** argv)
soln_file << std::setprecision(16) << itime+1 << "\t" << total_precip << "\t" << h2ocan<< "\t" << qflx_prec_grnd << "\t" << qflx_prec_intr << std::endl;
std::cout << std::setprecision(16) << itime+1 << "\t" << total_precip << "\t" << h2ocan<< "\t" << qflx_prec_grnd << "\t" << qflx_prec_intr << std::endl;
}soln_file.close();
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
std::cout << "Time taken by function: "<< duration.count() << " microseconds" << std::endl;
}
Kokkos::finalize();
return 0;
......
......@@ -10,11 +10,13 @@
#include <iomanip>
#include <numeric>
#include <fstream>
#include <chrono>
#include <Kokkos_Core.hpp>
#include "utils.hh"
#include "readers.hh"
#include "CanopyHydrology.hh"
#include "CanopyHydrology_SnowWater_impl.hh"
using namespace std::chrono;
namespace ELM {
namespace Utils {
......@@ -269,7 +271,8 @@ int main(int argc, char ** argv)
<< "\t" << sum_snow << "\t" << *min_max_snow.first << "\t" << *min_max_snow.second
<< "\t" << avg_frac_sfc << "\t" << *min_max_frac_sfc.first << "\t" << *min_max_frac_sfc.second << std::endl;
Kokkos::Timer timer;
auto start = high_resolution_clock::now();
// main loop
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
......@@ -374,6 +377,16 @@ int main(int argc, char ** argv)
} // end timestep loop
soln_file.close();
double time = timer.seconds();
double Gbytes = 1.0e-9 * double( sizeof(double) * ( n_grid_cells + n_grid_cells * n_pfts + n_pfts ) );
printf( " n_pfts( %d ) n_grid_cells( %d ) n_times ( %d ) problem( %g MB ) time( %g s ) bandwidth( %g GB/s )\n",
n_pfts, n_grid_cells, n_times, Gbytes * 1000, time, Gbytes * n_times / time );
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
std::cout << "Time taken by function: "<< duration.count() << " microseconds" << std::endl;
}
Kokkos::finalize();
return 0;
......