Commit 447d20b5 authored by Pillai, Himanshu's avatar Pillai, Himanshu
Browse files

Kokkos Fixed

parent 85fb3aa4
......@@ -12,6 +12,7 @@
#include <algorithm>
#include <fstream>
#include <time.h>
#include <cuda.h>
#include "utils.hh"
#include "readers.hh"
......
......@@ -11,6 +11,7 @@
#include <iomanip>
#include <fstream>
#include <time.h>
#include <cuda.h>
#include "utils.hh"
#include "readers.hh"
......
......@@ -12,6 +12,7 @@
#include <algorithm>
#include <fstream>
#include <time.h>
#include <cuda.h>
#include "utils.hh"
#include "readers.hh"
......
......@@ -43,6 +43,9 @@ int main(int argc, char ** argv)
using ELM::Utils::n_pfts;
using ELM::Utils::n_grid_cells;
using ELM::Utils::n_max_times;
using Kokkos::parallel_for;
using Kokkos::TeamPolicy;
using Kokkos::TeamThreadRange;
// fixed magic parameters for now
const int ctype = 1;
......@@ -171,35 +174,44 @@ int main(int argc, char ** argv)
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
// // grid cell and/or pft loop can be parallelized
// Kokkos::parallel_for(range_policy(0,n_grid_cells), KOKKOS_LAMBDA ( size_t g ) {
// //for (size_t g = 0; g != n_grid_cells; ++g) {
// //Kokkos::parallel_for(range_policy(0,n_pfts), KOKKOS_LAMBDA ( size_t p ) {
// for (size_t p = 0; p != n_pfts; ++p) {
// // NOTE: this currently punts on what to do with the qflx variables!
// // Surely they should be either accumulated or stored on PFTs as well.
// // --etc
// Kokkos::parallel_for("CanopyHydrology_Interception", Kokkos::MDRangePolicy<Kokkos::Rank<2,Kokkos::Iterate::Left>>({0,0},{n_grid_cells,n_pfts}),
// KOKKOS_LAMBDA (size_t g, size_t p) {
Kokkos::parallel_for("n_grid_cells", n_grid_cells, KOKKOS_LAMBDA (const size_t& g) {
for (size_t p = 0; p != n_pfts; ++p) {
ELM::CanopyHydrology_Interception(dtime,
forc_rain(t,g), forc_snow(t,g), forc_irrig(t,g),
ltype, ctype, urbpoi, do_capsnow,
elai(g,p), esai(g,p), dewmx, frac_veg_nosno,
h2o_can(g,p), n_irrig_steps_left,
qflx_prec_intr(g,p), qflx_irrig(g,p), qflx_prec_grnd(g,p),
qflx_snwcp_liq(g,p), qflx_snwcp_ice(g,p),
qflx_snow_grnd_patch(g,p), qflx_rain_grnd(g,p));
// qflx_prec_intr[g], qflx_irrig[g], qflx_prec_grnd[g],
// qflx_snwcp_liq[g], qflx_snwcp_ice[g],
// qflx_snow_grnd_patch[g], qflx_rain_grnd[g]);
//printf("%i %i %16.8g %16.8g %16.8g %16.8g %16.8g %16.8g\n", g, p, forc_rain(t,g), forc_snow(t,g), elai(g,p), esai(g,p), h2o_can(g,p), qflx_prec_intr[g]);
//}//)
}
});
typedef typename Kokkos::Experimental::MDRangePolicy< Kokkos::Experimental::Rank<2> > MDPolicyType_2D;
// Construct 2D MDRangePolicy: lower and upper bounds provided, tile dims defaulted
MDPolicyType_2D mdpolicy_2d( {{0,0}}, {{n_grid_cells,n_pfts}} );
// // Execute parallel_for with rank 2 MDRangePolicy
// Kokkos::parallel_for( "md2d", mdpolicy_2d, ELM::CanopyHydrology_Interception(dtime,
// forc_rain(t,g), forc_snow(t,g), forc_irrig(t,g),
// ltype, ctype, urbpoi, do_capsnow,
// elai(g,p), esai(g,p), dewmx, frac_veg_nosno,
// h2o_can(g,p), n_irrig_steps_left,
// qflx_prec_intr(g,p), qflx_irrig(g,p), qflx_prec_grnd(g,p),
// qflx_snwcp_liq(g,p), qflx_snwcp_ice(g,p),
// qflx_snow_grnd_patch(g,p), qflx_rain_grnd(g,p)); );
Kokkos::parallel_for("md2d",mdpolicy_2d,KOKKOS_LAMBDA (const size_t& g, const size_t& p) {
ELM::CanopyHydrology_Interception(dtime,
forc_rain(t,g), forc_snow(t,g), forc_irrig(t,g),
ltype, ctype, urbpoi, do_capsnow,
elai(g,p), esai(g,p), dewmx, frac_veg_nosno,
h2o_can(g,p), n_irrig_steps_left,
qflx_prec_intr(g,p), qflx_irrig(g,p), qflx_prec_grnd(g,p),
qflx_snwcp_liq(g,p), qflx_snwcp_ice(g,p),
qflx_snow_grnd_patch(g,p), qflx_rain_grnd(g,p)); });
// parallel_for (TeamPolicy<> (0,n_grid_cells), KOKKOS_LAMBDA (const size_t& g)
// {
// parallel_for (TeamThreadRange (g, n_pfts),
// [=] (size_t p) {
// ELM::CanopyHydrology_Interception(dtime,
// forc_rain(t,g), forc_snow(t,g), forc_irrig(t,g),
// ltype, ctype, urbpoi, do_capsnow,
// elai(g,p), esai(g,p), dewmx, frac_veg_nosno,
// h2o_can(g,p), n_irrig_steps_left,
// qflx_prec_intr(g,p), qflx_irrig(g,p), qflx_prec_grnd(g,p),
// qflx_snwcp_liq(g,p), qflx_snwcp_ice(g,p),
// qflx_snow_grnd_patch(g,p), qflx_rain_grnd(g,p));
// });
// });
auto min_max = std::minmax_element(&h_h2o_can(0,0), end+1);//h2o_can1.begin(), h2o_can1.end());
std::cout << std::setprecision(16)
......
......@@ -45,7 +45,9 @@ int main(int argc, char ** argv)
using ELM::Utils::n_grid_cells;
using ELM::Utils::n_max_times;
using ELM::Utils::n_levels_snow;
using Kokkos::TeamPolicy;
using Kokkos::TeamThreadRange;
// fixed magic parameters for now
const int ctype = 1;
const int ltype = 1;
......@@ -277,60 +279,41 @@ int main(int argc, char ** argv)
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
// grid cell and/or pft loop can be parallelized
//for (size_t g = 0; g != n_grid_cells; ++g) {
// PFT level operations
//for (size_t p = 0; p != n_pfts; ++p) {
Kokkos::parallel_for("n_grid_cells", n_grid_cells, KOKKOS_LAMBDA (const size_t& g) {
for (size_t p = 0; p != n_pfts; ++p) {
//
// Calculate interception
//
// NOTE: this currently punts on what to do with the qflx variables!
// Surely they should be either accumulated or stored on PFTs as well.
// --etc
ELM::CanopyHydrology_Interception(dtime,
forc_rain(t,g), forc_snow(t,g), forc_irrig(t,g),
ltype, ctype, urbpoi, do_capsnow,
elai(g,p), esai(g,p), dewmx, frac_veg_nosno,
h2ocan(g,p), n_irrig_steps_left,
qflx_prec_intr(g,p), qflx_irrig(g,p), qflx_prec_grnd(g,p),
qflx_snwcp_liq(g,p), qflx_snwcp_ice(g,p),
qflx_snow_grnd_patch(g,p), qflx_rain_grnd(g,p));
//printf("%i %i %16.8g %16.8g %16.8g %16.8g %16.8g %16.8g\n", g, p, forc_rain(t,g), forc_snow(t,g), elai(g,p), esai(g,p), h2ocan(g,p), qflx_prec_intr(g));
//
// Calculate fraction of LAI that is wet vs dry.
//
// FIXME: this currently punts on what to do with the fwet/fdry variables.
// Surely they should be something, as such this is dead code.
// By the PFT?
// --etc
double fwet = 0., fdry = 0.;
ELM::CanopyHydrology_FracWet(frac_veg_nosno, h2ocan(g,p), elai(g,p), esai(g,p), dewmx, fwet, fdry);
} // end PFT loop
// Column level operations
typedef typename Kokkos::Experimental::MDRangePolicy< Kokkos::Experimental::Rank<2> > MDPolicyType_2D;
typedef typename MDPolicyType_2D::member_type team_member;
// Construct 2D MDRangePolicy: lower and upper bounds provided, tile dims defaulted
MDPolicyType_2D mdpolicy_2d( {{0,0}}, {{n_grid_cells,n_pfts}} );
Kokkos::parallel_for("md2d",mdpolicy_2d,KOKKOS_LAMBDA (const team_member& thread, const size_t& g, const size_t& p) {
ELM::CanopyHydrology_Interception(dtime,
forc_rain(t,g), forc_snow(t,g), forc_irrig(t,g),
ltype, ctype, urbpoi, do_capsnow,
elai(g,p), esai(g,p), dewmx, frac_veg_nosno,
h2ocan(g,p), n_irrig_steps_left,
qflx_prec_intr(g,p), qflx_irrig(g,p), qflx_prec_grnd(g,p),
qflx_snwcp_liq(g,p), qflx_snwcp_ice(g,p),
qflx_snow_grnd_patch(g,p), qflx_rain_grnd(g,p));
double fwet = 0., fdry = 0.;
ELM::CanopyHydrology_FracWet(frac_veg_nosno, h2ocan(g,p), elai(g,p), esai(g,p), dewmx, fwet, fdry);
// Column level operations
// NOTE: this is effectively an accumulation kernel/task! --etc
double* qpatch = &qflx_snow_grnd_patch(n_grid_cells-1, n_pfts-1);
// NOTE: this is effectively an accumulation kernel/task! --etc
//qflx_snow_grnd_col(g) = std::accumulate(&qflx_snow_grnd_patch(0,0), qpatch+1, 0.);
// for (int x = 0; x <n_grid_cells; x++) {
double sum = 0 ;
for (size_t p = 0; p != n_pfts; ++p) {
sum += qflx_snow_grnd_patch(g,p);
}
Kokkos::parallel_reduce(TeamThreadRange (thread, 99),
[=] (size_t& g, double& lsum) {
lsum += qflx_snow_grnd_patch(g,p);
}, sum);
qflx_snow_grnd_col(g) = sum ;
// Calculate ?water balance? on the snow column, adding throughfall,
// removing melt, etc.
//
// local outputs
int newnode;
ELM::CanopyHydrology_SnowWater(dtime, qflx_floodg,
ELM::CanopyHydrology_SnowWater(dtime, qflx_floodg,
ltype, ctype, urbpoi, do_capsnow, oldfflag,
forc_air_temp(t,g), t_grnd(g),
qflx_snow_grnd_col(g), qflx_snow_melt, n_melt, frac_h2osfc(g),
......@@ -338,7 +321,7 @@ int main(int argc, char ** argv)
Kokkos::subview(h2osoi_liq, g , Kokkos::ALL), Kokkos::subview(h2osoi_ice, g , Kokkos::ALL), Kokkos::subview(t_soisno, g , Kokkos::ALL), Kokkos::subview(frac_iceold, g , Kokkos::ALL),
snow_level(g), Kokkos::subview(dz, g , Kokkos::ALL), Kokkos::subview(z, g , Kokkos::ALL), Kokkos::subview(zi, g , Kokkos::ALL), newnode,
qflx_floodc(g), qflx_snow_h2osfc(g), frac_sno_eff(g), frac_sno(g));
// Calculate Fraction of Water to the Surface?
//
// FIXME: Fortran black magic... h2osoi_liq is a vector, but the
......
/* ---------------------------------------------
Makefile constructed configuration:
Fri Jun 28 16:31:21 EDT 2019
Tue Jul 9 12:40:47 EDT 2019
----------------------------------------------*/
#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)
#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
......
/* ---------------------------------------------
Makefile constructed configuration:
Tue Jul 2 13:30:45 EDT 2019
Tue Jul 9 12:41:12 EDT 2019
----------------------------------------------*/
#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)
#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
......
......@@ -140,10 +140,34 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
// main loop
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
Kokkos::parallel_for("n_grid_cells", n_grid_cells, KOKKOS_LAMBDA (const size_t& g) {
for (size_t p = 0; p != n_pfts; ++p) {
ELM::CanopyHydrology_Interception(dtime,
//
// Define the kernel execution policy
//
using POL = RAJA::KernelPolicy<
RAJA::statement::For<1, RAJA::loop_exec,
RAJA::statement::InitLocalMem<RAJA::cpu_tile_mem, RAJA::ParamList<0, 1>,
RAJA::statement::For<0, RAJA::loop_exec,
RAJA::statement::Lambda<0>
>,
RAJA::statement::For<0, RAJA::loop_exec,
RAJA::statement::Lambda<1>
>
>
>
>;
//
// Define the kernel
//
for (size_t t = 0; t != n_times; ++t) {
RAJA::kernel_param<POL> (RAJA::RangeSegment(0,n_pfts),RAJA::make_tuple(RAJA::RangeSegment(0,n_grid_cells)),
[=] (size_t g, size_t p) {
ELM::CanopyHydrology_Interception(dtime,
forc_rain(t,g), forc_snow(t,g), forc_irrig(t,g),
ltype, ctype, urbpoi, do_capsnow,
elai(g,p), esai(g,p), dewmx, frac_veg_nosno,
......@@ -151,10 +175,11 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
qflx_prec_intr(g,p), qflx_irrig(g,p), qflx_prec_grnd(g,p),
qflx_snwcp_liq(g,p), qflx_snwcp_ice(g,p),
qflx_snow_grnd_patch(g,p), qflx_rain_grnd(g,p));
}
});
// Kokkos::parallel_for("n_grid_cells", n_grid_cells, KOKKOS_LAMBDA (const size_t& g) {
// for (size_t p = 0; p != n_pfts; ++p) {
);
auto min_max = std::minmax_element(&h_h2o_can(0,0), end+1);//h2o_can1.begin(), h2o_can1.end());
std::cout << std::setprecision(16)
......
......@@ -234,7 +234,18 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
<< "\t" << sum_snow << "\t" << *min_max_snow.first << "\t" << *min_max_snow.second
<< "\t" << avg_frac_sfc << "\t" << *min_max_frac_sfc.first << "\t" << *min_max_frac_sfc.second << std::endl;
using POL = RAJA::KernelPolicy<
RAJA::statement::For<1, RAJA::loop_exec,
RAJA::statement::InitLocalMem<RAJA::cpu_tile_mem, RAJA::ParamList<0, 1>,
RAJA::statement::For<0, RAJA::loop_exec,
RAJA::statement::Lambda<0>
>,
RAJA::statement::For<0, RAJA::loop_exec,
RAJA::statement::Lambda<1>
>
>
>
>;
// main loop
// -- the timestep loop cannot/should not be parallelized
for (size_t t = 0; t != n_times; ++t) {
......@@ -244,8 +255,9 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
// PFT level operations
//for (size_t p = 0; p != n_pfts; ++p) {
Kokkos::parallel_for("n_grid_cells", n_grid_cells, KOKKOS_LAMBDA (const size_t& g) {
for (size_t p = 0; p != n_pfts; ++p) {
RAJA::kernel_param<POL> (RAJA::RangeSegment(0,n_pfts),RAJA::make_tuple(RAJA::RangeSegment(0,n_grid_cells)),
[=] (size_t g, size_t p) {
//
// Calculate interception
//
......@@ -260,7 +272,7 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
qflx_prec_intr(g,p), qflx_irrig(g,p), qflx_prec_grnd(g,p),
qflx_snwcp_liq(g,p), qflx_snwcp_ice(g,p),
qflx_snow_grnd_patch(g,p), qflx_rain_grnd(g,p));
//printf("%i %i %16.8g %16.8g %16.8g %16.8g %16.8g %16.8g\n", g, p, forc_rain(t,g), forc_snow(t,g), elai(g,p), esai(g,p), h2ocan(g,p), qflx_prec_intr(g));
//printf("%i %i %16.8g %16.8g %16.8g %16.8g %16.8g %16.8g\n"(g, p, forc_rain(t,g), forc_snow(t,g), elai(g,p), esai(g,p), h2ocan(g,p), qflx_prec_intr(g));
//
// Calculate fraction of LAI that is wet vs dry.
......@@ -296,9 +308,9 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
ltype, ctype, urbpoi, do_capsnow, oldfflag,
forc_air_temp(t,g), t_grnd(g),
qflx_snow_grnd_col(g), qflx_snow_melt, n_melt, frac_h2osfc(g),
snow_depth(g), h2osno(g), integrated_snow(g), Kokkos::subview(swe_old, g , Kokkos::ALL),
Kokkos::subview(h2osoi_liq, g , Kokkos::ALL), Kokkos::subview(h2osoi_ice, g , Kokkos::ALL), Kokkos::subview(t_soisno, g , Kokkos::ALL), Kokkos::subview(frac_iceold, g , Kokkos::ALL),
snow_level(g), Kokkos::subview(dz, g , Kokkos::ALL), Kokkos::subview(z, g , Kokkos::ALL), Kokkos::subview(zi, g , Kokkos::ALL), newnode,
snow_depth(g), h2osno(g), integrated_snow(g), swe_old(g),
h2osoi_liq(g), h2osoi_ice(g), t_soisno(g), frac_iceold(g),
snow_level(g), dz(g), z(g), zi(g), newnode,
qflx_floodc(g), qflx_snow_h2osfc(g), frac_sno_eff(g), frac_sno(g));
// Calculate Fraction of Water to the Surface?
......@@ -310,7 +322,7 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
h2osno(g), h2osfc(g), h2osoi_liq(g,0), frac_sno(g), frac_sno_eff(g),
qflx_h2osfc2topsoi(g), frac_h2osfc(g));
}); // end grid cell loop
); // end grid cell loop
// auto min_max = std::minmax_element(h2ocan.begin(), h2ocan.end());
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment