Updating miniapp (bb9830cf) · Commits · Berrill, Mark A / RayTrace-miniapp

build/configure

+2 −2

Original line number	Diff line number	Diff line
		@@ -5,12 +5,12 @@ export KOKKOS_DIR=/packages/TPLs/install/opt/kokkos
		rm -rf CMake*

		cmake \
		-D CMAKE_BUILD_TYPE=Release \
		-D CMAKE_BUILD_TYPE=Debug \
		-D CMAKE_CXX_COMPILER=g++ \
		-D CXX_STD=11 \
		-D USE_OPENACC=0 \
		-D USE_OPENMP=1 \
		-D USE_KOKKOS=1 \
		-D USE_KOKKOS=0 \
		-D KOKKOS_DIRECTORY=${KOKKOS_DIR} \
		-D KOKKOS_WRAPPER=${KOKKOS_DIR}/nvcc_wrapper \
		-D USE_CUDA=1 \

src/AtomicModel/interp.cpp

+87 −56

Original line number	Diff line number	Diff line
		@@ -98,7 +98,7 @@ double interp::trilinear( double x, double y, double z, size_t Nx, size_t Ny, si

		// Subroutine to perform cubic hermite interpolation
		template<class TYPE>
		static inline TYPE pchip( size_t N, const TYPE xi, const TYPE yi, TYPE x )
		static inline TYPE pchip( size_t N, size_t i, const TYPE xi, const TYPE yi, TYPE x )
		{
		if ( x <= xi[0] \|\| N <= 2 ) {
		double dx = ( x - xi[0] ) / ( xi[1] - xi[0] );
		@@ -107,7 +107,6 @@ static inline TYPE pchip( size_t N, const TYPE xi, const TYPE yi, TYPE x )
		double dx = ( x - xi[N - 2] ) / ( xi[N - 1] - xi[N - 2] );
		return ( 1.0 - dx ) * yi[N - 2] + dx * yi[N - 1];
		}
		size_t i = interp::findfirstsingle( xi, N, x );
		double f1 = yi[i - 1];
		double f2 = yi[i];
		double dx = ( x - xi[i - 1] ) / ( xi[i] - xi[i - 1] );
		@@ -149,11 +148,13 @@ static inline TYPE pchip( size_t N, const TYPE xi, const TYPE yi, TYPE x )
		}
		double interp::interp_pchip( size_t N, const double xi, const double yi, double x )
		{
		return pchip( N, xi, yi, x );
		size_t i = interp::findfirstsingle( xi, N, x );
		return pchip( N, i, xi, yi, x );
		}
		float interp::interp_pchip( size_t N, const float xi, const float yi, float x )
		{
		return pchip( N, xi, yi, x );
		size_t i = interp::findfirstsingle( xi, N, x );
		return pchip( N, i, xi, yi, x );
		}


		@@ -207,66 +208,96 @@ double interp::calc_width( size_t n, const double x, const double y )

		// Calculate coefficients for the bisection method
		double interp::bisection_coeff(
		size_t N, const double x_in, const double r_in, double *range_out )
		size_t N, const double x_in, const double r_in, std::array<double, 2> &range )
		{
		if ( N < 2 )
		throw std::logic_error( "Error: N<2" );
		// Copy x and r
		auto *x = new double[N];
		auto *r = new double[N];
		memcpy( x, x_in, N * sizeof( double ) );
		memcpy( r, r_in, N * sizeof( double ) );
		// Change the sign or r such that min(x) will be at the beginning
		auto tmp = std::make_pair( x[0], r[0] );
		if ( N == 2 ) {
		range[0] = x_in[0];
		range[1] = x_in[1];
		if ( range[0] > range[1] )
		std::swap( range[0], range[1] );
		return 0.5 * ( x_in[0] + x_in[1] );
		}
		// Get the sign of the function such that f(-inf) is negitive
		double sign = 1;
		double x_min = 1e200;
		for ( size_t i = 0; i < N; i++ ) {
		if ( x[i] < tmp.first )
		tmp = std::make_pair( x[i], r[i] );
		}
		if ( tmp.second > 0 ) {
		for ( size_t i = 0; i < N; i++ )
		r[i] = -r[i];
		}
		// Sort x and r
		quicksort( N, r, x );
		// Check that we have two signs for r
		if ( r[0] > 0.0 \|\| r[N - 1] < 0.0 ) {
		delete[] x;
		delete[] r;
		throw std::logic_error( "r does not have two different signs" );
		}
		// Find r >= 0
		size_t index = findfirstsingle( r, N, 0.0 );
		double range[2] = { x[index - 1], x[index] };
		for ( size_t i = 0; i < index; i++ )
		range[0] = std::max( range[0], x[i] );
		for ( size_t i = index; i < N; i++ )
		range[1] = std::min( range[1], x[i] );
		double y = 0;
		if ( N < 5 ) {
		// Use simple bisection for the first couple of iterations
		y = 0.5 * ( range[0] + range[1] );
		} else if ( index == 1 \|\| index == N - 1 ) {
		// We are at the boundary which reduces the gradient that we can use
		// Use an uneven bisection to try an ensure we have 2 points on each side
		if ( index == 1 ) {
		y = 0.8 * x[0] + 0.2 * x[1];
		} else {
		y = 0.2 * x[N - 2] + 0.8 * x[N - 1];
		if ( x_in[i] < x_min ) {
		x_min = x_in[i];
		sign = r_in[i] < 0 ? 1 : -1;
		}
		}
		// Copy the closest two points on each side
		double x[4] = { -1e200, -1e200, 1e200, 1e200 };
		double f[4] = { -1e200, -1e200, 1e200, 1e200 };
		for ( size_t i = 0; i < N; i++ ) {
		double x0 = x_in[i];
		double f0 = r_in[i] * sign;
		if ( f0 < 0.0 ) {
		if ( x0 > x[0] ) {
		x[0] = x0;
		f[0] = f0;
		}
		if ( x[0] > x[1] ) {
		std::swap( x[0], x[1] );
		std::swap( f[0], f[1] );
		}
		} else {
		// Use cubic interpolation
		y = interp_pchip( N, r, x, 0.0 );
		// Check that y is within the range
		y = std::max( std::min( y, range[1] ), range[0] );
		// Adjust y to move it toward the center
		y = 0.9 * y + 0.1 * ( 0.5 * ( range[0] + range[1] ) );
		if ( x0 < x[3] ) {
		x[3] = x0;
		f[3] = f0;
		}
		if ( x[2] > x[3] ) {
		std::swap( x[2], x[3] );
		std::swap( f[2], f[3] );
		}
		}
		}
		if ( x[1] > x[2] ) {
		// We have multiple zeros, try to keep one
		if ( x[0] < x[2] ) {
		range[0] = x[0];
		range[1] = x[2];
		return 0.5 * ( x[0] + x[2] );
		} else if ( x[1] < x[3] ) {
		range[0] = x[1];
		range[1] = x[3];
		return 0.5 * ( x[1] + x[3] );
		} else {
		throw std::logic_error( "unable to find valid range" );
		}
		}
		// Finished
		delete[] x;
		delete[] r;
		if ( range_out != nullptr ) {
		range_out[0] = range[0];
		range_out[1] = range[1];
		range[0] = x[1];
		range[1] = x[2];
		if ( N > 30 ) {
		// We are having difficulties, switch back to simple bisection
		return 0.5 * ( x[1] + x[2] );
		}
		if ( fabs( f[0] ) > 1e100 ) {
		// Use an uneven bisection to try an ensure we have 2 points on each side
		double r = exp( -( N - 1. ) );
		return ( 1.0 - r ) * x[1] + r * x[2];
		} else if ( fabs( f[3] ) > 1e100 ) {
		// Use an uneven bisection to try an ensure we have 2 points on each side
		double r = exp( -( N - 1. ) );
		return ( 1.0 - r ) * x[2] + r * x[1];
		}
		// Use the 4 closest points to extimate the zero-crossing
		if ( f[0] > f[1] ) {
		std::swap( x[0], x[1] );
		std::swap( f[0], f[1] );
		} else if ( f[2] > f[3] ) {
		std::swap( x[2], x[3] );
		std::swap( f[2], f[3] );
		}
		double y = pchip( 4, 2, f, x, 0.0 );
		// Check that y is within the range
		y = std::min( y, x[2] );
		y = std::max( y, x[1] );
		// Adjust y to move it toward the center
		y = 0.95 * y + 0.05 * ( 0.5 * ( x[1] + x[2] ) );
		if ( y != y )
		y = 0.5 * ( x[1] + x[2] );
		return y;
		}

src/AtomicModel/interp.h

+18 −13

Original line number	Diff line number	Diff line
		#ifndef included_interp
		#define included_interp

		#include <array>
		#include <cstdlib>
		#include <functional>
		#include <math.h>
		#include <stdio.h>
		#include <stdlib.h>
		#include <vector>

		#include "CompilerFeatures.h"
		#ifdef ENABLE_STD_FUNCTION
		#include <functional>
		#endif


		#if defined( __CUDA_ARCH__ )
		#include <cuda.h>
		@@ -193,6 +190,19 @@ double trilinear( double x, double y, double z, size_t N, size_t M, size_t K, co
		const double ygrid, const double zgrid, const double *fgrid );


		/*!
		* @brief Subroutine to perform cubic interpolation (1D)
		* @details This function returns f(x) interpolated from a uniform grid in [0,1]
		* with points stored at even intervals starting at the endpoints.
		* The interpolation is cubic.
		* @param yi The known values on a uniform grid in [0,1] starting with the endpoints
		* @param x The point to interpolate
		* @return Returns f(x)
		*/
		template<std::size_t N>
		inline double cubic( const std::array<double, N> &yi, double x );


		/*!
		* @brief Subroutine to check if a vector is in ascending order
		* @details This function checks if the values in an array are in ascending order
		@@ -358,7 +368,6 @@ inline void unique( size_t nx, const double X, size_t ny, double Y, size_t I
		double calc_width( size_t n, const double x, const double y );


		#ifdef ENABLE_STD_FUNCTION
		/*!
		* @brief Function to solve for the zero of f(x)
		* @details This function solves for the solution of f(x)=0 using a modified bisection method.
		@@ -371,9 +380,8 @@ double calc_width( size_t n, const double x, const double y );
		* @param options Any additional options that are needed by the function
		*/
		template<class... Args>
		double bisection( std::function<double( double, Args... )>, double lb, double ub, double tol1,
		double bisection( std::function<double( double, Args... )> fun, double lb, double ub, double tol1,
		double tol2, Args... options );
		#endif


		/*!
		@@ -387,14 +395,12 @@ double bisection( std::function<double( double, Args... )>, double lb, double ub
		* @param[in] N The number of previous guesses
		* @param[in] x The previous guesses
		* @param[in] r The function or residual evaluations at x
		* @param[out] range Optional output with the current bounds that contain the solution [lb ub]
		* @param[out] range The current bounded range
		* @return The next guess to use
		*/
		double bisection_coeff( size_t N, const double x, const double r, double *range = NULL );
		double bisection_coeff( size_t N, const double x, const double r, std::array<double, 2> &range );


		#ifdef ENABLE_STD_FUNCTION

		/*!
		* @brief Function to perform numerical integration
		* @details This function numerically integrates the function on the interval [lb,ub]
		@@ -472,7 +478,6 @@ HOST_DEVICE inline T1 integrate( const std::function<T1( T2, T2, T2 )> &fun,
		const std::function<T2( T1 )> &norm = []( T1 x ) { return x < 0 ? -x : x; } );


		#endif
		} // namespace interp

		#include "interp.hpp"

src/AtomicModel/interp.hpp

+31 −14

Original line number	Diff line number	Diff line
		@@ -44,6 +44,28 @@ inline double interp::trilinear( double dx, double dy, double dz, double f1, dou
		}


		// Subroutine to perform cubic interpolation
		template<std::size_t N>
		inline double interp::cubic( const std::array<double, N> &yi, double x )
		{
		// Get the point and index in the array
		double xi = static_cast<double>( N - 1 ) * x;
		int k = static_cast<int>( xi );
		k = std::max( std::min<int>( k, N - 3 ), 1 );
		// Construct the polynomial
		const double *p = &yi[k - 1];
		const double a = p[1];
		const double b = -0.333333333333333 * p[0] - 0.5 * p[1] + p[2] - 0.166666666666667 * p[3];
		const double c = 0.5 * ( p[0] + p[2] ) - p[1];
		const double d = 0.166666666666667 * ( p[3] - p[0] ) + 0.5 * ( p[1] - p[2] );
		// Interpolate
		const double x1 = xi - k;
		const double x2 = x1 * x1;
		const double x3 = x2 * x1;
		return a + b * x1 + c * x2 + d * x3;
		}


		// Subroutine to perform N-D-linear interpolation
		inline double interp::n_linear( size_t N, double dx, double fn )
		{
		@@ -437,15 +459,14 @@ inline void interp::unique(


		// Modified bisection root finding
		#ifdef ENABLE_STD_FUNCTION
		template<class... Args>
		double interp::bisection( std::function<double( double, Args... )> fun, double lb, double ub,
		double tol1, double tol2, Args... options )
		{
		if ( ub <= lb )
		throw std::logic_error( "Error: lb <= ub" );
		double range[2] = { lb, ub };
		double x[500] = { 0 }, f[500] = { 0 };
		std::array<double, 2> range = { lb, ub };
		double x[101] = { 0 }, f[101] = { 0 };
		x[0] = lb;
		x[1] = ub;
		f[0] = fun( x[0], options... );
		@@ -455,20 +476,21 @@ double interp::bisection( std::function<double( double, Args... )> fun, double l
		if ( ( f[0] < 0 && f[1] < 0 ) \|\| ( f[0] > 0 && f[1] > 0 ) )
		throw std::logic_error( "Error: sign(f(lb)) == sign(f(ub))" );
		size_t i = 2;
		while ( ( range[1] - range[0] ) > tol2 ) {
		while ( true ) {
		// Get the next guess
		x[i] = bisection_coeff( i, x, f, range );
		if ( ( range[1] - range[0] ) < tol2 )
		break;
		// Evaluate f
		f[i] = fun( x[i], options... );
		i++;
		if ( fabs( f[i - 1] ) < tol1 )
		if ( fabs( f[i] ) < tol1 )
		break;
		if ( i > 500 )
		if ( i >= 100 )
		throw std::logic_error( "Excessive number of iterations" );
		i++;
		}
		return x[i - 1];
		return x[i];
		}
		#endif


		// Fast approximate x^y
		@@ -572,9 +594,6 @@ HOST_DEVICE inline double interp::get_interp_ratio(
		}


		#ifdef ENABLE_STD_FUNCTION


		// Integrate the function using the midpoints
		template<class T1, class T2>
		HOST_DEVICE inline T1 interp::integrate_midpoint(
		@@ -688,6 +707,4 @@ HOST_DEVICE inline T1 interp::integrate( const std::function<T1( T2, T2, T2 )> &
		}


		#endif

		#endif

src/CMakeLists.txt

+3 −1

Original line number	Diff line number	Diff line
		CMAKE_MINIMUM_REQUIRED(VERSION 3.4)
		CMAKE_MINIMUM_REQUIRED(VERSION 3.6)
		SET( DISABLE_LTO TRUE )


		MESSAGE( "==============================" )
		MESSAGE( "Configuring Ray Trace MiniApps" )