Loading build/configure +2 −2 Original line number Diff line number Diff line Loading @@ -5,12 +5,12 @@ export KOKKOS_DIR=/packages/TPLs/install/opt/kokkos rm -rf CMake* cmake \ -D CMAKE_BUILD_TYPE=Release \ -D CMAKE_BUILD_TYPE=Debug \ -D CMAKE_CXX_COMPILER=g++ \ -D CXX_STD=11 \ -D USE_OPENACC=0 \ -D USE_OPENMP=1 \ -D USE_KOKKOS=1 \ -D USE_KOKKOS=0 \ -D KOKKOS_DIRECTORY=${KOKKOS_DIR} \ -D KOKKOS_WRAPPER=${KOKKOS_DIR}/nvcc_wrapper \ -D USE_CUDA=1 \ Loading src/AtomicModel/interp.cpp +87 −56 Original line number Diff line number Diff line Loading @@ -98,7 +98,7 @@ double interp::trilinear( double x, double y, double z, size_t Nx, size_t Ny, si // Subroutine to perform cubic hermite interpolation template<class TYPE> static inline TYPE pchip( size_t N, const TYPE *xi, const TYPE *yi, TYPE x ) static inline TYPE pchip( size_t N, size_t i, const TYPE *xi, const TYPE *yi, TYPE x ) { if ( x <= xi[0] || N <= 2 ) { double dx = ( x - xi[0] ) / ( xi[1] - xi[0] ); Loading @@ -107,7 +107,6 @@ static inline TYPE pchip( size_t N, const TYPE *xi, const TYPE *yi, TYPE x ) double dx = ( x - xi[N - 2] ) / ( xi[N - 1] - xi[N - 2] ); return ( 1.0 - dx ) * yi[N - 2] + dx * yi[N - 1]; } size_t i = interp::findfirstsingle( xi, N, x ); double f1 = yi[i - 1]; double f2 = yi[i]; double dx = ( x - xi[i - 1] ) / ( xi[i] - xi[i - 1] ); Loading Loading @@ -149,11 +148,13 @@ static inline TYPE pchip( size_t N, const TYPE *xi, const TYPE *yi, TYPE x ) } double interp::interp_pchip( size_t N, const double *xi, const double *yi, double x ) { return pchip( N, xi, yi, x ); size_t i = interp::findfirstsingle( xi, N, x ); return pchip( N, i, xi, yi, x ); } float interp::interp_pchip( size_t N, const float *xi, const float *yi, float x ) { return pchip( N, xi, yi, x ); size_t i = interp::findfirstsingle( xi, N, x ); return pchip( N, i, xi, yi, x ); } Loading Loading @@ -207,66 +208,96 @@ double interp::calc_width( size_t n, const double *x, const double *y ) // Calculate coefficients for the bisection method double interp::bisection_coeff( size_t N, const double *x_in, const double *r_in, double *range_out ) size_t N, const double *x_in, const double *r_in, std::array<double, 2> &range ) { if ( N < 2 ) throw std::logic_error( "Error: N<2" ); // Copy x and r auto *x = new double[N]; auto *r = new double[N]; memcpy( x, x_in, N * sizeof( double ) ); memcpy( r, r_in, N * sizeof( double ) ); // Change the sign or r such that min(x) will be at the beginning auto tmp = std::make_pair( x[0], r[0] ); if ( N == 2 ) { range[0] = x_in[0]; range[1] = x_in[1]; if ( range[0] > range[1] ) std::swap( range[0], range[1] ); return 0.5 * ( x_in[0] + x_in[1] ); } // Get the sign of the function such that f(-inf) is negitive double sign = 1; double x_min = 1e200; for ( size_t i = 0; i < N; i++ ) { if ( x[i] < tmp.first ) tmp = std::make_pair( x[i], r[i] ); } if ( tmp.second > 0 ) { for ( size_t i = 0; i < N; i++ ) r[i] = -r[i]; } // Sort x and r quicksort( N, r, x ); // Check that we have two signs for r if ( r[0] > 0.0 || r[N - 1] < 0.0 ) { delete[] x; delete[] r; throw std::logic_error( "r does not have two different signs" ); } // Find r >= 0 size_t index = findfirstsingle( r, N, 0.0 ); double range[2] = { x[index - 1], x[index] }; for ( size_t i = 0; i < index; i++ ) range[0] = std::max( range[0], x[i] ); for ( size_t i = index; i < N; i++ ) range[1] = std::min( range[1], x[i] ); double y = 0; if ( N < 5 ) { // Use simple bisection for the first couple of iterations y = 0.5 * ( range[0] + range[1] ); } else if ( index == 1 || index == N - 1 ) { // We are at the boundary which reduces the gradient that we can use // Use an uneven bisection to try an ensure we have 2 points on each side if ( index == 1 ) { y = 0.8 * x[0] + 0.2 * x[1]; } else { y = 0.2 * x[N - 2] + 0.8 * x[N - 1]; if ( x_in[i] < x_min ) { x_min = x_in[i]; sign = r_in[i] < 0 ? 1 : -1; } } // Copy the closest two points on each side double x[4] = { -1e200, -1e200, 1e200, 1e200 }; double f[4] = { -1e200, -1e200, 1e200, 1e200 }; for ( size_t i = 0; i < N; i++ ) { double x0 = x_in[i]; double f0 = r_in[i] * sign; if ( f0 < 0.0 ) { if ( x0 > x[0] ) { x[0] = x0; f[0] = f0; } if ( x[0] > x[1] ) { std::swap( x[0], x[1] ); std::swap( f[0], f[1] ); } } else { // Use cubic interpolation y = interp_pchip( N, r, x, 0.0 ); // Check that y is within the range y = std::max( std::min( y, range[1] ), range[0] ); // Adjust y to move it toward the center y = 0.9 * y + 0.1 * ( 0.5 * ( range[0] + range[1] ) ); if ( x0 < x[3] ) { x[3] = x0; f[3] = f0; } if ( x[2] > x[3] ) { std::swap( x[2], x[3] ); std::swap( f[2], f[3] ); } } } if ( x[1] > x[2] ) { // We have multiple zeros, try to keep one if ( x[0] < x[2] ) { range[0] = x[0]; range[1] = x[2]; return 0.5 * ( x[0] + x[2] ); } else if ( x[1] < x[3] ) { range[0] = x[1]; range[1] = x[3]; return 0.5 * ( x[1] + x[3] ); } else { throw std::logic_error( "unable to find valid range" ); } } // Finished delete[] x; delete[] r; if ( range_out != nullptr ) { range_out[0] = range[0]; range_out[1] = range[1]; range[0] = x[1]; range[1] = x[2]; if ( N > 30 ) { // We are having difficulties, switch back to simple bisection return 0.5 * ( x[1] + x[2] ); } if ( fabs( f[0] ) > 1e100 ) { // Use an uneven bisection to try an ensure we have 2 points on each side double r = exp( -( N - 1. ) ); return ( 1.0 - r ) * x[1] + r * x[2]; } else if ( fabs( f[3] ) > 1e100 ) { // Use an uneven bisection to try an ensure we have 2 points on each side double r = exp( -( N - 1. ) ); return ( 1.0 - r ) * x[2] + r * x[1]; } // Use the 4 closest points to extimate the zero-crossing if ( f[0] > f[1] ) { std::swap( x[0], x[1] ); std::swap( f[0], f[1] ); } else if ( f[2] > f[3] ) { std::swap( x[2], x[3] ); std::swap( f[2], f[3] ); } double y = pchip( 4, 2, f, x, 0.0 ); // Check that y is within the range y = std::min( y, x[2] ); y = std::max( y, x[1] ); // Adjust y to move it toward the center y = 0.95 * y + 0.05 * ( 0.5 * ( x[1] + x[2] ) ); if ( y != y ) y = 0.5 * ( x[1] + x[2] ); return y; } src/AtomicModel/interp.h +18 −13 Original line number Diff line number Diff line #ifndef included_interp #define included_interp #include <array> #include <cstdlib> #include <functional> #include <math.h> #include <stdio.h> #include <stdlib.h> #include <vector> #include "CompilerFeatures.h" #ifdef ENABLE_STD_FUNCTION #include <functional> #endif #if defined( __CUDA_ARCH__ ) #include <cuda.h> Loading Loading @@ -193,6 +190,19 @@ double trilinear( double x, double y, double z, size_t N, size_t M, size_t K, co const double *ygrid, const double *zgrid, const double *fgrid ); /*! * @brief Subroutine to perform cubic interpolation (1D) * @details This function returns f(x) interpolated from a uniform grid in [0,1] * with points stored at even intervals starting at the endpoints. * The interpolation is cubic. * @param yi The known values on a uniform grid in [0,1] starting with the endpoints * @param x The point to interpolate * @return Returns f(x) */ template<std::size_t N> inline double cubic( const std::array<double, N> &yi, double x ); /*! * @brief Subroutine to check if a vector is in ascending order * @details This function checks if the values in an array are in ascending order Loading Loading @@ -358,7 +368,6 @@ inline void unique( size_t nx, const double *X, size_t *ny, double *Y, size_t *I double calc_width( size_t n, const double *x, const double *y ); #ifdef ENABLE_STD_FUNCTION /*! * @brief Function to solve for the zero of f(x) * @details This function solves for the solution of f(x)=0 using a modified bisection method. Loading @@ -371,9 +380,8 @@ double calc_width( size_t n, const double *x, const double *y ); * @param options Any additional options that are needed by the function */ template<class... Args> double bisection( std::function<double( double, Args... )>, double lb, double ub, double tol1, double bisection( std::function<double( double, Args... )> fun, double lb, double ub, double tol1, double tol2, Args... options ); #endif /*! Loading @@ -387,14 +395,12 @@ double bisection( std::function<double( double, Args... )>, double lb, double ub * @param[in] N The number of previous guesses * @param[in] x The previous guesses * @param[in] r The function or residual evaluations at x * @param[out] range Optional output with the current bounds that contain the solution [lb ub] * @param[out] range The current bounded range * @return The next guess to use */ double bisection_coeff( size_t N, const double *x, const double *r, double *range = NULL ); double bisection_coeff( size_t N, const double *x, const double *r, std::array<double, 2> &range ); #ifdef ENABLE_STD_FUNCTION /*! * @brief Function to perform numerical integration * @details This function numerically integrates the function on the interval [lb,ub] Loading Loading @@ -472,7 +478,6 @@ HOST_DEVICE inline T1 integrate( const std::function<T1( T2, T2, T2 )> &fun, const std::function<T2( T1 )> &norm = []( T1 x ) { return x < 0 ? -x : x; } ); #endif } // namespace interp #include "interp.hpp" Loading src/AtomicModel/interp.hpp +31 −14 Original line number Diff line number Diff line Loading @@ -44,6 +44,28 @@ inline double interp::trilinear( double dx, double dy, double dz, double f1, dou } // Subroutine to perform cubic interpolation template<std::size_t N> inline double interp::cubic( const std::array<double, N> &yi, double x ) { // Get the point and index in the array double xi = static_cast<double>( N - 1 ) * x; int k = static_cast<int>( xi ); k = std::max( std::min<int>( k, N - 3 ), 1 ); // Construct the polynomial const double *p = &yi[k - 1]; const double a = p[1]; const double b = -0.333333333333333 * p[0] - 0.5 * p[1] + p[2] - 0.166666666666667 * p[3]; const double c = 0.5 * ( p[0] + p[2] ) - p[1]; const double d = 0.166666666666667 * ( p[3] - p[0] ) + 0.5 * ( p[1] - p[2] ); // Interpolate const double x1 = xi - k; const double x2 = x1 * x1; const double x3 = x2 * x1; return a + b * x1 + c * x2 + d * x3; } // Subroutine to perform N-D-linear interpolation inline double interp::n_linear( size_t N, double *dx, double *fn ) { Loading Loading @@ -437,15 +459,14 @@ inline void interp::unique( // Modified bisection root finding #ifdef ENABLE_STD_FUNCTION template<class... Args> double interp::bisection( std::function<double( double, Args... )> fun, double lb, double ub, double tol1, double tol2, Args... options ) { if ( ub <= lb ) throw std::logic_error( "Error: lb <= ub" ); double range[2] = { lb, ub }; double x[500] = { 0 }, f[500] = { 0 }; std::array<double, 2> range = { lb, ub }; double x[101] = { 0 }, f[101] = { 0 }; x[0] = lb; x[1] = ub; f[0] = fun( x[0], options... ); Loading @@ -455,20 +476,21 @@ double interp::bisection( std::function<double( double, Args... )> fun, double l if ( ( f[0] < 0 && f[1] < 0 ) || ( f[0] > 0 && f[1] > 0 ) ) throw std::logic_error( "Error: sign(f(lb)) == sign(f(ub))" ); size_t i = 2; while ( ( range[1] - range[0] ) > tol2 ) { while ( true ) { // Get the next guess x[i] = bisection_coeff( i, x, f, range ); if ( ( range[1] - range[0] ) < tol2 ) break; // Evaluate f f[i] = fun( x[i], options... ); i++; if ( fabs( f[i - 1] ) < tol1 ) if ( fabs( f[i] ) < tol1 ) break; if ( i > 500 ) if ( i >= 100 ) throw std::logic_error( "Excessive number of iterations" ); i++; } return x[i - 1]; return x[i]; } #endif // Fast approximate x^y Loading Loading @@ -572,9 +594,6 @@ HOST_DEVICE inline double interp::get_interp_ratio( } #ifdef ENABLE_STD_FUNCTION // Integrate the function using the midpoints template<class T1, class T2> HOST_DEVICE inline T1 interp::integrate_midpoint( Loading Loading @@ -688,6 +707,4 @@ HOST_DEVICE inline T1 interp::integrate( const std::function<T1( T2, T2, T2 )> & } #endif #endif src/CMakeLists.txt +3 −1 Original line number Diff line number Diff line CMAKE_MINIMUM_REQUIRED(VERSION 3.4) CMAKE_MINIMUM_REQUIRED(VERSION 3.6) SET( DISABLE_LTO TRUE ) MESSAGE( "==============================" ) MESSAGE( "Configuring Ray Trace MiniApps" ) Loading Loading
build/configure +2 −2 Original line number Diff line number Diff line Loading @@ -5,12 +5,12 @@ export KOKKOS_DIR=/packages/TPLs/install/opt/kokkos rm -rf CMake* cmake \ -D CMAKE_BUILD_TYPE=Release \ -D CMAKE_BUILD_TYPE=Debug \ -D CMAKE_CXX_COMPILER=g++ \ -D CXX_STD=11 \ -D USE_OPENACC=0 \ -D USE_OPENMP=1 \ -D USE_KOKKOS=1 \ -D USE_KOKKOS=0 \ -D KOKKOS_DIRECTORY=${KOKKOS_DIR} \ -D KOKKOS_WRAPPER=${KOKKOS_DIR}/nvcc_wrapper \ -D USE_CUDA=1 \ Loading
src/AtomicModel/interp.cpp +87 −56 Original line number Diff line number Diff line Loading @@ -98,7 +98,7 @@ double interp::trilinear( double x, double y, double z, size_t Nx, size_t Ny, si // Subroutine to perform cubic hermite interpolation template<class TYPE> static inline TYPE pchip( size_t N, const TYPE *xi, const TYPE *yi, TYPE x ) static inline TYPE pchip( size_t N, size_t i, const TYPE *xi, const TYPE *yi, TYPE x ) { if ( x <= xi[0] || N <= 2 ) { double dx = ( x - xi[0] ) / ( xi[1] - xi[0] ); Loading @@ -107,7 +107,6 @@ static inline TYPE pchip( size_t N, const TYPE *xi, const TYPE *yi, TYPE x ) double dx = ( x - xi[N - 2] ) / ( xi[N - 1] - xi[N - 2] ); return ( 1.0 - dx ) * yi[N - 2] + dx * yi[N - 1]; } size_t i = interp::findfirstsingle( xi, N, x ); double f1 = yi[i - 1]; double f2 = yi[i]; double dx = ( x - xi[i - 1] ) / ( xi[i] - xi[i - 1] ); Loading Loading @@ -149,11 +148,13 @@ static inline TYPE pchip( size_t N, const TYPE *xi, const TYPE *yi, TYPE x ) } double interp::interp_pchip( size_t N, const double *xi, const double *yi, double x ) { return pchip( N, xi, yi, x ); size_t i = interp::findfirstsingle( xi, N, x ); return pchip( N, i, xi, yi, x ); } float interp::interp_pchip( size_t N, const float *xi, const float *yi, float x ) { return pchip( N, xi, yi, x ); size_t i = interp::findfirstsingle( xi, N, x ); return pchip( N, i, xi, yi, x ); } Loading Loading @@ -207,66 +208,96 @@ double interp::calc_width( size_t n, const double *x, const double *y ) // Calculate coefficients for the bisection method double interp::bisection_coeff( size_t N, const double *x_in, const double *r_in, double *range_out ) size_t N, const double *x_in, const double *r_in, std::array<double, 2> &range ) { if ( N < 2 ) throw std::logic_error( "Error: N<2" ); // Copy x and r auto *x = new double[N]; auto *r = new double[N]; memcpy( x, x_in, N * sizeof( double ) ); memcpy( r, r_in, N * sizeof( double ) ); // Change the sign or r such that min(x) will be at the beginning auto tmp = std::make_pair( x[0], r[0] ); if ( N == 2 ) { range[0] = x_in[0]; range[1] = x_in[1]; if ( range[0] > range[1] ) std::swap( range[0], range[1] ); return 0.5 * ( x_in[0] + x_in[1] ); } // Get the sign of the function such that f(-inf) is negitive double sign = 1; double x_min = 1e200; for ( size_t i = 0; i < N; i++ ) { if ( x[i] < tmp.first ) tmp = std::make_pair( x[i], r[i] ); } if ( tmp.second > 0 ) { for ( size_t i = 0; i < N; i++ ) r[i] = -r[i]; } // Sort x and r quicksort( N, r, x ); // Check that we have two signs for r if ( r[0] > 0.0 || r[N - 1] < 0.0 ) { delete[] x; delete[] r; throw std::logic_error( "r does not have two different signs" ); } // Find r >= 0 size_t index = findfirstsingle( r, N, 0.0 ); double range[2] = { x[index - 1], x[index] }; for ( size_t i = 0; i < index; i++ ) range[0] = std::max( range[0], x[i] ); for ( size_t i = index; i < N; i++ ) range[1] = std::min( range[1], x[i] ); double y = 0; if ( N < 5 ) { // Use simple bisection for the first couple of iterations y = 0.5 * ( range[0] + range[1] ); } else if ( index == 1 || index == N - 1 ) { // We are at the boundary which reduces the gradient that we can use // Use an uneven bisection to try an ensure we have 2 points on each side if ( index == 1 ) { y = 0.8 * x[0] + 0.2 * x[1]; } else { y = 0.2 * x[N - 2] + 0.8 * x[N - 1]; if ( x_in[i] < x_min ) { x_min = x_in[i]; sign = r_in[i] < 0 ? 1 : -1; } } // Copy the closest two points on each side double x[4] = { -1e200, -1e200, 1e200, 1e200 }; double f[4] = { -1e200, -1e200, 1e200, 1e200 }; for ( size_t i = 0; i < N; i++ ) { double x0 = x_in[i]; double f0 = r_in[i] * sign; if ( f0 < 0.0 ) { if ( x0 > x[0] ) { x[0] = x0; f[0] = f0; } if ( x[0] > x[1] ) { std::swap( x[0], x[1] ); std::swap( f[0], f[1] ); } } else { // Use cubic interpolation y = interp_pchip( N, r, x, 0.0 ); // Check that y is within the range y = std::max( std::min( y, range[1] ), range[0] ); // Adjust y to move it toward the center y = 0.9 * y + 0.1 * ( 0.5 * ( range[0] + range[1] ) ); if ( x0 < x[3] ) { x[3] = x0; f[3] = f0; } if ( x[2] > x[3] ) { std::swap( x[2], x[3] ); std::swap( f[2], f[3] ); } } } if ( x[1] > x[2] ) { // We have multiple zeros, try to keep one if ( x[0] < x[2] ) { range[0] = x[0]; range[1] = x[2]; return 0.5 * ( x[0] + x[2] ); } else if ( x[1] < x[3] ) { range[0] = x[1]; range[1] = x[3]; return 0.5 * ( x[1] + x[3] ); } else { throw std::logic_error( "unable to find valid range" ); } } // Finished delete[] x; delete[] r; if ( range_out != nullptr ) { range_out[0] = range[0]; range_out[1] = range[1]; range[0] = x[1]; range[1] = x[2]; if ( N > 30 ) { // We are having difficulties, switch back to simple bisection return 0.5 * ( x[1] + x[2] ); } if ( fabs( f[0] ) > 1e100 ) { // Use an uneven bisection to try an ensure we have 2 points on each side double r = exp( -( N - 1. ) ); return ( 1.0 - r ) * x[1] + r * x[2]; } else if ( fabs( f[3] ) > 1e100 ) { // Use an uneven bisection to try an ensure we have 2 points on each side double r = exp( -( N - 1. ) ); return ( 1.0 - r ) * x[2] + r * x[1]; } // Use the 4 closest points to extimate the zero-crossing if ( f[0] > f[1] ) { std::swap( x[0], x[1] ); std::swap( f[0], f[1] ); } else if ( f[2] > f[3] ) { std::swap( x[2], x[3] ); std::swap( f[2], f[3] ); } double y = pchip( 4, 2, f, x, 0.0 ); // Check that y is within the range y = std::min( y, x[2] ); y = std::max( y, x[1] ); // Adjust y to move it toward the center y = 0.95 * y + 0.05 * ( 0.5 * ( x[1] + x[2] ) ); if ( y != y ) y = 0.5 * ( x[1] + x[2] ); return y; }
src/AtomicModel/interp.h +18 −13 Original line number Diff line number Diff line #ifndef included_interp #define included_interp #include <array> #include <cstdlib> #include <functional> #include <math.h> #include <stdio.h> #include <stdlib.h> #include <vector> #include "CompilerFeatures.h" #ifdef ENABLE_STD_FUNCTION #include <functional> #endif #if defined( __CUDA_ARCH__ ) #include <cuda.h> Loading Loading @@ -193,6 +190,19 @@ double trilinear( double x, double y, double z, size_t N, size_t M, size_t K, co const double *ygrid, const double *zgrid, const double *fgrid ); /*! * @brief Subroutine to perform cubic interpolation (1D) * @details This function returns f(x) interpolated from a uniform grid in [0,1] * with points stored at even intervals starting at the endpoints. * The interpolation is cubic. * @param yi The known values on a uniform grid in [0,1] starting with the endpoints * @param x The point to interpolate * @return Returns f(x) */ template<std::size_t N> inline double cubic( const std::array<double, N> &yi, double x ); /*! * @brief Subroutine to check if a vector is in ascending order * @details This function checks if the values in an array are in ascending order Loading Loading @@ -358,7 +368,6 @@ inline void unique( size_t nx, const double *X, size_t *ny, double *Y, size_t *I double calc_width( size_t n, const double *x, const double *y ); #ifdef ENABLE_STD_FUNCTION /*! * @brief Function to solve for the zero of f(x) * @details This function solves for the solution of f(x)=0 using a modified bisection method. Loading @@ -371,9 +380,8 @@ double calc_width( size_t n, const double *x, const double *y ); * @param options Any additional options that are needed by the function */ template<class... Args> double bisection( std::function<double( double, Args... )>, double lb, double ub, double tol1, double bisection( std::function<double( double, Args... )> fun, double lb, double ub, double tol1, double tol2, Args... options ); #endif /*! Loading @@ -387,14 +395,12 @@ double bisection( std::function<double( double, Args... )>, double lb, double ub * @param[in] N The number of previous guesses * @param[in] x The previous guesses * @param[in] r The function or residual evaluations at x * @param[out] range Optional output with the current bounds that contain the solution [lb ub] * @param[out] range The current bounded range * @return The next guess to use */ double bisection_coeff( size_t N, const double *x, const double *r, double *range = NULL ); double bisection_coeff( size_t N, const double *x, const double *r, std::array<double, 2> &range ); #ifdef ENABLE_STD_FUNCTION /*! * @brief Function to perform numerical integration * @details This function numerically integrates the function on the interval [lb,ub] Loading Loading @@ -472,7 +478,6 @@ HOST_DEVICE inline T1 integrate( const std::function<T1( T2, T2, T2 )> &fun, const std::function<T2( T1 )> &norm = []( T1 x ) { return x < 0 ? -x : x; } ); #endif } // namespace interp #include "interp.hpp" Loading
src/AtomicModel/interp.hpp +31 −14 Original line number Diff line number Diff line Loading @@ -44,6 +44,28 @@ inline double interp::trilinear( double dx, double dy, double dz, double f1, dou } // Subroutine to perform cubic interpolation template<std::size_t N> inline double interp::cubic( const std::array<double, N> &yi, double x ) { // Get the point and index in the array double xi = static_cast<double>( N - 1 ) * x; int k = static_cast<int>( xi ); k = std::max( std::min<int>( k, N - 3 ), 1 ); // Construct the polynomial const double *p = &yi[k - 1]; const double a = p[1]; const double b = -0.333333333333333 * p[0] - 0.5 * p[1] + p[2] - 0.166666666666667 * p[3]; const double c = 0.5 * ( p[0] + p[2] ) - p[1]; const double d = 0.166666666666667 * ( p[3] - p[0] ) + 0.5 * ( p[1] - p[2] ); // Interpolate const double x1 = xi - k; const double x2 = x1 * x1; const double x3 = x2 * x1; return a + b * x1 + c * x2 + d * x3; } // Subroutine to perform N-D-linear interpolation inline double interp::n_linear( size_t N, double *dx, double *fn ) { Loading Loading @@ -437,15 +459,14 @@ inline void interp::unique( // Modified bisection root finding #ifdef ENABLE_STD_FUNCTION template<class... Args> double interp::bisection( std::function<double( double, Args... )> fun, double lb, double ub, double tol1, double tol2, Args... options ) { if ( ub <= lb ) throw std::logic_error( "Error: lb <= ub" ); double range[2] = { lb, ub }; double x[500] = { 0 }, f[500] = { 0 }; std::array<double, 2> range = { lb, ub }; double x[101] = { 0 }, f[101] = { 0 }; x[0] = lb; x[1] = ub; f[0] = fun( x[0], options... ); Loading @@ -455,20 +476,21 @@ double interp::bisection( std::function<double( double, Args... )> fun, double l if ( ( f[0] < 0 && f[1] < 0 ) || ( f[0] > 0 && f[1] > 0 ) ) throw std::logic_error( "Error: sign(f(lb)) == sign(f(ub))" ); size_t i = 2; while ( ( range[1] - range[0] ) > tol2 ) { while ( true ) { // Get the next guess x[i] = bisection_coeff( i, x, f, range ); if ( ( range[1] - range[0] ) < tol2 ) break; // Evaluate f f[i] = fun( x[i], options... ); i++; if ( fabs( f[i - 1] ) < tol1 ) if ( fabs( f[i] ) < tol1 ) break; if ( i > 500 ) if ( i >= 100 ) throw std::logic_error( "Excessive number of iterations" ); i++; } return x[i - 1]; return x[i]; } #endif // Fast approximate x^y Loading Loading @@ -572,9 +594,6 @@ HOST_DEVICE inline double interp::get_interp_ratio( } #ifdef ENABLE_STD_FUNCTION // Integrate the function using the midpoints template<class T1, class T2> HOST_DEVICE inline T1 interp::integrate_midpoint( Loading Loading @@ -688,6 +707,4 @@ HOST_DEVICE inline T1 interp::integrate( const std::function<T1( T2, T2, T2 )> & } #endif #endif
src/CMakeLists.txt +3 −1 Original line number Diff line number Diff line CMAKE_MINIMUM_REQUIRED(VERSION 3.4) CMAKE_MINIMUM_REQUIRED(VERSION 3.6) SET( DISABLE_LTO TRUE ) MESSAGE( "==============================" ) MESSAGE( "Configuring Ray Trace MiniApps" ) Loading