Commit bb9830cf authored by Berrill, Mark's avatar Berrill, Mark
Browse files

Updating miniapp

parent 87d98088
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -5,12 +5,12 @@ export KOKKOS_DIR=/packages/TPLs/install/opt/kokkos
rm -rf CMake*

cmake                                   \
   -D CMAKE_BUILD_TYPE=Release          \
   -D CMAKE_BUILD_TYPE=Debug          \
   -D CMAKE_CXX_COMPILER=g++            \
   -D CXX_STD=11                        \
   -D USE_OPENACC=0                     \
   -D USE_OPENMP=1                      \
   -D USE_KOKKOS=1                      \
   -D USE_KOKKOS=0                      \
      -D KOKKOS_DIRECTORY=${KOKKOS_DIR} \
      -D KOKKOS_WRAPPER=${KOKKOS_DIR}/nvcc_wrapper \
   -D USE_CUDA=1                        \
+87 −56
Original line number Diff line number Diff line
@@ -98,7 +98,7 @@ double interp::trilinear( double x, double y, double z, size_t Nx, size_t Ny, si

// Subroutine to perform cubic hermite interpolation
template<class TYPE>
static inline TYPE pchip( size_t N, const TYPE *xi, const TYPE *yi, TYPE x )
static inline TYPE pchip( size_t N, size_t i, const TYPE *xi, const TYPE *yi, TYPE x )
{
    if ( x <= xi[0] || N <= 2 ) {
        double dx = ( x - xi[0] ) / ( xi[1] - xi[0] );
@@ -107,7 +107,6 @@ static inline TYPE pchip( size_t N, const TYPE *xi, const TYPE *yi, TYPE x )
        double dx = ( x - xi[N - 2] ) / ( xi[N - 1] - xi[N - 2] );
        return ( 1.0 - dx ) * yi[N - 2] + dx * yi[N - 1];
    }
    size_t i  = interp::findfirstsingle( xi, N, x );
    double f1 = yi[i - 1];
    double f2 = yi[i];
    double dx = ( x - xi[i - 1] ) / ( xi[i] - xi[i - 1] );
@@ -149,11 +148,13 @@ static inline TYPE pchip( size_t N, const TYPE *xi, const TYPE *yi, TYPE x )
}
double interp::interp_pchip( size_t N, const double *xi, const double *yi, double x )
{
    return pchip( N, xi, yi, x );
    size_t i = interp::findfirstsingle( xi, N, x );
    return pchip( N, i, xi, yi, x );
}
float interp::interp_pchip( size_t N, const float *xi, const float *yi, float x )
{
    return pchip( N, xi, yi, x );
    size_t i = interp::findfirstsingle( xi, N, x );
    return pchip( N, i, xi, yi, x );
}


@@ -207,66 +208,96 @@ double interp::calc_width( size_t n, const double *x, const double *y )

// Calculate coefficients for the bisection method
double interp::bisection_coeff(
    size_t N, const double *x_in, const double *r_in, double *range_out )
    size_t N, const double *x_in, const double *r_in, std::array<double, 2> &range )
{
    if ( N < 2 )
        throw std::logic_error( "Error: N<2" );
    // Copy x and r
    auto *x = new double[N];
    auto *r = new double[N];
    memcpy( x, x_in, N * sizeof( double ) );
    memcpy( r, r_in, N * sizeof( double ) );
    // Change the sign or r such that min(x) will be at the beginning
    auto tmp = std::make_pair( x[0], r[0] );
    if ( N == 2 ) {
        range[0] = x_in[0];
        range[1] = x_in[1];
        if ( range[0] > range[1] )
            std::swap( range[0], range[1] );
        return 0.5 * ( x_in[0] + x_in[1] );
    }
    // Get the sign of the function such that f(-inf) is negitive
    double sign  = 1;
    double x_min = 1e200;
    for ( size_t i = 0; i < N; i++ ) {
        if ( x[i] < tmp.first )
            tmp = std::make_pair( x[i], r[i] );
    }
    if ( tmp.second > 0 ) {
        for ( size_t i = 0; i < N; i++ )
            r[i] = -r[i];
    }
    // Sort x and r
    quicksort( N, r, x );
    // Check that we have two signs for r
    if ( r[0] > 0.0 || r[N - 1] < 0.0 ) {
        delete[] x;
        delete[] r;
        throw std::logic_error( "r does not have two different signs" );
    }
    // Find r >= 0
    size_t index    = findfirstsingle( r, N, 0.0 );
    double range[2] = { x[index - 1], x[index] };
    for ( size_t i = 0; i < index; i++ )
        range[0] = std::max( range[0], x[i] );
    for ( size_t i = index; i < N; i++ )
        range[1] = std::min( range[1], x[i] );
    double y = 0;
    if ( N < 5 ) {
        // Use simple bisection for the first couple of iterations
        y = 0.5 * ( range[0] + range[1] );
    } else if ( index == 1 || index == N - 1 ) {
        // We are at the boundary which reduces the gradient that we can use
        // Use an uneven bisection to try an ensure we have 2 points on each side
        if ( index == 1 ) {
            y = 0.8 * x[0] + 0.2 * x[1];
        } else {
            y = 0.2 * x[N - 2] + 0.8 * x[N - 1];
        if ( x_in[i] < x_min ) {
            x_min = x_in[i];
            sign  = r_in[i] < 0 ? 1 : -1;
        }
    }
    // Copy the closest two points on each side
    double x[4] = { -1e200, -1e200, 1e200, 1e200 };
    double f[4] = { -1e200, -1e200, 1e200, 1e200 };
    for ( size_t i = 0; i < N; i++ ) {
        double x0 = x_in[i];
        double f0 = r_in[i] * sign;
        if ( f0 < 0.0 ) {
            if ( x0 > x[0] ) {
                x[0] = x0;
                f[0] = f0;
            }
            if ( x[0] > x[1] ) {
                std::swap( x[0], x[1] );
                std::swap( f[0], f[1] );
            }
        } else {
        // Use cubic interpolation
        y = interp_pchip( N, r, x, 0.0 );
        // Check that y is within the range
        y = std::max( std::min( y, range[1] ), range[0] );
        // Adjust y to move it toward the center
        y = 0.9 * y + 0.1 * ( 0.5 * ( range[0] + range[1] ) );
            if ( x0 < x[3] ) {
                x[3] = x0;
                f[3] = f0;
            }
            if ( x[2] > x[3] ) {
                std::swap( x[2], x[3] );
                std::swap( f[2], f[3] );
            }
        }
    }
    if ( x[1] > x[2] ) {
        // We have multiple zeros, try to keep one
        if ( x[0] < x[2] ) {
            range[0] = x[0];
            range[1] = x[2];
            return 0.5 * ( x[0] + x[2] );
        } else if ( x[1] < x[3] ) {
            range[0] = x[1];
            range[1] = x[3];
            return 0.5 * ( x[1] + x[3] );
        } else {
            throw std::logic_error( "unable to find valid range" );
        }
    }
    // Finished
    delete[] x;
    delete[] r;
    if ( range_out != nullptr ) {
        range_out[0] = range[0];
        range_out[1] = range[1];
    range[0] = x[1];
    range[1] = x[2];
    if ( N > 30 ) {
        // We are having difficulties, switch back to simple bisection
        return 0.5 * ( x[1] + x[2] );
    }
    if ( fabs( f[0] ) > 1e100 ) {
        // Use an uneven bisection to try an ensure we have 2 points on each side
        double r = exp( -( N - 1. ) );
        return ( 1.0 - r ) * x[1] + r * x[2];
    } else if ( fabs( f[3] ) > 1e100 ) {
        // Use an uneven bisection to try an ensure we have 2 points on each side
        double r = exp( -( N - 1. ) );
        return ( 1.0 - r ) * x[2] + r * x[1];
    }
    // Use the 4 closest points to extimate the zero-crossing
    if ( f[0] > f[1] ) {
        std::swap( x[0], x[1] );
        std::swap( f[0], f[1] );
    } else if ( f[2] > f[3] ) {
        std::swap( x[2], x[3] );
        std::swap( f[2], f[3] );
    }
    double y = pchip( 4, 2, f, x, 0.0 );
    // Check that y is within the range
    y = std::min( y, x[2] );
    y = std::max( y, x[1] );
    // Adjust y to move it toward the center
    y = 0.95 * y + 0.05 * ( 0.5 * ( x[1] + x[2] ) );
    if ( y != y )
        y = 0.5 * ( x[1] + x[2] );
    return y;
}
+18 −13
Original line number Diff line number Diff line
#ifndef included_interp
#define included_interp

#include <array>
#include <cstdlib>
#include <functional>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <vector>

#include "CompilerFeatures.h"
#ifdef ENABLE_STD_FUNCTION
#include <functional>
#endif


#if defined( __CUDA_ARCH__ )
#include <cuda.h>
@@ -193,6 +190,19 @@ double trilinear( double x, double y, double z, size_t N, size_t M, size_t K, co
    const double *ygrid, const double *zgrid, const double *fgrid );


/*!
 * @brief    Subroutine to perform cubic interpolation (1D)
 * @details  This function returns f(x) interpolated from a uniform grid in [0,1]
 *           with points stored at even intervals starting at the endpoints.
 *           The interpolation is cubic.
 * @param yi        The known values on a uniform grid in [0,1] starting with the endpoints
 * @param x         The point to interpolate
 * @return          Returns f(x)
 */
template<std::size_t N>
inline double cubic( const std::array<double, N> &yi, double x );


/*!
 * @brief    Subroutine to check if a vector is in ascending order
 * @details  This function checks if the values in an array are in ascending order
@@ -358,7 +368,6 @@ inline void unique( size_t nx, const double *X, size_t *ny, double *Y, size_t *I
double calc_width( size_t n, const double *x, const double *y );


#ifdef ENABLE_STD_FUNCTION
/*!
 * @brief    Function to solve for the zero of f(x)
 * @details  This function solves for the solution of f(x)=0 using a modified bisection method.
@@ -371,9 +380,8 @@ double calc_width( size_t n, const double *x, const double *y );
 * @param options   Any additional options that are needed by the function
 */
template<class... Args>
double bisection( std::function<double( double, Args... )>, double lb, double ub, double tol1,
double bisection( std::function<double( double, Args... )> fun, double lb, double ub, double tol1,
    double tol2, Args... options );
#endif


/*!
@@ -387,14 +395,12 @@ double bisection( std::function<double( double, Args... )>, double lb, double ub
 * @param[in] N         The number of previous guesses
 * @param[in] x         The previous guesses
 * @param[in] r         The function or residual evaluations at x
 * @param[out] range    Optional output with the current bounds that contain the solution [lb ub]
 * @param[out] range    The current bounded range
 * @return              The next guess to use
 */
double bisection_coeff( size_t N, const double *x, const double *r, double *range = NULL );
double bisection_coeff( size_t N, const double *x, const double *r, std::array<double, 2> &range );


#ifdef ENABLE_STD_FUNCTION

/*!
 * @brief    Function to perform numerical integration
 * @details  This function numerically integrates the function on the interval [lb,ub]
@@ -472,7 +478,6 @@ HOST_DEVICE inline T1 integrate( const std::function<T1( T2, T2, T2 )> &fun,
    const std::function<T2( T1 )> &norm = []( T1 x ) { return x < 0 ? -x : x; } );


#endif
} // namespace interp

#include "interp.hpp"
+31 −14
Original line number Diff line number Diff line
@@ -44,6 +44,28 @@ inline double interp::trilinear( double dx, double dy, double dz, double f1, dou
}


// Subroutine to perform cubic interpolation
template<std::size_t N>
inline double interp::cubic( const std::array<double, N> &yi, double x )
{
    // Get the point and index in the array
    double xi = static_cast<double>( N - 1 ) * x;
    int k     = static_cast<int>( xi );
    k         = std::max( std::min<int>( k, N - 3 ), 1 );
    // Construct the polynomial
    const double *p = &yi[k - 1];
    const double a  = p[1];
    const double b  = -0.333333333333333 * p[0] - 0.5 * p[1] + p[2] - 0.166666666666667 * p[3];
    const double c  = 0.5 * ( p[0] + p[2] ) - p[1];
    const double d  = 0.166666666666667 * ( p[3] - p[0] ) + 0.5 * ( p[1] - p[2] );
    // Interpolate
    const double x1 = xi - k;
    const double x2 = x1 * x1;
    const double x3 = x2 * x1;
    return a + b * x1 + c * x2 + d * x3;
}


// Subroutine to perform N-D-linear interpolation
inline double interp::n_linear( size_t N, double *dx, double *fn )
{
@@ -437,15 +459,14 @@ inline void interp::unique(


// Modified bisection root finding
#ifdef ENABLE_STD_FUNCTION
template<class... Args>
double interp::bisection( std::function<double( double, Args... )> fun, double lb, double ub,
    double tol1, double tol2, Args... options )
{
    if ( ub <= lb )
        throw std::logic_error( "Error: lb <= ub" );
    double range[2] = { lb, ub };
    double x[500] = { 0 }, f[500] = { 0 };
    std::array<double, 2> range = { lb, ub };
    double x[101] = { 0 }, f[101] = { 0 };
    x[0] = lb;
    x[1] = ub;
    f[0] = fun( x[0], options... );
@@ -455,20 +476,21 @@ double interp::bisection( std::function<double( double, Args... )> fun, double l
    if ( ( f[0] < 0 && f[1] < 0 ) || ( f[0] > 0 && f[1] > 0 ) )
        throw std::logic_error( "Error: sign(f(lb)) == sign(f(ub))" );
    size_t i = 2;
    while ( ( range[1] - range[0] ) > tol2 ) {
    while ( true ) {
        // Get the next guess
        x[i] = bisection_coeff( i, x, f, range );
        if ( ( range[1] - range[0] ) < tol2 )
            break;
        // Evaluate f
        f[i] = fun( x[i], options... );
        i++;
        if ( fabs( f[i - 1] ) < tol1 )
        if ( fabs( f[i] ) < tol1 )
            break;
        if ( i > 500 )
        if ( i >= 100 )
            throw std::logic_error( "Excessive number of iterations" );
        i++;
    }
    return x[i - 1];
    return x[i];
}
#endif


// Fast approximate x^y
@@ -572,9 +594,6 @@ HOST_DEVICE inline double interp::get_interp_ratio(
}


#ifdef ENABLE_STD_FUNCTION


// Integrate the function using the midpoints
template<class T1, class T2>
HOST_DEVICE inline T1 interp::integrate_midpoint(
@@ -688,6 +707,4 @@ HOST_DEVICE inline T1 interp::integrate( const std::function<T1( T2, T2, T2 )> &
}


#endif

#endif
+3 −1
Original line number Diff line number Diff line
CMAKE_MINIMUM_REQUIRED(VERSION 3.4)
CMAKE_MINIMUM_REQUIRED(VERSION 3.6)
SET( DISABLE_LTO TRUE )


MESSAGE( "==============================" )
MESSAGE( "Configuring Ray Trace MiniApps" )
Loading