Updating RayTrace version (de5e8229) · Commits · Berrill, Mark A / RayTrace-miniapp

build/configure

+5 −3

Original line number	Diff line number	Diff line
		# Sample configure file

		export KOKKOS_DIR=/packages/TPLs/install/opt/kokkos

		cmake \
		-D CMAKE_BUILD_TYPE=Release \
		-D CMAKE_C_COMPILER=gcc \
		@@ -7,9 +9,9 @@ cmake \
		-D CXX_STD=11 \
		-D USE_OPENACC=0 \
		-D USE_OPENMP=1 \
		-D USE_KOKKOS=0 \
		-D KOKKOS_DIRECTORY=/usr/local/kokkos \
		-D KOKKOS_WRAPPER=/usr/local/kokkos/config/nvcc_wrapper \
		-D USE_KOKKOS=1 \
		-D KOKKOS_DIRECTORY=${KOKKOS_DIR} \
		-D KOKKOS_WRAPPER=${KOKKOS_DIR}/nvcc_wrapper \
		-D USE_CUDA=1 \
		-D CUDA_FLAGS="-arch sm_30" \
		../src

src/interp.cpp→src/AtomicModel/interp.cpp

+13 −5

Original line number	Diff line number	Diff line
		@@ -2,7 +2,7 @@
		#include <stdexcept>
		#include <string.h>

		#include "interp.h"
		#include "AtomicModel/interp.h"


		// Subroutine to perform linear interpolation
		@@ -94,7 +94,8 @@ double interp::trilinear( double x, double y, double z, size_t Nx, size_t Ny, si


		// Subroutine to perform cubic hermite interpolation
		double interp::interp_pchip( size_t N, const double xi, const double yi, double x )
		template<class TYPE>
		static inline TYPE pchip( size_t N, const TYPE xi, const TYPE yi, TYPE x )
		{
		if ( x <= xi[0] \|\| N <= 2 ) {
		double dx = ( x - xi[0] ) / ( xi[1] - xi[0] );
		@@ -103,7 +104,7 @@ double interp::interp_pchip( size_t N, const double xi, const double yi, doubl
		double dx = ( x - xi[N - 2] ) / ( xi[N - 1] - xi[N - 2] );
		return ( 1.0 - dx ) * yi[N - 2] + dx * yi[N - 1];
		}
		size_t i = findfirstsingle( xi, N, x );
		size_t i = interp::findfirstsingle( xi, N, x );
		double f1 = yi[i - 1];
		double f2 = yi[i];
		double dx = ( x - xi[i - 1] ) / ( xi[i] - xi[i - 1] );
		@@ -139,10 +140,17 @@ double interp::interp_pchip( size_t N, const double xi, const double yi, doubl
		}
		// Perform the interpolation
		double dx2 = dx * dx;
		double f =
		f1 + dx2 * ( 2 * dx - 3 ) * ( f1 - f2 ) + dx * g1 - dx2 * ( g1 + ( 1 - dx ) * ( g1 + g2 ) );
		double f = f1 + dx2 * ( 2 * dx - 3 ) * ( f1 - f2 ) + dx * g1 - dx2 * ( g1 + ( 1 - dx ) * ( g1 + g2 ) );
		return f;
		}
		double interp::interp_pchip( size_t N, const double xi, const double yi, double x )
		{
		return pchip( N, xi, yi, x );
		}
		float interp::interp_pchip( size_t N, const float xi, const float yi, float x )
		{
		return pchip( N, xi, yi, x );
		}


		// This function calculates the FWHM by finding the narrowest region that contains 76% of the energy

src/interp.h→src/AtomicModel/interp.h

+96 −0

Original line number	Diff line number	Diff line
		#ifndef included_interp
		#define included_interp

		#include <cstdlib>
		#include <math.h>
		#include <stdio.h>
		#include <stdlib.h>
		@@ -145,6 +146,19 @@ double interp_linear( size_t N, const double xi, const double yi, double x );
		double interp_pchip( size_t N, const double xi, const double yi, double x );


		/*!
		* @brief Subroutine to perform cubic hermite
		* @details This function returns f(x) interpolated between x1 and x2.
		* It does so using a monotonic preserving piecewise cubic hermite polynomial.
		* Ouside of the domain it will perform linear interpolation.
		* @param N Number of points in xi
		* @param xi Sorted grid to use for interpolation
		* @param yi Function values
		* @param x Point to be interpolated
		*/
		float interp_pchip( size_t N, const float xi, const float yi, float x );


		/*!
		* @brief Subroutine to perform bi-linear interpolation (2D)
		* @details This function returns f(x,y) interpolated from a rectangular grid
		@@ -357,6 +371,88 @@ double bisection( std::function<double( double, Args... )>, double lb, double ub
		* @return The next guess to use
		*/
		double bisection_coeff( size_t N, const double x, const double r, double *range = NULL );


		#ifdef ENABLE_STD_FUNCTION

		/*!
		* @brief Function to perform numerical integration
		* @details This function numerically integrates the function on the interval [lb,ub]
		* using the midpoint. This requires N function evaluations.
		* @param fun The function to integrate of the form y = f(x)
		* @param range The range to integrate
		* @param N Number of sub-intervals to use
		*/
		template <class T1, class T2>
		HOST_DEVICE inline T1 integrate_midpoint(
		const std::function<T1( T2 )> &fun, const std::array<T2, 2> &range, int N );


		/*!
		* @brief Function to perform numerical integration
		* @details This function numerically integrates the function on the interval [lb,ub]
		* using Simpson's rule. This requires N+1 function evaluations.
		* @param fun The function to integrate of the form y = f(x)
		* @param range The range to integrate
		* @param N Number of sub-intervals to use
		*/
		template <class T1, class T2>
		HOST_DEVICE inline T1 integrate_simpson(
		const std::function<T1( T2 )> &fun, const std::array<T2, 2> &range, int N );


		/*!
		* @brief Function to perform numerical integration in 1D
		* @details This function numerically integrates the function on the interval [lb,ub]
		* using an adaptive Simpson's rule. This requires N+1 function evaluations.
		* @param[in] fun The function to integrate of the form y = f(x)
		* @param[in] range The range to integrate
		* @param[in] tol Number of sub-intervals to use
		* @param[in] N_eval Total number of evalutations
		* @param[in] norm Function to compute the norm to use for the error
		*/
		template <class T1, class T2>
		HOST_DEVICE inline T1 integrate( const std::function<T1( T2 )> &fun, const std::array<T2, 2> &range,
		const T2 tol = 1e-6, int *N_eval = NULL,
		const std::function<T2( T1 )> &norm = []( T1 x ) { return x<0 ? -x:x; } );


		/*!
		* @brief Function to perform numerical integration in 2D
		* @details This function numerically integrates the function on the interval
		* [x_min,x_max,y_min,y_max]
		* using an adaptive Simpson's rule. This requires N+1 function evaluations (3 iterations for
		* N=1).
		* @param[in] fun The function to integrate of the form y = f(x)
		* @param[in] range The range to integrate
		* @param[in] tol Number of sub-intervals to use
		* @param[in] N_eval Total number of evalutations
		* @param[in] norm Function to compute the norm to use for the error
		*/
		template <class T1, class T2>
		HOST_DEVICE inline T1 integrate( const std::function<T1( T2, T2 )> &fun,
		const std::array<T2, 4> &range, const T2 tol = 1e-6, int *N_eval = NULL,
		const std::function<T2( T1 )> &norm = []( T1 x ) { return x<0 ? -x:x; } );


		/*!
		* @brief Function to perform numerical integration in 3D
		* @details This function numerically integrates the function on the interval
		* [x_min,x_max,y_min,y_max,z_min,z_max]
		* using an adaptive Simpson's rule.
		* @param[in] fun The function to integrate of the form y = f(x)
		* @param[in] range The range to integrate
		* @param[in] tol Number of sub-intervals to use
		* @param[in] N_eval Total number of evalutations
		* @param[in] norm Function to compute the norm to use for the error
		*/
		template <class T1, class T2>
		HOST_DEVICE inline T1 integrate( const std::function<T1( T2, T2, T2 )> &fun,
		const std::array<T2, 6> &range, const T2 tol = 1e-6, int *N_eval = NULL,
		const std::function<T2( T1 )> &norm = []( T1 x ) { return x<0 ? -x:x; } );


		#endif
		}

		#include "interp.hpp"

src/interp.hpp→src/AtomicModel/interp.hpp

+119 −1

Original line number	Diff line number	Diff line
		#ifndef included_interp_hpp
		#define included_interp_hpp

		#include "interp.h"
		#include "AtomicModel/interp.h"
		#include <iostream>
		#include <limits>
		#include <stdexcept>
		@@ -561,4 +561,122 @@ HOST_DEVICE inline double interp::get_interp_ratio(
		}


		#ifdef ENABLE_STD_FUNCTION


		// Integrate the function using the midpoints
		template <class T1, class T2>
		HOST_DEVICE inline T1 interp::integrate_midpoint(
		const std::function<T1( T2 )> &f, const std::array<T2, 2> &range, int N )
		{
		T2 dx = ( range[1] - range[0] ) / N;
		T1 y = 0;
		for ( int i = 0; i < N; i++ )
		y += f( range[0] + ( i + 0.5 ) * dx );
		y *= dx;
		return y;
		}


		// Integrate the function using Simpson's rule
		template <class T1, class T2>
		HOST_DEVICE inline T1 interp::integrate_simpson(
		const std::function<T1( T2 )> &f, const std::array<T2, 2> &range, int N )
		{
		if ( N <= 2 )
		return ( range[1] - range[0] ) / 6 *
		( f( range[0] ) + 4 * f( ( range[0] + range[1] ) / 2 ) + f( range[1] ) );
		if ( N % 2 != 0 )
		throw std::logic_error( "Error: N must be even" );
		T2 dx = ( range[1] - range[0] ) / N;
		T1 y = f( range[0] ) + f( range[1] ) + 4 * f( range[0] + dx );
		for ( int i = 1; i < N / 2; i++ ) {
		y += 2 * f( range[0] + 2 * i * dx );
		y += 4 * f( range[0] + 2 * i * dx + dx );
		}
		y *= ( dx / 3 );
		return y;
		}


		// Integrate the function using adaptive Simpson's rule
		template <class T1, class T2>
		HOST_DEVICE inline T1 adaptiveSimpsonsAux( const std::function<T1( T2 )> &fun, T2 a, T2 b, T2 tol,
		T1 S, T1 fa, T1 fb, T1 fc, int bottom, int &N_eval, const std::function<T2( T1 )> &norm )
		{
		T2 c = ( a + b ) / 2, h = b - a;
		T2 d = ( a + c ) / 2, e = ( c + b ) / 2;
		T1 fd = fun( d ), fe = fun( e );
		T1 Sleft = ( h / 12 ) * ( fa + 4 * fd + fc );
		T1 Sright = ( h / 12 ) * ( fc + 4 * fe + fb );
		T1 S2 = Sleft + Sright;
		N_eval += 2;
		T2 err = norm( S2 - S );
		if ( bottom <= 0 \|\| err <= 15 * tol ) // magic 15 comes from error analysis
		return S2 + 0.066666666666667 * ( S2 - S );
		return adaptiveSimpsonsAux<T1, T2>(
		fun, a, c, tol / 2, Sleft, fa, fc, fd, bottom - 1, N_eval, norm ) +
		adaptiveSimpsonsAux<T1, T2>(
		fun, c, b, tol / 2, Sright, fc, fb, fe, bottom - 1, N_eval, norm );
		}
		template <class T1, class T2>
		HOST_DEVICE inline T1 interp::integrate( const std::function<T1( T2 )> &fun,
		const std::array<T2, 2> &range, T2 tol, int *N_eval_out, const std::function<T2( T1 )> &norm )
		{
		T2 h = range[1] - range[0];
		T1 fa = fun( range[0] );
		T1 fb = fun( range[1] );
		T1 fc = fun( range[0] + 0.5 * h );
		T1 S = ( h / 6 ) * ( fa + 4 * fc + fb );
		int N_eval = 2;
		S = adaptiveSimpsonsAux<T1, T2>(
		fun, range[0], range[1], tol, S, fa, fb, fc, 100, N_eval, norm );
		if ( N_eval_out != NULL )
		*N_eval_out = N_eval;
		return S;
		}
		template <class T1, class T2>
		HOST_DEVICE inline T1 interp::integrate( const std::function<T1( T2, T2 )> &fun,
		const std::array<T2, 4> &range, T2 tol, int *N_eval_out, const std::function<T2( T1 )> &norm )
		{
		std::array<T2, 2> range1 = { range[0], range[1] };
		std::array<T2, 2> range2 = { range[2], range[3] };
		int N_eval = 0;
		int *N_eval_ptr = &N_eval;
		auto fun2 = [fun, range1, tol, N_eval_ptr, norm]( T2 y ) {
		auto fun3 = [fun, y, N_eval_ptr]( T2 x ) {
		( *N_eval_ptr )++;
		return fun( x, y );
		};
		return integrate<T1, T2>( fun3, range1, tol, NULL, norm );
		};
		auto S = integrate<T1, T2>( fun2, range2, tol, NULL, norm );
		if ( N_eval_out != NULL )
		*N_eval_out = N_eval;
		return S;
		}
		template <class T1, class T2>
		HOST_DEVICE inline T1 interp::integrate( const std::function<T1( T2, T2, T2 )> &fun,
		const std::array<T2, 6> &range, T2 tol, int *N_eval_out, const std::function<T2( T1 )> &norm )
		{
		std::array<T2, 4> range1 = { range[0], range[1], range[2], range[3] };
		std::array<T2, 2> range2 = { range[4], range[5] };
		int N_eval = 0;
		int *N_eval_ptr = &N_eval;
		auto fun2 = [fun, range1, tol, N_eval_ptr, norm]( T2 z ) {
		auto fun3 = [fun, z, N_eval_ptr]( T2 x, T2 y ) {
		( *N_eval_ptr )++;
		return fun( x, y, z );
		};
		return integrate<T1, T2>( fun3, range1, tol, NULL, norm );
		};
		auto S = integrate<T1, T2>( fun2, range2, tol, NULL, norm );
		if ( N_eval_out != NULL )
		*N_eval_out = N_eval;
		return S;
		}


		#endif

		#endif

src/CMakeLists.txt

+1 −6

Original line number	Diff line number	Diff line
		@@ -36,11 +36,6 @@ INCLUDE( "${RAYTRACE_SOURCE_DIR}/macros.cmake" )
		SET_COMPILER_FLAGS()


		# Link external projects
		INCLUDE( Find_TIMER.cmake )
		CONFIGURE_TIMER( 0 "" )


		# Enable OpenMP
		IF ( USE_OPENMP )
		ADD_DEFINITIONS( -DUSE_OPENMP )
		@@ -161,7 +156,7 @@ ADD_DISTCLEAN( libRayTrace.* null_timer CreateImage* )
		# Create the library
		INCLUDE_DIRECTORIES( ${RAYTRACE_SOURCE_DIR} )
		ADD_DEFINITIONS( -DDISABLE_WRITE_FAILED_RAYS )
		SET( SOURCES RayTrace RayTraceImage.cpp RayTraceStructures.cpp utilities/RayUtilities.cpp interp.cpp RayTraceImageCPU.cpp )
		SET( SOURCES RayTrace RayTraceImage.cpp RayTraceStructures.cpp utilities/RayUtilities.cpp AtomicModel/interp.cpp RayTraceImageCPU.cpp )
		IF ( USE_OPENACC )
		SET( SOURCES ${SOURCES} RayTraceImageOpenACC.cpp )
		ENDIF()