Loading build/configure +5 −3 Original line number Diff line number Diff line # Sample configure file export KOKKOS_DIR=/packages/TPLs/install/opt/kokkos cmake \ -D CMAKE_BUILD_TYPE=Release \ -D CMAKE_C_COMPILER=gcc \ Loading @@ -7,9 +9,9 @@ cmake \ -D CXX_STD=11 \ -D USE_OPENACC=0 \ -D USE_OPENMP=1 \ -D USE_KOKKOS=0 \ -D KOKKOS_DIRECTORY=/usr/local/kokkos \ -D KOKKOS_WRAPPER=/usr/local/kokkos/config/nvcc_wrapper \ -D USE_KOKKOS=1 \ -D KOKKOS_DIRECTORY=${KOKKOS_DIR} \ -D KOKKOS_WRAPPER=${KOKKOS_DIR}/nvcc_wrapper \ -D USE_CUDA=1 \ -D CUDA_FLAGS="-arch sm_30" \ ../src Loading src/interp.cpp→src/AtomicModel/interp.cpp +13 −5 Original line number Diff line number Diff line Loading @@ -2,7 +2,7 @@ #include <stdexcept> #include <string.h> #include "interp.h" #include "AtomicModel/interp.h" // Subroutine to perform linear interpolation Loading Loading @@ -94,7 +94,8 @@ double interp::trilinear( double x, double y, double z, size_t Nx, size_t Ny, si // Subroutine to perform cubic hermite interpolation double interp::interp_pchip( size_t N, const double *xi, const double *yi, double x ) template<class TYPE> static inline TYPE pchip( size_t N, const TYPE *xi, const TYPE *yi, TYPE x ) { if ( x <= xi[0] || N <= 2 ) { double dx = ( x - xi[0] ) / ( xi[1] - xi[0] ); Loading @@ -103,7 +104,7 @@ double interp::interp_pchip( size_t N, const double *xi, const double *yi, doubl double dx = ( x - xi[N - 2] ) / ( xi[N - 1] - xi[N - 2] ); return ( 1.0 - dx ) * yi[N - 2] + dx * yi[N - 1]; } size_t i = findfirstsingle( xi, N, x ); size_t i = interp::findfirstsingle( xi, N, x ); double f1 = yi[i - 1]; double f2 = yi[i]; double dx = ( x - xi[i - 1] ) / ( xi[i] - xi[i - 1] ); Loading Loading @@ -139,10 +140,17 @@ double interp::interp_pchip( size_t N, const double *xi, const double *yi, doubl } // Perform the interpolation double dx2 = dx * dx; double f = f1 + dx2 * ( 2 * dx - 3 ) * ( f1 - f2 ) + dx * g1 - dx2 * ( g1 + ( 1 - dx ) * ( g1 + g2 ) ); double f = f1 + dx2 * ( 2 * dx - 3 ) * ( f1 - f2 ) + dx * g1 - dx2 * ( g1 + ( 1 - dx ) * ( g1 + g2 ) ); return f; } double interp::interp_pchip( size_t N, const double *xi, const double *yi, double x ) { return pchip( N, xi, yi, x ); } float interp::interp_pchip( size_t N, const float *xi, const float *yi, float x ) { return pchip( N, xi, yi, x ); } // This function calculates the FWHM by finding the narrowest region that contains 76% of the energy Loading src/interp.h→src/AtomicModel/interp.h +96 −0 Original line number Diff line number Diff line #ifndef included_interp #define included_interp #include <cstdlib> #include <math.h> #include <stdio.h> #include <stdlib.h> Loading Loading @@ -145,6 +146,19 @@ double interp_linear( size_t N, const double *xi, const double *yi, double x ); double interp_pchip( size_t N, const double *xi, const double *yi, double x ); /*! * @brief Subroutine to perform cubic hermite * @details This function returns f(x) interpolated between x1 and x2. * It does so using a monotonic preserving piecewise cubic hermite polynomial. * Ouside of the domain it will perform linear interpolation. * @param N Number of points in xi * @param xi Sorted grid to use for interpolation * @param yi Function values * @param x Point to be interpolated */ float interp_pchip( size_t N, const float *xi, const float *yi, float x ); /*! * @brief Subroutine to perform bi-linear interpolation (2D) * @details This function returns f(x,y) interpolated from a rectangular grid Loading Loading @@ -357,6 +371,88 @@ double bisection( std::function<double( double, Args... )>, double lb, double ub * @return The next guess to use */ double bisection_coeff( size_t N, const double *x, const double *r, double *range = NULL ); #ifdef ENABLE_STD_FUNCTION /*! * @brief Function to perform numerical integration * @details This function numerically integrates the function on the interval [lb,ub] * using the midpoint. This requires N function evaluations. * @param fun The function to integrate of the form y = f(x) * @param range The range to integrate * @param N Number of sub-intervals to use */ template <class T1, class T2> HOST_DEVICE inline T1 integrate_midpoint( const std::function<T1( T2 )> &fun, const std::array<T2, 2> &range, int N ); /*! * @brief Function to perform numerical integration * @details This function numerically integrates the function on the interval [lb,ub] * using Simpson's rule. This requires N+1 function evaluations. * @param fun The function to integrate of the form y = f(x) * @param range The range to integrate * @param N Number of sub-intervals to use */ template <class T1, class T2> HOST_DEVICE inline T1 integrate_simpson( const std::function<T1( T2 )> &fun, const std::array<T2, 2> &range, int N ); /*! * @brief Function to perform numerical integration in 1D * @details This function numerically integrates the function on the interval [lb,ub] * using an adaptive Simpson's rule. This requires N+1 function evaluations. * @param[in] fun The function to integrate of the form y = f(x) * @param[in] range The range to integrate * @param[in] tol Number of sub-intervals to use * @param[in] N_eval Total number of evalutations * @param[in] norm Function to compute the norm to use for the error */ template <class T1, class T2> HOST_DEVICE inline T1 integrate( const std::function<T1( T2 )> &fun, const std::array<T2, 2> &range, const T2 tol = 1e-6, int *N_eval = NULL, const std::function<T2( T1 )> &norm = []( T1 x ) { return x<0 ? -x:x; } ); /*! * @brief Function to perform numerical integration in 2D * @details This function numerically integrates the function on the interval * [x_min,x_max,y_min,y_max] * using an adaptive Simpson's rule. This requires N+1 function evaluations (3 iterations for * N=1). * @param[in] fun The function to integrate of the form y = f(x) * @param[in] range The range to integrate * @param[in] tol Number of sub-intervals to use * @param[in] N_eval Total number of evalutations * @param[in] norm Function to compute the norm to use for the error */ template <class T1, class T2> HOST_DEVICE inline T1 integrate( const std::function<T1( T2, T2 )> &fun, const std::array<T2, 4> &range, const T2 tol = 1e-6, int *N_eval = NULL, const std::function<T2( T1 )> &norm = []( T1 x ) { return x<0 ? -x:x; } ); /*! * @brief Function to perform numerical integration in 3D * @details This function numerically integrates the function on the interval * [x_min,x_max,y_min,y_max,z_min,z_max] * using an adaptive Simpson's rule. * @param[in] fun The function to integrate of the form y = f(x) * @param[in] range The range to integrate * @param[in] tol Number of sub-intervals to use * @param[in] N_eval Total number of evalutations * @param[in] norm Function to compute the norm to use for the error */ template <class T1, class T2> HOST_DEVICE inline T1 integrate( const std::function<T1( T2, T2, T2 )> &fun, const std::array<T2, 6> &range, const T2 tol = 1e-6, int *N_eval = NULL, const std::function<T2( T1 )> &norm = []( T1 x ) { return x<0 ? -x:x; } ); #endif } #include "interp.hpp" Loading src/interp.hpp→src/AtomicModel/interp.hpp +119 −1 Original line number Diff line number Diff line #ifndef included_interp_hpp #define included_interp_hpp #include "interp.h" #include "AtomicModel/interp.h" #include <iostream> #include <limits> #include <stdexcept> Loading Loading @@ -561,4 +561,122 @@ HOST_DEVICE inline double interp::get_interp_ratio( } #ifdef ENABLE_STD_FUNCTION // Integrate the function using the midpoints template <class T1, class T2> HOST_DEVICE inline T1 interp::integrate_midpoint( const std::function<T1( T2 )> &f, const std::array<T2, 2> &range, int N ) { T2 dx = ( range[1] - range[0] ) / N; T1 y = 0; for ( int i = 0; i < N; i++ ) y += f( range[0] + ( i + 0.5 ) * dx ); y *= dx; return y; } // Integrate the function using Simpson's rule template <class T1, class T2> HOST_DEVICE inline T1 interp::integrate_simpson( const std::function<T1( T2 )> &f, const std::array<T2, 2> &range, int N ) { if ( N <= 2 ) return ( range[1] - range[0] ) / 6 * ( f( range[0] ) + 4 * f( ( range[0] + range[1] ) / 2 ) + f( range[1] ) ); if ( N % 2 != 0 ) throw std::logic_error( "Error: N must be even" ); T2 dx = ( range[1] - range[0] ) / N; T1 y = f( range[0] ) + f( range[1] ) + 4 * f( range[0] + dx ); for ( int i = 1; i < N / 2; i++ ) { y += 2 * f( range[0] + 2 * i * dx ); y += 4 * f( range[0] + 2 * i * dx + dx ); } y *= ( dx / 3 ); return y; } // Integrate the function using adaptive Simpson's rule template <class T1, class T2> HOST_DEVICE inline T1 adaptiveSimpsonsAux( const std::function<T1( T2 )> &fun, T2 a, T2 b, T2 tol, T1 S, T1 fa, T1 fb, T1 fc, int bottom, int &N_eval, const std::function<T2( T1 )> &norm ) { T2 c = ( a + b ) / 2, h = b - a; T2 d = ( a + c ) / 2, e = ( c + b ) / 2; T1 fd = fun( d ), fe = fun( e ); T1 Sleft = ( h / 12 ) * ( fa + 4 * fd + fc ); T1 Sright = ( h / 12 ) * ( fc + 4 * fe + fb ); T1 S2 = Sleft + Sright; N_eval += 2; T2 err = norm( S2 - S ); if ( bottom <= 0 || err <= 15 * tol ) // magic 15 comes from error analysis return S2 + 0.066666666666667 * ( S2 - S ); return adaptiveSimpsonsAux<T1, T2>( fun, a, c, tol / 2, Sleft, fa, fc, fd, bottom - 1, N_eval, norm ) + adaptiveSimpsonsAux<T1, T2>( fun, c, b, tol / 2, Sright, fc, fb, fe, bottom - 1, N_eval, norm ); } template <class T1, class T2> HOST_DEVICE inline T1 interp::integrate( const std::function<T1( T2 )> &fun, const std::array<T2, 2> &range, T2 tol, int *N_eval_out, const std::function<T2( T1 )> &norm ) { T2 h = range[1] - range[0]; T1 fa = fun( range[0] ); T1 fb = fun( range[1] ); T1 fc = fun( range[0] + 0.5 * h ); T1 S = ( h / 6 ) * ( fa + 4 * fc + fb ); int N_eval = 2; S = adaptiveSimpsonsAux<T1, T2>( fun, range[0], range[1], tol, S, fa, fb, fc, 100, N_eval, norm ); if ( N_eval_out != NULL ) *N_eval_out = N_eval; return S; } template <class T1, class T2> HOST_DEVICE inline T1 interp::integrate( const std::function<T1( T2, T2 )> &fun, const std::array<T2, 4> &range, T2 tol, int *N_eval_out, const std::function<T2( T1 )> &norm ) { std::array<T2, 2> range1 = { range[0], range[1] }; std::array<T2, 2> range2 = { range[2], range[3] }; int N_eval = 0; int *N_eval_ptr = &N_eval; auto fun2 = [fun, range1, tol, N_eval_ptr, norm]( T2 y ) { auto fun3 = [fun, y, N_eval_ptr]( T2 x ) { ( *N_eval_ptr )++; return fun( x, y ); }; return integrate<T1, T2>( fun3, range1, tol, NULL, norm ); }; auto S = integrate<T1, T2>( fun2, range2, tol, NULL, norm ); if ( N_eval_out != NULL ) *N_eval_out = N_eval; return S; } template <class T1, class T2> HOST_DEVICE inline T1 interp::integrate( const std::function<T1( T2, T2, T2 )> &fun, const std::array<T2, 6> &range, T2 tol, int *N_eval_out, const std::function<T2( T1 )> &norm ) { std::array<T2, 4> range1 = { range[0], range[1], range[2], range[3] }; std::array<T2, 2> range2 = { range[4], range[5] }; int N_eval = 0; int *N_eval_ptr = &N_eval; auto fun2 = [fun, range1, tol, N_eval_ptr, norm]( T2 z ) { auto fun3 = [fun, z, N_eval_ptr]( T2 x, T2 y ) { ( *N_eval_ptr )++; return fun( x, y, z ); }; return integrate<T1, T2>( fun3, range1, tol, NULL, norm ); }; auto S = integrate<T1, T2>( fun2, range2, tol, NULL, norm ); if ( N_eval_out != NULL ) *N_eval_out = N_eval; return S; } #endif #endif src/CMakeLists.txt +1 −6 Original line number Diff line number Diff line Loading @@ -36,11 +36,6 @@ INCLUDE( "${RAYTRACE_SOURCE_DIR}/macros.cmake" ) SET_COMPILER_FLAGS() # Link external projects INCLUDE( Find_TIMER.cmake ) CONFIGURE_TIMER( 0 "" ) # Enable OpenMP IF ( USE_OPENMP ) ADD_DEFINITIONS( -DUSE_OPENMP ) Loading Loading @@ -161,7 +156,7 @@ ADD_DISTCLEAN( libRayTrace.* null_timer CreateImage* ) # Create the library INCLUDE_DIRECTORIES( ${RAYTRACE_SOURCE_DIR} ) ADD_DEFINITIONS( -DDISABLE_WRITE_FAILED_RAYS ) SET( SOURCES RayTrace RayTraceImage.cpp RayTraceStructures.cpp utilities/RayUtilities.cpp interp.cpp RayTraceImageCPU.cpp ) SET( SOURCES RayTrace RayTraceImage.cpp RayTraceStructures.cpp utilities/RayUtilities.cpp AtomicModel/interp.cpp RayTraceImageCPU.cpp ) IF ( USE_OPENACC ) SET( SOURCES ${SOURCES} RayTraceImageOpenACC.cpp ) ENDIF() Loading Loading
build/configure +5 −3 Original line number Diff line number Diff line # Sample configure file export KOKKOS_DIR=/packages/TPLs/install/opt/kokkos cmake \ -D CMAKE_BUILD_TYPE=Release \ -D CMAKE_C_COMPILER=gcc \ Loading @@ -7,9 +9,9 @@ cmake \ -D CXX_STD=11 \ -D USE_OPENACC=0 \ -D USE_OPENMP=1 \ -D USE_KOKKOS=0 \ -D KOKKOS_DIRECTORY=/usr/local/kokkos \ -D KOKKOS_WRAPPER=/usr/local/kokkos/config/nvcc_wrapper \ -D USE_KOKKOS=1 \ -D KOKKOS_DIRECTORY=${KOKKOS_DIR} \ -D KOKKOS_WRAPPER=${KOKKOS_DIR}/nvcc_wrapper \ -D USE_CUDA=1 \ -D CUDA_FLAGS="-arch sm_30" \ ../src Loading
src/interp.cpp→src/AtomicModel/interp.cpp +13 −5 Original line number Diff line number Diff line Loading @@ -2,7 +2,7 @@ #include <stdexcept> #include <string.h> #include "interp.h" #include "AtomicModel/interp.h" // Subroutine to perform linear interpolation Loading Loading @@ -94,7 +94,8 @@ double interp::trilinear( double x, double y, double z, size_t Nx, size_t Ny, si // Subroutine to perform cubic hermite interpolation double interp::interp_pchip( size_t N, const double *xi, const double *yi, double x ) template<class TYPE> static inline TYPE pchip( size_t N, const TYPE *xi, const TYPE *yi, TYPE x ) { if ( x <= xi[0] || N <= 2 ) { double dx = ( x - xi[0] ) / ( xi[1] - xi[0] ); Loading @@ -103,7 +104,7 @@ double interp::interp_pchip( size_t N, const double *xi, const double *yi, doubl double dx = ( x - xi[N - 2] ) / ( xi[N - 1] - xi[N - 2] ); return ( 1.0 - dx ) * yi[N - 2] + dx * yi[N - 1]; } size_t i = findfirstsingle( xi, N, x ); size_t i = interp::findfirstsingle( xi, N, x ); double f1 = yi[i - 1]; double f2 = yi[i]; double dx = ( x - xi[i - 1] ) / ( xi[i] - xi[i - 1] ); Loading Loading @@ -139,10 +140,17 @@ double interp::interp_pchip( size_t N, const double *xi, const double *yi, doubl } // Perform the interpolation double dx2 = dx * dx; double f = f1 + dx2 * ( 2 * dx - 3 ) * ( f1 - f2 ) + dx * g1 - dx2 * ( g1 + ( 1 - dx ) * ( g1 + g2 ) ); double f = f1 + dx2 * ( 2 * dx - 3 ) * ( f1 - f2 ) + dx * g1 - dx2 * ( g1 + ( 1 - dx ) * ( g1 + g2 ) ); return f; } double interp::interp_pchip( size_t N, const double *xi, const double *yi, double x ) { return pchip( N, xi, yi, x ); } float interp::interp_pchip( size_t N, const float *xi, const float *yi, float x ) { return pchip( N, xi, yi, x ); } // This function calculates the FWHM by finding the narrowest region that contains 76% of the energy Loading
src/interp.h→src/AtomicModel/interp.h +96 −0 Original line number Diff line number Diff line #ifndef included_interp #define included_interp #include <cstdlib> #include <math.h> #include <stdio.h> #include <stdlib.h> Loading Loading @@ -145,6 +146,19 @@ double interp_linear( size_t N, const double *xi, const double *yi, double x ); double interp_pchip( size_t N, const double *xi, const double *yi, double x ); /*! * @brief Subroutine to perform cubic hermite * @details This function returns f(x) interpolated between x1 and x2. * It does so using a monotonic preserving piecewise cubic hermite polynomial. * Ouside of the domain it will perform linear interpolation. * @param N Number of points in xi * @param xi Sorted grid to use for interpolation * @param yi Function values * @param x Point to be interpolated */ float interp_pchip( size_t N, const float *xi, const float *yi, float x ); /*! * @brief Subroutine to perform bi-linear interpolation (2D) * @details This function returns f(x,y) interpolated from a rectangular grid Loading Loading @@ -357,6 +371,88 @@ double bisection( std::function<double( double, Args... )>, double lb, double ub * @return The next guess to use */ double bisection_coeff( size_t N, const double *x, const double *r, double *range = NULL ); #ifdef ENABLE_STD_FUNCTION /*! * @brief Function to perform numerical integration * @details This function numerically integrates the function on the interval [lb,ub] * using the midpoint. This requires N function evaluations. * @param fun The function to integrate of the form y = f(x) * @param range The range to integrate * @param N Number of sub-intervals to use */ template <class T1, class T2> HOST_DEVICE inline T1 integrate_midpoint( const std::function<T1( T2 )> &fun, const std::array<T2, 2> &range, int N ); /*! * @brief Function to perform numerical integration * @details This function numerically integrates the function on the interval [lb,ub] * using Simpson's rule. This requires N+1 function evaluations. * @param fun The function to integrate of the form y = f(x) * @param range The range to integrate * @param N Number of sub-intervals to use */ template <class T1, class T2> HOST_DEVICE inline T1 integrate_simpson( const std::function<T1( T2 )> &fun, const std::array<T2, 2> &range, int N ); /*! * @brief Function to perform numerical integration in 1D * @details This function numerically integrates the function on the interval [lb,ub] * using an adaptive Simpson's rule. This requires N+1 function evaluations. * @param[in] fun The function to integrate of the form y = f(x) * @param[in] range The range to integrate * @param[in] tol Number of sub-intervals to use * @param[in] N_eval Total number of evalutations * @param[in] norm Function to compute the norm to use for the error */ template <class T1, class T2> HOST_DEVICE inline T1 integrate( const std::function<T1( T2 )> &fun, const std::array<T2, 2> &range, const T2 tol = 1e-6, int *N_eval = NULL, const std::function<T2( T1 )> &norm = []( T1 x ) { return x<0 ? -x:x; } ); /*! * @brief Function to perform numerical integration in 2D * @details This function numerically integrates the function on the interval * [x_min,x_max,y_min,y_max] * using an adaptive Simpson's rule. This requires N+1 function evaluations (3 iterations for * N=1). * @param[in] fun The function to integrate of the form y = f(x) * @param[in] range The range to integrate * @param[in] tol Number of sub-intervals to use * @param[in] N_eval Total number of evalutations * @param[in] norm Function to compute the norm to use for the error */ template <class T1, class T2> HOST_DEVICE inline T1 integrate( const std::function<T1( T2, T2 )> &fun, const std::array<T2, 4> &range, const T2 tol = 1e-6, int *N_eval = NULL, const std::function<T2( T1 )> &norm = []( T1 x ) { return x<0 ? -x:x; } ); /*! * @brief Function to perform numerical integration in 3D * @details This function numerically integrates the function on the interval * [x_min,x_max,y_min,y_max,z_min,z_max] * using an adaptive Simpson's rule. * @param[in] fun The function to integrate of the form y = f(x) * @param[in] range The range to integrate * @param[in] tol Number of sub-intervals to use * @param[in] N_eval Total number of evalutations * @param[in] norm Function to compute the norm to use for the error */ template <class T1, class T2> HOST_DEVICE inline T1 integrate( const std::function<T1( T2, T2, T2 )> &fun, const std::array<T2, 6> &range, const T2 tol = 1e-6, int *N_eval = NULL, const std::function<T2( T1 )> &norm = []( T1 x ) { return x<0 ? -x:x; } ); #endif } #include "interp.hpp" Loading
src/interp.hpp→src/AtomicModel/interp.hpp +119 −1 Original line number Diff line number Diff line #ifndef included_interp_hpp #define included_interp_hpp #include "interp.h" #include "AtomicModel/interp.h" #include <iostream> #include <limits> #include <stdexcept> Loading Loading @@ -561,4 +561,122 @@ HOST_DEVICE inline double interp::get_interp_ratio( } #ifdef ENABLE_STD_FUNCTION // Integrate the function using the midpoints template <class T1, class T2> HOST_DEVICE inline T1 interp::integrate_midpoint( const std::function<T1( T2 )> &f, const std::array<T2, 2> &range, int N ) { T2 dx = ( range[1] - range[0] ) / N; T1 y = 0; for ( int i = 0; i < N; i++ ) y += f( range[0] + ( i + 0.5 ) * dx ); y *= dx; return y; } // Integrate the function using Simpson's rule template <class T1, class T2> HOST_DEVICE inline T1 interp::integrate_simpson( const std::function<T1( T2 )> &f, const std::array<T2, 2> &range, int N ) { if ( N <= 2 ) return ( range[1] - range[0] ) / 6 * ( f( range[0] ) + 4 * f( ( range[0] + range[1] ) / 2 ) + f( range[1] ) ); if ( N % 2 != 0 ) throw std::logic_error( "Error: N must be even" ); T2 dx = ( range[1] - range[0] ) / N; T1 y = f( range[0] ) + f( range[1] ) + 4 * f( range[0] + dx ); for ( int i = 1; i < N / 2; i++ ) { y += 2 * f( range[0] + 2 * i * dx ); y += 4 * f( range[0] + 2 * i * dx + dx ); } y *= ( dx / 3 ); return y; } // Integrate the function using adaptive Simpson's rule template <class T1, class T2> HOST_DEVICE inline T1 adaptiveSimpsonsAux( const std::function<T1( T2 )> &fun, T2 a, T2 b, T2 tol, T1 S, T1 fa, T1 fb, T1 fc, int bottom, int &N_eval, const std::function<T2( T1 )> &norm ) { T2 c = ( a + b ) / 2, h = b - a; T2 d = ( a + c ) / 2, e = ( c + b ) / 2; T1 fd = fun( d ), fe = fun( e ); T1 Sleft = ( h / 12 ) * ( fa + 4 * fd + fc ); T1 Sright = ( h / 12 ) * ( fc + 4 * fe + fb ); T1 S2 = Sleft + Sright; N_eval += 2; T2 err = norm( S2 - S ); if ( bottom <= 0 || err <= 15 * tol ) // magic 15 comes from error analysis return S2 + 0.066666666666667 * ( S2 - S ); return adaptiveSimpsonsAux<T1, T2>( fun, a, c, tol / 2, Sleft, fa, fc, fd, bottom - 1, N_eval, norm ) + adaptiveSimpsonsAux<T1, T2>( fun, c, b, tol / 2, Sright, fc, fb, fe, bottom - 1, N_eval, norm ); } template <class T1, class T2> HOST_DEVICE inline T1 interp::integrate( const std::function<T1( T2 )> &fun, const std::array<T2, 2> &range, T2 tol, int *N_eval_out, const std::function<T2( T1 )> &norm ) { T2 h = range[1] - range[0]; T1 fa = fun( range[0] ); T1 fb = fun( range[1] ); T1 fc = fun( range[0] + 0.5 * h ); T1 S = ( h / 6 ) * ( fa + 4 * fc + fb ); int N_eval = 2; S = adaptiveSimpsonsAux<T1, T2>( fun, range[0], range[1], tol, S, fa, fb, fc, 100, N_eval, norm ); if ( N_eval_out != NULL ) *N_eval_out = N_eval; return S; } template <class T1, class T2> HOST_DEVICE inline T1 interp::integrate( const std::function<T1( T2, T2 )> &fun, const std::array<T2, 4> &range, T2 tol, int *N_eval_out, const std::function<T2( T1 )> &norm ) { std::array<T2, 2> range1 = { range[0], range[1] }; std::array<T2, 2> range2 = { range[2], range[3] }; int N_eval = 0; int *N_eval_ptr = &N_eval; auto fun2 = [fun, range1, tol, N_eval_ptr, norm]( T2 y ) { auto fun3 = [fun, y, N_eval_ptr]( T2 x ) { ( *N_eval_ptr )++; return fun( x, y ); }; return integrate<T1, T2>( fun3, range1, tol, NULL, norm ); }; auto S = integrate<T1, T2>( fun2, range2, tol, NULL, norm ); if ( N_eval_out != NULL ) *N_eval_out = N_eval; return S; } template <class T1, class T2> HOST_DEVICE inline T1 interp::integrate( const std::function<T1( T2, T2, T2 )> &fun, const std::array<T2, 6> &range, T2 tol, int *N_eval_out, const std::function<T2( T1 )> &norm ) { std::array<T2, 4> range1 = { range[0], range[1], range[2], range[3] }; std::array<T2, 2> range2 = { range[4], range[5] }; int N_eval = 0; int *N_eval_ptr = &N_eval; auto fun2 = [fun, range1, tol, N_eval_ptr, norm]( T2 z ) { auto fun3 = [fun, z, N_eval_ptr]( T2 x, T2 y ) { ( *N_eval_ptr )++; return fun( x, y, z ); }; return integrate<T1, T2>( fun3, range1, tol, NULL, norm ); }; auto S = integrate<T1, T2>( fun2, range2, tol, NULL, norm ); if ( N_eval_out != NULL ) *N_eval_out = N_eval; return S; } #endif #endif
src/CMakeLists.txt +1 −6 Original line number Diff line number Diff line Loading @@ -36,11 +36,6 @@ INCLUDE( "${RAYTRACE_SOURCE_DIR}/macros.cmake" ) SET_COMPILER_FLAGS() # Link external projects INCLUDE( Find_TIMER.cmake ) CONFIGURE_TIMER( 0 "" ) # Enable OpenMP IF ( USE_OPENMP ) ADD_DEFINITIONS( -DUSE_OPENMP ) Loading Loading @@ -161,7 +156,7 @@ ADD_DISTCLEAN( libRayTrace.* null_timer CreateImage* ) # Create the library INCLUDE_DIRECTORIES( ${RAYTRACE_SOURCE_DIR} ) ADD_DEFINITIONS( -DDISABLE_WRITE_FAILED_RAYS ) SET( SOURCES RayTrace RayTraceImage.cpp RayTraceStructures.cpp utilities/RayUtilities.cpp interp.cpp RayTraceImageCPU.cpp ) SET( SOURCES RayTrace RayTraceImage.cpp RayTraceStructures.cpp utilities/RayUtilities.cpp AtomicModel/interp.cpp RayTraceImageCPU.cpp ) IF ( USE_OPENACC ) SET( SOURCES ${SOURCES} RayTraceImageOpenACC.cpp ) ENDIF() Loading