Loading kitsune-tests/benchmarks/build.sh 0 → 100755 +102 −0 Original line number Diff line number Diff line #!/bin/bash if [[ -d ./test1 ]] ; then rm -rf ./test1 fi if [[ -d ./test2 ]] ; then rm -rf ./test2 fi if [[ -d ./test3 ]] ; then rm -rf ./test3 fi if [[ -d ./test4 ]] ; then rm -rf ./test4 fi mkdir test1 mkdir test2 mkdir test3 mkdir test4 mkdir test{1,2,3,4}/exe mkdir test{1,2,3,4}/ll O_LEVEL="-O2" CFLAGS1="$O_LEVEL -I./ -I$LANL_INSTALL/kokkos2/include" KOKKOS_FLAGS1="-lkokkoscore -L$LANL_INSTALL/kokkos2/lib64 -ldl" CFLAGS2="-fopenmp $O_LEVEL -I./ -I$LANL_INSTALL/include" KOKKOS_FLAGS2="-L$LANL_INSTALL/lib64 -lkokkoscore -ldl" CFLAGS3="-I./ -I$LANL_INSTALL/include -fkokkos -fkokkos-no-init -ftapir=serial -fopenmp $O_LEVEL" CFLAGS4="-I./ -I$LANL_INSTALL/include -fkokkos -fkokkos-no-init -ftapir=opencilk -fopenmp $O_LEVEL" function compile() { for j in 1 2 3 4 do EXE_FOLDER="test$j/exe" LL_FOLDER="test$j/ll" if [[ $j == 1 ]] then set -x $OCC $CFLAGS1 $1 -o "$EXE_FOLDER/`basename $1 .cpp`" $KOKKOS_FLAGS1 $OCC $CFLAGS1 $1 -o "$LL_FOLDER/`basename $1 .cpp`.ll" -S -emit-llvm set +x elif [[ $j == 2 ]] then set -x $OCC $CFLAGS2 $1 -o "$EXE_FOLDER/`basename $1 .cpp`" $KOKKOS_FLAGS2 $OCC $CFLAGS2 $1 -o "$LL_FOLDER/`basename $1 .cpp`.ll" -S -emit-llvm set +x elif [[ $j == 3 ]] then set -x $OCC $CFLAGS3 $KOKKOS_FLAGS3 $1 -o "$EXE_FOLDER/`basename $1 .cpp`" $OCC $CFLAGS3 $1 -o "$LL_FOLDER/`basename $1 .cpp`.ll" -S -emit-llvm set +x elif [[ $j == 4 ]] then set -x $OCC $CFLAGS4 $KOKKOS_FLAGS4 $1 -o "$EXE_FOLDER/`basename $1 .cpp`" $OCC $CFLAGS4 $1 -o "$LL_FOLDER/`basename $1 .cpp`.ll" -S -emit-llvm set +x fi done } ########################################################################################### ## Serial tests echo "Building Serial..." for i in serial/*.cpp do compile $i done ########################################################################################### ## Forall tests echo "Building Forall..." for i in forall/*.cpp do compile $i done ########################################################################################### ## Kitsunes tests echo "Building Parallel..." for i in kokkos/*.cpp do compile $i done echo "Done" kitsune-tests/benchmarks/forall/complex_forall.cpp 0 → 100644 +72 −0 Original line number Diff line number Diff line // // Example of operations over an array of complex numbers. // // To enable kitsune+tapir compilation add the flags to a standard // clang compilation: // // * -fkokkos : enable specialized Kokkos recognition and // compilation (lower to Tapir). // * -fkokkos-no-init : disable Kokkos initialization and // finalization calls to avoid conflicts with // target runtime operation. // * -ftapir=rt-target : the runtime ABI to target. // #include <cstdio> #include <cstdlib> #include <kitsune.h> #include "timer.h" using namespace std; using namespace kitsune; const size_t VEC_SIZE = 1024 * 1024 * 256; struct my_complex { float real; float img; }; void random_fill(my_complex *data, size_t N) { for(size_t i = 0; i < N; ++i) { data[i].real = rand() / (float)RAND_MAX; data[i].img = rand() / (float)RAND_MAX; } } int main (int argc, char* argv[]) { fprintf(stderr, "**** kitsune+tapir kokkos example: complex\n"); my_complex *A = new my_complex[VEC_SIZE]; my_complex *B = new my_complex[VEC_SIZE]; my_complex *C = new my_complex[VEC_SIZE]; random_fill(A, VEC_SIZE); random_fill(B, VEC_SIZE); double loop_secs = 0; for (int ii = 0; ii<4; ii++) { timer t; { forall (int i = 0; i<VEC_SIZE; i++) { C[i].real = (A[i].real * B[i].real) - (A[i].img * B[i].img); C[i].img = (A[i].real * B[i].img) - (A[i].img * B[i].real); } } loop_secs += t.seconds(); } loop_secs /= 4; fprintf(stderr, "(%s) %lf, %lf, %lf, %lf\n", argv[0], C[0].real, C[0].img, C[VEC_SIZE/4].real, C[VEC_SIZE/4].img); fprintf(stdout, "Time: %lf\n", loop_secs); delete []A; delete []B; delete []C; return 0; } kitsune-tests/benchmarks/forall/matmul_forall.cpp 0 → 100644 +73 −0 Original line number Diff line number Diff line // // Non-square matrix multiplication example. To enable // kitsune+tapir compilation add the flags to a standard // clang compilation: // // * -fkokkos : enable specialized Kokkos recognition and // compilation (lower to Tapir). // * -fkokkos-no-init : disable Kokkos initialization and // finalization calls to avoid conflicts with // target runtime operation. // * -ftapir=rt-target : the runtime ABI to target. // #include <cstdio> #include <cstdlib> #include <kitsune.h> #include "timer.h" using namespace std; using namespace kitsune; const size_t N = 8192; const size_t M = 4096; const size_t K = 512; void random_fill(float *data, size_t N) { for(size_t i = 0; i < N; ++i) data[i] = rand() / (float)RAND_MAX; } void zero_fill(float *data, size_t N) { for(size_t i = 0; i < N; ++i) data[i] = 0.0f; } int main (int argc, char* argv[]) { fprintf(stderr, "**** kitsune+tapir kokkos example: matrix multiply\n"); float *A = new float[N*K]; float *B = new float[K*M]; float *C = new float[N*M]; random_fill(A, N*K); random_fill(B, M*K); zero_fill(C, N*M); double loop_secs = 0; for (int ii = 0; ii<4; ii++) { timer t; { forall (int i = 0; i<N; i++) { forall (int k = 0; k<K; k++) { forall (int j = 0; j<M; j++) { C[i*M + j] += A[i*K + k] * B[k*M +j]; } } } } loop_secs += t.seconds(); } loop_secs /= 4; fprintf(stderr, "(%s) %lf, %lf, %lf, %lf\n", argv[0], C[0], C[(N*M)/4], C[(N*M)/2], C[(N*M)-1]); fprintf(stdout, "Time: %lf\n", loop_secs); delete []A; delete []B; delete []C; return 0; } kitsune-tests/benchmarks/forall/matvec_forall.cpp 0 → 100644 +69 −0 Original line number Diff line number Diff line // // Non-square matrix multiplication example. To enable // kitsune+tapir compilation add the flags to a standard // clang compilation: // // * -fkokkos : enable specialized Kokkos recognition and // compilation (lower to Tapir). // * -fkokkos-no-init : disable Kokkos initialization and // finalization calls to avoid conflicts with // target runtime operation. // * -ftapir=rt-target : the runtime ABI to target. // #include <cstdio> #include <cstdlib> #include <kitsune.h> #include "timer.h" using namespace std; using namespace kitsune; const size_t N = 8192; void random_fill(float *data, size_t N) { for(size_t i = 0; i < N; ++i) data[i] = rand() / (float)RAND_MAX; } void zero_fill(float *data, size_t N) { for(size_t i = 0; i < N; ++i) data[i] = 0.0f; } int main (int argc, char* argv[]) { fprintf(stderr, "**** kitsune+tapir kokkos example: matrix-vector multiply\n"); float *matrix = new float[N*N]; float *vector = new float[N]; float *result = new float[N*N]; random_fill(matrix, N*N); random_fill(vector, N); zero_fill(result, N*N); double loop_secs = 0; for (int ii = 0; ii<4; ii++) { timer t; { forall (int i = 0; i<N; i++) { forall (int j = 0; j<N; j++) { result[i*N + j] += matrix[i*N + j] * vector[i]; } } } loop_secs += t.seconds(); } loop_secs /= 4; fprintf(stderr, "(%s) %lf, %lf, %lf, %lf\n", argv[0], result[0], result[(N*N)/4], result[(N*N)/2], result[(N*N)-1]); fprintf(stdout, "Time: %lf\n", loop_secs); delete []matrix; delete []vector; delete []result; return 0; } kitsune-tests/benchmarks/forall/normalize_forall.cpp 0 → 100644 +66 −0 Original line number Diff line number Diff line // // The normalize example from the Tapir PPOP paper converted // to Kokkos. To enable kitsune+tapir compilation add the // flags to a standard clang compilation: // // * -fkokkos : enable specialized Kokkos recognition and // compilation (lower to Tapir). // * -fkokkos-no-init : disable Kokkos initialization and // finalization calls to avoid conflicts with // target runtime operation. // * -ftapir=rt-target : the runtime ABI to target. // #include <cstdio> #include <cmath> #include <cstdlib> #include <kitsune.h> #include "timer.h" using namespace std; using namespace kitsune; const size_t VEC_SIZE = 4096 * 20; void random_fill(double *data, size_t N) { for(size_t i = 0; i < N; ++i) data[i] = (rand() / (double)RAND_MAX) * 10.0; } __attribute__((const, noinline)) double norm(const double *in, size_t N) { double sum = 0.0; for(size_t i = 0; i < N; ++i) sum += in[i] * in[i]; return sqrt(sum); } int main (int argc, char* argv[]) { fprintf(stderr, "**** kitsune+tapir kokkos example: normalize (tapir paper)\n"); double *in = new double[VEC_SIZE]; double *out = new double[VEC_SIZE]; random_fill(in, VEC_SIZE); double loop_secs = 0; for (int ii = 0; ii<4; ii++) { timer t; { forall (int i = 0; i<VEC_SIZE; i++) { out[i] = in[i] / norm(in, VEC_SIZE); } } loop_secs += t.seconds(); } loop_secs /= 4; fprintf(stderr, "(%s) %lf, %lf, %lf, %lf\n", argv[0], out[0], out[VEC_SIZE/4], out[VEC_SIZE/2], out[VEC_SIZE-1]); fprintf(stdout, "Time: %lf\n", loop_secs); delete []in; delete []out; return 0; } Loading
kitsune-tests/benchmarks/build.sh 0 → 100755 +102 −0 Original line number Diff line number Diff line #!/bin/bash if [[ -d ./test1 ]] ; then rm -rf ./test1 fi if [[ -d ./test2 ]] ; then rm -rf ./test2 fi if [[ -d ./test3 ]] ; then rm -rf ./test3 fi if [[ -d ./test4 ]] ; then rm -rf ./test4 fi mkdir test1 mkdir test2 mkdir test3 mkdir test4 mkdir test{1,2,3,4}/exe mkdir test{1,2,3,4}/ll O_LEVEL="-O2" CFLAGS1="$O_LEVEL -I./ -I$LANL_INSTALL/kokkos2/include" KOKKOS_FLAGS1="-lkokkoscore -L$LANL_INSTALL/kokkos2/lib64 -ldl" CFLAGS2="-fopenmp $O_LEVEL -I./ -I$LANL_INSTALL/include" KOKKOS_FLAGS2="-L$LANL_INSTALL/lib64 -lkokkoscore -ldl" CFLAGS3="-I./ -I$LANL_INSTALL/include -fkokkos -fkokkos-no-init -ftapir=serial -fopenmp $O_LEVEL" CFLAGS4="-I./ -I$LANL_INSTALL/include -fkokkos -fkokkos-no-init -ftapir=opencilk -fopenmp $O_LEVEL" function compile() { for j in 1 2 3 4 do EXE_FOLDER="test$j/exe" LL_FOLDER="test$j/ll" if [[ $j == 1 ]] then set -x $OCC $CFLAGS1 $1 -o "$EXE_FOLDER/`basename $1 .cpp`" $KOKKOS_FLAGS1 $OCC $CFLAGS1 $1 -o "$LL_FOLDER/`basename $1 .cpp`.ll" -S -emit-llvm set +x elif [[ $j == 2 ]] then set -x $OCC $CFLAGS2 $1 -o "$EXE_FOLDER/`basename $1 .cpp`" $KOKKOS_FLAGS2 $OCC $CFLAGS2 $1 -o "$LL_FOLDER/`basename $1 .cpp`.ll" -S -emit-llvm set +x elif [[ $j == 3 ]] then set -x $OCC $CFLAGS3 $KOKKOS_FLAGS3 $1 -o "$EXE_FOLDER/`basename $1 .cpp`" $OCC $CFLAGS3 $1 -o "$LL_FOLDER/`basename $1 .cpp`.ll" -S -emit-llvm set +x elif [[ $j == 4 ]] then set -x $OCC $CFLAGS4 $KOKKOS_FLAGS4 $1 -o "$EXE_FOLDER/`basename $1 .cpp`" $OCC $CFLAGS4 $1 -o "$LL_FOLDER/`basename $1 .cpp`.ll" -S -emit-llvm set +x fi done } ########################################################################################### ## Serial tests echo "Building Serial..." for i in serial/*.cpp do compile $i done ########################################################################################### ## Forall tests echo "Building Forall..." for i in forall/*.cpp do compile $i done ########################################################################################### ## Kitsunes tests echo "Building Parallel..." for i in kokkos/*.cpp do compile $i done echo "Done"
kitsune-tests/benchmarks/forall/complex_forall.cpp 0 → 100644 +72 −0 Original line number Diff line number Diff line // // Example of operations over an array of complex numbers. // // To enable kitsune+tapir compilation add the flags to a standard // clang compilation: // // * -fkokkos : enable specialized Kokkos recognition and // compilation (lower to Tapir). // * -fkokkos-no-init : disable Kokkos initialization and // finalization calls to avoid conflicts with // target runtime operation. // * -ftapir=rt-target : the runtime ABI to target. // #include <cstdio> #include <cstdlib> #include <kitsune.h> #include "timer.h" using namespace std; using namespace kitsune; const size_t VEC_SIZE = 1024 * 1024 * 256; struct my_complex { float real; float img; }; void random_fill(my_complex *data, size_t N) { for(size_t i = 0; i < N; ++i) { data[i].real = rand() / (float)RAND_MAX; data[i].img = rand() / (float)RAND_MAX; } } int main (int argc, char* argv[]) { fprintf(stderr, "**** kitsune+tapir kokkos example: complex\n"); my_complex *A = new my_complex[VEC_SIZE]; my_complex *B = new my_complex[VEC_SIZE]; my_complex *C = new my_complex[VEC_SIZE]; random_fill(A, VEC_SIZE); random_fill(B, VEC_SIZE); double loop_secs = 0; for (int ii = 0; ii<4; ii++) { timer t; { forall (int i = 0; i<VEC_SIZE; i++) { C[i].real = (A[i].real * B[i].real) - (A[i].img * B[i].img); C[i].img = (A[i].real * B[i].img) - (A[i].img * B[i].real); } } loop_secs += t.seconds(); } loop_secs /= 4; fprintf(stderr, "(%s) %lf, %lf, %lf, %lf\n", argv[0], C[0].real, C[0].img, C[VEC_SIZE/4].real, C[VEC_SIZE/4].img); fprintf(stdout, "Time: %lf\n", loop_secs); delete []A; delete []B; delete []C; return 0; }
kitsune-tests/benchmarks/forall/matmul_forall.cpp 0 → 100644 +73 −0 Original line number Diff line number Diff line // // Non-square matrix multiplication example. To enable // kitsune+tapir compilation add the flags to a standard // clang compilation: // // * -fkokkos : enable specialized Kokkos recognition and // compilation (lower to Tapir). // * -fkokkos-no-init : disable Kokkos initialization and // finalization calls to avoid conflicts with // target runtime operation. // * -ftapir=rt-target : the runtime ABI to target. // #include <cstdio> #include <cstdlib> #include <kitsune.h> #include "timer.h" using namespace std; using namespace kitsune; const size_t N = 8192; const size_t M = 4096; const size_t K = 512; void random_fill(float *data, size_t N) { for(size_t i = 0; i < N; ++i) data[i] = rand() / (float)RAND_MAX; } void zero_fill(float *data, size_t N) { for(size_t i = 0; i < N; ++i) data[i] = 0.0f; } int main (int argc, char* argv[]) { fprintf(stderr, "**** kitsune+tapir kokkos example: matrix multiply\n"); float *A = new float[N*K]; float *B = new float[K*M]; float *C = new float[N*M]; random_fill(A, N*K); random_fill(B, M*K); zero_fill(C, N*M); double loop_secs = 0; for (int ii = 0; ii<4; ii++) { timer t; { forall (int i = 0; i<N; i++) { forall (int k = 0; k<K; k++) { forall (int j = 0; j<M; j++) { C[i*M + j] += A[i*K + k] * B[k*M +j]; } } } } loop_secs += t.seconds(); } loop_secs /= 4; fprintf(stderr, "(%s) %lf, %lf, %lf, %lf\n", argv[0], C[0], C[(N*M)/4], C[(N*M)/2], C[(N*M)-1]); fprintf(stdout, "Time: %lf\n", loop_secs); delete []A; delete []B; delete []C; return 0; }
kitsune-tests/benchmarks/forall/matvec_forall.cpp 0 → 100644 +69 −0 Original line number Diff line number Diff line // // Non-square matrix multiplication example. To enable // kitsune+tapir compilation add the flags to a standard // clang compilation: // // * -fkokkos : enable specialized Kokkos recognition and // compilation (lower to Tapir). // * -fkokkos-no-init : disable Kokkos initialization and // finalization calls to avoid conflicts with // target runtime operation. // * -ftapir=rt-target : the runtime ABI to target. // #include <cstdio> #include <cstdlib> #include <kitsune.h> #include "timer.h" using namespace std; using namespace kitsune; const size_t N = 8192; void random_fill(float *data, size_t N) { for(size_t i = 0; i < N; ++i) data[i] = rand() / (float)RAND_MAX; } void zero_fill(float *data, size_t N) { for(size_t i = 0; i < N; ++i) data[i] = 0.0f; } int main (int argc, char* argv[]) { fprintf(stderr, "**** kitsune+tapir kokkos example: matrix-vector multiply\n"); float *matrix = new float[N*N]; float *vector = new float[N]; float *result = new float[N*N]; random_fill(matrix, N*N); random_fill(vector, N); zero_fill(result, N*N); double loop_secs = 0; for (int ii = 0; ii<4; ii++) { timer t; { forall (int i = 0; i<N; i++) { forall (int j = 0; j<N; j++) { result[i*N + j] += matrix[i*N + j] * vector[i]; } } } loop_secs += t.seconds(); } loop_secs /= 4; fprintf(stderr, "(%s) %lf, %lf, %lf, %lf\n", argv[0], result[0], result[(N*N)/4], result[(N*N)/2], result[(N*N)-1]); fprintf(stdout, "Time: %lf\n", loop_secs); delete []matrix; delete []vector; delete []result; return 0; }
kitsune-tests/benchmarks/forall/normalize_forall.cpp 0 → 100644 +66 −0 Original line number Diff line number Diff line // // The normalize example from the Tapir PPOP paper converted // to Kokkos. To enable kitsune+tapir compilation add the // flags to a standard clang compilation: // // * -fkokkos : enable specialized Kokkos recognition and // compilation (lower to Tapir). // * -fkokkos-no-init : disable Kokkos initialization and // finalization calls to avoid conflicts with // target runtime operation. // * -ftapir=rt-target : the runtime ABI to target. // #include <cstdio> #include <cmath> #include <cstdlib> #include <kitsune.h> #include "timer.h" using namespace std; using namespace kitsune; const size_t VEC_SIZE = 4096 * 20; void random_fill(double *data, size_t N) { for(size_t i = 0; i < N; ++i) data[i] = (rand() / (double)RAND_MAX) * 10.0; } __attribute__((const, noinline)) double norm(const double *in, size_t N) { double sum = 0.0; for(size_t i = 0; i < N; ++i) sum += in[i] * in[i]; return sqrt(sum); } int main (int argc, char* argv[]) { fprintf(stderr, "**** kitsune+tapir kokkos example: normalize (tapir paper)\n"); double *in = new double[VEC_SIZE]; double *out = new double[VEC_SIZE]; random_fill(in, VEC_SIZE); double loop_secs = 0; for (int ii = 0; ii<4; ii++) { timer t; { forall (int i = 0; i<VEC_SIZE; i++) { out[i] = in[i] / norm(in, VEC_SIZE); } } loop_secs += t.seconds(); } loop_secs /= 4; fprintf(stderr, "(%s) %lf, %lf, %lf, %lf\n", argv[0], out[0], out[VEC_SIZE/4], out[VEC_SIZE/2], out[VEC_SIZE-1]); fprintf(stdout, "Time: %lf\n", loop_secs); delete []in; delete []out; return 0; }