Commit e6d0e5c1 authored by Kim, Jungwon's avatar Kim, Jungwon

Initial commit for the SC17 paper version

parents
cmake_minimum_required(VERSION 2.8)
find_package(MPI REQUIRED)
project(PAPYRUS)
set(PAPYRUS_VERSION_MAJOR 1)
set(PAPYRUS_VERSION_MINOR 0)
set(PAPYRUS_VERSION_PATCH 0)
cmake_host_system_information(RESULT MACHINE QUERY HOSTNAME)
if(${MACHINE} MATCHES "summitdev*")
message(STATUS "Configuration for SUMMITDEV")
include(conf/summitdev.cmake)
elseif(${MACHINE} MATCHES "cori*|nid*")
message(STATUS "Configuration for CORI")
include(conf/cori.cmake)
else()
message(STATUS "Configuration for DEFAULT")
include(conf/default.cmake)
endif()
include(CTest)
add_subdirectory(include)
add_subdirectory(kv)
Copyright (c) 2017, UT-Battelle, LLC
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Oak Ridge National Laboratory, nor UT-Battelle, LLC, nor
the names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**Papyrus is a programming system written at Oak Ridge National Laboratory that provides features for scalable, aggregate, persistent memory in an extreme-scale system for typical HPC usage scenarios.**
Authors: Jungwon Kim (kimj@ornl.gov), Kittisak Sajjapongse (kittisaks@computer.org), Seyong Lee (slee2@ornl.gov), and Jeffrey S. Vetter (vetter@ornl.gov)
# How to build
You can build Papyrus with CMake and Make:
$ cmake <papyrus_source_directory> -DCMAKE_INSTALL_PREFIX=<papyrus_install_directory>
$ make install
# Repository contents
The public interface is in include/papyrus/*.h.
The Key-Value Store in is kv/.
rm -rf build install
mkdir build
cd build
cmake -DCMAKE_INSTALL_PREFIX=../install ..
make -j install
set(CMAKE_C_COMPILER "CC")
set(CMAKE_CXX_COMPILER "CC")
set(CMAKE_C_FLAGS "-craympich-mt")
set(CMAKE_CXX_FLAGS "-craympich-mt -std=c++11")
set(MPIEXEC "srun")
set(MPIEXEC_NUMPROC_FLAG "-n")
set(CMAKE_C_COMPILER "mpicc")
set(CMAKE_CXX_COMPILER "mpic++")
set(CMAKE_C_FLAGS "")
set(CMAKE_CXX_FLAGS "-O2 -std=c++11")
set(MPIEXEC "mpirun")
set(MPIEXEC_NUMPROC_FLAG "-n")
set(CMAKE_C_COMPILER "mpicc")
set(CMAKE_CXX_COMPILER "mpic++")
set(CMAKE_C_FLAGS "")
set(CMAKE_CXX_FLAGS "-O2 -qlanglvl=extended0x")
set(MPIEXEC "mpirun")
set(MPIEXEC_NUMPROC_FLAG "-n")
add_subdirectory(papyrus)
install(FILES bupc.h mpi.h kv.h DESTINATION include/papyrus)
#ifndef PAPYRUS_INCLUDE_PAPYRUS_BUPC_H
#define PAPYRUS_INCLUDE_PAPYRUS_BUPC_H
#include <bupc_extern.h>
#include <stdio.h>
extern int MPI_Init_thread(int*, char***, int, int*);
int __real_MPI_Init(int* argc, char*** argv);
int __wrap_MPI_Init(int* argc, char*** argv) {
int provided;
MPI_Init_thread(argc, argv, 3, &provided);
if (provided != 3) fprintf(stderr, "[%s:%d] provided[%d]\n", __FILE__, __LINE__, provided);
return 0;
}
#endif /* PAPYRUS_INCLUDE_PAPYRUS_BUPC_H */
#ifndef PAPYRUS_KV_INC_PAPYRUS_KV_H
#define PAPYRUS_KV_INC_PAPYRUS_KV_H
#define PAPYRUSKV_OK 0
#define PAPYRUSKV_ERR -1
#define PAPYRUSKV_MEMTABLE (1 << 0)
#define PAPYRUSKV_SSTABLE (1 << 1)
#define PAPYRUSKV_SEQUENTIAL (1 << 0)
#define PAPYRUSKV_RELAXED (1 << 1)
#define PAPYRUSKV_CREATE (1 << 2)
#define PAPYRUSKV_RDWR (1 << 3)
#define PAPYRUSKV_WRONLY (1 << 4)
#define PAPYRUSKV_RDONLY (1 << 5)
#define PAPYRUSKV_UDONLY (1 << 6)
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
struct _papyruskv_iter_t {
char* key;
size_t keylen;
char* val;
size_t vallen;
void* handle;
};
typedef struct _papyruskv_iter_t* papyruskv_iter_t;
struct _papyruskv_pos_t {
void* handle;
};
typedef struct _papyruskv_pos_t papyruskv_pos_t;
typedef int (*papyruskv_hash_fn_t)(const char* key, size_t keylen, size_t nranks);
typedef int (*papyruskv_update_fn_t)(const char* key, size_t keylen, char** val, size_t* vallen, void* userin, size_t userinlen, void* userout, size_t useroutlen);
typedef struct {
size_t keylen;
size_t vallen;
papyruskv_hash_fn_t hash;
} papyruskv_option_t;
extern int papyruskv_init(int* argc, char*** argv, const char* repository);
extern int papyruskv_finalize();
extern int papyruskv_open(const char* name, int flags, papyruskv_option_t* opt, int* db);
extern int papyruskv_close(int db);
extern int papyruskv_put(int db, const char* key, size_t keylen, const char* val, size_t vallen);
extern int papyruskv_get(int db, const char* key, size_t keylen, char** val, size_t* vallen);
extern int papyruskv_get_pos(int db, const char* key, size_t keylen, char** val, size_t* vallen, papyruskv_pos_t* pos);
extern int papyruskv_delete(int db, const char* key, size_t keylen);
extern int papyruskv_free(void* val);
extern int papyruskv_fence(int db, int level);
extern int papyruskv_barrier(int db, int level);
extern int papyruskv_signal_notify(int signum, int* ranks, int count);
extern int papyruskv_signal_wait(int signum, int* ranks, int count);
extern int papyruskv_consistency(int db, int consistency);
extern int papyruskv_protect(int db, int prot);
extern int papyruskv_destroy(int db, int* event);
extern int papyruskv_checkpoint(int db, const char* path, int* event);
extern int papyruskv_restart(const char* path, const char* name, int flags, papyruskv_option_t* opt, int* db, int* event);
extern int papyruskv_wait(int db, int event);
extern int papyruskv_hash(int db, papyruskv_hash_fn_t hfn);
extern int papyruskv_iter_local(int db, papyruskv_iter_t* iter);
extern int papyruskv_iter_next(int db, papyruskv_iter_t* iter);
extern int papyruskv_register_update(int db, int fnid, papyruskv_update_fn_t ufn);
extern int papyruskv_update(int db, const char* key, size_t keylen, papyruskv_pos_t* pos, int fnid, void* userin, size_t userinlen, void* userout, size_t useroutlen);
#ifdef __cplusplus
} /* end extern "C" */
#endif
#endif /* PAPYRUS_KV_INC_PAPYRUS_KV_H */
#ifndef PAPYRUS_INCLUDE_PAPYRUS_MPI_H
#define PAPYRUS_INCLUDE_PAPYRUS_MPI_H
#include <mpi.h>
#include <stdio.h>
#define MPI_Init(ARGC, ARGV) { \
int provided; \
MPI_Init_thread(ARGC, ARGV, MPI_THREAD_MULTIPLE, &provided); \
if (provided != MPI_THREAD_MULTIPLE) \
fprintf(stderr, "MPI_THREAD_MULTIPLE not supported[%d]\n", provided); \
}
#endif /* PAPYRUS_INCLUDE_PAPYRUS_MPI_H */
add_subdirectory(src)
add_subdirectory(tests)
add_subdirectory(bin)
#ifndef PAPYRUS_KV_DEBUG_H
#define PAPYRUS_KV_DEBUG_H
#include <stdio.h>
//#define _TRACE_ENABLE
#define _CHECK_ENABLE
#define _DEBUG_ENABLE
#define _INFO_ENABLE
#define _ERROR_ENABLE
#define _TODO_ENABLE
#define _COLOR_DEBUG
#ifdef _COLOR_DEBUG
#define RED "\033[22;31m"
#define GREEN "\033[22;32m"
#define YELLOW "\033[22;33m"
#define BLUE "\033[22;34m"
#define PURPLE "\033[22;35m"
#define CYAN "\033[22;36m"
#define GRAY "\033[22;37m"
#define _RED "\033[22;41m"
#define _GREEN "\033[22;42m"
#define _YELLOW "\033[22;43m"
#define _BLUE "\033[22;44m"
#define _PURPLE "\033[22;45m"
#define _CYAN "\033[22;46m"
#define _GRAY "\033[22;47m"
#define RESET "\e[m"
#else
#define RED
#define GREEN
#define YELLOW
#define BLUE
#define PURPLE
#define CYAN
#define GRAY
#define _RED
#define _GREEN
#define _YELLOW
#define _BLUE
#define _PURPLE
#define _CYAN
#define _GRAY
#define RESET
#endif
extern char nick_[];
#ifdef _TRACE_ENABLE
#define _trace(fmt, ...) { printf( BLUE "[T] %s [%s:%d:%s] " fmt RESET "\n", nick_, __FILE__, __LINE__, __func__, __VA_ARGS__); fflush(stdout); }
#define __trace(fmt, ...) { printf(_BLUE "[T] %s [%s:%d:%s] " fmt RESET "\n", nick_, __FILE__, __LINE__, __func__, __VA_ARGS__); fflush(stdout); }
#else
#define _trace(fmt, ...)
#define __trace(fmt, ...)
#endif
#ifdef _CHECK_ENABLE
#define _check() { printf( PURPLE "[C] %s [%s:%d:%s]" RESET "\n", nick_, __FILE__, __LINE__, __func__); fflush(stdout); }
#define __check() { printf(_PURPLE "[C] %s [%s:%d:%s]" RESET "\n", nick_, __FILE__, __LINE__, __func__); fflush(stdout); }
#else
#define _check()
#define __check()
#endif
#ifdef _DEBUG_ENABLE
#define _debug(fmt, ...) { printf( CYAN "[D] %s [%s:%d:%s] " fmt RESET "\n", nick_, __FILE__, __LINE__, __func__, __VA_ARGS__); fflush(stdout); }
#define __debug(fmt, ...) { printf(_CYAN "[D] %s [%s:%d:%s] " fmt RESET "\n", nick_, __FILE__, __LINE__, __func__, __VA_ARGS__); fflush(stdout); }
#else
#define _debug(fmt, ...)
#define __debug(fmt, ...)
#endif
#ifdef _INFO_ENABLE
#define _info(fmt, ...) { printf( YELLOW "[I] %s [%s:%d:%s] " fmt RESET "\n", nick_, __FILE__, __LINE__, __func__, __VA_ARGS__); fflush(stdout); }
#define __info(fmt, ...) { printf(_YELLOW "[I] %s [%s:%d:%s] " fmt RESET "\n", nick_, __FILE__, __LINE__, __func__, __VA_ARGS__); fflush(stdout); }
#else
#define _info(fmt, ...)
#define __info(fmt, ...)
#endif
#ifdef _ERROR_ENABLE
#define _error(fmt, ...) { printf( RED "[E] %s [%s:%d:%s] " fmt RESET "\n", nick_, __FILE__, __LINE__, __func__, __VA_ARGS__); fflush(stdout); }
#define __error(fmt, ...) { printf(_RED "[E] %s [%s:%d:%s] " fmt RESET "\n", nick_, __FILE__, __LINE__, __func__, __VA_ARGS__); fflush(stdout); }
#else
#define _error(fmt, ...)
#define __error(fmt, ...)
#endif
#ifdef _TODO_ENABLE
#define _todo(fmt, ...) { printf( GREEN "[TODO] %s [%s:%d:%s] " fmt RESET "\n", nick_, __FILE__, __LINE__, __func__, __VA_ARGS__); fflush(stdout); }
#define __todo(fmt, ...) { printf(_GREEN "[TODO] %s [%s:%d:%s] " fmt RESET "\n", nick_, __FILE__, __LINE__, __func__, __VA_ARGS__); fflush(stdout); }
#else
#define _todo(fmt, ...)
#define __todo(fmt, ...)
#endif
#endif /* PAPYRUS_KV_DEBUG_H */
/****************************************************************/
/* Parallel Combinatorial BLAS Library (for Graph Computations) */
/* version 1.2 -------------------------------------------------*/
/* date: 10/06/2011 --------------------------------------------*/
/* authors: Aydin Buluc (abuluc@lbl.gov), Adam Lugowski --------*/
/****************************************************************/
/*
Copyright (c) 2011, Aydin Buluc
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef _DELETER_H_
#define _DELETER_H_
#include <iostream>
using namespace std;
struct DeletePtrIf
{
template<typename T, typename _BinaryPredicate, typename Pred>
void operator()(const T *ptr, _BinaryPredicate cond, Pred first, Pred second) const
{
if(cond(first,second))
delete ptr;
}
};
template<typename A>
void DeleteAll(A arr1)
{
delete [] arr1;
}
template<typename A, typename B>
void DeleteAll(A arr1, B arr2)
{
delete [] arr2;
DeleteAll(arr1);
}
template<typename A, typename B, typename C>
void DeleteAll(A arr1, B arr2, C arr3)
{
delete [] arr3;
DeleteAll(arr1, arr2);
}
template<typename A, typename B, typename C, typename D>
void DeleteAll(A arr1, B arr2, C arr3, D arr4)
{
delete [] arr4;
DeleteAll(arr1, arr2, arr3);
}
template<typename A, typename B, typename C, typename D, typename E>
void DeleteAll(A arr1, B arr2, C arr3, D arr4, E arr5)
{
delete [] arr5;
DeleteAll(arr1, arr2, arr3, arr4);
}
template<typename A, typename B, typename C, typename D, typename E, typename F>
void DeleteAll(A arr1, B arr2, C arr3, D arr4, E arr5, F arr6)
{
delete [] arr6;
DeleteAll(arr1, arr2, arr3, arr4,arr5);
}
#endif
#ifndef _FRIENDS_H_
#define _FRIENDS_H_
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <fstream>
#include <sstream>
#include "Kmer.hpp"
#include "KmerIterator.hpp"
#include "Deleter.h"
#include <sys/stat.h>
using namespace std;
#ifndef MAX_KMER_SIZE
#define MAX_KMER_SIZE 64
#endif
#define KMERLONGS MAX_KMER_SIZE/32 // 32 = numbits(uint64_t)/2- with 2 being the number of bits needed per nucleotide
typedef array<uint64_t, KMERLONGS> MERARR;
struct filedata
{
char filename[256];
size_t filesize;
};
ostream & operator<<(ostream & os, uint8_t val)
{
return os << static_cast<int>(val);
}
struct kmerpack // the pair<MERARR,int> used as value_type in map is not guaranteed to be contiguous in memory
{
MERARR arr;
int count;
bool operator > (const kmerpack & rhs) const
{ return (arr > rhs.arr); }
bool operator < (const kmerpack & rhs) const
{ return (arr < rhs.arr); }
bool operator == (const kmerpack & rhs) const
{ return (arr == rhs.arr); }
};
struct ufxpack // 38bytes for k=51
{
MERARR arr; // ~128-bits=16bytes for k=51
int count;
char left;
char right;
int leftmin;
int leftmax;
int rightmin;
int rightmax;
bool operator > (const ufxpack & rhs) const
{ return (arr > rhs.arr); }
bool operator < (const ufxpack & rhs) const
{ return (arr < rhs.arr); }
bool operator == (const ufxpack & rhs) const
{ return (arr == rhs.arr); }
};
void PackIntoUFX(array<int,4> & leftcnt, array<int,4> & righcnt, int count, ufxpack & pack)
{
pair<int, char> lsort[4] = {make_pair(leftcnt[0], 'A'), make_pair(leftcnt[1], 'C'), make_pair(leftcnt[2], 'G'), make_pair(leftcnt[3], 'T')};
pair<int, char> rsort[4] = {make_pair(righcnt[0], 'A'), make_pair(righcnt[1], 'C'), make_pair(righcnt[2], 'G'), make_pair(righcnt[3], 'T')};
sort(lsort, lsort+4);
sort(rsort, rsort+4);
pack.left = lsort[3].second; // max entry guarenteed to exist
pack.leftmax = lsort[3].first;
pack.leftmin = lsort[2].first;
pack.right = rsort[3].second;
pack.rightmax = rsort[3].first;
pack.rightmin = rsort[2].first;
pack.count = count;
}
struct SNPdata
{
MERARR karr;
char extA;
char extB;
bool operator > (const SNPdata & rhs) const
{ return (karr > rhs.karr); }
bool operator < (const SNPdata & rhs) const
{ return (karr < rhs.karr); }
bool operator == (const SNPdata & rhs) const
{ return (karr == rhs.karr); }
};
#endif
This diff is collapsed.
#ifndef BFG_KMER_HPP
#define BFG_KMER_HPP
#ifndef MAX_KMER_SIZE
#define MAX_KMER_SIZE 64 // ABAB: This code will probably crush if this is not a multiple of 32
#endif
#include <stdio.h>
#include <stdint.h>
#include <cassert>
#include <cstring>
#include <string>
#include <array>
#include "hash.hpp"
/* Short description:
* - Store kmer strings by using 2 bits per base instead of 8
* - Easily return reverse complements of kmers, e.g. TTGG -> CCAA
* - Easily compare kmers
* - Provide hash of kmers
* - Get last and next kmer, e.g. ACGT -> CGTT or ACGT -> AACGT
* */
class Kmer {
public:
Kmer();
Kmer(const Kmer& o);
explicit Kmer(const char *s);
void copyDataFrom(uint8_t * mybytes) // this is like a shadow constructor (to avoid accidental signature match with the existing constructor)
{
memcpy(longs, mybytes, sizeof(uint64_t) * (MAX_K/32));
}
explicit Kmer(const std::array<uint64_t, MAX_KMER_SIZE/32> & arr)
{
std::memcpy (longs, arr.data(), sizeof(uint64_t) * (MAX_K/32));
}
Kmer& operator=(const Kmer& o);
void set_deleted();
bool operator<(const Kmer& o) const;
bool operator==(const Kmer& o) const;
bool operator!=(const Kmer& o) const {
return !(*this == o);
}
void set_kmer(const char *s);
uint64_t hash() const;
Kmer twin() const;
Kmer rep() const; // ABAB: return the smaller of itself (lexicographically) or its reversed-complement (i.e. twin)
Kmer getLink(const size_t index) const;
Kmer forwardBase(const char b) const;
Kmer backwardBase(const char b) const;
std::string getBinary() const;
void toString(char * s) const;
std::string toString() const;
void copyDataInto(void * pointer) const
{
// void * memcpy ( void * destination, const void * source, size_t num );
memcpy(pointer, longs, sizeof(uint64_t) * (MAX_K/32));
}
// ABAB: return the raw data packed in an std::array
// this preserves the lexicographical order on k-mers
// i.e. A.toString() < B.toString <=> A.getArray() < B.getArray()
std::array<uint64_t, MAX_KMER_SIZE/32> getArray()
{
std::array<uint64_t,MAX_K/32> i64array;
std::memcpy (i64array.data(),longs,sizeof(uint64_t) * (MAX_K/32));
<