Loading docs/user_guide/source/engines/sst.rst +29 −12 Original line number Diff line number Diff line Loading @@ -136,18 +136,34 @@ satisfying, but has a similar long-term effect upon the set of steps delivered to the readers.) This value is interpreted by SST Writer engines only. 5. **DataTransport**: Default **"RDMA"**. This string value specifies the underlying network communication mechanism to use for exchanging data in SST. Current allowed values are **"RDMA"** and **"WAN"**. (**ib** and **fabric** are accepted as equivalent to **RDMA** and **evpath** is equivalent to **WAN**.) Generally both the reader and writer should be using the same network transport, and the network transport chosen may be dictated by the situation. For example, the RDMA transport generally operates only between applications running on the same high-performance interconnect (e.g. on the same HPC machine). If communication is desired between applications running on different interconnects, the Wide Area Network (WAN) option should be chosen. This value is interpreted by both SST Writer and Reader engines. 5. **DataTransport**: Default **varies**. This string value specifies the underlying network communication mechanism to use for exchanging data in SST. Generally this is chosen by SST based upon what is available on the current platform. However, specifying this engine parameter allows overriding SST's choice. Current allowed values are **"RDMA"** and **"WAN"**. (**ib** and **fabric** are accepted as equivalent to **RDMA** and **evpath** is equivalent to **WAN**.) Generally both the reader and writer should be using the same network transport, and the network transport chosen may be dictated by the situation. For example, the RDMA transport generally operates only between applications running on the same high-performance interconnect (e.g. on the same HPC machine). If communication is desired between applications running on different interconnects, the Wide Area Network (WAN) option should be chosen. This value is interpreted by both SST Writer and Reader engines. 5. **NetworkInterface**: Default **NULL**. In situations in which there are multiple possible network interfaces available to SST, this string value specifies which should be used to generate SST's contact information for writers. Generally this should *NOT* be specified except for narrow sets of circumstances. It has no effect if specified on Reader engines. If specified, the string value should correspond to a name of a network interface, such as are listed by commands like "netstat -i". For example, on most Unix systems, setting the NetworkInterface parameter to "lo" (or possibly "lo0") will result in SST generating contact information that uses the network address associated with the loopback interface (127.0.0.1). This value is interpreted by only by the SST Writer engine. ==================== ===================== ========================================================= **Key** **Value Format** **Default** and Examples Loading @@ -157,4 +173,5 @@ SST Writer and Reader engines. QueueLimit integer **0** (no queue limits) QueueFullPolicy string **Block**, Discard DataTransport string **default varies by platform**, RDMA, WAN NetworkInterface string **NULL** ==================== ===================== ========================================================= source/adios2/engine/sst/SstReader.cpp +18 −2 Original line number Diff line number Diff line Loading @@ -116,13 +116,29 @@ SstReader::SstReader(IO &io, const std::string &name, const Mode mode, SstReader::~SstReader() { SstStreamDestroy(m_Input); } StepStatus SstReader::BeginStep(StepMode mode, const float timeout_sec) StepStatus SstReader::BeginStep(StepMode Mode, const float timeout_sec) { SstStatusValue result; SstStepMode StepMode; switch (Mode) { case adios2::StepMode::Append: case adios2::StepMode::Update: throw std::invalid_argument( "ERROR: SstReader::BeginStep inappropriate StepMode specified" + m_EndMessage); break; case adios2::StepMode::NextAvailable: StepMode = SstNextAvailable; break; case adios2::StepMode::LatestAvailable: StepMode = SstLatestAvailable; break; } m_IO.RemoveAllVariables(); m_IO.RemoveAllAttributes(); result = SstAdvanceStep(m_Input, (int)mode, timeout_sec); result = SstAdvanceStep(m_Input, StepMode, timeout_sec); if (result == SstEndOfStream) { return StepStatus::EndOfStream; Loading source/adios2/toolkit/sst/cp/cp_internal.h +3 −0 Original line number Diff line number Diff line Loading @@ -50,6 +50,9 @@ enum StreamStatus Closed }; static char *SSTStreamStatusStr[] = {"NotOpen", "Established", "PeerClosed", "PeerFailed", "Closed"}; typedef struct _WS_ReaderInfo { SstStream ParentStream; Loading source/adios2/toolkit/sst/cp/cp_reader.c +8 −7 Original line number Diff line number Diff line Loading @@ -153,8 +153,8 @@ static void ReaderConnCloseHandler(CManager cm, CMConnection ClosedConn, fprintf(stderr, "Got an unexpected connection close event\n"); CP_verbose(Stream, "Reader-side Rank received a " "connection-close event in unexpected " "state %d\n", Stream->Status); "status %s\n", SSTStreamStatusStr[Stream->Status]); } } Loading Loading @@ -762,7 +762,8 @@ static TSMetadataList waitForNextMetadata(SstStream Stream, long LastTimestep) CP_verbose(Stream, "Waiting for metadata for a Timestep later than TS %d\n", LastTimestep); CP_verbose(Stream, "Stream status is %d\n", Stream->Status); CP_verbose(Stream, "(PID %x) Stream status is %s\n", getpid(), SSTStreamStatusStr[Stream->Status]); /* wait until we get the timestep metadata or something else changes */ pthread_cond_wait(&Stream->DataCondition, &Stream->DataLock); } Loading Loading @@ -880,7 +881,7 @@ extern void SstReleaseStep(SstStream Stream) * wait for metadata for Timestep indicated to arrive, or fail with EndOfStream * or Error */ extern SstStatusValue SstAdvanceStep(SstStream Stream, int mode, extern SstStatusValue SstAdvanceStep(SstStream Stream, SstStepMode mode, const float timeout_sec) { Loading @@ -892,7 +893,7 @@ extern SstStatusValue SstAdvanceStep(SstStream Stream, int mode, Stream->CurrentMetadata = NULL; } if ((timeout_sec >= 0.0) || (mode == 3 /* LatestAvailable*/)) if ((timeout_sec >= 0.0) || (mode == SstLatestAvailable)) { struct _GlobalOpInfo { Loading Loading @@ -973,7 +974,7 @@ extern SstStatusValue SstAdvanceStep(SstStream Stream, int mode, * others will see shortly. I'm going to go with Biggest * until I have a reason to prefer one or the other. */ if (mode == 3) if (mode == SstLatestAvailable) { // latest available CP_verbose(Stream, "Returning Biggest timestep available " Loading Loading @@ -1009,7 +1010,7 @@ extern SstStatusValue SstAdvanceStep(SstStream Stream, int mode, CP_verbose(Stream, "Advancestep timing out on no data\n"); return SstTimeout; } if (mode == 3) if (mode == SstLatestAvailable) { // latest available /* release all timesteps from before NextTimestep, then fall Loading source/adios2/toolkit/sst/cp/cp_writer.c +6 −4 Original line number Diff line number Diff line Loading @@ -230,8 +230,8 @@ static void WriterConnCloseHandler(CManager cm, CMConnection closed_conn, fprintf(stderr, "Got an unexpected connection close event\n"); CP_verbose(ParentWriterStream, "Writer-side Rank received a " "connection-close event in unexpected " "state %d\n", WSreader->ReaderStatus); "state %s\n", SSTStreamStatusStr[WSreader->ReaderStatus]); PTHREAD_MUTEX_UNLOCK(&ParentWriterStream->DataLock); CP_PeerFailCloseWSReader(WSreader, PeerFailed); PTHREAD_MUTEX_LOCK(&ParentWriterStream->DataLock); Loading Loading @@ -833,6 +833,8 @@ static void CP_PeerFailCloseWSReader(WS_ReaderInfo CP_WSR_Stream, CMadd_delayed_task(ParentStream->CPInfo->cm, 2, 0, CloseWSRStream, CP_WSR_Stream); } CP_verbose(ParentStream, "Moving stream %p to status %s\n", CP_WSR_Stream, SSTStreamStatusStr[NewState]); CP_WSR_Stream->ReaderStatus = NewState; /* main thread might be waiting on timesteps going away */ pthread_cond_signal(&ParentStream->DataCondition); Loading Loading @@ -876,8 +878,8 @@ void SstWriterClose(SstStream Stream) CP_verbose(Stream, "Reader Count is %d\n", Stream->ReaderCount); for (int i = 0; i < Stream->ReaderCount; i++) { CP_verbose(Stream, "Reader [%d] status is %d\n", i, Stream->Readers[i]->ReaderStatus); CP_verbose(Stream, "Reader [%d] status is %s\n", i, SSTStreamStatusStr[Stream->Readers[i]->ReaderStatus]); } /* NEED TO HANDLE FAILURE HERE */ pthread_cond_wait(&Stream->DataCondition, &Stream->DataLock); Loading Loading
docs/user_guide/source/engines/sst.rst +29 −12 Original line number Diff line number Diff line Loading @@ -136,18 +136,34 @@ satisfying, but has a similar long-term effect upon the set of steps delivered to the readers.) This value is interpreted by SST Writer engines only. 5. **DataTransport**: Default **"RDMA"**. This string value specifies the underlying network communication mechanism to use for exchanging data in SST. Current allowed values are **"RDMA"** and **"WAN"**. (**ib** and **fabric** are accepted as equivalent to **RDMA** and **evpath** is equivalent to **WAN**.) Generally both the reader and writer should be using the same network transport, and the network transport chosen may be dictated by the situation. For example, the RDMA transport generally operates only between applications running on the same high-performance interconnect (e.g. on the same HPC machine). If communication is desired between applications running on different interconnects, the Wide Area Network (WAN) option should be chosen. This value is interpreted by both SST Writer and Reader engines. 5. **DataTransport**: Default **varies**. This string value specifies the underlying network communication mechanism to use for exchanging data in SST. Generally this is chosen by SST based upon what is available on the current platform. However, specifying this engine parameter allows overriding SST's choice. Current allowed values are **"RDMA"** and **"WAN"**. (**ib** and **fabric** are accepted as equivalent to **RDMA** and **evpath** is equivalent to **WAN**.) Generally both the reader and writer should be using the same network transport, and the network transport chosen may be dictated by the situation. For example, the RDMA transport generally operates only between applications running on the same high-performance interconnect (e.g. on the same HPC machine). If communication is desired between applications running on different interconnects, the Wide Area Network (WAN) option should be chosen. This value is interpreted by both SST Writer and Reader engines. 5. **NetworkInterface**: Default **NULL**. In situations in which there are multiple possible network interfaces available to SST, this string value specifies which should be used to generate SST's contact information for writers. Generally this should *NOT* be specified except for narrow sets of circumstances. It has no effect if specified on Reader engines. If specified, the string value should correspond to a name of a network interface, such as are listed by commands like "netstat -i". For example, on most Unix systems, setting the NetworkInterface parameter to "lo" (or possibly "lo0") will result in SST generating contact information that uses the network address associated with the loopback interface (127.0.0.1). This value is interpreted by only by the SST Writer engine. ==================== ===================== ========================================================= **Key** **Value Format** **Default** and Examples Loading @@ -157,4 +173,5 @@ SST Writer and Reader engines. QueueLimit integer **0** (no queue limits) QueueFullPolicy string **Block**, Discard DataTransport string **default varies by platform**, RDMA, WAN NetworkInterface string **NULL** ==================== ===================== =========================================================
source/adios2/engine/sst/SstReader.cpp +18 −2 Original line number Diff line number Diff line Loading @@ -116,13 +116,29 @@ SstReader::SstReader(IO &io, const std::string &name, const Mode mode, SstReader::~SstReader() { SstStreamDestroy(m_Input); } StepStatus SstReader::BeginStep(StepMode mode, const float timeout_sec) StepStatus SstReader::BeginStep(StepMode Mode, const float timeout_sec) { SstStatusValue result; SstStepMode StepMode; switch (Mode) { case adios2::StepMode::Append: case adios2::StepMode::Update: throw std::invalid_argument( "ERROR: SstReader::BeginStep inappropriate StepMode specified" + m_EndMessage); break; case adios2::StepMode::NextAvailable: StepMode = SstNextAvailable; break; case adios2::StepMode::LatestAvailable: StepMode = SstLatestAvailable; break; } m_IO.RemoveAllVariables(); m_IO.RemoveAllAttributes(); result = SstAdvanceStep(m_Input, (int)mode, timeout_sec); result = SstAdvanceStep(m_Input, StepMode, timeout_sec); if (result == SstEndOfStream) { return StepStatus::EndOfStream; Loading
source/adios2/toolkit/sst/cp/cp_internal.h +3 −0 Original line number Diff line number Diff line Loading @@ -50,6 +50,9 @@ enum StreamStatus Closed }; static char *SSTStreamStatusStr[] = {"NotOpen", "Established", "PeerClosed", "PeerFailed", "Closed"}; typedef struct _WS_ReaderInfo { SstStream ParentStream; Loading
source/adios2/toolkit/sst/cp/cp_reader.c +8 −7 Original line number Diff line number Diff line Loading @@ -153,8 +153,8 @@ static void ReaderConnCloseHandler(CManager cm, CMConnection ClosedConn, fprintf(stderr, "Got an unexpected connection close event\n"); CP_verbose(Stream, "Reader-side Rank received a " "connection-close event in unexpected " "state %d\n", Stream->Status); "status %s\n", SSTStreamStatusStr[Stream->Status]); } } Loading Loading @@ -762,7 +762,8 @@ static TSMetadataList waitForNextMetadata(SstStream Stream, long LastTimestep) CP_verbose(Stream, "Waiting for metadata for a Timestep later than TS %d\n", LastTimestep); CP_verbose(Stream, "Stream status is %d\n", Stream->Status); CP_verbose(Stream, "(PID %x) Stream status is %s\n", getpid(), SSTStreamStatusStr[Stream->Status]); /* wait until we get the timestep metadata or something else changes */ pthread_cond_wait(&Stream->DataCondition, &Stream->DataLock); } Loading Loading @@ -880,7 +881,7 @@ extern void SstReleaseStep(SstStream Stream) * wait for metadata for Timestep indicated to arrive, or fail with EndOfStream * or Error */ extern SstStatusValue SstAdvanceStep(SstStream Stream, int mode, extern SstStatusValue SstAdvanceStep(SstStream Stream, SstStepMode mode, const float timeout_sec) { Loading @@ -892,7 +893,7 @@ extern SstStatusValue SstAdvanceStep(SstStream Stream, int mode, Stream->CurrentMetadata = NULL; } if ((timeout_sec >= 0.0) || (mode == 3 /* LatestAvailable*/)) if ((timeout_sec >= 0.0) || (mode == SstLatestAvailable)) { struct _GlobalOpInfo { Loading Loading @@ -973,7 +974,7 @@ extern SstStatusValue SstAdvanceStep(SstStream Stream, int mode, * others will see shortly. I'm going to go with Biggest * until I have a reason to prefer one or the other. */ if (mode == 3) if (mode == SstLatestAvailable) { // latest available CP_verbose(Stream, "Returning Biggest timestep available " Loading Loading @@ -1009,7 +1010,7 @@ extern SstStatusValue SstAdvanceStep(SstStream Stream, int mode, CP_verbose(Stream, "Advancestep timing out on no data\n"); return SstTimeout; } if (mode == 3) if (mode == SstLatestAvailable) { // latest available /* release all timesteps from before NextTimestep, then fall Loading
source/adios2/toolkit/sst/cp/cp_writer.c +6 −4 Original line number Diff line number Diff line Loading @@ -230,8 +230,8 @@ static void WriterConnCloseHandler(CManager cm, CMConnection closed_conn, fprintf(stderr, "Got an unexpected connection close event\n"); CP_verbose(ParentWriterStream, "Writer-side Rank received a " "connection-close event in unexpected " "state %d\n", WSreader->ReaderStatus); "state %s\n", SSTStreamStatusStr[WSreader->ReaderStatus]); PTHREAD_MUTEX_UNLOCK(&ParentWriterStream->DataLock); CP_PeerFailCloseWSReader(WSreader, PeerFailed); PTHREAD_MUTEX_LOCK(&ParentWriterStream->DataLock); Loading Loading @@ -833,6 +833,8 @@ static void CP_PeerFailCloseWSReader(WS_ReaderInfo CP_WSR_Stream, CMadd_delayed_task(ParentStream->CPInfo->cm, 2, 0, CloseWSRStream, CP_WSR_Stream); } CP_verbose(ParentStream, "Moving stream %p to status %s\n", CP_WSR_Stream, SSTStreamStatusStr[NewState]); CP_WSR_Stream->ReaderStatus = NewState; /* main thread might be waiting on timesteps going away */ pthread_cond_signal(&ParentStream->DataCondition); Loading Loading @@ -876,8 +878,8 @@ void SstWriterClose(SstStream Stream) CP_verbose(Stream, "Reader Count is %d\n", Stream->ReaderCount); for (int i = 0; i < Stream->ReaderCount; i++) { CP_verbose(Stream, "Reader [%d] status is %d\n", i, Stream->Readers[i]->ReaderStatus); CP_verbose(Stream, "Reader [%d] status is %s\n", i, SSTStreamStatusStr[Stream->Readers[i]->ReaderStatus]); } /* NEED TO HANDLE FAILURE HERE */ pthread_cond_wait(&Stream->DataCondition, &Stream->DataLock); Loading