Unverified Commit fea264e8 authored by Junmin Gu's avatar Junmin Gu Committed by GitHub
Browse files

Merge pull request #3660 from guj/profiler_keywords

An effort to make profiler more user friendly
parents 643e3c75 8c0e722b
Loading
Loading
Loading
Loading
+25 −21
Original line number Diff line number Diff line
@@ -91,7 +91,7 @@ StepStatus BP5Writer::BeginStep(StepMode mode, const float timeoutSeconds)
        TimePoint wait_start = Now();
        if (m_WriteFuture.valid())
        {
            m_Profiler.Start("WaitOnAsync");
            m_Profiler.Start("BS_WaitOnAsync");
            m_WriteFuture.get();
            m_Comm.Barrier();
            AsyncWriteDataCleanup();
@@ -110,7 +110,7 @@ StepStatus BP5Writer::BeginStep(StepMode mode, const float timeoutSeconds)
                              << std::endl;
                }
            }
            m_Profiler.Stop("WaitOnAsync");
            m_Profiler.Stop("BS_WaitOnAsync");
        }
    }

@@ -529,9 +529,9 @@ void BP5Writer::EndStep()
      std::cout << "END STEP starts at: " << ts.count() << std::endl; */
    m_BetweenStepPairs = false;
    PERFSTUBS_SCOPED_TIMER("BP5Writer::EndStep");
    m_Profiler.Start("endstep");
    m_Profiler.Start("ES");

    m_Profiler.Start("close_ts");
    m_Profiler.Start("ES_close");
    MarshalAttributes();

    // true: advances step
@@ -542,9 +542,9 @@ void BP5Writer::EndStep()
     * AttributeEncodeBuffer and the data encode Vector */

    m_ThisTimestepDataSize += TSInfo.DataBuffer->Size();
    m_Profiler.Stop("close_ts");
    m_Profiler.Stop("ES_close");

    m_Profiler.Start("AWD");
    m_Profiler.Start("ES_AWD");
    // TSInfo destructor would delete the DataBuffer so we need to save it
    // for async IO and let the writer free it up when not needed anymore
    m_AsyncWriteLock.lock();
@@ -555,12 +555,12 @@ void BP5Writer::EndStep()
    WriteData(TSInfo.DataBuffer);
    TSInfo.DataBuffer = NULL;

    m_Profiler.Stop("AWD");
    m_Profiler.Stop("ES_AWD");

    /*
     * Two-step metadata aggregation
     */
    m_Profiler.Start("meta_lvl1");
    m_Profiler.Start("ES_meta1");
    std::vector<char> MetaBuffer;
    core::iovec m{TSInfo.MetaEncodeBuffer->Data(),
                  TSInfo.MetaEncodeBuffer->m_FixedSize};
@@ -576,7 +576,7 @@ void BP5Writer::EndStep()

    if (m_Aggregator->m_Comm.Size() > 1)
    { // level 1
        m_Profiler.Start("meta_gather1");
        m_Profiler.Start("ES_meta1_gather");
        size_t LocalSize = MetaBuffer.size();
        std::vector<size_t> RecvCounts =
            m_Aggregator->m_Comm.GatherValues(LocalSize, 0);
@@ -594,7 +594,7 @@ void BP5Writer::EndStep()
        m_Aggregator->m_Comm.GathervArrays(MetaBuffer.data(), LocalSize,
                                           RecvCounts.data(), RecvCounts.size(),
                                           RecvBuffer.data(), 0);
        m_Profiler.Stop("meta_gather1");
        m_Profiler.Stop("ES_meta1_gather");
        if (m_Aggregator->m_Comm.Rank() == 0)
        {
            std::vector<format::BP5Base::MetaMetaInfoBlock>
@@ -612,8 +612,8 @@ void BP5Writer::EndStep()
                WriterDataPositions);
        }
    } // level 1
    m_Profiler.Stop("meta_lvl1");
    m_Profiler.Start("meta_lvl2");
    m_Profiler.Stop("ES_meta1");
    m_Profiler.Start("ES_meta2");
    // level 2
    if (m_Aggregator->m_Comm.Rank() == 0)
    {
@@ -623,7 +623,7 @@ void BP5Writer::EndStep()
        size_t LocalSize = MetaBuffer.size();
        if (m_CommAggregators.Size() > 1)
        {
            m_Profiler.Start("meta_gather2");
            m_Profiler.Start("ES_meta2_gather");
            RecvCounts = m_CommAggregators.GatherValues(LocalSize, 0);
            if (m_CommAggregators.Rank() == 0)
            {
@@ -640,7 +640,7 @@ void BP5Writer::EndStep()
                MetaBuffer.data(), LocalSize, RecvCounts.data(),
                RecvCounts.size(), RecvBuffer.data(), 0);
            buf = &RecvBuffer;
            m_Profiler.Stop("meta_gather2");
            m_Profiler.Stop("ES_meta2_gather");
        }
        else
        {
@@ -670,7 +670,7 @@ void BP5Writer::EndStep()
            }
        }
    } // level 2
    m_Profiler.Stop("meta_lvl2");
    m_Profiler.Stop("ES_meta2");

    if (m_Parameters.AsyncWrite)
    {
@@ -685,7 +685,7 @@ void BP5Writer::EndStep()
        }
    }

    m_Profiler.Stop("endstep");
    m_Profiler.Stop("ES");
    m_WriterStep++;
    m_EndStepEnd = Now();
    /* Seconds ts2 = Now() - m_EngineStart;
@@ -1555,7 +1555,11 @@ void BP5Writer::FlushData(const bool isFinal)

void BP5Writer::Flush(const int transportIndex) {}

void BP5Writer::PerformDataWrite() { FlushData(false); }
void BP5Writer::PerformDataWrite() {
  m_Profiler.Start("PDW");
  FlushData(false);
  m_Profiler.Stop("PDW");
}

void BP5Writer::DestructorClose(bool Verbose) noexcept
{
@@ -1597,13 +1601,13 @@ void BP5Writer::DoClose(const int transportIndex)
    Seconds wait(0.0);
    if (m_WriteFuture.valid())
    {
        m_Profiler.Start("WaitOnAsync");
        m_Profiler.Start("DC_WaitOnAsync1");
        m_AsyncWriteLock.lock();
        m_flagRush = true;
        m_AsyncWriteLock.unlock();
        m_WriteFuture.get();
        wait += Now() - wait_start;
        m_Profiler.Stop("WaitOnAsync");
        m_Profiler.Stop("DC_WaitOnAsync1");
    }

    m_FileDataManager.CloseFiles(transportIndex);
@@ -1621,7 +1625,7 @@ void BP5Writer::DoClose(const int transportIndex)
    if (m_Parameters.AsyncWrite)
    {
        // wait until all process' writing thread completes
        m_Profiler.Start("WaitOnAsync");
        m_Profiler.Start("DC_WaitOnAsync2");
        wait_start = Now();
        m_Comm.Barrier();
        AsyncWriteDataCleanup();
@@ -1631,7 +1635,7 @@ void BP5Writer::DoClose(const int transportIndex)
            std::cout << "Close waited " << wait.count()
                      << " seconds on async threads" << std::endl;
        }
        m_Profiler.Stop("WaitOnAsync");
        m_Profiler.Stop("DC_WaitOnAsync2");
    }

    if (m_Comm.Rank() == 0)
+16 −10
Original line number Diff line number Diff line
@@ -40,15 +40,21 @@ JSONProfiler::JSONProfiler(helper::Comm const &comm) : m_Comm(comm)
    m_Profiler.m_IsActive = true; // default is true

    AddTimerWatch("buffering");
    AddTimerWatch("endstep");
    AddTimerWatch("ES");
    AddTimerWatch("PP");
    AddTimerWatch("meta_gather1", false);
    AddTimerWatch("meta_gather2", false);
    AddTimerWatch("meta_lvl1");
    AddTimerWatch("meta_lvl2");
    AddTimerWatch("close_ts");
    AddTimerWatch("AWD");
    AddTimerWatch("ES_meta1_gather", false);
    AddTimerWatch("ES_meta2_gather", false);

    AddTimerWatch("ES_meta1");
    AddTimerWatch("ES_meta2");

    AddTimerWatch("ES_close");
    AddTimerWatch("ES_AWD");
    AddTimerWatch("WaitOnAsync");
    AddTimerWatch("BS_WaitOnAsync");
    AddTimerWatch("DC_WaitOnAsync1");
    AddTimerWatch("DC_WaitOnAsync2");
    AddTimerWatch("PDW");

    m_Profiler.m_Bytes.emplace("buffering", 0);

@@ -83,9 +89,6 @@ std::string JSONProfiler::GetRankProfilingJSON(
    std::replace(timeDate.begin(), timeDate.end(), ' ', '_');

    rankLog += ", \"start\":\"" + timeDate + "\"";
    // rankLog += ", \"threads\":" + std::to_string(m_Parameters.Threads);
    rankLog +=
        ", \"bytes\":" + std::to_string(profiler.m_Bytes.at("buffering"));

    for (const auto &timerPair : profiler.m_Timers)
    {
@@ -102,6 +105,9 @@ std::string JSONProfiler::GetRankProfilingJSON(
        rankLog += ", \"transport_" + std::to_string(t) + "\":{";
        rankLog += "\"type\":\"" + transportsTypes[t] + "\"";

        size_t wBytes = transportsProfilers[t]->m_Bytes.at("write");
        rankLog += ", \"wbytes\":" + std::to_string(wBytes);

        for (const auto &transportTimerPair : transportsProfilers[t]->m_Timers)
        {
            lf_WriterTimer(rankLog, transportTimerPair.second);
+8 −0
Original line number Diff line number Diff line
@@ -107,6 +107,14 @@ void Transport::Flush()

size_t Transport::GetSize() { return 0; }

void Transport::ProfilerWriteBytes(size_t bytes) noexcept
{
    if (m_Profiler.m_IsActive)
    {
        m_Profiler.m_Bytes.at("write") += bytes;
    }
}

void Transport::ProfilerStart(const std::string process) noexcept
{
    if (m_Profiler.m_IsActive)
+1 −0
Original line number Diff line number Diff line
@@ -155,6 +155,7 @@ public:
    virtual void MkDir(const std::string &fileName) = 0;

protected:
    void ProfilerWriteBytes(size_t bytes) noexcept;
    void ProfilerStart(const std::string process) noexcept;

    void ProfilerStop(const std::string process) noexcept;
+3 −0
Original line number Diff line number Diff line
@@ -256,6 +256,7 @@ void FilePOSIX::Write(const char *buffer, size_t size, size_t start)
        while (size > 0)
        {
            ProfilerStart("write");
            ProfilerWriteBytes(size);
            errno = 0;
            const auto writtenSize = write(m_FileDescriptor, buffer, size);
            m_Errno = errno;
@@ -350,6 +351,8 @@ void FilePOSIX::WriteV(const core::iovec *iov, const int iovcnt, size_t start)
            written = static_cast<size_t>(ret);
        }

        ProfilerWriteBytes(written);

        if (written < nBytesExpected)
        {
            /* Fall back to write calls with individual buffers */