diff --git a/tapeserver/castor/tape/tapeserver/daemon/DiskWriteThreadPool.cpp b/tapeserver/castor/tape/tapeserver/daemon/DiskWriteThreadPool.cpp index 479f2c140e4499f2c1682001ab70f3a583d82c74..15c5836ec7b30ee57a6acc275e954508e6895d97 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/DiskWriteThreadPool.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/DiskWriteThreadPool.cpp @@ -195,6 +195,8 @@ void DiskWriteThreadPool::DiskWriteWorkerThread::run() { logParams.add("errorCount", m_parentThreadPool.m_failedWriteCount); m_parentThreadPool.logWithStat(cta::log::INFO, "As last exiting DiskWriteWorkerThread, reported an end of session with errors"); } + const double deliveryTime = m_parentThreadPool.m_totalTime.secs(); + m_parentThreadPool.m_watchdog.updateStatsDeliveryTime(deliveryTime); } } diff --git a/tapeserver/castor/tape/tapeserver/daemon/TapeReadSingleThread.cpp b/tapeserver/castor/tape/tapeserver/daemon/TapeReadSingleThread.cpp index 5f0f0c065743419374f29499703de75160cf5c3f..09dce1db5643ed3c35405d5f72d6dbd4f9769376 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/TapeReadSingleThread.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/TapeReadSingleThread.cpp @@ -276,7 +276,7 @@ void castor::tape::tapeserver::daemon::TapeReadSingleThread::run() { // it in the while loop task->execute(*rs, m_logContext, m_watchdog,m_stats,timer); // Transmit the statistics to the watchdog thread - m_watchdog.updateStats(m_stats); + m_watchdog.updateStatsWithoutDeliveryTime(m_stats); // The session could have been corrupted (failed positioning) if(rs->isCorrupted()) { throw cta::exception::Exception ("Session corrupted: exiting task execution loop in TapeReadSingleThread. Cleanup will follow."); @@ -292,7 +292,7 @@ void castor::tape::tapeserver::daemon::TapeReadSingleThread::run() { logWithStat(cta::log::INFO, "Tape thread complete", params); // Report one last time the stats, after unloading/unmounting. - m_watchdog.updateStats(m_stats); + m_watchdog.updateStatsWithoutDeliveryTime(m_stats); } catch(const cta::exception::Exception& e){ // We can still update the session stats one last time (unmount timings // should have been updated by the RAII cleaner/unmounter). diff --git a/tapeserver/castor/tape/tapeserver/daemon/TapeSessionStats.hpp b/tapeserver/castor/tape/tapeserver/daemon/TapeSessionStats.hpp index 6575e217cbb725e3481a7f23a6eefacc76c27d10..9b34ee88c24e0cbd510ac875b0e0fbf4e9ae295b 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/TapeSessionStats.hpp +++ b/tapeserver/castor/tape/tapeserver/daemon/TapeSessionStats.hpp @@ -74,6 +74,11 @@ namespace daemon { /** Total time of the session, computed in parallel */ double totalTime; + + /** Time to delivery data to the client equal disk threads totalTime + * for recall and the tape thread totalTime for migration + */ + double deliveryTime; /** Cumulated data volume (actual payload), in bytes. */ uint64_t dataVolume; @@ -91,8 +96,8 @@ namespace daemon { TapeSessionStats(): mountTime(0.0), positionTime(0.0), checksumingTime(0.0), readWriteTime(0.0), flushTime(0.0), unloadTime(0.0), unmountTime(0.0), waitDataTime(0.0), waitFreeMemoryTime(0.0), waitInstructionsTime(0.0), - waitReportingTime(0.0), totalTime(0.0), dataVolume(0), headerVolume(0), - filesCount(0) {} + waitReportingTime(0.0), totalTime(0.0), deliveryTime(0.0), + dataVolume(0), headerVolume(0), filesCount(0) {} /** Accumulate contents of another stats block */ void add(const TapeSessionStats& other) { @@ -108,6 +113,7 @@ namespace daemon { waitInstructionsTime += other.waitInstructionsTime; waitReportingTime += other.waitReportingTime; // totalTime is not cumulative between threads (it's real time) + // deliveryTime is not cumulative between thread dataVolume += other.dataVolume; headerVolume += other.headerVolume; filesCount += other.filesCount; diff --git a/tapeserver/castor/tape/tapeserver/daemon/TapeWriteSingleThread.cpp b/tapeserver/castor/tape/tapeserver/daemon/TapeWriteSingleThread.cpp index 79661cb528e41545d8583953f9c63f9e06f207f3..2c5d4b8608eb560632eec8f9c93fc5a81d4361a5 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/TapeWriteSingleThread.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/TapeWriteSingleThread.cpp @@ -283,7 +283,7 @@ void castor::tape::tapeserver::daemon::TapeWriteSingleThread::run() { // Add the tasks counts to the session's m_stats.add(task->getTaskStats()); // Transmit the statistics to the watchdog thread - m_watchdog.updateStats(m_stats); + m_watchdog.updateStatsWithoutDeliveryTime(m_stats); // Increase local flush counters (session counters are incremented by // the task) files++; @@ -304,6 +304,7 @@ void castor::tape::tapeserver::daemon::TapeWriteSingleThread::run() { cta::log::ScopedParamContainer params(m_logContext); params.add("status", "success"); m_stats.totalTime = totalTimer.secs(); + m_stats.deliveryTime = m_stats.totalTime; logWithStats(cta::log::INFO, "Tape thread complete",params); // Report one last time the stats, after unloading/unmounting. m_watchdog.updateStats(m_stats); @@ -321,7 +322,7 @@ void castor::tape::tapeserver::daemon::TapeWriteSingleThread::run() { m_injector->setErrorFlag(); // We can still update the session stats one last time (unmount timings // should have been updated by the RAII cleaner/unmounter). - m_watchdog.updateStats(m_stats); + m_watchdog.updateStatsWithoutDeliveryTime(m_stats); // If we reached the end of tape, this is not an error (ENOSPC) try { diff --git a/tapeserver/castor/tape/tapeserver/daemon/TaskWatchDog.hpp b/tapeserver/castor/tape/tapeserver/daemon/TaskWatchDog.hpp index 2487fca198ad46a0c81dfe7ee585565ceef5c379..2828d7e7070938022e2c3d409bc77f571bdff3cf 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/TaskWatchDog.hpp +++ b/tapeserver/castor/tape/tapeserver/daemon/TaskWatchDog.hpp @@ -150,11 +150,20 @@ protected: if (m_statsSet) { // Build the statistics to be logged std::list<Param> paramList; + // for delivery time we estimate it ourselves before getting the + // final stats at the end of the last disk thread. This allows proper + // estimation of statistics for data transfer sessions that get killed + // before completion. + const double deliveryTime = + m_stats.deliveryTime?m_stats.deliveryTime:m_tapeThreadTimer.secs(); // total time is a special case. We will estimate it ourselves before // getting the final stats (at the end of the thread). This will allow // proper estimation of statistics for tape sessions that get killed // before completion. - double totalTime = m_stats.totalTime?m_stats.totalTime:m_tapeThreadTimer.secs(); + const double totalTime = m_stats.totalTime?m_stats.totalTime:m_tapeThreadTimer.secs(); + /** Time beetwen fineshed tape thread and finished disk threads */ + const double drainingTime = + deliveryTime > totalTime?deliveryTime-totalTime: 0.0; paramList.push_back(Param("mountTime", m_stats.mountTime)); paramList.push_back(Param("positionTime", m_stats.positionTime)); paramList.push_back(Param("waitInstructionsTime", m_stats.waitInstructionsTime)); @@ -168,6 +177,8 @@ protected: paramList.push_back(Param("unmountTime", m_stats.unmountTime)); paramList.push_back(Param("transferTime", m_stats.transferTime())); paramList.push_back(Param("totalTime", totalTime)); + paramList.push_back(Param("deliveryTime", deliveryTime)); + paramList.push_back(Param("drainingTime", drainingTime)); paramList.push_back(Param("dataVolume", m_stats.dataVolume)); paramList.push_back(Param("filesCount", m_stats.filesCount)); paramList.push_back(Param("headerVolume", m_stats.headerVolume)); @@ -283,7 +294,28 @@ protected: m_blockMovementTimer.reset(); m_nbOfMemblocksMoved++; } + + /** + * update the watchdog's statistics for the session delivery time + * @param a new deliveryTime + */ + void updateStatsDeliveryTime (const double deliveryTime) { + cta::threading::MutexLocker locker(m_mutex); + m_stats.deliveryTime = deliveryTime; + } + /** + * update the watchdog's statistics for the session + * @param stats the stats counters collection to push + */ + void updateStatsWithoutDeliveryTime (const TapeSessionStats & stats) { + cta::threading::MutexLocker locker(m_mutex); + const double savedDeliveryTime = m_stats.deliveryTime; + m_stats = stats; + m_stats.deliveryTime = savedDeliveryTime; + m_statsSet = true; + } + /** * update the watchdog's statistics for the session * @param stats the stats counters collection to push