Commit 425af9d0 authored by Victor Kotlyar's avatar Victor Kotlyar
Browse files

Ported commit fab791bc805f95d59c55cbcb554e6f218710b0dd from castor/master

CASTOR-4836: tapeserverd should have a new time counter: delivery time

Fixed.

Add two counters: deliveryTime and drainingTime. For the recall
session deliveryTime is the total time of the disk threads and
drainigTime is the time difference between deliveryTime and
the total time of the tape thread or in other words how much time
disk threads spent after the tape thread finished. For the
migration session deliveryTime is the total time of the tape thread
and drainingTime always equal 0.
parent a4776842
......@@ -195,6 +195,8 @@ void DiskWriteThreadPool::DiskWriteWorkerThread::run() {
logParams.add("errorCount", m_parentThreadPool.m_failedWriteCount);
m_parentThreadPool.logWithStat(cta::log::INFO, "As last exiting DiskWriteWorkerThread, reported an end of session with errors");
}
const double deliveryTime = m_parentThreadPool.m_totalTime.secs();
m_parentThreadPool.m_watchdog.updateStatsDeliveryTime(deliveryTime);
}
}
......
......@@ -276,7 +276,7 @@ void castor::tape::tapeserver::daemon::TapeReadSingleThread::run() {
// it in the while loop
task->execute(*rs, m_logContext, m_watchdog,m_stats,timer);
// Transmit the statistics to the watchdog thread
m_watchdog.updateStats(m_stats);
m_watchdog.updateStatsWithoutDeliveryTime(m_stats);
// The session could have been corrupted (failed positioning)
if(rs->isCorrupted()) {
throw cta::exception::Exception ("Session corrupted: exiting task execution loop in TapeReadSingleThread. Cleanup will follow.");
......@@ -292,7 +292,7 @@ void castor::tape::tapeserver::daemon::TapeReadSingleThread::run() {
logWithStat(cta::log::INFO, "Tape thread complete",
params);
// Report one last time the stats, after unloading/unmounting.
m_watchdog.updateStats(m_stats);
m_watchdog.updateStatsWithoutDeliveryTime(m_stats);
} catch(const cta::exception::Exception& e){
// We can still update the session stats one last time (unmount timings
// should have been updated by the RAII cleaner/unmounter).
......
......@@ -74,6 +74,11 @@ namespace daemon {
/** Total time of the session, computed in parallel */
double totalTime;
/** Time to delivery data to the client equal disk threads totalTime
* for recall and the tape thread totalTime for migration
*/
double deliveryTime;
/** Cumulated data volume (actual payload), in bytes. */
uint64_t dataVolume;
......@@ -91,8 +96,8 @@ namespace daemon {
TapeSessionStats(): mountTime(0.0), positionTime(0.0), checksumingTime(0.0),
readWriteTime(0.0), flushTime(0.0), unloadTime(0.0), unmountTime(0.0),
waitDataTime(0.0), waitFreeMemoryTime(0.0), waitInstructionsTime(0.0),
waitReportingTime(0.0), totalTime(0.0), dataVolume(0), headerVolume(0),
filesCount(0) {}
waitReportingTime(0.0), totalTime(0.0), deliveryTime(0.0),
dataVolume(0), headerVolume(0), filesCount(0) {}
/** Accumulate contents of another stats block */
void add(const TapeSessionStats& other) {
......@@ -108,6 +113,7 @@ namespace daemon {
waitInstructionsTime += other.waitInstructionsTime;
waitReportingTime += other.waitReportingTime;
// totalTime is not cumulative between threads (it's real time)
// deliveryTime is not cumulative between thread
dataVolume += other.dataVolume;
headerVolume += other.headerVolume;
filesCount += other.filesCount;
......
......@@ -283,7 +283,7 @@ void castor::tape::tapeserver::daemon::TapeWriteSingleThread::run() {
// Add the tasks counts to the session's
m_stats.add(task->getTaskStats());
// Transmit the statistics to the watchdog thread
m_watchdog.updateStats(m_stats);
m_watchdog.updateStatsWithoutDeliveryTime(m_stats);
// Increase local flush counters (session counters are incremented by
// the task)
files++;
......@@ -304,6 +304,7 @@ void castor::tape::tapeserver::daemon::TapeWriteSingleThread::run() {
cta::log::ScopedParamContainer params(m_logContext);
params.add("status", "success");
m_stats.totalTime = totalTimer.secs();
m_stats.deliveryTime = m_stats.totalTime;
logWithStats(cta::log::INFO, "Tape thread complete",params);
// Report one last time the stats, after unloading/unmounting.
m_watchdog.updateStats(m_stats);
......@@ -321,7 +322,7 @@ void castor::tape::tapeserver::daemon::TapeWriteSingleThread::run() {
m_injector->setErrorFlag();
// We can still update the session stats one last time (unmount timings
// should have been updated by the RAII cleaner/unmounter).
m_watchdog.updateStats(m_stats);
m_watchdog.updateStatsWithoutDeliveryTime(m_stats);
// If we reached the end of tape, this is not an error (ENOSPC)
try {
......
......@@ -150,11 +150,20 @@ protected:
if (m_statsSet) {
// Build the statistics to be logged
std::list<Param> paramList;
// for delivery time we estimate it ourselves before getting the
// final stats at the end of the last disk thread. This allows proper
// estimation of statistics for data transfer sessions that get killed
// before completion.
const double deliveryTime =
m_stats.deliveryTime?m_stats.deliveryTime:m_tapeThreadTimer.secs();
// total time is a special case. We will estimate it ourselves before
// getting the final stats (at the end of the thread). This will allow
// proper estimation of statistics for tape sessions that get killed
// before completion.
double totalTime = m_stats.totalTime?m_stats.totalTime:m_tapeThreadTimer.secs();
const double totalTime = m_stats.totalTime?m_stats.totalTime:m_tapeThreadTimer.secs();
/** Time beetwen fineshed tape thread and finished disk threads */
const double drainingTime =
deliveryTime > totalTime?deliveryTime-totalTime: 0.0;
paramList.push_back(Param("mountTime", m_stats.mountTime));
paramList.push_back(Param("positionTime", m_stats.positionTime));
paramList.push_back(Param("waitInstructionsTime", m_stats.waitInstructionsTime));
......@@ -168,6 +177,8 @@ protected:
paramList.push_back(Param("unmountTime", m_stats.unmountTime));
paramList.push_back(Param("transferTime", m_stats.transferTime()));
paramList.push_back(Param("totalTime", totalTime));
paramList.push_back(Param("deliveryTime", deliveryTime));
paramList.push_back(Param("drainingTime", drainingTime));
paramList.push_back(Param("dataVolume", m_stats.dataVolume));
paramList.push_back(Param("filesCount", m_stats.filesCount));
paramList.push_back(Param("headerVolume", m_stats.headerVolume));
......@@ -283,7 +294,28 @@ protected:
m_blockMovementTimer.reset();
m_nbOfMemblocksMoved++;
}
/**
* update the watchdog's statistics for the session delivery time
* @param a new deliveryTime
*/
void updateStatsDeliveryTime (const double deliveryTime) {
cta::threading::MutexLocker locker(m_mutex);
m_stats.deliveryTime = deliveryTime;
}
/**
* update the watchdog's statistics for the session
* @param stats the stats counters collection to push
*/
void updateStatsWithoutDeliveryTime (const TapeSessionStats & stats) {
cta::threading::MutexLocker locker(m_mutex);
const double savedDeliveryTime = m_stats.deliveryTime;
m_stats = stats;
m_stats.deliveryTime = savedDeliveryTime;
m_statsSet = true;
}
/**
* update the watchdog's statistics for the session
* @param stats the stats counters collection to push
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment