Skip to content
Snippets Groups Projects
Commit 2577468d authored by Eric Cano's avatar Eric Cano
Browse files

Added logging of Rados locking performance statistics.

parent 462aa14f
Branches
Tags
No related merge requests found
...@@ -546,6 +546,118 @@ void BackendRados::lockNotify(std::string name, uint64_t timeout_us, LockType lo ...@@ -546,6 +546,118 @@ void BackendRados::lockNotify(std::string name, uint64_t timeout_us, LockType lo
} }
} }
BackendRados::RadosLockTimingLogger BackendRados::g_RadosLockTimingLogger;
void BackendRados::RadosLockTimingLogger::Measurements::addAttempt(double latency, double waitTime, double latencyMultiplier) {
totalLatency += latency;
totalWaitTime += waitTime;
totalLatencyMultiplier += latencyMultiplier;
if (attempts) {
minLatency = std::min(minLatency, latency);
maxLatency = std::max(maxLatency, latency);
minWaitTime = std::min(minWaitTime, latency);
maxWaitTime = std::max(maxWaitTime, latency);
minLatencyMultiplier = std::min(minLatencyMultiplier, latency);
maxLatencyMultiplier = std::max(maxLatencyMultiplier, latency);
} else {
minLatency = maxLatency = latency;
minWaitTime = maxWaitTime = waitTime;
minLatencyMultiplier = maxLatencyMultiplier = latencyMultiplier;
}
attempts++;
waitCount++;
}
void BackendRados::RadosLockTimingLogger::Measurements::addSuccess(double latency, double time) {
if (attempts) {
minLatency = std::min(minLatency, latency);
maxLatency = std::max(maxLatency, latency);
} else {
minLatency = maxLatency = latency;
}
totalTime = time;
attempts++;
BackendRados::g_RadosLockTimingLogger.addMeasurements(*this);
}
void BackendRados::RadosLockTimingLogger::addMeasurements(const Measurements& measurements) {
threading::MutexLocker ml(m_mutex);
if (m_measurements.totalCalls) {
m_measurements.minAttempts = std::min(measurements.attempts, m_measurements.minAttempts);
m_measurements.maxAttempts = std::max(measurements.attempts, m_measurements.maxAttempts);
m_measurements.minTotalTime = std::min(measurements.totalTime, m_measurements.minTotalTime);
m_measurements.maxTotalTime = std::max(measurements.totalTime, m_measurements.maxTotalTime);
} else {
m_measurements.minAttempts = measurements.attempts;
m_measurements.maxAttempts = measurements.attempts;
m_measurements.minTotalTime = measurements.totalTime;
m_measurements.maxTotalTime = measurements.totalTime;
}
if (measurements.waitCount) {
if (m_measurements.waitCount) {
m_measurements.minLatency = std::min(measurements.minLatency, m_measurements.minLatency);
m_measurements.maxLatency = std::max(measurements.maxLatency, m_measurements.maxLatency);
m_measurements.minWaitTime = std::min(measurements.minWaitTime, m_measurements.minWaitTime);
m_measurements.maxWaitTime = std::max(measurements.maxWaitTime, m_measurements.maxWaitTime);
m_measurements.minLatencyMultiplier = std::min(measurements.minLatencyMultiplier, m_measurements.minLatencyMultiplier);
m_measurements.maxLatencyMultiplier = std::max(measurements.maxLatencyMultiplier, m_measurements.maxLatencyMultiplier);
} else {
m_measurements.minLatency = measurements.minLatency;
m_measurements.maxLatency = measurements.maxLatency;
m_measurements.minWaitTime = measurements.minWaitTime;
m_measurements.maxWaitTime = measurements.maxWaitTime;
m_measurements.minLatencyMultiplier = measurements.minLatencyMultiplier;
m_measurements.maxLatencyMultiplier = measurements.maxLatencyMultiplier;
}
}
m_measurements.totalCalls++;
m_measurements.attempts += measurements.attempts;
m_measurements.waitCount += measurements.waitCount;
m_measurements.totalTime += measurements.totalTime;
m_measurements.totalLatency += measurements.totalLatency;
m_measurements.totalWaitTime += measurements.totalWaitTime;
m_measurements.totalLatencyMultiplier += measurements.totalLatencyMultiplier;
m_measurements.totalWaitTime += measurements.totalWaitTime;
if (m_timer.secs() > 60) {
logIfNeeded();
}
}
void BackendRados::RadosLockTimingLogger::logIfNeeded() {
if (m_measurements.totalCalls) {
{
std::ofstream logFile(RADOS_LOCK_PERFORMANCE_LOGGING_FILE, std::ofstream::app);
std::time_t end_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
std::string date=std::ctime(&end_time);
// Chomp newline in the end
date.erase(std::remove(date.begin(), date.end(), '\n'), date.end());
logFile << date
<< " totalCalls=" << m_measurements.totalCalls
<< " averageAttempts=" << 1.0 * m_measurements.attempts / m_measurements.totalCalls
<< " waitCount=" << m_measurements.waitCount
<< " minAttempts=" << m_measurements.minAttempts
<< " maxAttempts=" << m_measurements.maxAttempts
<< " averageTotalTime=" << m_measurements.totalTime / m_measurements.totalCalls
<< " minTotalTime=" << m_measurements.minTotalTime
<< " maxTotalTime=" << m_measurements.maxTotalTime
<< " averageLatency=" << (m_measurements.waitCount? m_measurements.totalLatency / m_measurements.waitCount : 0)
<< " minLatency=" << m_measurements.minLatency
<< " maxLatency=" << m_measurements.maxLatency
<< " averageLatencyMultiplier=" << (m_measurements.waitCount? m_measurements.totalLatencyMultiplier / m_measurements.waitCount : 0)
<< " minLatencyMultiplier=" << m_measurements.minLatencyMultiplier
<< " maxLatencyMultiplier=" << m_measurements.maxLatencyMultiplier
<< " samplingTime=" << m_timer.secs(utils::Timer::resetCounter) << std::endl;
}
m_measurements = CumulatedMesurements();
}
}
BackendRados::RadosLockTimingLogger::~RadosLockTimingLogger() {
threading::MutexLocker ml(m_mutex);
logIfNeeded();
}
void BackendRados::lockBackoff(std::string name, uint64_t timeout_us, LockType lockType, void BackendRados::lockBackoff(std::string name, uint64_t timeout_us, LockType lockType,
const std::string& clientId, librados::IoCtx & radosCtx) { const std::string& clientId, librados::IoCtx & radosCtx) {
// In Rados, locking a non-existing object will create it. This is not our intended // In Rados, locking a non-existing object will create it. This is not our intended
...@@ -560,12 +672,14 @@ void BackendRados::lockBackoff(std::string name, uint64_t timeout_us, LockType l ...@@ -560,12 +672,14 @@ void BackendRados::lockBackoff(std::string name, uint64_t timeout_us, LockType l
// (betweend and 50-150%) // (betweend and 50-150%)
size_t backoff=1; size_t backoff=1;
utils::Timer t, timeoutTimer; utils::Timer t, timeoutTimer;
RadosLockTimingLogger::Measurements timingMeasurements;
while (true) { while (true) {
TIMESTAMPEDPRINT(lockType==LockType::Shared?"Pre-lock (shared)":"Pre-lock (exclusive)"); TIMESTAMPEDPRINT(lockType==LockType::Shared?"Pre-lock (shared)":"Pre-lock (exclusive)");
RadosTimeoutLogger rtl; RadosTimeoutLogger rtl;
if (lockType==LockType::Shared) { if (lockType==LockType::Shared) {
cta::exception::Errnum::throwOnReturnedErrnoOrThrownStdException([&]() { cta::exception::Errnum::throwOnReturnedErrnoOrThrownStdException([&]() {
rc = radosCtx.lock_shared(name, "lock", clientId, "", "", &tv, 0); rc = radosCtx.lock_shared(name, "lock", clientId, "", "", &tv, 0);
timingMeasurements.addSuccess(t.secs(), timeoutTimer.secs());
return 0; return 0;
}, "In BackendRados::lockBackoff(): failed radosCtx.lock_shared()"); }, "In BackendRados::lockBackoff(): failed radosCtx.lock_shared()");
rtl.logIfNeeded("In BackendRados::lockBackoff(): radosCtx.lock_shared()", name); rtl.logIfNeeded("In BackendRados::lockBackoff(): radosCtx.lock_shared()", name);
...@@ -588,6 +702,7 @@ void BackendRados::lockBackoff(std::string name, uint64_t timeout_us, LockType l ...@@ -588,6 +702,7 @@ void BackendRados::lockBackoff(std::string name, uint64_t timeout_us, LockType l
} }
timespec ts; timespec ts;
auto wait=t.usecs(utils::Timer::resetCounter)*backoff++/c_backoffFraction; auto wait=t.usecs(utils::Timer::resetCounter)*backoff++/c_backoffFraction;
auto latencyUsecs = wait;
wait = std::min(wait, c_maxWait); wait = std::min(wait, c_maxWait);
if (backoff>c_maxBackoff) backoff=1; if (backoff>c_maxBackoff) backoff=1;
// We need to get a random number [50, 150] // We need to get a random number [50, 150]
...@@ -598,6 +713,7 @@ void BackendRados::lockBackoff(std::string name, uint64_t timeout_us, LockType l ...@@ -598,6 +713,7 @@ void BackendRados::lockBackoff(std::string name, uint64_t timeout_us, LockType l
ts.tv_sec = wait/(1000*1000); ts.tv_sec = wait/(1000*1000);
ts.tv_nsec = (wait % (1000*1000)) * 1000; ts.tv_nsec = (wait % (1000*1000)) * 1000;
nanosleep(&ts, nullptr); nanosleep(&ts, nullptr);
timingMeasurements.addAttempt(1.0 * latencyUsecs / 1000 / 1000, 1.0 * wait / 1000 /1000, 1.0 * randFactor / 100);
} }
cta::exception::Errnum::throwOnReturnedErrno(-rc, cta::exception::Errnum::throwOnReturnedErrno(-rc,
std::string("In ObjectStoreRados::lock(), failed to librados::IoCtx::") + std::string("In ObjectStoreRados::lock(), failed to librados::IoCtx::") +
......
...@@ -46,6 +46,8 @@ ...@@ -46,6 +46,8 @@
#include <syscall.h> #include <syscall.h>
#endif //RADOS_SLOW_CALLS_LOGGING #endif //RADOS_SLOW_CALLS_LOGGING
#define RADOS_LOCK_PERFORMANCE_LOGGING_FILE "/var/tmp/cta-rados-locking.log"
namespace cta { namespace objectstore { namespace cta { namespace objectstore {
/** /**
* An implementation of the object store primitives, using Rados. * An implementation of the object store primitives, using Rados.
...@@ -157,6 +159,44 @@ private: ...@@ -157,6 +159,44 @@ private:
static cta::threading::Mutex g_mutex; static cta::threading::Mutex g_mutex;
#endif //RADOS_SLOW_CALLS_LOGGING #endif //RADOS_SLOW_CALLS_LOGGING
}; };
/**
* A class for logging lock timings and performance
*/
class RadosLockTimingLogger {
public:
struct Measurements {
size_t attempts = 0;
size_t waitCount = 0;
double totalTime = 0;
double totalLatency = 0;
double minLatency = 0;
double maxLatency = 0;
double totalWaitTime = 0;
double minWaitTime = 0;
double maxWaitTime = 0;
double totalLatencyMultiplier = 0;
double minLatencyMultiplier = 0;
double maxLatencyMultiplier = 0;
void addSuccess (double latency, double time);
void addAttempt(double latency, double waitTime, double latencyMultiplier);
};
void addMeasurements(const Measurements & measurements);
void logIfNeeded();
~RadosLockTimingLogger();
private:
struct CumulatedMesurements: public Measurements {
size_t totalCalls = 0;
size_t minAttempts = 0;
size_t maxAttempts = 0;
double minTotalTime = 0;
double maxTotalTime = 0;
};
CumulatedMesurements m_measurements;
threading::Mutex m_mutex;
utils::Timer m_timer;
};
static RadosLockTimingLogger g_RadosLockTimingLogger;
/** /**
* A class handling the watch part when waiting for a lock. * A class handling the watch part when waiting for a lock.
......
...@@ -225,7 +225,7 @@ TEST_P(BackendAbstractTest, ParametersInterface) { ...@@ -225,7 +225,7 @@ TEST_P(BackendAbstractTest, ParametersInterface) {
static cta::objectstore::BackendVFS osVFS(__LINE__, __FILE__); static cta::objectstore::BackendVFS osVFS(__LINE__, __FILE__);
#ifdef TEST_RADOS #ifdef TEST_RADOS
static cta::log::DummyLogger dl(""); static cta::log::DummyLogger dl("", "");
static cta::objectstore::BackendRados osRados(dl, "tapetest", "tapetest"); static cta::objectstore::BackendRados osRados(dl, "tapetest", "tapetest");
INSTANTIATE_TEST_CASE_P(BackendTestRados, BackendAbstractTest, ::testing::Values((cta::objectstore::Backend*)&osRados)); INSTANTIATE_TEST_CASE_P(BackendTestRados, BackendAbstractTest, ::testing::Values((cta::objectstore::Backend*)&osRados));
#endif #endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment