Skip to content
Snippets Groups Projects
Commit c903c7bb authored by Eric Cano's avatar Eric Cano
Browse files

Rados performance: fixed missing unwatch call and added instrumentation.

The missing unwatch fix should improve performance of watch/notify based locking significantly.
Instrumentation will log any call to rados longer that 1s to /var/tmp/cta-rados-slow-calls.log.
Also prepared a structure to allow switching between watch/notify and backoff based locking.
Backoff code is not yet brought back (will test with the unwatch fix first).
parent 56843fba
No related branches found
No related tags found
No related merge requests found
......@@ -78,6 +78,8 @@ void notifyLocked() {
namespace cta { namespace objectstore {
cta::threading::Mutex BackendRados::RadosTimeoutLogger::g_mutex;
BackendRados::BackendRados(const std::string & userId, const std::string & pool, const std::string &radosNameSpace) :
m_user(userId), m_pool(pool), m_namespace(radosNameSpace), m_cluster(), m_radosCtx() {
cta::exception::Errnum::throwOnReturnedErrno(-m_cluster.init(userId.c_str()),
......@@ -100,8 +102,12 @@ m_user(userId), m_pool(pool), m_namespace(radosNameSpace), m_cluster(), m_radosC
}
BackendRados::~BackendRados() {
RadosTimeoutLogger rtl1;
m_radosCtx.close();
rtl1.logIfNeeded("In BackendRados::~BackendRados(): m_radosCtx.close()", "no object");
RadosTimeoutLogger rtl2;
m_cluster.shutdown();
rtl2.logIfNeeded("In BackendRados::~BackendRados(): m_cluster.shutdown()", "no object");
}
void BackendRados::create(std::string name, std::string content) {
......@@ -111,9 +117,11 @@ void BackendRados::create(std::string name, std::string content) {
ceph::bufferlist bl;
bl.append(content.c_str(), content.size());
wop.write_full(bl);
RadosTimeoutLogger rtl;
cta::exception::Errnum::throwOnReturnedErrno(-m_radosCtx.operate(name, &wop),
std::string("In ObjectStoreRados::create, failed to create exclusively or write: ")
+ name);
rtl.logIfNeeded("In BackendRados::create(): m_radosCtx.operate(create+write_full)", name);
}
void BackendRados::atomicOverwrite(std::string name, std::string content) {
......@@ -122,29 +130,38 @@ void BackendRados::atomicOverwrite(std::string name, std::string content) {
ceph::bufferlist bl;
bl.append(content.c_str(), content.size());
wop.write_full(bl);
RadosTimeoutLogger rtl;
cta::exception::Errnum::throwOnReturnedErrno(-m_radosCtx.operate(name, &wop),
std::string("In ObjectStoreRados::atomicOverwrite, failed to assert existence or write: ")
+ name);
rtl.logIfNeeded("In BackendRados::atomicOverwrite(): m_radosCtx.operate(assert_exists+write_full)", name);
}
std::string BackendRados::read(std::string name) {
std::string ret;
librados::bufferlist bl;
RadosTimeoutLogger rtl;
cta::exception::Errnum::throwOnNegativeErrnoIfNegative(m_radosCtx.read(name, bl, std::numeric_limits<int32_t>::max(), 0),
std::string("In ObjectStoreRados::read, failed to read: ")
+ name);
rtl.logIfNeeded("In BackendRados::read(): m_radosCtx.read()", name);
bl.copy(0, bl.length(), ret);
return ret;
}
void BackendRados::remove(std::string name) {
RadosTimeoutLogger rtl;
cta::exception::Errnum::throwOnReturnedErrno(-m_radosCtx.remove(name));
rtl.logIfNeeded("In BackendRados::remove(): m_radosCtx.remove()", name);
}
bool BackendRados::exists(std::string name) {
uint64_t size;
time_t date;
if (m_radosCtx.stat(name, &size, &date)) {
RadosTimeoutLogger rtl;
auto statRet=m_radosCtx.stat(name, &size, &date);
rtl.logIfNeeded("In BackendRados::exists(): m_radosCtx.stat()", name);
if (statRet) {
return false;
} else {
return true;
......@@ -159,14 +176,25 @@ std::list<std::string> BackendRados::list() {
return ret;
}
void BackendRados::ScopedLock::release() {
#if RADOS_LOCKING_STRATEGY == NOTIFY
releaseNotify();
#elif RADOS_LOCKING_STRATEGY == BACKOFF
releaseBackoff();
#else
#error Wrong value for "RADOS_LOCKING_STRATEGY"
#endif
}
void BackendRados::ScopedLock::releaseNotify() {
if (!m_lockSet) return;
// We should be tolerant with unlocking a deleted object, which is part
// of the lock-remove-(implicit unlock) cycle when we delete an object
// we hence overlook the ENOENT errors.
TIMESTAMPEDPRINT("Pre-release");
RadosTimeoutLogger rtl1;
int rc=m_context.unlock(m_oid, "lock", m_clientId);
rtl1.logIfNeeded("In BackendRados::ScopedLock::releaseNotify(): m_context.unlock()", m_oid);
switch (-rc) {
case ENOENT:
break;
......@@ -181,10 +209,12 @@ void BackendRados::ScopedLock::release() {
// Notify potential waiters to take their chances now on the lock.
utils::Timer t;
librados::bufferlist bl;
// We use a fire aand forget aio call.
// We use a fire and forget aio call.
librados::AioCompletion * completion = librados::Rados::aio_create_completion(nullptr, nullptr, nullptr);
RadosTimeoutLogger rtl2;
cta::exception::Errnum::throwOnReturnedErrno(-m_context.aio_notify(m_oid, completion, bl, 10000, nullptr),
"In BackendRados::ScopedLock::release(): failed to aio_notify()");
rtl2.logIfNeeded("In BackendRados::ScopedLock::releaseNotify(): m_context.aio_notify()", m_oid);
completion->release();
m_lockSet = false;
TIMESTAMPEDPRINT("Post-notify");
......@@ -203,11 +233,14 @@ BackendRados::ScopedLock::~ScopedLock() {
}
BackendRados::LockWatcher::LockWatcher(librados::IoCtx& context, const std::string& name):
m_context(context), m_name(name) {
m_context(context) {
m_internal.reset(new Internal);
m_internal->m_name = name;
m_internal->m_future = m_internal->m_promise.get_future();
TIMESTAMPEDPRINT("Pre-watch2");
RadosTimeoutLogger rtl;
cta::exception::Errnum::throwOnReturnedErrno(-m_context.watch2(name, &m_watchHandle, m_internal.get()));
rtl.logIfNeeded("In BackendRados::LockWatcher::LockWatcher(): m_context.watch2()", name);
TIMESTAMPEDPRINT("Post-watch2");
}
......@@ -234,13 +267,21 @@ void BackendRados::LockWatcher::wait(const durationUs& timeout) {
}
BackendRados::LockWatcher::~LockWatcher() {
TIMESTAMPEDPRINT("Pre-unwatch2");
librados::Rados::aio_create_completion(m_internal.release(), nullptr, Internal::deleter);
TIMESTAMPEDPRINT("Post-unwatch2");
TIMESTAMPEDPRINT("Pre-aio_unwatch2");
m_internal->m_radosTimeoutLogger.reset();
auto name=m_internal->m_name;
librados::AioCompletion *completion = librados::Rados::aio_create_completion(m_internal.release(), nullptr, Internal::deleter);
RadosTimeoutLogger rtl;
m_context.aio_unwatch(m_watchHandle, completion);
completion->release();
rtl.logIfNeeded("In BackendRados::LockWatcher::~LockWatcher(): m_context.aio_unwatch() call", name);
TIMESTAMPEDPRINT("Post-aio_unwatch2");
}
void BackendRados::LockWatcher::Internal::deleter(librados::completion_t cb, void* i) {
delete (Internal *) i;
TIMESTAMPEDPRINT("Unwatch2 completion");
std::unique_ptr<Internal> internal((Internal *) i);
internal->m_radosTimeoutLogger.logIfNeeded("BackendRados::LockWatcher::Internal::deleter(): aio_unwatch callback", internal->m_name);
}
std::string BackendRados::createUniqueClientId() {
......@@ -254,7 +295,17 @@ std::string BackendRados::createUniqueClientId() {
return client.str();
}
void BackendRados::lock(std::string name, uint64_t timeout_us, LockType lockType,
void BackendRados::lock(std::string name, uint64_t timeout_us, LockType lockType, const std::string& clientId) {
#if RADOS_LOCKING_STRATEGY == NOTIFY
lockNotify(name, timeout_us, lockType, clientId);
#elif RADOS_LOCKING_STRATEGY == BACKOFF
lockBackoff(name, timeout_us, lockType, clientId);
#else
#error Wrong value for "RADOS_LOCKING_STRATEGY"
#endif
}
void BackendRados::lockNotify(std::string name, uint64_t timeout_us, LockType lockType,
const std::string & clientId) {
// In Rados, locking a non-existing object will create it. This is not our intended
// behavior. We will lock anyway, test the object and re-delete it if it has a size of 0
......@@ -267,10 +318,14 @@ void BackendRados::lock(std::string name, uint64_t timeout_us, LockType lockType
utils::Timer timeoutTimer;
while (true) {
TIMESTAMPEDPRINT(lockType==LockType::Shared?"Pre-lock (shared)":"Pre-lock (exclusive)");
if (lockType==LockType::Shared)
RadosTimeoutLogger rtl;
if (lockType==LockType::Shared) {
rc = m_radosCtx.lock_shared(name, "lock", client, "", "", &tv, 0);
else
rtl.logIfNeeded("In BackendRados::lockNotify(): m_radosCtx.lock_shared()", name);
} else {
rc = m_radosCtx.lock_exclusive(name, "lock", client, "", &tv, 0);
rtl.logIfNeeded("In BackendRados::lockNotify(): m_radosCtx.lock_exclusive()", name);
}
if (!rc) {
TIMESTAMPEDPRINT(lockType==LockType::Shared?"Post-lock (shared) (got it)":"Post-lock (exclusive) (got it)");
NOTIFYLOCKED();
......@@ -284,10 +339,14 @@ void BackendRados::lock(std::string name, uint64_t timeout_us, LockType lockType
LockWatcher watcher(m_radosCtx, name);
TIMESTAMPEDPRINT(lockType==LockType::Shared?"Post-watch-setup/Pre-relock (shared)":"Post-watch-setup/Pre-relock (exclusive)");
// We need to retry the lock after establishing the watch: it could have been released during that time.
if (lockType==LockType::Shared)
rtl.reset();
if (lockType==LockType::Shared) {
rc = m_radosCtx.lock_shared(name, "lock", client, "", "", &tv, 0);
else
rtl.logIfNeeded("In BackendRados::lockNotify(): m_radosCtx.lock_shared()", name);
} else {
rc = m_radosCtx.lock_exclusive(name, "lock", client, "", &tv, 0);
rtl.logIfNeeded("In BackendRados::lockNotify(): m_radosCtx.lock_exclusive()", name);
}
if (!rc) {
TIMESTAMPEDPRINT(lockType==LockType::Shared?"Post-relock (shared) (got it)":"Post-relock (exclusive) (got it)");
NOTIFYLOCKED();
......@@ -366,10 +425,13 @@ BackendRados::AsyncUpdater::AsyncUpdater(BackendRados& be, const std::string& na
m_backend.lock(m_name, 60*1000, BackendRados::LockType::Exclusive, m_lockClient);
// Locking is done, we can launch the read operation (async).
librados::AioCompletion * aioc = librados::Rados::aio_create_completion(this, fetchCallback, nullptr);
RadosTimeoutLogger rtl;
m_radosTimeoutLogger.reset();
auto rc=m_backend.m_radosCtx.aio_read(m_name, aioc, &m_radosBufferList, std::numeric_limits<int32_t>::max(), 0);
rtl.logIfNeeded("BackendRados::AsyncUpdater::AsyncUpdater::lock_lambda(): m_radosCtx.aio_read()", m_name);
aioc->release();
if (rc) {
cta::exception::Errnum errnum (-rc, std::string("In BackendRados::AsyncUpdater::statCallback(): failed to launch aio_read(): ")+m_name);
cta::exception::Errnum errnum (-rc, std::string("In BackendRados::AsyncUpdater::AsyncUpdater::lock_lambda(): failed to launch aio_read(): ")+m_name);
throw Backend::NoSuchObject(errnum.getMessageValue());
}
} catch (...) {
......@@ -403,6 +465,7 @@ void BackendRados::AsyncUpdater::deleteEmptyCallback(librados::completion_t comp
void BackendRados::AsyncUpdater::fetchCallback(librados::completion_t completion, void* pThis) {
AsyncUpdater & au = *((AsyncUpdater *) pThis);
au.m_radosTimeoutLogger.logIfNeeded("In BackendRados::AsyncUpdater::fetchCallback(): aio_read callback", au.m_name);
try {
// Check that the object could be read.
if (rados_aio_get_return_value(completion)<0) {
......@@ -462,7 +525,10 @@ void BackendRados::AsyncUpdater::fetchCallback(librados::completion_t completion
}
// Launch the write
librados::AioCompletion * aioc = librados::Rados::aio_create_completion(pThis, commitCallback, nullptr);
RadosTimeoutLogger rtl;
au.m_radosTimeoutLogger.reset();
auto rc=au.m_backend.m_radosCtx.aio_write_full(au.m_name, aioc, au.m_radosBufferList);
rtl.logIfNeeded("In BackendRados::AsyncUpdater::fetchCallback::update_lambda(): m_radosCtx.aio_write_full() call", au.m_name);
aioc->release();
if (rc) {
cta::exception::Errnum errnum (-rc,
......@@ -484,6 +550,7 @@ void BackendRados::AsyncUpdater::fetchCallback(librados::completion_t completion
void BackendRados::AsyncUpdater::commitCallback(librados::completion_t completion, void* pThis) {
AsyncUpdater & au = *((AsyncUpdater *) pThis);
au.m_radosTimeoutLogger.logIfNeeded("In BackendRados::AsyncUpdater::commitCallback(): aio_write_full() callback", au.m_name);
try {
// Check that the object could be written.
if (rados_aio_get_return_value(completion)) {
......@@ -493,7 +560,10 @@ void BackendRados::AsyncUpdater::commitCallback(librados::completion_t completio
}
// Launch the async unlock.
librados::AioCompletion * aioc = librados::Rados::aio_create_completion(pThis, unlockCallback, nullptr);
au.m_radosTimeoutLogger.reset();
RadosTimeoutLogger rtl;
auto rc=au.m_backend.m_radosCtx.aio_unlock(au.m_name, "lock", au.m_lockClient, aioc);
rtl.logIfNeeded("In BackendRados::AsyncUpdater::commitCallback(): m_radosCtx.aio_unlock", au.m_name);
aioc->release();
if (rc) {
cta::exception::Errnum errnum (-rc, std::string("In BackendRados::AsyncUpdater::commitCallback(): failed to launch aio_unlock()")+au.m_name);
......@@ -507,6 +577,7 @@ void BackendRados::AsyncUpdater::commitCallback(librados::completion_t completio
void BackendRados::AsyncUpdater::unlockCallback(librados::completion_t completion, void* pThis) {
AsyncUpdater & au = *((AsyncUpdater *) pThis);
au.m_radosTimeoutLogger.logIfNeeded("In BackendRados::AsyncUpdater::unlockCallback(): aio_unlock() callback", au.m_name);
try {
// Check that the object could be unlocked.
if (rados_aio_get_return_value(completion)) {
......@@ -549,7 +620,10 @@ BackendRados::AsyncDeleter::AsyncDeleter(BackendRados& be, const std::string& na
m_backend.lock(m_name, 60*1000, BackendRados::LockType::Exclusive, m_lockClient);
// Locking is done, we can launch the remove operation (async).
librados::AioCompletion * aioc = librados::Rados::aio_create_completion(this, deleteCallback, nullptr);
m_radosTimeoutLogger.reset();
RadosTimeoutLogger rtl;
auto rc=m_backend.m_radosCtx.aio_remove(m_name, aioc);
rtl.logIfNeeded("In BackendRados::AsyncDeleter::AsyncDeleter(): m_radosCtx.aio_remove() call", m_name);
aioc->release();
if (rc) {
cta::exception::Errnum errnum (-rc, std::string("In BackendRados::AsyncUpdater::statCallback(): failed to launch aio_remove(): ")+m_name);
......@@ -569,6 +643,7 @@ BackendRados::AsyncDeleter::AsyncDeleter(BackendRados& be, const std::string& na
void BackendRados::AsyncDeleter::deleteCallback(librados::completion_t completion, void* pThis) {
AsyncDeleter & au = *((AsyncDeleter *) pThis);
au.m_radosTimeoutLogger.logIfNeeded("In BackendRados::AsyncDeleter::deleteCallback(): aio_remove() callback", au.m_name);
try {
// Check that the object could be deleted.
if (rados_aio_get_return_value(completion)) {
......
......@@ -23,6 +23,22 @@
#include "common/threading/Mutex.hpp"
#include <future>
// RADOS_LOCKING can be NOTIFY or BACKOFF
#define RADOS_LOCKING_STRATEGY NOTIFY
// Define this to get long response times logging.
#define RADOS_SLOW_CALLS_LOGGING
#define RADOS_SLOW_CALLS_LOGGING_FILE "/var/tmp/cta-rados-slow-calls.log"
#define RADOS_SLOW_CALL_TIMEOUT 1
#ifdef RADOS_SLOW_CALLS_LOGGING
#include "common/Timer.hpp"
#include "common/threading/Mutex.hpp"
#include "common/threading/MutexLocker.hpp"
#include <fstream>
#include <iomanip>
#include <algorithm>
#endif //RADOS_SLOW_CALLS_LOGGING
namespace cta { namespace objectstore {
/**
......@@ -69,6 +85,10 @@ public:
friend class BackendRados;
public:
void release() override;
private:
inline void releaseBackoff();
inline void releaseNotify();
public:
~ScopedLock() override;
private:
ScopedLock(librados::IoCtx & ioCtx): m_lockSet(false), m_context(ioCtx) {}
......@@ -88,12 +108,45 @@ private:
static std::string createUniqueClientId();
/** This function will lock or die (actually throw, that is) */
void lock(std::string name, uint64_t timeout_us, LockType lockType, const std::string & clientId);
inline void lockBackoff(std::string name, uint64_t timeout_us, LockType lockType, const std::string & clientId);
inline void lockNotify(std::string name, uint64_t timeout_us, LockType lockType, const std::string & clientId);
public:
ScopedLock * lockExclusive(std::string name, uint64_t timeout_us=0) override;
ScopedLock * lockShared(std::string name, uint64_t timeout_us=0) override;
private:
/**
* A class for logging the calls to rados taking too long.
* If RADOS_SLOW_CALLS_LOGGING is not defined, this is just an empty shell.
*/
class RadosTimeoutLogger {
public:
void logIfNeeded(const std::string & radosCall, const std::string & objectName) {
#ifdef RADOS_SLOW_CALLS_LOGGING
if (m_timer.secs() >= RADOS_SLOW_CALL_TIMEOUT) {
cta::threading::MutexLocker ml(g_mutex);
std::ofstream logFile(RADOS_SLOW_CALLS_LOGGING_FILE, std::ofstream::app);
std::time_t end_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
std::string date=std::ctime(&end_time);
// Chomp newline in the end
date.erase(std::remove(date.begin(), date.end(), '\n'), date.end());
logFile << date << " op=" << radosCall << " obj=" << objectName << " duration=" << m_timer.secs() << std::endl;
}
#endif //RADOS_SLOW_CALLS_LOGGING
}
void reset() {
#ifdef RADOS_SLOW_CALLS_LOGGING
m_timer.reset();
#endif //RADOS_SLOW_CALLS_LOGGING
}
private:
#ifdef RADOS_SLOW_CALLS_LOGGING
cta::utils::Timer m_timer;
static cta::threading::Mutex g_mutex;
#endif //RADOS_SLOW_CALLS_LOGGING
};
/**
* A class handling the watch part when waiting for a lock.
*/
......@@ -118,10 +171,11 @@ private:
bool m_promiseSet = false;
std::promise<void> m_promise;
std::future<void> m_future;
RadosTimeoutLogger m_radosTimeoutLogger;
std::string m_name;
};
std::unique_ptr<Internal> m_internal;
librados::IoCtx & m_context;
std::string m_name;
uint64_t m_watchHandle;
};
......@@ -165,6 +219,8 @@ public:
static void commitCallback(librados::completion_t completion, void *pThis);
/** The fourth callback operation (after unlocking) */
static void unlockCallback(librados::completion_t completion, void *pThis);
/** Instrumentation for rados calls timing */
RadosTimeoutLogger m_radosTimeoutLogger;
};
Backend::AsyncUpdater* asyncUpdate(const std::string & name, std::function <std::string(const std::string &)> & update) override;
......@@ -200,6 +256,8 @@ public:
static void deleteCallback(librados::completion_t completion, void *pThis);
/** Async delete in case of zero sized object */
static void deleteEmptyCallback(librados::completion_t completion, void *pThis);
/** Instrumentation for rados calls timing */
RadosTimeoutLogger m_radosTimeoutLogger;
};
Backend::AsyncDeleter* asyncDelete(const std::string & name) override;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment