diff --git a/objectstore/BackendRados.cpp b/objectstore/BackendRados.cpp index 58b3e5e3d31b4d5a148d7d19c4fc13f71c1ea17e..2702c18c4d57e9444608454f5112fb4bcb0b3a93 100644 --- a/objectstore/BackendRados.cpp +++ b/objectstore/BackendRados.cpp @@ -78,6 +78,8 @@ void notifyLocked() { namespace cta { namespace objectstore { +cta::threading::Mutex BackendRados::RadosTimeoutLogger::g_mutex; + BackendRados::BackendRados(const std::string & userId, const std::string & pool, const std::string &radosNameSpace) : m_user(userId), m_pool(pool), m_namespace(radosNameSpace), m_cluster(), m_radosCtx() { cta::exception::Errnum::throwOnReturnedErrno(-m_cluster.init(userId.c_str()), @@ -100,8 +102,12 @@ m_user(userId), m_pool(pool), m_namespace(radosNameSpace), m_cluster(), m_radosC } BackendRados::~BackendRados() { + RadosTimeoutLogger rtl1; m_radosCtx.close(); + rtl1.logIfNeeded("In BackendRados::~BackendRados(): m_radosCtx.close()", "no object"); + RadosTimeoutLogger rtl2; m_cluster.shutdown(); + rtl2.logIfNeeded("In BackendRados::~BackendRados(): m_cluster.shutdown()", "no object"); } void BackendRados::create(std::string name, std::string content) { @@ -111,9 +117,11 @@ void BackendRados::create(std::string name, std::string content) { ceph::bufferlist bl; bl.append(content.c_str(), content.size()); wop.write_full(bl); + RadosTimeoutLogger rtl; cta::exception::Errnum::throwOnReturnedErrno(-m_radosCtx.operate(name, &wop), std::string("In ObjectStoreRados::create, failed to create exclusively or write: ") + name); + rtl.logIfNeeded("In BackendRados::create(): m_radosCtx.operate(create+write_full)", name); } void BackendRados::atomicOverwrite(std::string name, std::string content) { @@ -122,29 +130,38 @@ void BackendRados::atomicOverwrite(std::string name, std::string content) { ceph::bufferlist bl; bl.append(content.c_str(), content.size()); wop.write_full(bl); + RadosTimeoutLogger rtl; cta::exception::Errnum::throwOnReturnedErrno(-m_radosCtx.operate(name, &wop), std::string("In ObjectStoreRados::atomicOverwrite, failed to assert existence or write: ") + name); + rtl.logIfNeeded("In BackendRados::atomicOverwrite(): m_radosCtx.operate(assert_exists+write_full)", name); } std::string BackendRados::read(std::string name) { std::string ret; librados::bufferlist bl; + RadosTimeoutLogger rtl; cta::exception::Errnum::throwOnNegativeErrnoIfNegative(m_radosCtx.read(name, bl, std::numeric_limits<int32_t>::max(), 0), std::string("In ObjectStoreRados::read, failed to read: ") + name); + rtl.logIfNeeded("In BackendRados::read(): m_radosCtx.read()", name); bl.copy(0, bl.length(), ret); return ret; } void BackendRados::remove(std::string name) { + RadosTimeoutLogger rtl; cta::exception::Errnum::throwOnReturnedErrno(-m_radosCtx.remove(name)); + rtl.logIfNeeded("In BackendRados::remove(): m_radosCtx.remove()", name); } bool BackendRados::exists(std::string name) { uint64_t size; time_t date; - if (m_radosCtx.stat(name, &size, &date)) { + RadosTimeoutLogger rtl; + auto statRet=m_radosCtx.stat(name, &size, &date); + rtl.logIfNeeded("In BackendRados::exists(): m_radosCtx.stat()", name); + if (statRet) { return false; } else { return true; @@ -159,14 +176,25 @@ std::list<std::string> BackendRados::list() { return ret; } - void BackendRados::ScopedLock::release() { +#if RADOS_LOCKING_STRATEGY == NOTIFY + releaseNotify(); +#elif RADOS_LOCKING_STRATEGY == BACKOFF + releaseBackoff(); +#else +#error Wrong value for "RADOS_LOCKING_STRATEGY" +#endif +} + +void BackendRados::ScopedLock::releaseNotify() { if (!m_lockSet) return; // We should be tolerant with unlocking a deleted object, which is part // of the lock-remove-(implicit unlock) cycle when we delete an object // we hence overlook the ENOENT errors. TIMESTAMPEDPRINT("Pre-release"); + RadosTimeoutLogger rtl1; int rc=m_context.unlock(m_oid, "lock", m_clientId); + rtl1.logIfNeeded("In BackendRados::ScopedLock::releaseNotify(): m_context.unlock()", m_oid); switch (-rc) { case ENOENT: break; @@ -181,10 +209,12 @@ void BackendRados::ScopedLock::release() { // Notify potential waiters to take their chances now on the lock. utils::Timer t; librados::bufferlist bl; - // We use a fire aand forget aio call. + // We use a fire and forget aio call. librados::AioCompletion * completion = librados::Rados::aio_create_completion(nullptr, nullptr, nullptr); + RadosTimeoutLogger rtl2; cta::exception::Errnum::throwOnReturnedErrno(-m_context.aio_notify(m_oid, completion, bl, 10000, nullptr), "In BackendRados::ScopedLock::release(): failed to aio_notify()"); + rtl2.logIfNeeded("In BackendRados::ScopedLock::releaseNotify(): m_context.aio_notify()", m_oid); completion->release(); m_lockSet = false; TIMESTAMPEDPRINT("Post-notify"); @@ -203,11 +233,14 @@ BackendRados::ScopedLock::~ScopedLock() { } BackendRados::LockWatcher::LockWatcher(librados::IoCtx& context, const std::string& name): - m_context(context), m_name(name) { + m_context(context) { m_internal.reset(new Internal); + m_internal->m_name = name; m_internal->m_future = m_internal->m_promise.get_future(); TIMESTAMPEDPRINT("Pre-watch2"); + RadosTimeoutLogger rtl; cta::exception::Errnum::throwOnReturnedErrno(-m_context.watch2(name, &m_watchHandle, m_internal.get())); + rtl.logIfNeeded("In BackendRados::LockWatcher::LockWatcher(): m_context.watch2()", name); TIMESTAMPEDPRINT("Post-watch2"); } @@ -234,13 +267,21 @@ void BackendRados::LockWatcher::wait(const durationUs& timeout) { } BackendRados::LockWatcher::~LockWatcher() { - TIMESTAMPEDPRINT("Pre-unwatch2"); - librados::Rados::aio_create_completion(m_internal.release(), nullptr, Internal::deleter); - TIMESTAMPEDPRINT("Post-unwatch2"); + TIMESTAMPEDPRINT("Pre-aio_unwatch2"); + m_internal->m_radosTimeoutLogger.reset(); + auto name=m_internal->m_name; + librados::AioCompletion *completion = librados::Rados::aio_create_completion(m_internal.release(), nullptr, Internal::deleter); + RadosTimeoutLogger rtl; + m_context.aio_unwatch(m_watchHandle, completion); + completion->release(); + rtl.logIfNeeded("In BackendRados::LockWatcher::~LockWatcher(): m_context.aio_unwatch() call", name); + TIMESTAMPEDPRINT("Post-aio_unwatch2"); } void BackendRados::LockWatcher::Internal::deleter(librados::completion_t cb, void* i) { - delete (Internal *) i; + TIMESTAMPEDPRINT("Unwatch2 completion"); + std::unique_ptr<Internal> internal((Internal *) i); + internal->m_radosTimeoutLogger.logIfNeeded("BackendRados::LockWatcher::Internal::deleter(): aio_unwatch callback", internal->m_name); } std::string BackendRados::createUniqueClientId() { @@ -254,7 +295,17 @@ std::string BackendRados::createUniqueClientId() { return client.str(); } -void BackendRados::lock(std::string name, uint64_t timeout_us, LockType lockType, +void BackendRados::lock(std::string name, uint64_t timeout_us, LockType lockType, const std::string& clientId) { +#if RADOS_LOCKING_STRATEGY == NOTIFY + lockNotify(name, timeout_us, lockType, clientId); +#elif RADOS_LOCKING_STRATEGY == BACKOFF + lockBackoff(name, timeout_us, lockType, clientId); +#else +#error Wrong value for "RADOS_LOCKING_STRATEGY" +#endif +} + +void BackendRados::lockNotify(std::string name, uint64_t timeout_us, LockType lockType, const std::string & clientId) { // In Rados, locking a non-existing object will create it. This is not our intended // behavior. We will lock anyway, test the object and re-delete it if it has a size of 0 @@ -267,10 +318,14 @@ void BackendRados::lock(std::string name, uint64_t timeout_us, LockType lockType utils::Timer timeoutTimer; while (true) { TIMESTAMPEDPRINT(lockType==LockType::Shared?"Pre-lock (shared)":"Pre-lock (exclusive)"); - if (lockType==LockType::Shared) + RadosTimeoutLogger rtl; + if (lockType==LockType::Shared) { rc = m_radosCtx.lock_shared(name, "lock", client, "", "", &tv, 0); - else + rtl.logIfNeeded("In BackendRados::lockNotify(): m_radosCtx.lock_shared()", name); + } else { rc = m_radosCtx.lock_exclusive(name, "lock", client, "", &tv, 0); + rtl.logIfNeeded("In BackendRados::lockNotify(): m_radosCtx.lock_exclusive()", name); + } if (!rc) { TIMESTAMPEDPRINT(lockType==LockType::Shared?"Post-lock (shared) (got it)":"Post-lock (exclusive) (got it)"); NOTIFYLOCKED(); @@ -284,10 +339,14 @@ void BackendRados::lock(std::string name, uint64_t timeout_us, LockType lockType LockWatcher watcher(m_radosCtx, name); TIMESTAMPEDPRINT(lockType==LockType::Shared?"Post-watch-setup/Pre-relock (shared)":"Post-watch-setup/Pre-relock (exclusive)"); // We need to retry the lock after establishing the watch: it could have been released during that time. - if (lockType==LockType::Shared) + rtl.reset(); + if (lockType==LockType::Shared) { rc = m_radosCtx.lock_shared(name, "lock", client, "", "", &tv, 0); - else + rtl.logIfNeeded("In BackendRados::lockNotify(): m_radosCtx.lock_shared()", name); + } else { rc = m_radosCtx.lock_exclusive(name, "lock", client, "", &tv, 0); + rtl.logIfNeeded("In BackendRados::lockNotify(): m_radosCtx.lock_exclusive()", name); + } if (!rc) { TIMESTAMPEDPRINT(lockType==LockType::Shared?"Post-relock (shared) (got it)":"Post-relock (exclusive) (got it)"); NOTIFYLOCKED(); @@ -366,10 +425,13 @@ BackendRados::AsyncUpdater::AsyncUpdater(BackendRados& be, const std::string& na m_backend.lock(m_name, 60*1000, BackendRados::LockType::Exclusive, m_lockClient); // Locking is done, we can launch the read operation (async). librados::AioCompletion * aioc = librados::Rados::aio_create_completion(this, fetchCallback, nullptr); + RadosTimeoutLogger rtl; + m_radosTimeoutLogger.reset(); auto rc=m_backend.m_radosCtx.aio_read(m_name, aioc, &m_radosBufferList, std::numeric_limits<int32_t>::max(), 0); + rtl.logIfNeeded("BackendRados::AsyncUpdater::AsyncUpdater::lock_lambda(): m_radosCtx.aio_read()", m_name); aioc->release(); if (rc) { - cta::exception::Errnum errnum (-rc, std::string("In BackendRados::AsyncUpdater::statCallback(): failed to launch aio_read(): ")+m_name); + cta::exception::Errnum errnum (-rc, std::string("In BackendRados::AsyncUpdater::AsyncUpdater::lock_lambda(): failed to launch aio_read(): ")+m_name); throw Backend::NoSuchObject(errnum.getMessageValue()); } } catch (...) { @@ -403,6 +465,7 @@ void BackendRados::AsyncUpdater::deleteEmptyCallback(librados::completion_t comp void BackendRados::AsyncUpdater::fetchCallback(librados::completion_t completion, void* pThis) { AsyncUpdater & au = *((AsyncUpdater *) pThis); + au.m_radosTimeoutLogger.logIfNeeded("In BackendRados::AsyncUpdater::fetchCallback(): aio_read callback", au.m_name); try { // Check that the object could be read. if (rados_aio_get_return_value(completion)<0) { @@ -462,7 +525,10 @@ void BackendRados::AsyncUpdater::fetchCallback(librados::completion_t completion } // Launch the write librados::AioCompletion * aioc = librados::Rados::aio_create_completion(pThis, commitCallback, nullptr); + RadosTimeoutLogger rtl; + au.m_radosTimeoutLogger.reset(); auto rc=au.m_backend.m_radosCtx.aio_write_full(au.m_name, aioc, au.m_radosBufferList); + rtl.logIfNeeded("In BackendRados::AsyncUpdater::fetchCallback::update_lambda(): m_radosCtx.aio_write_full() call", au.m_name); aioc->release(); if (rc) { cta::exception::Errnum errnum (-rc, @@ -484,6 +550,7 @@ void BackendRados::AsyncUpdater::fetchCallback(librados::completion_t completion void BackendRados::AsyncUpdater::commitCallback(librados::completion_t completion, void* pThis) { AsyncUpdater & au = *((AsyncUpdater *) pThis); + au.m_radosTimeoutLogger.logIfNeeded("In BackendRados::AsyncUpdater::commitCallback(): aio_write_full() callback", au.m_name); try { // Check that the object could be written. if (rados_aio_get_return_value(completion)) { @@ -493,7 +560,10 @@ void BackendRados::AsyncUpdater::commitCallback(librados::completion_t completio } // Launch the async unlock. librados::AioCompletion * aioc = librados::Rados::aio_create_completion(pThis, unlockCallback, nullptr); + au.m_radosTimeoutLogger.reset(); + RadosTimeoutLogger rtl; auto rc=au.m_backend.m_radosCtx.aio_unlock(au.m_name, "lock", au.m_lockClient, aioc); + rtl.logIfNeeded("In BackendRados::AsyncUpdater::commitCallback(): m_radosCtx.aio_unlock", au.m_name); aioc->release(); if (rc) { cta::exception::Errnum errnum (-rc, std::string("In BackendRados::AsyncUpdater::commitCallback(): failed to launch aio_unlock()")+au.m_name); @@ -507,6 +577,7 @@ void BackendRados::AsyncUpdater::commitCallback(librados::completion_t completio void BackendRados::AsyncUpdater::unlockCallback(librados::completion_t completion, void* pThis) { AsyncUpdater & au = *((AsyncUpdater *) pThis); + au.m_radosTimeoutLogger.logIfNeeded("In BackendRados::AsyncUpdater::unlockCallback(): aio_unlock() callback", au.m_name); try { // Check that the object could be unlocked. if (rados_aio_get_return_value(completion)) { @@ -549,7 +620,10 @@ BackendRados::AsyncDeleter::AsyncDeleter(BackendRados& be, const std::string& na m_backend.lock(m_name, 60*1000, BackendRados::LockType::Exclusive, m_lockClient); // Locking is done, we can launch the remove operation (async). librados::AioCompletion * aioc = librados::Rados::aio_create_completion(this, deleteCallback, nullptr); + m_radosTimeoutLogger.reset(); + RadosTimeoutLogger rtl; auto rc=m_backend.m_radosCtx.aio_remove(m_name, aioc); + rtl.logIfNeeded("In BackendRados::AsyncDeleter::AsyncDeleter(): m_radosCtx.aio_remove() call", m_name); aioc->release(); if (rc) { cta::exception::Errnum errnum (-rc, std::string("In BackendRados::AsyncUpdater::statCallback(): failed to launch aio_remove(): ")+m_name); @@ -569,6 +643,7 @@ BackendRados::AsyncDeleter::AsyncDeleter(BackendRados& be, const std::string& na void BackendRados::AsyncDeleter::deleteCallback(librados::completion_t completion, void* pThis) { AsyncDeleter & au = *((AsyncDeleter *) pThis); + au.m_radosTimeoutLogger.logIfNeeded("In BackendRados::AsyncDeleter::deleteCallback(): aio_remove() callback", au.m_name); try { // Check that the object could be deleted. if (rados_aio_get_return_value(completion)) { diff --git a/objectstore/BackendRados.hpp b/objectstore/BackendRados.hpp index 927599643011ad99298fd078bcabbbefde9f8900..d1e67aeb47aeb456ad54607c98b645b67445681b 100644 --- a/objectstore/BackendRados.hpp +++ b/objectstore/BackendRados.hpp @@ -23,6 +23,22 @@ #include "common/threading/Mutex.hpp" #include <future> +// RADOS_LOCKING can be NOTIFY or BACKOFF +#define RADOS_LOCKING_STRATEGY NOTIFY + +// Define this to get long response times logging. +#define RADOS_SLOW_CALLS_LOGGING +#define RADOS_SLOW_CALLS_LOGGING_FILE "/var/tmp/cta-rados-slow-calls.log" +#define RADOS_SLOW_CALL_TIMEOUT 1 + +#ifdef RADOS_SLOW_CALLS_LOGGING +#include "common/Timer.hpp" +#include "common/threading/Mutex.hpp" +#include "common/threading/MutexLocker.hpp" +#include <fstream> +#include <iomanip> +#include <algorithm> +#endif //RADOS_SLOW_CALLS_LOGGING namespace cta { namespace objectstore { /** @@ -69,6 +85,10 @@ public: friend class BackendRados; public: void release() override; + private: + inline void releaseBackoff(); + inline void releaseNotify(); + public: ~ScopedLock() override; private: ScopedLock(librados::IoCtx & ioCtx): m_lockSet(false), m_context(ioCtx) {} @@ -88,12 +108,45 @@ private: static std::string createUniqueClientId(); /** This function will lock or die (actually throw, that is) */ void lock(std::string name, uint64_t timeout_us, LockType lockType, const std::string & clientId); + inline void lockBackoff(std::string name, uint64_t timeout_us, LockType lockType, const std::string & clientId); + inline void lockNotify(std::string name, uint64_t timeout_us, LockType lockType, const std::string & clientId); public: ScopedLock * lockExclusive(std::string name, uint64_t timeout_us=0) override; ScopedLock * lockShared(std::string name, uint64_t timeout_us=0) override; private: + /** + * A class for logging the calls to rados taking too long. + * If RADOS_SLOW_CALLS_LOGGING is not defined, this is just an empty shell. + */ + class RadosTimeoutLogger { + public: + void logIfNeeded(const std::string & radosCall, const std::string & objectName) { + #ifdef RADOS_SLOW_CALLS_LOGGING + if (m_timer.secs() >= RADOS_SLOW_CALL_TIMEOUT) { + cta::threading::MutexLocker ml(g_mutex); + std::ofstream logFile(RADOS_SLOW_CALLS_LOGGING_FILE, std::ofstream::app); + std::time_t end_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + std::string date=std::ctime(&end_time); + // Chomp newline in the end + date.erase(std::remove(date.begin(), date.end(), '\n'), date.end()); + logFile << date << " op=" << radosCall << " obj=" << objectName << " duration=" << m_timer.secs() << std::endl; + } + #endif //RADOS_SLOW_CALLS_LOGGING + } + void reset() { + #ifdef RADOS_SLOW_CALLS_LOGGING + m_timer.reset(); + #endif //RADOS_SLOW_CALLS_LOGGING + } + private: + #ifdef RADOS_SLOW_CALLS_LOGGING + cta::utils::Timer m_timer; + static cta::threading::Mutex g_mutex; + #endif //RADOS_SLOW_CALLS_LOGGING + }; + /** * A class handling the watch part when waiting for a lock. */ @@ -118,10 +171,11 @@ private: bool m_promiseSet = false; std::promise<void> m_promise; std::future<void> m_future; + RadosTimeoutLogger m_radosTimeoutLogger; + std::string m_name; }; std::unique_ptr<Internal> m_internal; librados::IoCtx & m_context; - std::string m_name; uint64_t m_watchHandle; }; @@ -165,6 +219,8 @@ public: static void commitCallback(librados::completion_t completion, void *pThis); /** The fourth callback operation (after unlocking) */ static void unlockCallback(librados::completion_t completion, void *pThis); + /** Instrumentation for rados calls timing */ + RadosTimeoutLogger m_radosTimeoutLogger; }; Backend::AsyncUpdater* asyncUpdate(const std::string & name, std::function <std::string(const std::string &)> & update) override; @@ -200,6 +256,8 @@ public: static void deleteCallback(librados::completion_t completion, void *pThis); /** Async delete in case of zero sized object */ static void deleteEmptyCallback(librados::completion_t completion, void *pThis); + /** Instrumentation for rados calls timing */ + RadosTimeoutLogger m_radosTimeoutLogger; }; Backend::AsyncDeleter* asyncDelete(const std::string & name) override;