From 214f79ab8109e265a9454f720e2f9f3bc9e09b96 Mon Sep 17 00:00:00 2001 From: Joao Afonso <joao.afonso@cern.ch> Date: Mon, 12 Dec 2022 17:40:28 +0100 Subject: [PATCH] Fix misplaced WARNING messages triggered by the queue cleanup runner --- objectstore/QueueCleanupRunner.cpp | 40 +++++++++++++++--------------- scheduler/OStoreDB/OStoreDB.cpp | 21 ++-------------- 2 files changed, 22 insertions(+), 39 deletions(-) diff --git a/objectstore/QueueCleanupRunner.cpp b/objectstore/QueueCleanupRunner.cpp index 9a11c9f422..21a6c4228a 100644 --- a/objectstore/QueueCleanupRunner.cpp +++ b/objectstore/QueueCleanupRunner.cpp @@ -39,20 +39,6 @@ void QueueCleanupRunner::runOnePass(log::LogContext &logContext) { for (auto queue: queuesForCleanup) { - cta::common::dataStructures::Tape tapeToCheck; - - try { - auto vidToTapesMap = m_catalogue.getTapesByVid(queue.vid); //throws an exception if the vid is not found on the database - tapeToCheck = vidToTapesMap.at(queue.vid); - } catch (const exception::UserError &ex) { - log::ScopedParamContainer params(logContext); - params.add("tapeVid", queue.vid) - .add("cleanupFlag", queue.doCleanup) - .add("exceptionMessage", ex.getMessageValue()); - logContext.log(log::WARNING, "WARNING: In QueueCleanupRunner::runOnePass(): failed to find a tape in the database. Skipping it."); - continue; // Ignore queue - } - if (!queue.doCleanup) { // Do not clean a queue that does not have the cleanup flag set true continue; // Ignore queue @@ -87,16 +73,30 @@ void QueueCleanupRunner::runOnePass(log::LogContext &logContext) { } } + cta::common::dataStructures::Tape tapeToCheck; + + try { + auto vidToTapesMap = m_catalogue.getTapesByVid(queue.vid); //throws an exception if the vid is not found on the database + tapeToCheck = vidToTapesMap.at(queue.vid); + } catch (const exception::UserError &ex) { + log::ScopedParamContainer params(logContext); + params.add("tapeVid", queue.vid) + .add("cleanupFlag", queue.doCleanup) + .add("exceptionMessage", ex.getMessageValue()); + logContext.log(log::WARNING, "WARNING: In QueueCleanupRunner::runOnePass(): failed to find a tape in the database. Skipping it."); + continue; // Ignore queue + } + if (tapeToCheck.state != common::dataStructures::Tape::REPACKING_PENDING && tapeToCheck.state != common::dataStructures::Tape::BROKEN_PENDING && tapeToCheck.state != common::dataStructures::Tape::EXPORTED_PENDING) { // Do not cleanup a tape that is not in a X_PENDING state log::ScopedParamContainer params(logContext); params.add("tapeVid", queue.vid) - .add("cleanupFlag", queue.doCleanup) - .add("tapeState", common::dataStructures::Tape::stateToString(tapeToCheck.state)); + .add("cleanupFlag", queue.doCleanup) + .add("tapeState", common::dataStructures::Tape::stateToString(tapeToCheck.state)); logContext.log( - log::WARNING, + log::INFO, "In QueueCleanupRunner::runOnePass(): Queue is has cleanup flag enabled but is not in the expected PENDING state. Skipping it."); continue; } @@ -124,13 +124,13 @@ void QueueCleanupRunner::runOnePass(log::LogContext &logContext) { } catch (OStoreDB::RetrieveQueueNotFound & ex) { log::ScopedParamContainer paramsExcMsg(logContext); paramsExcMsg.add("exceptionMessage", ex.getMessageValue()); - logContext.log(log::WARNING, + logContext.log(log::DEBUG, "In QueueCleanupRunner::runOnePass(): Unable to find the retrieve queue for cleanup. Queue may have already been deleted. Skipping it."); continue; } catch (OStoreDB::RetrieveQueueNotReservedForCleanup & ex) { log::ScopedParamContainer paramsExcMsg(logContext); paramsExcMsg.add("exceptionMessage", ex.getMessageValue()); - logContext.log(log::WARNING, + logContext.log(log::DEBUG, "In QueueCleanupRunner::runOnePass(): Unable to reserve the retrieve queue due to it not being available for cleanup. Skipping it."); continue; } catch (cta::exception::Exception & ex) { @@ -220,7 +220,7 @@ void QueueCleanupRunner::runOnePass(log::LogContext &logContext) { paramsWarnMsg.add("tapeVid", qForCleanup.vid) .add("expectedPrevState", common::dataStructures::Tape::stateToString(qForCleanup.tapeState)) .add("actualPrevState", common::dataStructures::Tape::stateToString(tapeToModify.state)); - logContext.log(log::WARNING, "WARNING: In QueueCleanupRunner::runOnePass(): Cleaned up tape is not in a PENDING state. Unable to change it to its corresponding final state."); + logContext.log(log::WARNING, "In QueueCleanupRunner::runOnePass(): Cleaned up tape is not in a PENDING state. Unable to change it to its corresponding final state."); break; } } diff --git a/scheduler/OStoreDB/OStoreDB.cpp b/scheduler/OStoreDB/OStoreDB.cpp index e61dc5a792..66c347d65a 100644 --- a/scheduler/OStoreDB/OStoreDB.cpp +++ b/scheduler/OStoreDB/OStoreDB.cpp @@ -197,17 +197,13 @@ std::list<SchedulerDatabase::RetrieveQueueCleanupInfo> OStoreDB::getRetrieveQueu // Get all tapes with retrieve queues objectstore::RootEntry re(m_objectStore); re.fetchNoLock(); - auto rootFetchNoLockTime = t.secs(utils::Timer::resetCounter); // Walk the retrieve queues for cleanup flag for (auto &rqp: re.dumpRetrieveQueues(common::dataStructures::JobQueueType::JobsToTransferForUser)) { RetrieveQueue rqueue(rqp.address, m_objectStore); - double queueLockTime = 0; - double queueFetchTime = 0; try { rqueue.fetchNoLock(); - queueFetchTime = t.secs(utils::Timer::resetCounter); } catch (cta::exception::Exception &ex) { log::ScopedParamContainer params(logContext); params.add("queueObject", rqp.address) @@ -223,19 +219,6 @@ std::list<SchedulerDatabase::RetrieveQueueCleanupInfo> OStoreDB::getRetrieveQueu ret.back().doCleanup = rqueue.getQueueCleanupDoCleanup(); ret.back().assignedAgent = rqueue.getQueueCleanupAssignedAgent(); ret.back().heartbeat = rqueue.getQueueCleanupHeartbeat(); - - auto processingTime = t.secs(utils::Timer::resetCounter); - log::ScopedParamContainer params(logContext); - params.add("queueObject", rqp.address) - .add("tapeVid", rqp.vid) - .add("rootFetchNoLockTime", rootFetchNoLockTime) - .add("queueLockTime", queueLockTime) - .add("queueFetchTime", queueFetchTime) - .add("processingTime", processingTime); - if (queueLockTime > 1 || queueFetchTime > 1) { - logContext.log(log::WARNING, - "In OStoreDB::getRetrieveQueuesCleanupInfo(): fetched a retrieve queue and that lasted more than 1 second."); - } } return ret; } @@ -2008,8 +1991,8 @@ void OStoreDB::reserveRetrieveQueueForCleanup(std::string & vid, std::optional<u } // Check if heartbeat has been updated, which means that another agent is still tracking it - if (rq.getQueueCleanupAssignedAgent().has_value() && cleanupHeartBeatValue.has_value()) { - if (cleanupHeartBeatValue.value() != rq.getQueueCleanupHeartbeat()) { + if (rq.getQueueCleanupAssignedAgent().has_value()) { + if (rq.getQueueCleanupHeartbeat() != cleanupHeartBeatValue.has_value() ? cleanupHeartBeatValue.value() : 0) { throw RetrieveQueueNotReservedForCleanup("Another agent is alive and cleaning up the queue. Skipping it."); } } -- GitLab