diff --git a/ReleaseNotes.md b/ReleaseNotes.md index 36ab865fbc60db1d1bedecf6ed691a80a0351253..84b09bed4a02a5b95afebaf9e9a6974bfa5531cf 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -2,6 +2,7 @@ ## Summary ### Features +- cta/CTA#222 - Review what gets logged in CTA - cta/CTA#155 - Tape server querying DB at very high rate - cta/CTA#224 - Improve error message for cta-verify-file whn VID does not exist - cta/CTA#230 - Modify CTA code to enforce VID uppercase diff --git a/objectstore/AgentHeartbeatThread.cpp b/objectstore/AgentHeartbeatThread.cpp index f83846ba02d55b8ec5b86f5a7dec1a7bf924d009..315006cfec3126623f6de54323737510a65fffad 100644 --- a/objectstore/AgentHeartbeatThread.cpp +++ b/objectstore/AgentHeartbeatThread.cpp @@ -58,7 +58,7 @@ void AgentHeartbeatThread::run() { log::ScopedParamContainer params(lc); params.add("Message", ex.getMessageValue()); lc.log(log::CRIT, "In AgentHeartbeatThread::run(): exception while bumping heartbeat. Backtrace follows. Exiting (segfault)."); - lc.logBacktrace(log::ERR, ex.backtrace()); + lc.logBacktrace(log::INFO, ex.backtrace()); cta::utils::segfault(); ::exit(EXIT_FAILURE); } diff --git a/objectstore/BackendPopulator.cpp b/objectstore/BackendPopulator.cpp index dc5b1769f28bc86b84df0a8238535af1fc6837b1..a7999a606dd15d203e6ae3246063acba75fed08e 100644 --- a/objectstore/BackendPopulator.cpp +++ b/objectstore/BackendPopulator.cpp @@ -77,7 +77,7 @@ BackendPopulator::~BackendPopulator() throw() { cta::log::ScopedParamContainer params(m_lc); params.add("errorMessage", ex.getMessageValue()); m_lc.log(log::CRIT, "In BackendPopulator::~BackendPopulator(): error deleting agent (cta::exception::Exception). Backtrace follows."); - m_lc.logBacktrace(log::ERR, ex.backtrace()); + m_lc.logBacktrace(log::INFO, ex.backtrace()); // We have an exception (we should not), let's core dump. cta::utils::segfault(); } catch (std::exception & ex) { diff --git a/objectstore/GarbageCollector.cpp b/objectstore/GarbageCollector.cpp index 57cb2b0a44b8b056486dc5d5120c8e4d84890407..70367bf24e908b549475de9005c4b0adb2f33790 100644 --- a/objectstore/GarbageCollector.cpp +++ b/objectstore/GarbageCollector.cpp @@ -68,7 +68,7 @@ void GarbageCollector::trimGoneTargets(log::LogContext & lc) { log::ScopedParamContainer params(lc); params.add("agentAddress", wa->first); m_watchedAgents.erase(wa++); - lc.log(log::INFO, "In GarbageCollector::trimGoneTargets(): removed now gone agent."); + lc.log(log::DEBUG, "In GarbageCollector::trimGoneTargets(): removed now gone agent."); } else { ++wa; } @@ -107,7 +107,7 @@ void GarbageCollector::acquireTargets(log::LogContext & lc) { log::ScopedParamContainer params(lc); params.add("agentAddress", ag.getAddressIfSet()) .add("gcAgentAddress", m_ourAgentReference.getAgentAddress()); - lc.log(log::INFO, "In GarbageCollector::acquireTargets(): started tracking an untracked agent"); + lc.log(log::DEBUG, "In GarbageCollector::acquireTargets(): started tracking an untracked agent"); // Agent is to be tracked, let's track it. double timeout=ag.getTimeout(); // The creation of the watchdog could fail as well (if agent gets deleted in the meantime). diff --git a/scheduler/OStoreDB/OStoreDB.cpp b/scheduler/OStoreDB/OStoreDB.cpp index 83f7a79f6b02411b9c7db9198ec4eab703ac0b3b..e61dc5a79258cb16226d6487052d0b8b343b256d 100644 --- a/scheduler/OStoreDB/OStoreDB.cpp +++ b/scheduler/OStoreDB/OStoreDB.cpp @@ -819,7 +819,7 @@ void OStoreDB::trimEmptyQueues(log::LogContext& lc) { log::ScopedParamContainer params(lc); params.add("exceptionMessage", ex.getMessageValue()); lc.log(log::ERR, "In OStoreDB::trimEmptyQueues(): got an exception. Stack trace follows."); - lc.logBacktrace(log::ERR, ex.backtrace()); + lc.logBacktrace(log::INFO, ex.backtrace()); } } } @@ -4195,7 +4195,7 @@ void OStoreDB::ArchiveMount::setJobBatchTransferred(std::list<std::unique_ptr<ct params.add("tapeVid", list.first) .add("exceptionMSG", ex.getMessageValue()); lc.log(log::ERR, "In OStoreDB::ArchiveMount::setJobBatchTransferred(): failed to queue a batch of requests for reporting to user."); - lc.logBacktrace(log::ERR, ex.backtrace()); + lc.logBacktrace(log::INFO, ex.backtrace()); } } timingList.insertAndReset("queueingToReportToUserTime", t); @@ -4259,7 +4259,7 @@ void OStoreDB::ArchiveMount::setJobBatchTransferred(std::list<std::unique_ptr<ct params.add("tapeVid", list.first) .add("exceptionMSG", ex.getMessageValue()); lc.log(log::ERR, "In OStoreDB::ArchiveMount::setJobBatchTransferred(): failed to queue a batch of requests for reporting to repack."); - lc.logBacktrace(log::ERR, ex.backtrace()); + lc.logBacktrace(log::INFO, ex.backtrace()); } } timingList.insertAndReset("queueingToReportToRepackTime", t); diff --git a/scheduler/RetrieveMount.cpp b/scheduler/RetrieveMount.cpp index a41736595f7c807c588425e0df24517eea78bc58..992f5bc49b7e3d89cbfa59549ffd1c07cb82ffc9 100644 --- a/scheduler/RetrieveMount.cpp +++ b/scheduler/RetrieveMount.cpp @@ -279,7 +279,7 @@ void cta::RetrieveMount::flushAsyncSuccessReports(std::queue<std::unique_ptr<cta } const std::string msg_error="In cta::RetrieveMount::flushAsyncSuccessReports(): got an exception"; logContext.log(cta::log::ERR, msg_error); - logContext.logBacktrace(cta::log::ERR, e.backtrace()); + logContext.logBacktrace(cta::log::INFO, e.backtrace()); // Failing here does not really affect the session so we can carry on. Reported jobs are reported, non-reported ones // will be retried. } catch(const std::exception& e){ diff --git a/tapeserver/castor/tape/tapeserver/daemon/DataTransferSession.cpp b/tapeserver/castor/tape/tapeserver/daemon/DataTransferSession.cpp index a0407c12dd29adb6c7246111d9bb51317caf8117..db76894548ca998a01a4e18a3cbb8adc69d70b05 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/DataTransferSession.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/DataTransferSession.cpp @@ -173,7 +173,7 @@ castor::tape::tapeserver::daemon::DataTransferSession::execute() { } } catch (cta::exception::Exception &e) { lc.log(cta::log::ERR, "Error while scheduling new mount. Putting the drive down. Stack trace follows."); - lc.logBacktrace(cta::log::ERR, e.backtrace()); + lc.logBacktrace(cta::log::INFO, e.backtrace()); putDriveDown(e.getMessageValue(), nullptr, lc); return MARK_DRIVE_AS_DOWN; } diff --git a/tapeserver/castor/tape/tapeserver/daemon/DataTransferSessionTest.cpp b/tapeserver/castor/tape/tapeserver/daemon/DataTransferSessionTest.cpp index 165586a1b1027f2a6b477fe4f1b6cdb595a23ba0..c7ba5d3e4d81f28a6e1c420d765e10b8e10cdd1a 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/DataTransferSessionTest.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/DataTransferSessionTest.cpp @@ -412,19 +412,17 @@ public: */ std::map<size_t, std::vector<std::string>> getRAOFseqs(const std::string& log) { std::map<size_t, std::vector<std::string>> ret; - std::string logCopy = log; - std::string recallRAOOrderMsg = "Recall order of FSEQs using RAO: "; - size_t recallRAOOrderMsgSize = recallRAOOrderMsg.size(); - size_t posRAOMsg = logCopy.find(recallRAOOrderMsg); size_t i = 0; - while (posRAOMsg != std::string::npos) { - size_t posFirstFseq = posRAOMsg + recallRAOOrderMsgSize; - size_t posLastFseq = logCopy.find('\"', posFirstFseq); - std::string stringFseq = logCopy.substr(posFirstFseq, posLastFseq - posFirstFseq); - cta::utils::splitString(stringFseq, ' ', ret[i]); - logCopy = logCopy.substr(posLastFseq); - posRAOMsg = logCopy.find(recallRAOOrderMsg); - i++; + for(size_t endPos, logPos = 0; logPos != std::string::npos; logPos = endPos) { + logPos = log.find("Recall order of FSEQs", logPos); + logPos = log.find("useRAO=\"true\"", logPos); + logPos = log.find("recallOrder=", logPos); + logPos = log.find('\"', logPos); + if(logPos == std::string::npos) break; + endPos = log.find('\"', ++logPos); + if(endPos == logPos) continue; + auto strFseq = log.substr(logPos, endPos-logPos); + cta::utils::splitString(strFseq, ' ', ret[i++]); } return ret; } diff --git a/tapeserver/castor/tape/tapeserver/daemon/DiskWriteTask.cpp b/tapeserver/castor/tape/tapeserver/daemon/DiskWriteTask.cpp index 5ea738019b8919f21573256835c841f5adeda265..722f9ccae47b5351c7529b496d52b5715e5b950d 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/DiskWriteTask.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/DiskWriteTask.cpp @@ -165,7 +165,7 @@ bool DiskWriteTask::execute(RecallReportPacker& reporter,cta::log::LogContext& cta::log::ScopedParamContainer params(lc); params.add("errorMessage", e.getMessageValue()); logWithStat(cta::log::ERR, isVerifyOnly ? "File verification failed" : "File writing to disk failed", lc); - lc.logBacktrace(cta::log::ERR, e.backtrace()); + lc.logBacktrace(cta::log::INFO, e.backtrace()); reporter.reportFailedJob(std::move(m_retrieveJob), e, lc); watchdog.deleteParameter("stillOpenFileForThread"+ diff --git a/tapeserver/castor/tape/tapeserver/daemon/MigrationReportPacker.cpp b/tapeserver/castor/tape/tapeserver/daemon/MigrationReportPacker.cpp index 836bdd220a092e1239e28428926ad288e75c2e9b..8b69265bb1c7c77561b71ef6bae7bfba8072aa56 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/MigrationReportPacker.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/MigrationReportPacker.cpp @@ -187,7 +187,7 @@ void MigrationReportPacker::ReportSkipped::execute(MigrationReportPacker& report .add("fileId", m_skippedArchiveJob->archiveFile.archiveFileID); reportPacker.m_lc.log(cta::log::ERR, "In MigrationReportPacker::ReportSkipped::execute(): call to m_failedArchiveJob->failed() threw an exception."); - reportPacker.m_lc.logBacktrace(cta::log::ERR, ex.backtrace()); + reportPacker.m_lc.logBacktrace(cta::log::INFO, ex.backtrace()); } reportPacker.m_skippedFiles.push(cta::catalogue::TapeItemWritten()); auto& tapeItem = reportPacker.m_skippedFiles.back(); @@ -384,7 +384,7 @@ void MigrationReportPacker::ReportError::execute(MigrationReportPacker& reportPa .add("fileId", m_failedArchiveJob->archiveFile.archiveFileID); reportPacker.m_lc.log(cta::log::ERR, "In MigrationReportPacker::ReportError::execute(): call to m_failedArchiveJob->failed() threw an exception."); - reportPacker.m_lc.logBacktrace(cta::log::ERR, ex.backtrace()); + reportPacker.m_lc.logBacktrace(cta::log::INFO, ex.backtrace()); } } diff --git a/tapeserver/castor/tape/tapeserver/daemon/MigrationTaskInjector.cpp b/tapeserver/castor/tape/tapeserver/daemon/MigrationTaskInjector.cpp index 05c3b1b3c63764c08ebba39ad536dc8412843232..4e42c9c476a3bacc5fc9d18d513b8d497c9c901c 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/MigrationTaskInjector.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/MigrationTaskInjector.cpp @@ -205,7 +205,7 @@ namespace daemon { cta::log::ScopedParamContainer container( m_parent.m_lc); container.add("exception message",ex.getMessageValue()); - m_parent.m_lc.logBacktrace(cta::log::ERR,ex.backtrace()); + m_parent.m_lc.logBacktrace(cta::log::INFO, ex.backtrace()); m_parent.m_lc.log(cta::log::ERR,"In MigrationTaskInjector::WorkerThread::run(): " "could not retrieve a list of file to migrate, indicating finish of run"); diff --git a/tapeserver/castor/tape/tapeserver/daemon/RecallReportPacker.cpp b/tapeserver/castor/tape/tapeserver/daemon/RecallReportPacker.cpp index 467c686429e29a83807bef0d2acb7524a7011805..b4709e58f3d19de46f14413c805d147f17eaa0b1 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/RecallReportPacker.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/RecallReportPacker.cpp @@ -232,7 +232,7 @@ void RecallReportPacker::ReportError::execute(RecallReportPacker& reportPacker) params.add("ExceptionMSG", ex.getMessageValue()) .add("fileId", m_failedRetrieveJob->archiveFile.archiveFileID); reportPacker.m_lc.log(cta::log::ERR, "In RecallReportPacker::ReportError::execute(): call to m_failedRetrieveJob->failed() threw an exception."); - reportPacker.m_lc.logBacktrace(cta::log::ERR, ex.backtrace()); + reportPacker.m_lc.logBacktrace(cta::log::INFO, ex.backtrace()); } } diff --git a/tapeserver/castor/tape/tapeserver/daemon/RecallTaskInjector.cpp b/tapeserver/castor/tape/tapeserver/daemon/RecallTaskInjector.cpp index 6f256409b9b400c6da013304ba21d67d66103d08..82e1dca2d17adbc0db705d6926bd9fc7be95470b 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/RecallTaskInjector.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/RecallTaskInjector.cpp @@ -245,11 +245,6 @@ void RecallTaskInjector::injectBulkRecalls() { } std::ostringstream recallOrderLog; - if(useRAO) { - recallOrderLog << "Recall order of FSEQs using RAO:"; - } else { - recallOrderLog << "Recall order of FSEQs:"; - } uint64_t nFiles = 0; uint64_t nBytes = 0; std::list<std::unique_ptr<cta::RetrieveJob>> retrieveJobsBatch; @@ -286,14 +281,17 @@ void RecallTaskInjector::injectBulkRecalls() { //it can start its infinite loop setFirstTasksInjectedPromise(); } - m_lc.log(cta::log::INFO, recallOrderLog.str()); + // log the recall order + cta::log::ScopedParamContainer params(m_lc); + params.add("useRAO", useRAO ? "true" : "false"); + params.add("recallOrder", recallOrderLog.str()); + m_lc.log(cta::log::INFO, "Recall order of FSEQs"); // keep the rest for later injection m_jobs.erase(std::remove_if(m_jobs.begin(), m_jobs.end(), [](const std::unique_ptr<cta::RetrieveJob> &jobptr) { return jobptr.get() == nullptr; }), m_jobs.end()); LogContext::ScopedParam sp03(m_lc, Param("nbFile", njobs)); m_lc.log(cta::log::INFO, "Finished processing batch of recall tasks from client"); - } //------------------------------------------------------------------------------ @@ -446,7 +444,7 @@ catch(const cta::exception::Exception& ex){ //we end up there because we could not talk to the client cta::log::ScopedParamContainer container( m_parent.m_lc); container.add("exception message",ex.getMessageValue()); - m_parent.m_lc.logBacktrace(cta::log::ERR,ex.backtrace()); + m_parent.m_lc.logBacktrace(cta::log::INFO, ex.backtrace()); m_parent.m_lc.log(cta::log::ERR,"In RecallJobInjector::WorkerThread::run(): " "could not retrieve a list of file to recall. End of session"); m_parent.signalEndDataMovement(); diff --git a/tapeserver/castor/tape/tapeserver/daemon/TapeReadTask.hpp b/tapeserver/castor/tape/tapeserver/daemon/TapeReadTask.hpp index e90397dbee66f3ec5bc57564aa79ce506cc7fcef..de5022ee851e65d5793f4e8ba29d293317ab2a16 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/TapeReadTask.hpp +++ b/tapeserver/castor/tape/tapeserver/daemon/TapeReadTask.hpp @@ -211,7 +211,7 @@ public: } { cta::log::LogContext lc2(lc.logger()); - lc2.logBacktrace(cta::log::ERR, ex.backtrace()); + lc2.logBacktrace(cta::log::INFO, ex.backtrace()); } // mb might or might not be allocated at this point, but diff --git a/tapeserver/castor/tape/tapeserver/daemon/TapeWriteTask.cpp b/tapeserver/castor/tape/tapeserver/daemon/TapeWriteTask.cpp index a4645e7d8c160d48ef4cedcfb7b5ded05796430c..432dc4d79508bfe3f779d3ac60b7c34a38029b6e 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/TapeWriteTask.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/TapeWriteTask.cpp @@ -294,7 +294,8 @@ namespace daemon { //so we use a different exception to distinguish this case errorMsg=mb->errorMsg(); m_errorFlag.set(); - lc.log(cta::log::ERR,errorMsg); + LogContext::ScopedParam sp1(lc, Param("errorMessage", errorMsg)); + lc.log(cta::log::ERR, "Error while reading a file"); throw RecoverableMigrationErrorException(errorMsg); } else if (mb->isCanceled()) { errorMsg="Received a block marked as cancelled"; diff --git a/tapeserver/daemon/MaintenanceHandler.cpp b/tapeserver/daemon/MaintenanceHandler.cpp index 57057885e8153ab8512fafde38cc305932ce8d81..4c2d59033aafb4d57ffa906cfd5da4aa9a5b9039 100644 --- a/tapeserver/daemon/MaintenanceHandler.cpp +++ b/tapeserver/daemon/MaintenanceHandler.cpp @@ -342,7 +342,7 @@ void MaintenanceHandler::exceptionThrowingRunChild(){ m_processManager.logContext().log(log::ERR, "In MaintenanceHandler::exceptionThrowingRunChild(): received an exception. Backtrace follows."); } - m_processManager.logContext().logBacktrace(log::ERR, ex.backtrace()); + m_processManager.logContext().logBacktrace(log::INFO, ex.backtrace()); throw ex; } catch(std::exception &ex) { log::ScopedParamContainer params(m_processManager.logContext()); diff --git a/tapeserver/daemon/TapeDaemon.cpp b/tapeserver/daemon/TapeDaemon.cpp index f715606f05ce944c4487a967ad428997b8e4331f..fb9e7e5baf4e7c214d16a9aca054d08fd335e196 100644 --- a/tapeserver/daemon/TapeDaemon.cpp +++ b/tapeserver/daemon/TapeDaemon.cpp @@ -58,7 +58,7 @@ int TapeDaemon::main() { // Log the error m_log(log::ERR, "Aborting cta-taped on uncaught exception. Stack trace follows.", {{"Message", ex.getMessage().str()}}); log::LogContext lc(m_log); - lc.logBacktrace(log::ERR, ex.backtrace()); + lc.logBacktrace(log::INFO, ex.backtrace()); return 1; } return 0; diff --git a/xroot_plugins/XrdSsiCtaRequestMessage.cpp b/xroot_plugins/XrdSsiCtaRequestMessage.cpp index 16051ad9a0d880de2e7d0010fa06b953a1c51879..929f3ecefd8e944869da262a68cde01cfd57e887 100644 --- a/xroot_plugins/XrdSsiCtaRequestMessage.cpp +++ b/xroot_plugins/XrdSsiCtaRequestMessage.cpp @@ -789,7 +789,7 @@ void RequestMessage::processDELETE(const cta::eos::Notification ¬ification, c } catch (cta::exception::Exception &ex){ log::ScopedParamContainer spc(m_lc); spc.add("fileId", request.archiveFileID); - m_lc.log(log::WARNING, "Ignoring request to delete archive file because it does not exist in the catalogue"); + m_lc.log(log::DEBUG, "Ignoring request to delete archive file from the catalogue, because it does not exist"); } m_scheduler.deleteArchive(m_cliIdentity.username, request, m_lc); tl.insertAndReset("schedulerTime",t); @@ -861,7 +861,7 @@ const cta::admin::AdminCmd &admincmd, cta::utils::Timer &t) cta::log::ScopedParamContainer params(m_lc); - std::string log_msg = "In RequestMessage::" + function + "(): Admin command succeeded: "; + std::string log_msg = "In RequestMessage::" + function + "(): Admin command succeeded."; // Reverse lookup of strings corresponding to <command,subcommand> pair for(auto cmd_it = cmdLookup.begin(); cmd_it != cmdLookup.end(); ++cmd_it) {