From 9cee3f2bfe22242fbea26cf2f674dd0d9895ac48 Mon Sep 17 00:00:00 2001 From: Cedric CAFFY <cedric.caffy@cern.ch> Date: Thu, 28 Nov 2019 19:08:15 +0100 Subject: [PATCH] Corrected the segmentation fault problem if exception happens while the reporting of ArchiveSuccesses fails --- .../docker/ctafrontend/cc7/opt/run/bin/mkSymlinks.sh | 1 + scheduler/ArchiveMount.cpp | 12 +++++++----- scheduler/ArchiveMount.hpp | 2 +- scheduler/SchedulerTest.cpp | 8 ++++---- scheduler/testingMocks/MockArchiveMount.hpp | 2 +- .../tape/tapeserver/daemon/MigrationReportPacker.cpp | 4 ++-- 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/continuousintegration/docker/ctafrontend/cc7/opt/run/bin/mkSymlinks.sh b/continuousintegration/docker/ctafrontend/cc7/opt/run/bin/mkSymlinks.sh index 7df4512d96..03f11edf06 100755 --- a/continuousintegration/docker/ctafrontend/cc7/opt/run/bin/mkSymlinks.sh +++ b/continuousintegration/docker/ctafrontend/cc7/opt/run/bin/mkSymlinks.sh @@ -18,6 +18,7 @@ if [[ -n "${EOS_BUILDTREE_SUBDIR}" ]]; then ln -s -v -t /usr/bin `find ${BUILDTREE_BASE}/${EOS_BUILDTREE_SUBDIR} -type f -executable | egrep -v '\.so(\.|$)' | egrep -v '\.sh$' | grep -v RPM/BUILD | grep -v CMake | grep -v CPack` echo Creating symlinks for EOS libraries. find ${BUILDTREE_BASE}/${EOS_BUILDTREE_SUBDIR} | egrep 'lib.*\.so($|\.)' | xargs -itoto ln -s -v -t /usr/lib64 toto + find ${BUILDTREE_BASE}/${EOS_BUILDTREE_SUBDIR} | egrep 'lib.*\.so($|\.)' | xargs -itoto ln -s -v -t /usr/lib64 toto.0 ln -s -v -t /usr/sbin `grep eos_SOURCE ${BUILDTREE_BASE}/${EOS_BUILDTREE_SUBDIR}/CMakeCache.txt | cut -d = -f 2-`/fst/tools/eos* mkdir /var/log/eos{,/tx} mkdir /var/eos{,/wfe{,/bash},ns-queue{,/default}} diff --git a/scheduler/ArchiveMount.cpp b/scheduler/ArchiveMount.cpp index 78fa410c52..fe67f3041b 100644 --- a/scheduler/ArchiveMount.cpp +++ b/scheduler/ArchiveMount.cpp @@ -16,6 +16,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <iostream> + #include "scheduler/ArchiveMount.hpp" #include "common/make_unique.hpp" #include "objectstore/Backend.hpp" @@ -155,9 +157,10 @@ std::list<std::unique_ptr<cta::ArchiveJob> > cta::ArchiveMount::getNextJobBatch( // reportJobsBatchWritten //------------------------------------------------------------------------------ void cta::ArchiveMount::reportJobsBatchTransferred(std::queue<std::unique_ptr<cta::ArchiveJob> > & successfulArchiveJobs, - std::queue<cta::catalogue::TapeItemWritten> & skippedFiles, std::queue<std::unique_ptr<cta::ArchiveJob>>& failedToReportArchiveJobs,cta::log::LogContext& logContext) { + std::queue<cta::catalogue::TapeItemWritten> & skippedFiles, std::queue<std::unique_ptr<cta::SchedulerDatabase::ArchiveJob>>& failedToReportArchiveJobs,cta::log::LogContext& logContext) { std::set<cta::catalogue::TapeItemWrittenPointer> tapeItemsWritten; std::list<std::unique_ptr<cta::ArchiveJob> > validatedSuccessfulArchiveJobs; + std::list<std::unique_ptr<cta::SchedulerDatabase::ArchiveJob>> validatedSuccessfulDBArchiveJobs; std::unique_ptr<cta::ArchiveJob> job; try{ uint64_t files=0; @@ -181,7 +184,7 @@ void cta::ArchiveMount::reportJobsBatchTransferred(std::queue<std::unique_ptr<ct } catch (const cta::exception::Exception &ex){ //We put the not validated job into this list in order to insert the job //into the failedToReportArchiveJobs list in the exception catching block - validatedSuccessfulArchiveJobs.emplace_back(std::move(job)); + validatedSuccessfulDBArchiveJobs.emplace_back(std::move(job->m_dbJob)); throw ex; } files++; @@ -210,7 +213,6 @@ void cta::ArchiveMount::reportJobsBatchTransferred(std::queue<std::unique_ptr<ct // Now get the db mount to mark the jobs as successful. // Extract the db jobs from the scheduler jobs. - std::list<std::unique_ptr<cta::SchedulerDatabase::ArchiveJob>> validatedSuccessfulDBArchiveJobs; for (auto &schJob: validatedSuccessfulArchiveJobs) { validatedSuccessfulDBArchiveJobs.emplace_back(std::move(schJob->m_dbJob)); } @@ -250,7 +252,7 @@ void cta::ArchiveMount::reportJobsBatchTransferred(std::queue<std::unique_ptr<ct } const std::string msg_error="In ArchiveMount::reportJobsBatchWritten(): got an exception"; logContext.log(cta::log::ERR, msg_error); - for(auto &aj: validatedSuccessfulArchiveJobs){ + for(auto &aj: validatedSuccessfulDBArchiveJobs){ if(aj.get()) failedToReportArchiveJobs.push(std::move(aj)); } @@ -266,7 +268,7 @@ void cta::ArchiveMount::reportJobsBatchTransferred(std::queue<std::unique_ptr<ct } const std::string msg_error="In ArchiveMount::reportJobsBatchWritten(): got an standard exception"; logContext.log(cta::log::ERR, msg_error); - for(auto &aj: validatedSuccessfulArchiveJobs){ + for(auto &aj: validatedSuccessfulDBArchiveJobs){ if(aj.get()) failedToReportArchiveJobs.push(std::move(aj)); } diff --git a/scheduler/ArchiveMount.hpp b/scheduler/ArchiveMount.hpp index df62d10ae4..8f669b0874 100644 --- a/scheduler/ArchiveMount.hpp +++ b/scheduler/ArchiveMount.hpp @@ -149,7 +149,7 @@ namespace cta { * @param logContext */ virtual void reportJobsBatchTransferred (std::queue<std::unique_ptr<cta::ArchiveJob> > & successfulArchiveJobs, - std::queue<cta::catalogue::TapeItemWritten> & skippedFiles, std::queue<std::unique_ptr<cta::ArchiveJob>>& failedToReportArchiveJobs, cta::log::LogContext &logContext); + std::queue<cta::catalogue::TapeItemWritten> & skippedFiles, std::queue<std::unique_ptr<cta::SchedulerDatabase::ArchiveJob>>& failedToReportArchiveJobs, cta::log::LogContext &logContext); /** * Returns the tape pool of the tape to be mounted. diff --git a/scheduler/SchedulerTest.cpp b/scheduler/SchedulerTest.cpp index b0f7deb343..fbc92487fd 100644 --- a/scheduler/SchedulerTest.cpp +++ b/scheduler/SchedulerTest.cpp @@ -495,7 +495,7 @@ TEST_P(SchedulerTest, archive_report_and_retrieve_new_file) { archiveJob->validate(); std::queue<std::unique_ptr <cta::ArchiveJob >> sDBarchiveJobBatch; std::queue<cta::catalogue::TapeItemWritten> sTapeItems; - std::queue<std::unique_ptr <cta::ArchiveJob >> failedToReportArchiveJobs; + std::queue<std::unique_ptr <cta::SchedulerDatabase::ArchiveJob >> failedToReportArchiveJobs; sDBarchiveJobBatch.emplace(std::move(archiveJob)); archiveMount->reportJobsBatchTransferred(sDBarchiveJobBatch, sTapeItems,failedToReportArchiveJobs, lc); archiveJobBatch = archiveMount->getNextJobBatch(1,1,lc); @@ -695,7 +695,7 @@ TEST_P(SchedulerTest, archive_and_retrieve_failure) { archiveJob->validate(); std::queue<std::unique_ptr <cta::ArchiveJob >> sDBarchiveJobBatch; std::queue<cta::catalogue::TapeItemWritten> sTapeItems; - std::queue<std::unique_ptr <cta::ArchiveJob >> failedToReportArchiveJobs; + std::queue<std::unique_ptr <cta::SchedulerDatabase::ArchiveJob >> failedToReportArchiveJobs; sDBarchiveJobBatch.emplace(std::move(archiveJob)); archiveMount->reportJobsBatchTransferred(sDBarchiveJobBatch, sTapeItems,failedToReportArchiveJobs, lc); archiveJobBatch = archiveMount->getNextJobBatch(1,1,lc); @@ -947,7 +947,7 @@ TEST_P(SchedulerTest, archive_and_retrieve_report_failure) { std::queue<std::unique_ptr <cta::ArchiveJob >> sDBarchiveJobBatch; std::queue<cta::catalogue::TapeItemWritten> sTapeItems; sDBarchiveJobBatch.emplace(std::move(archiveJob)); - std::queue<std::unique_ptr<cta::ArchiveJob>> failedToReportArchiveJobs; + std::queue<std::unique_ptr<cta::SchedulerDatabase::ArchiveJob>> failedToReportArchiveJobs; archiveMount->reportJobsBatchTransferred(sDBarchiveJobBatch, sTapeItems,failedToReportArchiveJobs, lc); archiveJobBatch = archiveMount->getNextJobBatch(1,1,lc); ASSERT_EQ(0, archiveJobBatch.size()); @@ -2858,7 +2858,7 @@ TEST_P(SchedulerTest, archiveReportMultipleAndQueueRetrievesWithActivities) { archiveJob->validate(); std::queue<std::unique_ptr <cta::ArchiveJob >> sDBarchiveJobBatch; std::queue<cta::catalogue::TapeItemWritten> sTapeItems; - std::queue<std::unique_ptr <cta::ArchiveJob >> failedToReportArchiveJobs; + std::queue<std::unique_ptr <cta::SchedulerDatabase::ArchiveJob >> failedToReportArchiveJobs; sDBarchiveJobBatch.emplace(std::move(archiveJob)); archiveMount->reportJobsBatchTransferred(sDBarchiveJobBatch, sTapeItems, failedToReportArchiveJobs, lc); // Mark the tape full so we get one file per tape. diff --git a/scheduler/testingMocks/MockArchiveMount.hpp b/scheduler/testingMocks/MockArchiveMount.hpp index fac5e6a11c..c6d7d160c6 100644 --- a/scheduler/testingMocks/MockArchiveMount.hpp +++ b/scheduler/testingMocks/MockArchiveMount.hpp @@ -50,7 +50,7 @@ namespace cta { } void reportJobsBatchTransferred(std::queue<std::unique_ptr<cta::ArchiveJob> >& successfulArchiveJobs, - std::queue<cta::catalogue::TapeItemWritten> & skippedFiles, std::queue<std::unique_ptr<cta::ArchiveJob>>& failedToReportArchiveJobs, cta::log::LogContext& logContext) override { + std::queue<cta::catalogue::TapeItemWritten> & skippedFiles, std::queue<std::unique_ptr<cta::SchedulerDatabase::ArchiveJob>>& failedToReportArchiveJobs, cta::log::LogContext& logContext) override { try { std::set<cta::catalogue::TapeItemWrittenPointer> tapeItemsWritten; std::list<std::unique_ptr<cta::ArchiveJob> > validatedSuccessfulArchiveJobs; diff --git a/tapeserver/castor/tape/tapeserver/daemon/MigrationReportPacker.cpp b/tapeserver/castor/tape/tapeserver/daemon/MigrationReportPacker.cpp index 47b885f3c2..154c2d56a1 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/MigrationReportPacker.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/MigrationReportPacker.cpp @@ -251,14 +251,14 @@ void MigrationReportPacker::ReportFlush::execute(MigrationReportPacker& reportPa reportPacker.m_lc.log(cta::log::INFO,"Received a flush report from tape, but had no file to report to client. Doing nothing."); return; } - std::queue<std::unique_ptr<cta::ArchiveJob>> failedToReportArchiveJobs; + std::queue<std::unique_ptr<cta::SchedulerDatabase::ArchiveJob>> failedToReportArchiveJobs; try{ reportPacker.m_archiveMount->reportJobsBatchTransferred(reportPacker.m_successfulArchiveJobs, reportPacker.m_skippedFiles, failedToReportArchiveJobs, reportPacker.m_lc); } catch(const cta::ArchiveMount::FailedMigrationRecallResult &ex){ while(!failedToReportArchiveJobs.empty()){ auto archiveJob = std::move(failedToReportArchiveJobs.front()); - archiveJob->transferFailed(ex.getMessageValue(),reportPacker.m_lc); + archiveJob->failTransfer(ex.getMessageValue(),reportPacker.m_lc); failedToReportArchiveJobs.pop(); } throw ex; -- GitLab