diff --git a/cmdline/CtaAdminTextFormatter.cpp b/cmdline/CtaAdminTextFormatter.cpp index 86d104242e6c5c4279d906c28bee64f8837b076d..0d9b54ccb56b6f4cb7a1c4ca2fd2248ba0923f80 100644 --- a/cmdline/CtaAdminTextFormatter.cpp +++ b/cmdline/CtaAdminTextFormatter.cpp @@ -634,7 +634,7 @@ void TextFormatter::print(const RepackLsItem &rels_item) { rels_item.failed_to_retrieve_files(), dataSizeToStr(rels_item.failed_to_retrieve_bytes()), rels_item.failed_to_archive_files(), - dataSizeToStr(rels_item.failed_to_retrieve_bytes()), + dataSizeToStr(rels_item.failed_to_archive_bytes()), rels_item.last_expanded_fseq(), rels_item.status() ); diff --git a/objectstore/RepackRequest.cpp b/objectstore/RepackRequest.cpp index b7bb2a6d9fbb39717037fe1fc14018f66a832dea..3f3714fbd61b3d51720399fa27f273d55c5db8f9 100644 --- a/objectstore/RepackRequest.cpp +++ b/objectstore/RepackRequest.cpp @@ -72,6 +72,7 @@ void RepackRequest::initialize() { m_payload.set_archivedbytes(0); m_payload.set_failedtoretrievefiles(0); m_payload.set_failedtoretrievebytes(0); + m_payload.set_failedtocreatearchivereq(0); m_payload.set_failedtoarchivefiles(0); m_payload.set_failedtoarchivebytes(0); m_payload.set_lastexpandedfseq(0); @@ -196,9 +197,9 @@ void RepackRequest::setStatus(){ checkPayloadReadable(); if(m_payload.is_expand_started()){ - //The expansion of the Repack Request have started + //The expansion of the Repack Request have started if(m_payload.is_expand_finished()){ - if( (m_payload.retrievedfiles() + m_payload.failedtoretrievefiles() >= m_payload.totalfilestoretrieve()) && (m_payload.archivedfiles() + m_payload.failedtoarchivefiles() >= m_payload.totalfilestoarchive()) ){ + if( (m_payload.retrievedfiles() + m_payload.failedtoretrievefiles() >= m_payload.totalfilestoretrieve()) && (m_payload.archivedfiles() + m_payload.failedtoarchivefiles() + m_payload.failedtocreatearchivereq() >= m_payload.totalfilestoarchive()) ){ //We reached the end if (m_payload.failedtoretrievefiles() || m_payload.failedtoarchivefiles()) { //At least one retrieve or archive has failed @@ -520,13 +521,28 @@ auto RepackRequest::getStats() -> std::map<StatsType, StatsValues> { //------------------------------------------------------------------------------ // RepackRequest::reportRetrieveCreationFailures() //------------------------------------------------------------------------------ -void RepackRequest::reportRetrieveCreationFailures(const StatsValues& failedRetrieveCreation){ +void RepackRequest::reportRetrieveCreationFailures(const std::list<cta::SchedulerDatabase::RepackRequest::Subrequest>& notCreatedSubrequests){ checkPayloadWritable(); - m_payload.set_failedtoretrievebytes(m_payload.failedtoretrievebytes() + failedRetrieveCreation.bytes); - m_payload.set_failedtoretrievefiles(m_payload.failedtoretrievefiles() + failedRetrieveCreation.files); + uint64_t failedToRetrieveFiles, failedToRetrieveBytes, failedToCreateArchiveReq = 0; + for(auto & subreq: notCreatedSubrequests){ + failedToRetrieveFiles++; + failedToRetrieveBytes+=subreq.archiveFile.fileSize; + for(auto & copyNb: subreq.copyNbsToRearchive){ + (void) copyNb; + failedToCreateArchiveReq++; + } + } + m_payload.set_failedtoretrievebytes(m_payload.failedtoretrievebytes() + failedToRetrieveBytes); + m_payload.set_failedtoretrievefiles(m_payload.failedtoretrievefiles() + failedToRetrieveFiles); + reportArchiveCreationFailures(failedToCreateArchiveReq); setStatus(); } +void RepackRequest::reportArchiveCreationFailures(uint64_t nbFailedToCreateArchiveRequests){ + checkPayloadWritable(); + m_payload.set_failedtocreatearchivereq(m_payload.failedtocreatearchivereq() + nbFailedToCreateArchiveRequests); +} + //------------------------------------------------------------------------------ // RepackRequest::garbageCollect() //------------------------------------------------------------------------------ diff --git a/objectstore/RepackRequest.hpp b/objectstore/RepackRequest.hpp index 70dac1f871a13ccb9aef43b0f470ea8d24093ba7..bfa7c812d8e30ba3b8d8a062fef6009b7493685c 100644 --- a/objectstore/RepackRequest.hpp +++ b/objectstore/RepackRequest.hpp @@ -126,7 +126,9 @@ public: }; std::map<StatsType, StatsValues> getStats(); - void reportRetrieveCreationFailures(const StatsValues &failedRetrieveCreated); + void reportRetrieveCreationFailures(const std::list<cta::SchedulerDatabase::RepackRequest::Subrequest>& notCreatedSubrequests); + + void reportArchiveCreationFailures(uint64_t nbFailedToCreateArchiveRequests); void garbageCollect(const std::string &presumedOwner, AgentReference & agentReference, log::LogContext & lc, cta::catalogue::Catalogue & catalogue) override; diff --git a/objectstore/cta.proto b/objectstore/cta.proto index 55a327cac44482c5126eb8c448f86b1ce226e06b..734e0fb1b4bbed7f3a393cbd34d220527d2781c3 100644 --- a/objectstore/cta.proto +++ b/objectstore/cta.proto @@ -568,6 +568,7 @@ message RepackRequest { required uint64 archivedbytes = 11510; required uint64 failedtoretrievefiles = 11520; required uint64 failedtoretrievebytes = 11530; + required uint64 failedtocreatearchivereq = 11535; required uint64 failedtoarchivefiles = 11540; required uint64 failedtoarchivebytes = 11550; required uint64 lastexpandedfseq = 11560; diff --git a/scheduler/OStoreDB/OStoreDB.cpp b/scheduler/OStoreDB/OStoreDB.cpp index 9c30521bea7a9d0a82671585d2c54d51c18f3a47..8c95269d4e101fd0570648c2f706365943d2d8b7 100644 --- a/scheduler/OStoreDB/OStoreDB.cpp +++ b/scheduler/OStoreDB/OStoreDB.cpp @@ -2086,12 +2086,17 @@ void OStoreDB::RepackRetrieveFailureReportBatch::report(log::LogContext& lc){ { // Prepare the report objectstore::RepackRequest::SubrequestStatistics::List ssl; + uint64_t failedToCreateArchiveReq = 0; for (auto &rr: m_subrequestList) { ssl.push_back(objectstore::RepackRequest::SubrequestStatistics()); ssl.back().bytes = rr.archiveFile.fileSize; ssl.back().files = 1; ssl.back().fSeq = rr.repackInfo.fSeq; fSeqsToDelete.push_back(rr.repackInfo.fSeq); + for(auto& copyNb: rr.repackInfo.copyNbsToRearchive){ + (void) copyNb; + failedToCreateArchiveReq++; + } } // Record it. timingList.insertAndReset("failureStatsPrepareTime", t); @@ -2101,6 +2106,8 @@ void OStoreDB::RepackRetrieveFailureReportBatch::report(log::LogContext& lc){ timingList.insertAndReset("failureStatsFetchTime", t); m_repackRequest.reportSubRequestsForDeletion(fSeqsToDelete); timingList.insertAndReset("failureStatsReportSubRequestsForDeletionTime", t); + m_repackRequest.reportArchiveCreationFailures(failedToCreateArchiveReq); + timingList.insertAndReset("failureArchiveCreationStatsUpdateTime",t); m_repackRequest.reportRetriveFailures(ssl); timingList.insertAndReset("failureStatsUpdateTime", t); m_repackRequest.commit(); @@ -2198,10 +2205,8 @@ void OStoreDB::RepackRequest::addSubrequestsAndUpdateStats(std::list<Subrequest> for (auto &rn: subrequestsNames) { subReqInfoMap[rn.fSeq] = rn; } // Try to create the retrieve subrequests (owned by this process, to be queued in a second step) // subrequests can already fail at that point if we cannot find a copy on a valid tape. - std::list<uint64_t> failedFSeqs; + std::list<Subrequest> notCreatedSubrequests; objectstore::RepackRequest::StatsValues failedCreationStats; - uint64_t failedFiles = 0; - uint64_t failedBytes = 0; // First loop: we will issue the async insertions of the subrequests. struct AsyncInsertionInfo { Subrequest & rsr; @@ -2233,9 +2238,9 @@ void OStoreDB::RepackRequest::addSubrequestsAndUpdateStats(std::list<Subrequest> rRRepackInfo.archiveRouteMap[ar.second.copyNb] = ar.second.tapePoolName; } } catch (std::out_of_range &) { - failedFSeqs.emplace_back(rsr.fSeq); + notCreatedSubrequests.emplace_back(rsr); failedCreationStats.files++; - failedCreationStats.bytes += rsr.archiveFile.fileSize; + failedCreationStats.bytes+=rsr.archiveFile.fileSize; log::ScopedParamContainer params(lc); params.add("fileID", rsr.archiveFile.archiveFileID) .add("diskInstance", rsr.archiveFile.diskInstance) @@ -2286,9 +2291,9 @@ void OStoreDB::RepackRequest::addSubrequestsAndUpdateStats(std::list<Subrequest> bestVid = Helpers::selectBestRetrieveQueue(candidateVids, m_oStoreDB.m_catalogue, m_oStoreDB.m_objectStore); } catch (Helpers::NoTapeAvailableForRetrieve &) { // Count the failure for this subrequest. - failedFSeqs.emplace_back(rsr.fSeq); - failedFiles++; - failedBytes += rsr.archiveFile.fileSize; + notCreatedSubrequests.emplace_back(rsr); + failedCreationStats.files++; + failedCreationStats.bytes += rsr.archiveFile.fileSize; log::ScopedParamContainer params(lc); params.add("fileId", rsr.archiveFile.archiveFileID) .add("repackVid", repackInfo.vid); @@ -2304,7 +2309,7 @@ void OStoreDB::RepackRequest::addSubrequestsAndUpdateStats(std::list<Subrequest> } { // Count the failure for this subrequest. - failedFSeqs.emplace_back(rsr.fSeq); + notCreatedSubrequests.emplace_back(rsr); failedCreationStats.files++; failedCreationStats.bytes += rsr.archiveFile.fileSize; log::ScopedParamContainer params(lc); @@ -2336,10 +2341,9 @@ void OStoreDB::RepackRequest::addSubrequestsAndUpdateStats(std::list<Subrequest> } catch (exception::Exception & ex) { // We can fail to serialize here... // Count the failure for this subrequest. - failedFSeqs.emplace_back(rsr.fSeq); + notCreatedSubrequests.emplace_back(rsr); failedCreationStats.files++; failedCreationStats.bytes += rsr.archiveFile.fileSize; - failedFSeqs.emplace_back(rsr.fSeq); log::ScopedParamContainer params(lc); params.add("fileId", rsr.archiveFile.archiveFileID) .add("repackVid", repackInfo.vid) @@ -2374,7 +2378,7 @@ void OStoreDB::RepackRequest::addSubrequestsAndUpdateStats(std::list<Subrequest> asyncInsertedSubrequestInfoList.emplace_back(AsyncInsertedSubrequestInfo{aii.rsr, aii.bestVid, aii.activeCopyNb, aii.request}); } catch (exception::Exception & ex) { // Count the failure for this subrequest. - failedFSeqs.emplace_back(aii.rsr.fSeq); + notCreatedSubrequests.emplace_back(aii.rsr); failedCreationStats.files++; failedCreationStats.bytes += aii.rsr.archiveFile.fileSize; log::ScopedParamContainer params(lc); @@ -2387,11 +2391,11 @@ void OStoreDB::RepackRequest::addSubrequestsAndUpdateStats(std::list<Subrequest> "In OStoreDB::RepackRequest::addSubrequests(): could not asyncInsert the subrequest."); } } - if(failedFSeqs.size()){ + if(notCreatedSubrequests.size()){ log::ScopedParamContainer params(lc); params.add("files", failedCreationStats.files); params.add("bytes", failedCreationStats.bytes); - m_repackRequest.reportRetrieveCreationFailures(failedCreationStats); + m_repackRequest.reportRetrieveCreationFailures(notCreatedSubrequests); m_repackRequest.commit(); lc.log(log::ERR, "In OStoreDB::RepackRequest::addSubRequests(), reported the failed creation of Retrieve Requests to the Repack request"); }