From 208b7459ac679b712571657af868fbc53a2e0a5b Mon Sep 17 00:00:00 2001 From: David Smith <david.smith@cern.ch> Date: Wed, 1 Feb 2023 16:26:04 +0100 Subject: [PATCH] Resolve "Couple of improvements to reduce time of sortAndGetTapesForMountInfo (reported as "getTapeInfoTime")" --- ReleaseNotes.md | 1 + scheduler/Scheduler.cpp | 107 ++++++++++++++++++++++++---------------- 2 files changed, 65 insertions(+), 43 deletions(-) diff --git a/ReleaseNotes.md b/ReleaseNotes.md index a89f0c2bda..23aa9cee78 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -23,6 +23,7 @@ - cta/CTA#238 - Compilation fails when using cta::common::Configuration::getConfEntInt(...) - cta/CTA#273 - Fix tape state change command idempotency when resetting REPACKING/BROKEN/PENDING - cta/CTA#280 - Add a timeout to tape server global lock on the object store +- cta/CTA#289 - Avoid a DB query and improve filtering time in sortAndGetTapesForMountInfo - cta/CTA#292 - Problem with cppcheck - cta/CTA#288 - Do not allow tape server to transition from REPACKING_DISABLED to DISABLED - cta/CTA#290 - Remove temporary counters used to track single-vid-GetTapesByVid calls diff --git a/scheduler/Scheduler.cpp b/scheduler/Scheduler.cpp index 4d24a799cc..00fac344af 100644 --- a/scheduler/Scheduler.cpp +++ b/scheduler/Scheduler.cpp @@ -998,53 +998,61 @@ void Scheduler::sortAndGetTapesForMountInfo(std::unique_ptr<SchedulerDatabase::T ExistingMountSummaryPerVo& existingMountsBasicTypeSummaryPerVo, std::set<std::string>& tapesInUse, std::list<catalogue::TapeForWriting>& tapeList, double& getTapeInfoTime, double& candidateSortingTime, double& getTapeForWriteTime, log::LogContext& lc) { - // Neither the library information nor the tape status is known for tapes involved in retrieves. - // Build the eligible set of tapes in the library and with required status so - // we can filter the potential mounts to the ones that this tape server can serve. - std::set<std::string> eligibleTapeSet; + // Check if there are any potential mounts for retrieve + bool anyRetr; { - catalogue::TapeSearchCriteria searchCriteria; - searchCriteria.logicalLibrary = logicalLibraryName; - searchCriteria.state = common::dataStructures::Tape::ACTIVE; - auto eligibleTapesList = m_catalogue.Tape()->getTapes(searchCriteria); - for(auto& t : eligibleTapesList) { - eligibleTapeSet.insert(t.vid); - } - searchCriteria.state = common::dataStructures::Tape::REPACKING; - eligibleTapesList = m_catalogue.Tape()->getTapes(searchCriteria); - for(auto& t : eligibleTapesList) { - eligibleTapeSet.insert(t.vid); - } + auto &v = mountInfo->potentialMounts; + anyRetr = std::any_of(v.begin(), v.end(), + [](const SchedulerDatabase::PotentialMount &pm) + { + return pm.type == common::dataStructures::MountType::Retrieve; + }); } - - // Filter the potential mounts to keep only the ones that match the logical library for retrieves, - // and build the list of tapes that can potentially be mounted by this tape server - std::set<std::string> retrieveTapeSet; - for(auto m_it = mountInfo->potentialMounts.begin(); m_it != mountInfo->potentialMounts.end(); ) { - if(m_it->type == common::dataStructures::MountType::Retrieve) { - if(eligibleTapeSet.count(m_it->vid) == 0) { - m_it = mountInfo->potentialMounts.erase(m_it); - continue; - } else { - retrieveTapeSet.insert(m_it->vid); + if (anyRetr) { + // Neither the library information nor the tape status is known for tapes involved in retrieves. + // Build the eligible set of tapes in the library and with required status so + // we can filter the potential retrieve mounts to the ones that this tape server can serve. + common::dataStructures::VidToTapeMap eligibleTapeMap; + { + catalogue::TapeSearchCriteria searchCriteria; + searchCriteria.logicalLibrary = logicalLibraryName; + searchCriteria.state = common::dataStructures::Tape::ACTIVE; + auto eligibleTapesList = m_catalogue.Tape()->getTapes(searchCriteria); + for(auto& t : eligibleTapesList) { + eligibleTapeMap[t.vid] = t; + } + searchCriteria.state = common::dataStructures::Tape::REPACKING; + eligibleTapesList = m_catalogue.Tape()->getTapes(searchCriteria); + for(auto& t : eligibleTapesList) { + eligibleTapeMap[t.vid] = t; } } - m_it++; - } - common::dataStructures::VidToTapeMap retrieveTapesInfo; - if(!retrieveTapeSet.empty()) { - retrieveTapesInfo = m_catalogue.Tape()->getTapesByVid(retrieveTapeSet); + // Filter the potential retrieve mounts to keep only the ones that match + { + auto &v = mountInfo->potentialMounts; + v.erase(std::remove_if(v.begin(), v.end(), + [&eligibleTapeMap](const SchedulerDatabase::PotentialMount &pm) + { + return (pm.type == common::dataStructures::MountType::Retrieve && + eligibleTapeMap.count(pm.vid) == 0); + }), v.end()); + } + + // fill in some of the information for the potentialMounts that getMountInfo + // may not have provided getTapeInfoTime = timer.secs(utils::Timer::resetCounter); for(auto& m : mountInfo->potentialMounts) { if(m.type == common::dataStructures::MountType::Retrieve) { - m.logicalLibrary = retrieveTapesInfo[m.vid].logicalLibraryName; - m.tapePool = retrieveTapesInfo[m.vid].tapePoolName; - m.vendor = retrieveTapesInfo[m.vid].vendor; - m.mediaType = retrieveTapesInfo[m.vid].mediaType; - m.vo = retrieveTapesInfo[m.vid].vo; - m.capacityInBytes = retrieveTapesInfo[m.vid].capacityInBytes; - m.labelFormat = retrieveTapesInfo[m.vid].labelFormat; + const auto &tp = eligibleTapeMap.at(m.vid); + + m.logicalLibrary = tp.logicalLibraryName; + m.tapePool = tp.tapePoolName; + m.vendor = tp.vendor; + m.mediaType = tp.mediaType; + m.vo = tp.vo; + m.capacityInBytes = tp.capacityInBytes; + m.labelFormat = tp.labelFormat; } } } @@ -1095,7 +1103,8 @@ void Scheduler::sortAndGetTapesForMountInfo(std::unique_ptr<SchedulerDatabase::T // is not yet met, filter out the potential mounts for which the maximum mount // quota is already reached, filter out the retrieve requests put to sleep for lack of disk space, // and weight the remaining by how much of their quota is reached. - for (auto m = mountInfo->potentialMounts.begin(); m!= mountInfo->potentialMounts.end();) { + std::set<std::vector<SchedulerDatabase::PotentialMount>::iterator> toFilterSet; + for (auto m = mountInfo->potentialMounts.begin(); m!= mountInfo->potentialMounts.end(); ++m) { // Get summary data uint32_t existingMountsDistinctTypesForThisTapepool = 0; uint32_t existingMountsBasicTypeForThisVo = 0; @@ -1160,7 +1169,7 @@ void Scheduler::sortAndGetTapesForMountInfo(std::unique_ptr<SchedulerDatabase::T .add("maxDrives", maxDrives); if (sleepingMount) params.add("fullDiskSystem", m->diskSystemSleptFor); lc.log(log::DEBUG, "In Scheduler::sortAndGetTapesForMountInfo(): Removing potential mount not passing criteria"); - m = mountInfo->potentialMounts.erase(m); + toFilterSet.insert(m); } else { // For the implementation of this ticket: https://gitlab.cern.ch/cta/CTA/-/issues/948 // The max drives allowed is not per-tapepool anymore and is not set by the mount policy neither @@ -1193,8 +1202,20 @@ void Scheduler::sortAndGetTapesForMountInfo(std::unique_ptr<SchedulerDatabase::T .add("voWriteMaxDrives",voOfThisPotentialMount.writeMaxDrives) .add("ratioOfMountQuotaUsed", m->ratioOfMountQuotaUsed); lc.log(log::DEBUG, "In Scheduler::sortAndGetTapesForMountInfo(): Will consider potential mount"); - m++; - } + } + } + + // keep those not filtered out + if (toFilterSet.size() > 0) { + auto &v = mountInfo->potentialMounts; + std::vector<SchedulerDatabase::PotentialMount> tmpPm; + tmpPm.swap(v); + v.reserve(tmpPm.size()); + for(auto it = tmpPm.begin(); it != tmpPm.end(); ++it) { + if (toFilterSet.count(it) == 0) { + v.push_back(std::move(*it)); + } + } } // We can now sort the potential mounts in decreasing priority order. -- GitLab