diff --git a/ReleaseNotes.md b/ReleaseNotes.md index 7183860897d597022125f5a2845e34eebe88eaf2..8ba19cf6219d3b57f5a7f212df12afe05929a9ba 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,13 +1,19 @@ -# v4.NEXT +# v4.8.0-1 ## Summary +This CTA release contains significant changes related to repacking, including the addition of new final and temporary states. +It may make be incompatible with pre-existing operational tools that relied on the old repacking behaviour. ### Features +- cta/CTA#83 - Setup new tape state REPACKING +- cta/CTA#226 - Setup new tape state EXPORTED +- cta/CTA#77 - Add maintenance runner for cleaning-up (retrieve) queue requests and managing new internal states - cta/CTA#211 - Add functionality for reading encrypted tapes with cta-readtp - cta/CTA#214 - Update manual page for cta-admin to include info about user defined config files. ### Bug fixes - cta/CTA#93 - Refactor Frontend code to allow code sharing between SSI and gRPC implementations - cta/CTA#221 - Change option in cta-send-event from instance to eos.instance - cta/CTA#223 - Remove vid check to improve run time of cta-verify-file, fix possible _S_construct null not valid error +- cta/CTA#13 - Fix `cta-catalogue-schema-verify` checking of NOT NULL constraints in Postgres # v4.7.14-1 @@ -17,7 +23,6 @@ - cta/CTA#201 - Improve error message when oracle configured without oracle support - cta/CTA#203 - Refactor cta-restore-deletes-files by using the connection configuration class in standalone_cli_tools/common ### Bug fixes -- cta/CTA#13 - Fix `cta-catalogue-schema-verify` checking of NOT NULL constraints in Postgres - cta/CTA#209 - handle if $HOME is not defined when choosing config file for cta-admin # v4.7.13-3 diff --git a/catalogue/Catalogue.hpp b/catalogue/Catalogue.hpp index 805e1cbf73f46ff5837f04838725289dae3cd92e..9137469264ea4e84fb82b327d7f5367fd2b35c3f 100644 --- a/catalogue/Catalogue.hpp +++ b/catalogue/Catalogue.hpp @@ -708,9 +708,11 @@ public: * @param admin, the person or the system who modified the state of the tape * @param vid the VID of the tape to change the state * @param state the new state + * @param prev_state the previous state, if value is not std::nullopt then it will be used to validate that the state transition proceeds as expected * @param stateReason the reason why the state changes, if the state is ACTIVE and the stateReason is std::nullopt, the state will be reset to null */ - virtual void modifyTapeState(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<std::string> & stateReason) = 0; + virtual void modifyTapeState(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<common::dataStructures::Tape::State> & prev_state, const std::optional<std::string> & stateReason) = 0; + /** * Sets the full status of the specified tape. * @@ -744,6 +746,7 @@ public: virtual void setTapeIsFromCastorInUnitTests(const std::string &vid) = 0; virtual void setTapeDisabled(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const std::string & reason) = 0; + virtual void setTapeRepackingDisabled(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const std::string & reason) = 0; virtual void setTapeDirty(const std::string & vid) = 0; diff --git a/catalogue/CatalogueRetryWrapper.cpp b/catalogue/CatalogueRetryWrapper.cpp index 0e1010ce869fd7f760440cca089de135145a2a65..a609c73e7c30473b501db055c923fd88984394ec 100644 --- a/catalogue/CatalogueRetryWrapper.cpp +++ b/catalogue/CatalogueRetryWrapper.cpp @@ -388,8 +388,8 @@ void CatalogueRetryWrapper::modifyTapeVerificationStatus(const common::dataStruc return retryOnLostConnection(m_log, [&]{return m_catalogue->modifyTapeVerificationStatus(admin, vid, verificationStatus);}, m_maxTriesToConnect); } -void CatalogueRetryWrapper::modifyTapeState(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<std::string> & stateReason) { - return retryOnLostConnection(m_log, [&]{return m_catalogue->modifyTapeState(admin,vid, state, stateReason);}, m_maxTriesToConnect); +void CatalogueRetryWrapper::modifyTapeState(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<common::dataStructures::Tape::State> & prev_state, const std::optional<std::string> & stateReason) { + return retryOnLostConnection(m_log, [&]{return m_catalogue->modifyTapeState(admin,vid, state, prev_state, stateReason);}, m_maxTriesToConnect); } void CatalogueRetryWrapper::setTapeFull(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const bool fullValue) { @@ -408,6 +408,10 @@ void CatalogueRetryWrapper::setTapeDisabled(const common::dataStructures::Securi return retryOnLostConnection(m_log, [&]{return m_catalogue->setTapeDisabled(admin, vid, reason);}, m_maxTriesToConnect); } +void CatalogueRetryWrapper::setTapeRepackingDisabled(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const std::string & reason) { + return retryOnLostConnection(m_log, [&]{return m_catalogue->setTapeRepackingDisabled(admin, vid, reason);}, m_maxTriesToConnect); +} + void CatalogueRetryWrapper::setTapeDirty(const std::string & vid) { return retryOnLostConnection(m_log,[&]{ return m_catalogue->setTapeDirty(vid);}, m_maxTriesToConnect); } diff --git a/catalogue/CatalogueRetryWrapper.hpp b/catalogue/CatalogueRetryWrapper.hpp index 4e1ae122fc3a80b4c733fc7ab4b51d9d27c884a4..55702751fb139b1040bb55ed91faca2106e36f35 100644 --- a/catalogue/CatalogueRetryWrapper.hpp +++ b/catalogue/CatalogueRetryWrapper.hpp @@ -219,7 +219,7 @@ public: void modifyTapeVerificationStatus(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const std::string &verificationStatus) override; - void modifyTapeState(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<std::string> & stateReason) override; + void modifyTapeState(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<common::dataStructures::Tape::State> & prev_state, const std::optional<std::string> & stateReason) override; void setTapeFull(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const bool fullValue) override; @@ -229,6 +229,8 @@ public: void setTapeDisabled(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const std::string & reason) override; + void setTapeRepackingDisabled(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const std::string & reason) override; + void setTapeDirty(const std::string & vid) override; void modifyTapeComment(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const std::optional<std::string> &comment) override; diff --git a/catalogue/CatalogueTest.cpp b/catalogue/CatalogueTest.cpp index c9ed269d8ab399274ab48df868f960df58f9f236..c1ee3ac8f4fed26b6fe505fc61cb4a8bea782bd8 100644 --- a/catalogue/CatalogueTest.cpp +++ b/catalogue/CatalogueTest.cpp @@ -2217,6 +2217,12 @@ TEST_P(cta_catalogue_CatalogueTest, createTapePool_tapes_of_mixed_state) { tape_broken_01.stateReason = "unit Test"; m_catalogue->createTape(m_admin, tape_broken_01); + auto tape_exported_01 = m_tape1; + tape_exported_01.vid = "E000001"; + tape_exported_01.state = common::dataStructures::Tape::EXPORTED; + tape_exported_01.stateReason = "unit Test"; + m_catalogue->createTape(m_admin, tape_exported_01); + auto tape_full_01 = m_tape1; tape_full_01.vid = "F000001"; tape_full_01.full = true; @@ -2239,6 +2245,13 @@ TEST_P(cta_catalogue_CatalogueTest, createTapePool_tapes_of_mixed_state) { tape_broken_full_01.full = true; m_catalogue->createTape(m_admin, tape_broken_full_01); + auto tape_exported_full_01 = m_tape1; + tape_exported_full_01.vid = "EFO001"; + tape_exported_full_01.state = common::dataStructures::Tape::EXPORTED; + tape_exported_full_01.stateReason = "unit Test"; + tape_exported_full_01.full = true; + m_catalogue->createTape(m_admin, tape_exported_full_01); + auto tape_disabled_full_01 = m_tape1; tape_disabled_full_01.vid = "DFO001"; tape_disabled_full_01.state = common::dataStructures::Tape::DISABLED; @@ -2255,7 +2268,7 @@ TEST_P(cta_catalogue_CatalogueTest, createTapePool_tapes_of_mixed_state) { const auto tapes = m_catalogue->getTapes(); - ASSERT_EQ(10, tapes.size()); + ASSERT_EQ(12, tapes.size()); { const auto pools = m_catalogue->getTapePools(); @@ -2264,12 +2277,12 @@ TEST_P(cta_catalogue_CatalogueTest, createTapePool_tapes_of_mixed_state) { const auto &pool = pools.front(); ASSERT_EQ(m_tape1.tapePoolName, pool.name); ASSERT_EQ(m_vo.name, pool.vo.name); - ASSERT_EQ(10, pool.nbTapes); - ASSERT_EQ(10, pool.nbEmptyTapes); + ASSERT_EQ(12, pool.nbTapes); + ASSERT_EQ(12, pool.nbEmptyTapes); ASSERT_EQ(4, pool.nbDisabledTapes); - ASSERT_EQ(6, pool.nbFullTapes); + ASSERT_EQ(7, pool.nbFullTapes); ASSERT_EQ(1, pool.nbWritableTapes); - ASSERT_EQ(10 * m_mediaType.capacityInBytes, pool.capacityBytes); + ASSERT_EQ(12 * m_mediaType.capacityInBytes, pool.capacityBytes); ASSERT_EQ(0, pool.dataBytes); ASSERT_EQ(0, pool.nbPhysicalFiles); } @@ -2280,12 +2293,12 @@ TEST_P(cta_catalogue_CatalogueTest, createTapePool_tapes_of_mixed_state) { ASSERT_EQ(m_tape1.tapePoolName, pool->name); ASSERT_EQ(m_vo.name, pool->vo.name); - ASSERT_EQ(10, pool->nbTapes); - ASSERT_EQ(10, pool->nbEmptyTapes); + ASSERT_EQ(12, pool->nbTapes); + ASSERT_EQ(12, pool->nbEmptyTapes); ASSERT_EQ(4, pool->nbDisabledTapes); - ASSERT_EQ(6, pool->nbFullTapes); + ASSERT_EQ(7, pool->nbFullTapes); ASSERT_EQ(1, pool->nbWritableTapes); - ASSERT_EQ(10 * m_mediaType.capacityInBytes, pool->capacityBytes); + ASSERT_EQ(12 * m_mediaType.capacityInBytes, pool->capacityBytes); ASSERT_EQ(0, pool->dataBytes); ASSERT_EQ(0, pool->nbPhysicalFiles); } @@ -4993,15 +5006,23 @@ TEST_P(cta_catalogue_CatalogueTest, createTape_StateNotActiveWithoutReasonShould m_catalogue->createVirtualOrganization(m_admin, m_vo); m_catalogue->createTapePool(m_admin, m_tape1.tapePoolName, m_vo.name, nbPartialTapes, isEncrypted, supply, "Create tape pool"); - auto tape = m_tape1; - tape.state = cta::common::dataStructures::Tape::DISABLED; - ASSERT_THROW(m_catalogue->createTape(m_admin, tape),cta::catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive); + auto tape1 = m_tape1; + tape1.state = cta::common::dataStructures::Tape::DISABLED; + ASSERT_THROW(m_catalogue->createTape(m_admin, tape1),cta::catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive); - tape.state = cta::common::dataStructures::Tape::BROKEN; - ASSERT_THROW(m_catalogue->createTape(m_admin, tape),cta::catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive); + auto tape2 = m_tape2; + tape2.state = cta::common::dataStructures::Tape::BROKEN; + ASSERT_THROW(m_catalogue->createTape(m_admin, tape2),cta::catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive); + + tape2.stateReason = "Tape broken"; + ASSERT_NO_THROW(m_catalogue->createTape(m_admin, tape2)); - tape.stateReason = "Tape broken"; - ASSERT_NO_THROW(m_catalogue->createTape(m_admin, tape)); + auto tape3 = m_tape3; + tape3.state = cta::common::dataStructures::Tape::EXPORTED; + ASSERT_THROW(m_catalogue->createTape(m_admin, tape3),cta::catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive); + + tape3.stateReason = "Tape exported"; + ASSERT_NO_THROW(m_catalogue->createTape(m_admin, tape3)); } TEST_P(cta_catalogue_CatalogueTest, createTape_many_tapes) { @@ -6398,7 +6419,7 @@ TEST_P(cta_catalogue_CatalogueTest, modifyTapeState_nonExistentTape) { using namespace cta; common::dataStructures::Tape::State state = common::dataStructures::Tape::State::ACTIVE; - ASSERT_THROW(m_catalogue->modifyTapeState(m_admin,"DOES_NOT_EXIST",state,std::nullopt),cta::catalogue::UserSpecifiedANonExistentTape); + ASSERT_THROW(m_catalogue->modifyTapeState(m_admin,"DOES_NOT_EXIST",state,std::nullopt,std::nullopt),cta::catalogue::UserSpecifiedANonExistentTape); } TEST_P(cta_catalogue_CatalogueTest, setTapeDisabled_nonExistentTape) { @@ -6425,7 +6446,54 @@ TEST_P(cta_catalogue_CatalogueTest, modifyTapeState_nonExistentState) { m_catalogue->createTape(m_admin, m_tape1); common::dataStructures::Tape::State state = (common::dataStructures::Tape::State)42; - ASSERT_THROW(m_catalogue->modifyTapeState(m_admin, m_tape1.vid,state,std::nullopt),cta::catalogue::UserSpecifiedANonExistentTapeState); + ASSERT_THROW(m_catalogue->modifyTapeState(m_admin, m_tape1.vid,state,std::nullopt,std::nullopt),cta::catalogue::UserSpecifiedANonExistentTapeState); +} + +TEST_P(cta_catalogue_CatalogueTest, modifyTapeState_nonExistentPrevState) { + using namespace cta; + + const bool logicalLibraryIsDisabled= false; + const uint64_t nbPartialTapes = 2; + const bool isEncrypted = true; + const std::optional<std::string> supply("value for the supply pool mechanism"); + + m_catalogue->createMediaType(m_admin, m_mediaType); + m_catalogue->createLogicalLibrary(m_admin, m_tape1.logicalLibraryName, logicalLibraryIsDisabled, "Create logical library"); + + m_catalogue->createDiskInstance(m_admin, m_diskInstance.name, m_diskInstance.comment); + m_catalogue->createVirtualOrganization(m_admin, m_vo); + m_catalogue->createTapePool(m_admin, m_tape1.tapePoolName, m_vo.name, nbPartialTapes, isEncrypted, supply, "Create tape pool"); + + m_catalogue->createTape(m_admin, m_tape1); + + common::dataStructures::Tape::State state = common::dataStructures::Tape::State::ACTIVE; + common::dataStructures::Tape::State prevState = (common::dataStructures::Tape::State)42; + ASSERT_THROW(m_catalogue->modifyTapeState(m_admin, m_tape1.vid,state,prevState,std::nullopt),cta::catalogue::UserSpecifiedANonExistentTapeState); +} + +TEST_P(cta_catalogue_CatalogueTest, modifyTapeState_wrongPrevState) { + using namespace cta; + + const bool logicalLibraryIsDisabled= false; + const uint64_t nbPartialTapes = 2; + const bool isEncrypted = true; + const std::optional<std::string> supply("value for the supply pool mechanism"); + + m_catalogue->createMediaType(m_admin, m_mediaType); + m_catalogue->createLogicalLibrary(m_admin, m_tape1.logicalLibraryName, logicalLibraryIsDisabled, "Create logical library"); + + m_catalogue->createDiskInstance(m_admin, m_diskInstance.name, m_diskInstance.comment); + m_catalogue->createVirtualOrganization(m_admin, m_vo); + m_catalogue->createTapePool(m_admin, m_tape1.tapePoolName, m_vo.name, nbPartialTapes, isEncrypted, supply, "Create tape pool"); + + m_catalogue->createTape(m_admin, m_tape1); + + common::dataStructures::Tape::State prevState = common::dataStructures::Tape::State::ACTIVE; + common::dataStructures::Tape::State prevStateGuess = common::dataStructures::Tape::State::REPACKING; + common::dataStructures::Tape::State nextState = common::dataStructures::Tape::State::DISABLED; + std::string reason = "modify for testing"; + m_catalogue->modifyTapeState(m_admin, m_tape1.vid,prevState,std::nullopt,std::nullopt); + ASSERT_THROW(m_catalogue->modifyTapeState(m_admin, m_tape1.vid,nextState,prevStateGuess,reason),cta::catalogue::UserSpecifiedANonExistentTape); } TEST_P(cta_catalogue_CatalogueTest, modifyTapeState_noReasonWhenNotActive) { @@ -6446,9 +6514,9 @@ TEST_P(cta_catalogue_CatalogueTest, modifyTapeState_noReasonWhenNotActive) { m_catalogue->createTape(m_admin, m_tape1); std::string reason = ""; - ASSERT_THROW(m_catalogue->modifyTapeState(m_admin,m_tape1.vid,common::dataStructures::Tape::State::BROKEN,reason),cta::catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive); + ASSERT_THROW(m_catalogue->modifyTapeState(m_admin,m_tape1.vid,common::dataStructures::Tape::State::BROKEN,std::nullopt,reason),cta::catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive); - ASSERT_THROW(m_catalogue->modifyTapeState(m_admin,m_tape1.vid,common::dataStructures::Tape::State::DISABLED,std::nullopt),cta::catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive); + ASSERT_THROW(m_catalogue->modifyTapeState(m_admin,m_tape1.vid,common::dataStructures::Tape::State::DISABLED,std::nullopt,std::nullopt),cta::catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive); } TEST_P(cta_catalogue_CatalogueTest, modifyTapeState) { @@ -6471,7 +6539,7 @@ TEST_P(cta_catalogue_CatalogueTest, modifyTapeState) { std::string reason = "tape broken"; std::string vid = m_tape1.vid; - ASSERT_NO_THROW(m_catalogue->modifyTapeState(m_admin,vid,common::dataStructures::Tape::State::BROKEN,reason)); + ASSERT_NO_THROW(m_catalogue->modifyTapeState(m_admin,vid,common::dataStructures::Tape::State::BROKEN,std::nullopt,reason)); { //catalogue getTapesByVid test (single VID) @@ -6530,9 +6598,9 @@ TEST_P(cta_catalogue_CatalogueTest, modifyTapeStateResetReasonWhenBackToActiveSt std::string vid = m_tape1.vid; std::string reason = "Broken tape"; - ASSERT_NO_THROW(m_catalogue->modifyTapeState(m_admin,vid,common::dataStructures::Tape::State::BROKEN,reason)); + ASSERT_NO_THROW(m_catalogue->modifyTapeState(m_admin,vid,common::dataStructures::Tape::State::BROKEN,std::nullopt,reason)); - ASSERT_NO_THROW(m_catalogue->modifyTapeState(m_admin,vid,common::dataStructures::Tape::State::ACTIVE,std::nullopt)); + ASSERT_NO_THROW(m_catalogue->modifyTapeState(m_admin,vid,common::dataStructures::Tape::State::ACTIVE,std::nullopt,std::nullopt)); { auto vidToTapeMap = m_catalogue->getTapesByVid(vid); @@ -6580,7 +6648,7 @@ TEST_P(cta_catalogue_CatalogueTest, getTapesSearchCriteriaByState) { } std::string reason = "Broken tape"; - ASSERT_NO_THROW(m_catalogue->modifyTapeState(m_admin,vidTape1,common::dataStructures::Tape::State::BROKEN,reason)); + ASSERT_NO_THROW(m_catalogue->modifyTapeState(m_admin,vidTape1,common::dataStructures::Tape::State::BROKEN,std::nullopt,reason)); { cta::catalogue::TapeSearchCriteria criteria; @@ -9693,6 +9761,7 @@ TEST_P(cta_catalogue_CatalogueTest, prepareToRetrieveFileUsingArchiveFileId_disa ASSERT_EQ(file2Written.fSeq, tapeFile2.fSeq); ASSERT_EQ(file2Written.blockId, tapeFile2.blockId); ASSERT_EQ(file2Written.checksumBlob, tapeFile2.checksumBlob); + ASSERT_EQ(file2Written.copyNb, tapeFile2.copyNb); } m_catalogue->setTapeDisabled(m_admin, m_tape1.vid, disabledReason); @@ -9704,6 +9773,279 @@ TEST_P(cta_catalogue_CatalogueTest, prepareToRetrieveFileUsingArchiveFileId_disa ASSERT_EQ(archivePriority, queueCriteria.mountPolicy.archivePriority); ASSERT_EQ(minArchiveRequestAge, queueCriteria.mountPolicy.archiveMinRequestAge); + ASSERT_EQ(2, queueCriteria.archiveFile.tapeFiles.size()); + + const auto copyNbToTapeFile1Itor = queueCriteria.archiveFile.tapeFiles.find(1); + ASSERT_NE(copyNbToTapeFile1Itor, queueCriteria.archiveFile.tapeFiles.end()); + const common::dataStructures::TapeFile &tapeFile1 = *copyNbToTapeFile1Itor; + ASSERT_EQ(file1Written.vid, tapeFile1.vid); + ASSERT_EQ(file1Written.fSeq, tapeFile1.fSeq); + ASSERT_EQ(file1Written.blockId, tapeFile1.blockId); + ASSERT_EQ(file1Written.checksumBlob, tapeFile1.checksumBlob); + ASSERT_EQ(file1Written.copyNb, tapeFile1.copyNb); + + const auto copyNbToTapeFile2Itor = queueCriteria.archiveFile.tapeFiles.find(2); + ASSERT_NE(copyNbToTapeFile2Itor, queueCriteria.archiveFile.tapeFiles.end()); + const common::dataStructures::TapeFile &tapeFile2 = *copyNbToTapeFile2Itor; + ASSERT_EQ(file2Written.vid, tapeFile2.vid); + ASSERT_EQ(file2Written.fSeq, tapeFile2.fSeq); + ASSERT_EQ(file2Written.blockId, tapeFile2.blockId); + ASSERT_EQ(file2Written.checksumBlob, tapeFile2.checksumBlob); + ASSERT_EQ(file2Written.copyNb, tapeFile2.copyNb); + } +} + +TEST_P(cta_catalogue_CatalogueTest, prepareToRetrieveFileUsingArchiveFileId_repackingTapes) { + using namespace cta; + + const std::string diskInstanceName1 = m_diskInstance.name; + + const bool logicalLibraryIsDisabled= false; + const uint64_t nbPartialTapes = 2; + const bool isEncrypted = true; + const std::optional<std::string> supply("value for the supply pool mechanism"); + + std::string repackingReason = "repackingReason"; + + m_catalogue->createMediaType(m_admin, m_mediaType); + m_catalogue->createLogicalLibrary(m_admin, m_tape1.logicalLibraryName, logicalLibraryIsDisabled, "Create logical library"); + m_catalogue->createDiskInstance(m_admin, m_diskInstance.name, m_diskInstance.comment); + m_catalogue->createVirtualOrganization(m_admin, m_vo); + m_catalogue->createTapePool(m_admin, m_tape1.tapePoolName, m_vo.name, nbPartialTapes, isEncrypted, supply, "Create tape pool"); + + m_catalogue->createTape(m_admin, m_tape1); + m_catalogue->createTape(m_admin, m_tape2); + + const std::list<common::dataStructures::Tape> tapes = m_catalogue->getTapes(); + const std::map<std::string, common::dataStructures::Tape> vidToTape = tapeListToMap(tapes); + { + auto it = vidToTape.find(m_tape1.vid); + ASSERT_TRUE(it != vidToTape.end()); + const common::dataStructures::Tape &tape = it->second; + ASSERT_EQ(m_tape1.vid, tape.vid); + ASSERT_EQ(m_tape1.mediaType, tape.mediaType); + ASSERT_EQ(m_tape1.vendor, tape.vendor); + ASSERT_EQ(m_tape1.logicalLibraryName, tape.logicalLibraryName); + ASSERT_EQ(m_tape1.tapePoolName, tape.tapePoolName); + ASSERT_EQ(m_vo.name, tape.vo); + ASSERT_EQ(m_mediaType.capacityInBytes, tape.capacityInBytes); + ASSERT_EQ(m_tape1.full, tape.full); + + ASSERT_FALSE(tape.isFromCastor); + ASSERT_EQ(m_tape1.comment, tape.comment); + ASSERT_FALSE(tape.labelLog); + ASSERT_FALSE(tape.lastReadLog); + ASSERT_FALSE(tape.lastWriteLog); + + const common::dataStructures::EntryLog creationLog = tape.creationLog; + ASSERT_EQ(m_admin.username, creationLog.username); + ASSERT_EQ(m_admin.host, creationLog.host); + + const common::dataStructures::EntryLog lastModificationLog = + tape.lastModificationLog; + ASSERT_EQ(creationLog, lastModificationLog); + } + { + auto it = vidToTape.find(m_tape2.vid); + ASSERT_TRUE(it != vidToTape.end()); + const common::dataStructures::Tape &tape = it->second; + ASSERT_EQ(m_tape2.vid, tape.vid); + ASSERT_EQ(m_tape2.mediaType, tape.mediaType); + ASSERT_EQ(m_tape2.vendor, tape.vendor); + ASSERT_EQ(m_tape2.logicalLibraryName, tape.logicalLibraryName); + ASSERT_EQ(m_tape2.tapePoolName, tape.tapePoolName); + ASSERT_EQ(m_vo.name, tape.vo); + ASSERT_EQ(m_mediaType.capacityInBytes, tape.capacityInBytes); + ASSERT_EQ(m_tape2.full, tape.full); + + ASSERT_FALSE(tape.isFromCastor); + ASSERT_EQ(m_tape2.comment, tape.comment); + ASSERT_FALSE(tape.labelLog); + ASSERT_FALSE(tape.lastReadLog); + ASSERT_FALSE(tape.lastWriteLog); + + const common::dataStructures::EntryLog creationLog = tape.creationLog; + ASSERT_EQ(m_admin.username, creationLog.username); + ASSERT_EQ(m_admin.host, creationLog.host); + + const common::dataStructures::EntryLog lastModificationLog = + tape.lastModificationLog; + ASSERT_EQ(creationLog, lastModificationLog); + } + + const uint64_t archiveFileId = 1234; + + ASSERT_FALSE(m_catalogue->getArchiveFilesItor().hasMore()); + ASSERT_THROW(m_catalogue->getArchiveFileById(archiveFileId), exception::Exception); + + m_catalogue->createStorageClass(m_admin, m_storageClassSingleCopy); + + const uint64_t archiveFileSize = 1; + const std::string tapeDrive = "tape_drive"; + + auto file1WrittenUP=std::make_unique<cta::catalogue::TapeFileWritten>(); + auto & file1Written = *file1WrittenUP; + std::set<cta::catalogue::TapeItemWrittenPointer> file1WrittenSet; + file1WrittenSet.insert(file1WrittenUP.release()); + file1Written.archiveFileId = archiveFileId; + file1Written.diskInstance = diskInstanceName1; + file1Written.diskFileId = "5678"; + + file1Written.diskFileOwnerUid = PUBLIC_DISK_USER; + file1Written.diskFileGid = PUBLIC_DISK_GROUP; + file1Written.size = archiveFileSize; + file1Written.checksumBlob.insert(checksum::ADLER32, "1234"); + file1Written.storageClassName = m_storageClassSingleCopy.name; + file1Written.vid = m_tape1.vid; + file1Written.fSeq = 1; + file1Written.blockId = 4321; + file1Written.copyNb = 1; + file1Written.tapeDrive = tapeDrive; + m_catalogue->filesWrittenToTape(file1WrittenSet); + + { + const common::dataStructures::ArchiveFile archiveFile = m_catalogue->getArchiveFileById(archiveFileId); + + ASSERT_EQ(file1Written.archiveFileId, archiveFile.archiveFileID); + ASSERT_EQ(file1Written.diskFileId, archiveFile.diskFileId); + ASSERT_EQ(file1Written.size, archiveFile.fileSize); + ASSERT_EQ(file1Written.checksumBlob, archiveFile.checksumBlob); + ASSERT_EQ(file1Written.storageClassName, archiveFile.storageClass); + + ASSERT_EQ(file1Written.diskInstance, archiveFile.diskInstance); + + ASSERT_EQ(file1Written.diskFileOwnerUid, archiveFile.diskFileInfo.owner_uid); + ASSERT_EQ(file1Written.diskFileGid, archiveFile.diskFileInfo.gid); + + ASSERT_EQ(1, archiveFile.tapeFiles.size()); + auto copyNbToTapeFile1Itor = archiveFile.tapeFiles.find(1); + ASSERT_NE(copyNbToTapeFile1Itor, archiveFile.tapeFiles.end()); + const common::dataStructures::TapeFile &tapeFile1 = *copyNbToTapeFile1Itor; + ASSERT_EQ(file1Written.vid, tapeFile1.vid); + ASSERT_EQ(file1Written.fSeq, tapeFile1.fSeq); + ASSERT_EQ(file1Written.blockId, tapeFile1.blockId); + ASSERT_EQ(file1Written.checksumBlob, tapeFile1.checksumBlob); + ASSERT_EQ(file1Written.copyNb, tapeFile1.copyNb); + } + + auto file2WrittenUP=std::make_unique<cta::catalogue::TapeFileWritten>(); + auto & file2Written = *file2WrittenUP; + std::set<cta::catalogue::TapeItemWrittenPointer> file2WrittenSet; + file2WrittenSet.insert(file2WrittenUP.release()); + file2Written.archiveFileId = file1Written.archiveFileId; + file2Written.diskInstance = file1Written.diskInstance; + file2Written.diskFileId = file1Written.diskFileId; + + file2Written.diskFileOwnerUid = file1Written.diskFileOwnerUid; + file2Written.diskFileGid = file1Written.diskFileGid; + file2Written.size = archiveFileSize; + file2Written.checksumBlob = file1Written.checksumBlob; + file2Written.storageClassName = m_storageClassSingleCopy.name; + file2Written.vid = m_tape2.vid; + file2Written.fSeq = 1; + file2Written.blockId = 4331; + file2Written.copyNb = 2; + file2Written.tapeDrive = tapeDrive; + m_catalogue->filesWrittenToTape(file2WrittenSet); + + { + const common::dataStructures::ArchiveFile archiveFile = m_catalogue->getArchiveFileById(archiveFileId); + + ASSERT_EQ(file2Written.archiveFileId, archiveFile.archiveFileID); + ASSERT_EQ(file2Written.diskFileId, archiveFile.diskFileId); + ASSERT_EQ(file2Written.size, archiveFile.fileSize); + ASSERT_EQ(file2Written.checksumBlob, archiveFile.checksumBlob); + ASSERT_EQ(file2Written.storageClassName, archiveFile.storageClass); + + ASSERT_EQ(file2Written.diskInstance, archiveFile.diskInstance); + + ASSERT_EQ(file2Written.diskFileOwnerUid, archiveFile.diskFileInfo.owner_uid); + ASSERT_EQ(file2Written.diskFileGid, archiveFile.diskFileInfo.gid); + + ASSERT_EQ(2, archiveFile.tapeFiles.size()); + + const auto copyNbToTapeFile1Itor = archiveFile.tapeFiles.find(1); + ASSERT_NE(copyNbToTapeFile1Itor, archiveFile.tapeFiles.end()); + const common::dataStructures::TapeFile &tapeFile1 = *copyNbToTapeFile1Itor; + ASSERT_EQ(file1Written.vid, tapeFile1.vid); + ASSERT_EQ(file1Written.fSeq, tapeFile1.fSeq); + ASSERT_EQ(file1Written.blockId, tapeFile1.blockId); + ASSERT_EQ(file1Written.checksumBlob, tapeFile1.checksumBlob); + + const auto copyNbToTapeFile2Itor = archiveFile.tapeFiles.find(2); + ASSERT_NE(copyNbToTapeFile2Itor, archiveFile.tapeFiles.end()); + const common::dataStructures::TapeFile &tapeFile2 = *copyNbToTapeFile2Itor; + ASSERT_EQ(file2Written.vid, tapeFile2.vid); + ASSERT_EQ(file2Written.fSeq, tapeFile2.fSeq); + ASSERT_EQ(file2Written.blockId, tapeFile2.blockId); + ASSERT_EQ(file2Written.checksumBlob, tapeFile2.checksumBlob); + } + + auto mountPolicyToAdd = getMountPolicy1(); + std::string mountPolicyName = mountPolicyToAdd.name; + uint64_t minArchiveRequestAge = mountPolicyToAdd.minArchiveRequestAge; + uint64_t archivePriority = mountPolicyToAdd.archivePriority; + m_catalogue->createMountPolicy(m_admin,mountPolicyToAdd); + + const std::string comment = "Create mount rule for requester"; + const std::string requesterName = "requester_name"; + m_catalogue->createRequesterMountRule(m_admin, mountPolicyName, diskInstanceName1, requesterName, comment); + + const std::list<common::dataStructures::RequesterMountRule> rules = m_catalogue->getRequesterMountRules(); + ASSERT_EQ(1, rules.size()); + + const common::dataStructures::RequesterMountRule rule = rules.front(); + + ASSERT_EQ(diskInstanceName1, rule.diskInstance); + ASSERT_EQ(requesterName, rule.name); + ASSERT_EQ(mountPolicyName, rule.mountPolicy); + ASSERT_EQ(comment, rule.comment); + ASSERT_EQ(m_admin.username, rule.creationLog.username); + ASSERT_EQ(m_admin.host, rule.creationLog.host); + ASSERT_EQ(rule.creationLog, rule.lastModificationLog); + + log::LogContext dummyLc(m_dummyLog); + + common::dataStructures::RequesterIdentity requesterIdentity; + requesterIdentity.name = requesterName; + requesterIdentity.group = "group"; + + { + const common::dataStructures::RetrieveFileQueueCriteria queueCriteria = + m_catalogue->prepareToRetrieveFile(diskInstanceName1, archiveFileId, requesterIdentity, std::nullopt, dummyLc); + + ASSERT_EQ(archivePriority, queueCriteria.mountPolicy.archivePriority); + ASSERT_EQ(minArchiveRequestAge, queueCriteria.mountPolicy.archiveMinRequestAge); + + ASSERT_EQ(2, queueCriteria.archiveFile.tapeFiles.size()); + + const auto copyNbToTapeFile1Itor = queueCriteria.archiveFile.tapeFiles.find(1); + ASSERT_NE(copyNbToTapeFile1Itor, queueCriteria.archiveFile.tapeFiles.end()); + const common::dataStructures::TapeFile &tapeFile1 = *copyNbToTapeFile1Itor; + ASSERT_EQ(file1Written.vid, tapeFile1.vid); + ASSERT_EQ(file1Written.fSeq, tapeFile1.fSeq); + ASSERT_EQ(file1Written.blockId, tapeFile1.blockId); + ASSERT_EQ(file1Written.checksumBlob, tapeFile1.checksumBlob); + ASSERT_EQ(file1Written.copyNb, tapeFile1.copyNb); + + const auto copyNbToTapeFile2Itor = queueCriteria.archiveFile.tapeFiles.find(2); + ASSERT_NE(copyNbToTapeFile2Itor, queueCriteria.archiveFile.tapeFiles.end()); + const common::dataStructures::TapeFile &tapeFile2 = *copyNbToTapeFile2Itor; + ASSERT_EQ(file2Written.vid, tapeFile2.vid); + ASSERT_EQ(file2Written.fSeq, tapeFile2.fSeq); + ASSERT_EQ(file2Written.blockId, tapeFile2.blockId); + ASSERT_EQ(file2Written.checksumBlob, tapeFile2.checksumBlob); + } + + m_catalogue->modifyTapeState(m_admin, m_tape1.vid, common::dataStructures::Tape::State::REPACKING, std::nullopt, repackingReason); + + { + const common::dataStructures::RetrieveFileQueueCriteria queueCriteria = + m_catalogue->prepareToRetrieveFile(diskInstanceName1, archiveFileId, requesterIdentity, std::nullopt, dummyLc); + + ASSERT_EQ(archivePriority, queueCriteria.mountPolicy.archivePriority); + ASSERT_EQ(minArchiveRequestAge, queueCriteria.mountPolicy.archiveMinRequestAge); + ASSERT_EQ(1, queueCriteria.archiveFile.tapeFiles.size()); const auto copyNbToTapeFile2Itor = queueCriteria.archiveFile.tapeFiles.find(2); @@ -9715,7 +10057,7 @@ TEST_P(cta_catalogue_CatalogueTest, prepareToRetrieveFileUsingArchiveFileId_disa ASSERT_EQ(file2Written.checksumBlob, tapeFile2.checksumBlob); } - m_catalogue->setTapeDisabled(m_admin, m_tape2.vid, disabledReason); + m_catalogue->modifyTapeState(m_admin, m_tape2.vid, common::dataStructures::Tape::State::REPACKING, std::nullopt, repackingReason); ASSERT_THROW(m_catalogue->prepareToRetrieveFile(diskInstanceName1, archiveFileId, requesterIdentity, std::nullopt, dummyLc), exception::UserError); @@ -9831,8 +10173,8 @@ TEST_P(cta_catalogue_CatalogueTest, prepareToRetrieveFileUsingArchiveFileId_retu ASSERT_EQ(file2Written.copyNb, tapeFile1.copyNb); } - std::string disabledReason = "disabled reason"; - m_catalogue->setTapeDisabled(m_admin, m_tape2.vid, disabledReason); + std::string repackingReason = "repackingReason"; + m_catalogue->modifyTapeState(m_admin, m_tape2.vid, common::dataStructures::Tape::State::REPACKING, std::nullopt, repackingReason); ASSERT_THROW(m_catalogue->prepareToRetrieveFile(diskInstanceName1, archiveFileId, requesterIdentity, std::nullopt, dummyLc), exception::UserError); @@ -11066,7 +11408,7 @@ TEST_P(cta_catalogue_CatalogueTest, filesWrittenToTape_many_archive_files) { ASSERT_EQ(nbArchiveFiles, pool.nbPhysicalFiles); } - m_catalogue->modifyTapeState(m_admin, tape1.vid, common::dataStructures::Tape::ACTIVE,std::nullopt); + m_catalogue->modifyTapeState(m_admin, tape1.vid, common::dataStructures::Tape::ACTIVE,std::nullopt,std::nullopt); { const auto pools = m_catalogue->getTapePools(); @@ -17414,6 +17756,120 @@ TEST_P(cta_catalogue_CatalogueTest, reclaimTapeRemovesFilesFromRecycleLog) { } } +TEST_P(cta_catalogue_CatalogueTest, reclaimTapeActiveState) { + using namespace cta; + + const bool logicalLibraryIsDisabled= false; + const std::string tapePoolName1 = "tape_pool_name_1"; + const uint64_t nbPartialTapes = 1; + const bool isEncrypted = true; + const std::optional<std::string> supply("value for the supply pool mechanism"); + const std::string diskInstance = m_diskInstance.name; + + log::LogContext dummyLc(m_dummyLog); + + m_catalogue->createMediaType(m_admin, m_mediaType); + m_catalogue->createLogicalLibrary(m_admin, m_tape1.logicalLibraryName, logicalLibraryIsDisabled, "Create logical library"); + m_catalogue->createDiskInstance(m_admin, m_diskInstance.name, m_diskInstance.comment); + m_catalogue->createVirtualOrganization(m_admin, m_vo); + m_catalogue->createTapePool(m_admin, tapePoolName1, m_vo.name, nbPartialTapes, isEncrypted, supply, "Create tape pool"); + m_catalogue->createStorageClass(m_admin, m_storageClassSingleCopy); + + auto tape1 = m_tape1; + tape1.tapePoolName = tapePoolName1; + + m_catalogue->createTape(m_admin, tape1); + m_catalogue->setTapeFull(m_admin, tape1.vid, true); + + // ACTIVE - Reclaim allowed + m_catalogue->modifyTapeState(m_admin, tape1.vid, common::dataStructures::Tape::ACTIVE, std::nullopt, "Testing"); + ASSERT_NO_THROW(m_catalogue->reclaimTape(m_admin, tape1.vid, dummyLc)); +} + +TEST_P(cta_catalogue_CatalogueTest, reclaimTapeDisabledState) { +using namespace cta; + + const bool logicalLibraryIsDisabled= false; + const std::string tapePoolName1 = "tape_pool_name_1"; + const uint64_t nbPartialTapes = 1; + const bool isEncrypted = true; + const std::optional<std::string> supply("value for the supply pool mechanism"); + const std::string diskInstance = m_diskInstance.name; + + log::LogContext dummyLc(m_dummyLog); + + m_catalogue->createMediaType(m_admin, m_mediaType); + m_catalogue->createLogicalLibrary(m_admin, m_tape1.logicalLibraryName, logicalLibraryIsDisabled, "Create logical library"); + m_catalogue->createDiskInstance(m_admin, m_diskInstance.name, m_diskInstance.comment); + m_catalogue->createVirtualOrganization(m_admin, m_vo); + m_catalogue->createTapePool(m_admin, tapePoolName1, m_vo.name, nbPartialTapes, isEncrypted, supply, "Create tape pool"); + m_catalogue->createStorageClass(m_admin, m_storageClassSingleCopy); + + auto tape1 = m_tape1; + tape1.tapePoolName = tapePoolName1; + + m_catalogue->createTape(m_admin, tape1); + m_catalogue->setTapeFull(m_admin, tape1.vid, true); + + // ACTIVE - Reclaim allowed + m_catalogue->modifyTapeState(m_admin, tape1.vid, common::dataStructures::Tape::DISABLED, std::nullopt, "Testing"); + ASSERT_NO_THROW(m_catalogue->reclaimTape(m_admin, tape1.vid, dummyLc)); +} + +TEST_P(cta_catalogue_CatalogueTest, reclaimTapeNotAllowedStates) { + using namespace cta; + + const bool logicalLibraryIsDisabled= false; + const std::string tapePoolName1 = "tape_pool_name_1"; + const uint64_t nbPartialTapes = 1; + const bool isEncrypted = true; + const std::optional<std::string> supply("value for the supply pool mechanism"); + const std::string diskInstance = m_diskInstance.name; + + log::LogContext dummyLc(m_dummyLog); + + m_catalogue->createMediaType(m_admin, m_mediaType); + m_catalogue->createLogicalLibrary(m_admin, m_tape1.logicalLibraryName, logicalLibraryIsDisabled, "Create logical library"); + m_catalogue->createDiskInstance(m_admin, m_diskInstance.name, m_diskInstance.comment); + m_catalogue->createVirtualOrganization(m_admin, m_vo); + m_catalogue->createTapePool(m_admin, tapePoolName1, m_vo.name, nbPartialTapes, isEncrypted, supply, "Create tape pool"); + m_catalogue->createStorageClass(m_admin, m_storageClassSingleCopy); + + auto tape1 = m_tape1; + tape1.tapePoolName = tapePoolName1; + + m_catalogue->createTape(m_admin, tape1); + m_catalogue->setTapeFull(m_admin, tape1.vid, true); + + // REPACKING - Reclaim not allowed + m_catalogue->modifyTapeState(m_admin, tape1.vid, common::dataStructures::Tape::REPACKING, std::nullopt, "Testing"); + ASSERT_THROW(m_catalogue->reclaimTape(m_admin, tape1.vid, dummyLc), exception::UserError); + + // REPACKING_DISABLED - Reclaim not allowed + m_catalogue->modifyTapeState(m_admin, tape1.vid, common::dataStructures::Tape::REPACKING_DISABLED, std::nullopt, "Testing"); + ASSERT_THROW(m_catalogue->reclaimTape(m_admin, tape1.vid, dummyLc), exception::UserError); + + // REPACKING_PENDING - Reclaim not allowed + m_catalogue->modifyTapeState(m_admin, tape1.vid, common::dataStructures::Tape::REPACKING_PENDING, std::nullopt, "Testing"); + ASSERT_THROW(m_catalogue->reclaimTape(m_admin, tape1.vid, dummyLc), exception::UserError); + + // BROKEN - Reclaim not allowed + m_catalogue->modifyTapeState(m_admin, tape1.vid, common::dataStructures::Tape::BROKEN, std::nullopt, "Testing"); + ASSERT_THROW(m_catalogue->reclaimTape(m_admin, tape1.vid, dummyLc), exception::UserError); + + // BROKEN_PENDING - Reclaim not allowed + m_catalogue->modifyTapeState(m_admin, tape1.vid, common::dataStructures::Tape::BROKEN_PENDING, std::nullopt, "Testing"); + ASSERT_THROW(m_catalogue->reclaimTape(m_admin, tape1.vid, dummyLc), exception::UserError); + + // EXPORTED - Reclaim not allowed + m_catalogue->modifyTapeState(m_admin, tape1.vid, common::dataStructures::Tape::EXPORTED, std::nullopt, "Testing"); + ASSERT_THROW(m_catalogue->reclaimTape(m_admin, tape1.vid, dummyLc), exception::UserError); + + // EXPORTED_PENDING - Reclaim not allowed + m_catalogue->modifyTapeState(m_admin, tape1.vid, common::dataStructures::Tape::EXPORTED_PENDING, std::nullopt, "Testing"); + ASSERT_THROW(m_catalogue->reclaimTape(m_admin, tape1.vid, dummyLc), exception::UserError); +} + TEST_P(cta_catalogue_CatalogueTest, emptyFileRecycleLogItorTest) { using namespace cta; auto itor = m_catalogue->getFileRecycleLogItor(); diff --git a/catalogue/DummyCatalogue.cpp b/catalogue/DummyCatalogue.cpp index 7f07160162f4ddf0f2080c266185772e29f9b62a..12d93142a82ac725657e819c6b01fcbaf13137d5 100644 --- a/catalogue/DummyCatalogue.cpp +++ b/catalogue/DummyCatalogue.cpp @@ -161,7 +161,6 @@ void DummyCatalogue::modifyStorageClassNbCopies(const common::dataStructures::Se void DummyCatalogue::modifyTapeComment(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::optional<std::string> &comment) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } void DummyCatalogue::modifyTapeEncryptionKeyName(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string& encryptionKeyName) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } void DummyCatalogue::modifyTapeVerificationStatus(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string& verificationStatus) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } -void DummyCatalogue::modifyTapeState(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<std::string> & stateReason) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } void DummyCatalogue::modifyTapeMediaType(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string& mediaType) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } void DummyCatalogue::modifyTapeVendor(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string& vendor) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } void DummyCatalogue::modifyTapeLogicalLibraryName(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string& logicalLibraryName) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } @@ -183,12 +182,12 @@ void DummyCatalogue::reclaimTape(const common::dataStructures::SecurityIdentity& void DummyCatalogue::checkTapeForLabel(const std::string& vid) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } uint64_t DummyCatalogue::getNbFilesOnTape(const std::string& vid) const { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } void DummyCatalogue::setTapeDisabled(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string & reason) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } +void DummyCatalogue::setTapeRepackingDisabled(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string & reason) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } void DummyCatalogue::setTapeFull(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const bool fullValue) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } void DummyCatalogue::setTapeDirty(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const bool dirtyValue) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } void DummyCatalogue::setTapeDirty(const std::string & vid) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } void DummyCatalogue::setTapeIsFromCastorInUnitTests(const std::string &vid) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } void DummyCatalogue::setTapePoolEncryption(const common::dataStructures::SecurityIdentity& admin, const std::string& name, const bool encryptionValue) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } -bool DummyCatalogue::tapeExists(const std::string& vid) const { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } bool DummyCatalogue::diskSystemExists(const std::string& name) const { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } void DummyCatalogue::tapeLabelled(const std::string& vid, const std::string& drive) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } void DummyCatalogue::tapeMountedForArchive(const std::string& vid, const std::string& drive) { throw exception::Exception(std::string("In ")+__PRETTY_FUNCTION__+": not implemented"); } @@ -208,6 +207,23 @@ void DummyCatalogue::addDisabledTape(const std::string & vid) { threading::MutexLocker lm(m_tapeEnablingMutex); m_tapeEnabling[vid]=common::dataStructures::Tape::DISABLED; } +void DummyCatalogue::addRepackingTape(const std::string & vid) { + threading::MutexLocker lm(m_tapeEnablingMutex); + m_tapeEnabling[vid]=common::dataStructures::Tape::REPACKING; +} +void DummyCatalogue::modifyTapeState(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<common::dataStructures::Tape::State> & prev_state, const std::optional<std::string> & stateReason) { + threading::MutexLocker lm(m_tapeEnablingMutex); + if (prev_state.has_value() && prev_state.value() != m_tapeEnabling[vid]) { + throw exception::Exception("Previous state mismatch"); + } + m_tapeEnabling[vid]=state; +} +bool DummyCatalogue::tapeExists(const std::string& vid) const { + return m_tapeEnabling.find(vid) != m_tapeEnabling.end(); +} +common::dataStructures::Tape::State DummyCatalogue::getTapeState(const std::string & vid) const { + return m_tapeEnabling.at(vid); +} common::dataStructures::VidToTapeMap DummyCatalogue::getTapesByVid(const std::string& vid) const { std::set<std::string> vids = {vid}; return getTapesByVid(vids); diff --git a/catalogue/DummyCatalogue.hpp b/catalogue/DummyCatalogue.hpp index 5598d78ed9aac87cb887d31aaf12bfaa0d43093a..1ffaff73bdcc3f849db9f77d4b99e644cf8d1c03 100644 --- a/catalogue/DummyCatalogue.hpp +++ b/catalogue/DummyCatalogue.hpp @@ -164,7 +164,7 @@ public: void modifyTapeComment(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::optional<std::string> &comment) override; void modifyTapeEncryptionKeyName(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string& encryptionKeyName) override; void modifyTapeVerificationStatus(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string& verificationStatus) override; - void modifyTapeState(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<std::string> & stateReason) override; + void modifyTapeState(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<common::dataStructures::Tape::State> & prev_state, const std::optional<std::string> & stateReason) override; void modifyTapeMediaType(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string& mediaType) override; void modifyTapeVendor(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string& vendor) override; void modifyTapeLogicalLibraryName(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string& logicalLibraryName) override; @@ -186,6 +186,7 @@ public: void checkTapeForLabel(const std::string& vid) override; uint64_t getNbFilesOnTape(const std::string& vid) const override; void setTapeDisabled(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string & reason) override; + void setTapeRepackingDisabled(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const std::string & reason) override; void setTapeFull(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const bool fullValue) override; void setTapeDirty(const common::dataStructures::SecurityIdentity& admin, const std::string& vid, const bool dirtyValue) override; void setTapeDirty(const std::string & vid) override; @@ -198,10 +199,11 @@ public: void tapeMountedForRetrieve(const std::string& vid, const std::string& drive) override; bool tapePoolExists(const std::string& tapePoolName) const; void updateDiskFileId(uint64_t archiveFileId, const std::string &diskInstance, const std::string &diskFileId) override; - void moveArchiveFileToRecycleLog(const common::dataStructures::DeleteArchiveRequest &request, - log::LogContext & lc) override; + void moveArchiveFileToRecycleLog(const common::dataStructures::DeleteArchiveRequest &request, log::LogContext & lc) override; void modifyArchiveFileStorageClassId(const uint64_t archiveFileId, const std::string &newStorageClassName) const override; + common::dataStructures::Tape::State getTapeState(const std::string & vid) const; + common::dataStructures::VidToTapeMap getTapesByVid(const std::string& vid) const override; common::dataStructures::VidToTapeMap getTapesByVid(const std::set<std::string>& vids) const override; @@ -248,6 +250,7 @@ public: // This special funcitons for unit tests should be put in private void addEnabledTape(const std::string & vid); void addDisabledTape(const std::string & vid); + void addRepackingTape(const std::string & vid); private: mutable threading::Mutex m_tapeEnablingMutex; diff --git a/catalogue/RdbmsCatalogue.cpp b/catalogue/RdbmsCatalogue.cpp index bf003e6d1be46b643531ae4057fe3cf0760cda55..77e9d1321232a56a29deeaf274b1f6f71ccf3db1 100644 --- a/catalogue/RdbmsCatalogue.cpp +++ b/catalogue/RdbmsCatalogue.cpp @@ -4802,6 +4802,7 @@ void RdbmsCatalogue::resetTapeCounters(rdbms::Conn& conn, const common::dataStru // reclaimTape //------------------------------------------------------------------------------ void RdbmsCatalogue::reclaimTape(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, cta::log::LogContext & lc) { + using namespace common::dataStructures; try{ log::TimingList tl; utils::Timer t; @@ -4814,11 +4815,12 @@ void RdbmsCatalogue::reclaimTape(const common::dataStructures::SecurityIdentity if (tapes.empty()) { throw exception::UserError(std::string("Cannot reclaim tape ") + vid + " because it does not exist"); - } else { - if (!tapes.front().full) { - throw exception::UserError(std::string("Cannot reclaim tape ") + vid + " because it is not FULL"); - } + } else if (tapes.front().state != Tape::State::ACTIVE && tapes.front().state != Tape::State::DISABLED) { + throw exception::UserError(std::string("Cannot reclaim tape ") + vid + " because it is not on ACTIVE or DISABLED state"); + } else if (!tapes.front().full) { + throw exception::UserError(std::string("Cannot reclaim tape ") + vid + " because it is not FULL"); } + // The tape exists and is full, we can try to reclaim it if (this->getNbFilesOnTape(conn, vid) == 0) { tl.insertAndReset("getNbFilesOnTape", t); @@ -5204,7 +5206,7 @@ void RdbmsCatalogue::modifyTapeVerificationStatus(const common::dataStructures:: //------------------------------------------------------------------------------ // modifyTapeState //------------------------------------------------------------------------------ -void RdbmsCatalogue::modifyTapeState(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<std::string> & stateReason){ +void RdbmsCatalogue::modifyTapeState(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<common::dataStructures::Tape::State> & prev_state, const std::optional<std::string> & stateReason){ try { using namespace common::dataStructures; const time_t now = time(nullptr); @@ -5220,6 +5222,16 @@ void RdbmsCatalogue::modifyTapeState(const common::dataStructures::SecurityIdent throw UserSpecifiedANonExistentTapeState(errorMsg); } + std::string prevStateStr; + if (prev_state.has_value()) { + try { + prevStateStr = cta::common::dataStructures::Tape::stateToString(prev_state.value()); + } catch (cta::exception::Exception &ex) { + std::string errorMsg = "The previous state provided in parameter (" + std::to_string(prev_state.value()) + ") is not known or has not been initialized existing states are:" + common::dataStructures::Tape::getAllPossibleStates(); + throw UserSpecifiedANonExistentTapeState(errorMsg); + } + } + //Check the reason is set for all the status except the ACTIVE one, this is the only state that allows the reason to be set to null. if(state != Tape::State::ACTIVE){ if(!stateReasonCopy){ @@ -5227,7 +5239,7 @@ void RdbmsCatalogue::modifyTapeState(const common::dataStructures::SecurityIdent } } - const char *const sql = + std::string sql = "UPDATE TAPE SET " "TAPE_STATE = :TAPE_STATE," "STATE_REASON = :STATE_REASON," @@ -5235,6 +5247,11 @@ void RdbmsCatalogue::modifyTapeState(const common::dataStructures::SecurityIdent "STATE_MODIFIED_BY = :STATE_MODIFIED_BY " "WHERE " "VID = :VID"; + + if (prev_state.has_value()) { + sql += " AND TAPE_STATE = :PREV_TAPE_STATE"; + } + auto conn = m_connPool.getConn(); auto stmt = conn.createStmt(sql); @@ -5243,10 +5260,13 @@ void RdbmsCatalogue::modifyTapeState(const common::dataStructures::SecurityIdent stmt.bindUint64(":STATE_UPDATE_TIME", now); stmt.bindString(":STATE_MODIFIED_BY",generateTapeStateModifiedBy(admin)); stmt.bindString(":VID",vid); + if (prev_state.has_value()) { + stmt.bindString(":PREV_TAPE_STATE",prevStateStr); + } stmt.executeNonQuery(); if (0 == stmt.getNbAffectedRows()) { - throw UserSpecifiedANonExistentTape(std::string("Cannot modify the state of the tape ") + vid + " because it does not exist"); + throw UserSpecifiedANonExistentTape(std::string("Cannot modify the state of the tape ") + vid + " because it does not exist or because a recent state change has been detected"); } } catch(exception::UserError &) { @@ -5500,7 +5520,23 @@ void RdbmsCatalogue::setTapeDisabled(const common::dataStructures::SecurityIdent const std::string &vid, const std::string & reason) { try { - modifyTapeState(admin,vid,common::dataStructures::Tape::DISABLED,reason); + modifyTapeState(admin,vid,common::dataStructures::Tape::DISABLED,std::nullopt,reason); + } catch(exception::UserError &) { + throw; + } catch(exception::Exception &ex) { + ex.getMessage().str(std::string(__FUNCTION__) + ": " + ex.getMessage().str()); + throw; + } +} + +//------------------------------------------------------------------------------ +// setTapeRepackingDisabled +//------------------------------------------------------------------------------ +void RdbmsCatalogue::setTapeRepackingDisabled(const common::dataStructures::SecurityIdentity &admin, + const std::string &vid, const std::string & reason) { + + try { + modifyTapeState(admin,vid,common::dataStructures::Tape::REPACKING_DISABLED,std::nullopt,reason); } catch(exception::UserError &) { throw; } catch(exception::Exception &ex) { @@ -9046,7 +9082,12 @@ common::dataStructures::RetrieveFileQueueCriteria RdbmsCatalogue::prepareToRetri throw ex; } const auto nonBrokenState = std::find_if(std::begin(tapeFileStateList), std::end(tapeFileStateList), - [](std::pair<std::string, std::string> state) {return state.second != "BROKEN";}); + [](std::pair<std::string, std::string> state) { + return (state.second != "BROKEN") + && (state.second != "BROKEN_PENDING") + && (state.second != "EXPORTED") + && (state.second != "EXPORTED_PENDING"); + }); if (nonBrokenState != std::end(tapeFileStateList)) { ex.getMessage() << "WARNING: File with archive file ID " << archiveFileId << @@ -9743,7 +9784,7 @@ std::unique_ptr<common::dataStructures::ArchiveFile> RdbmsCatalogue::getArchiveF "TAPE_FILE.VID = TAPE.VID " "WHERE " "ARCHIVE_FILE.ARCHIVE_FILE_ID = :ARCHIVE_FILE_ID AND " - "TAPE.TAPE_STATE = 'ACTIVE' " + "TAPE.TAPE_STATE IN ('ACTIVE', 'DISABLED') " "ORDER BY " "TAPE_FILE.CREATION_TIME ASC"; auto stmt = conn.createStmt(sql); diff --git a/catalogue/RdbmsCatalogue.hpp b/catalogue/RdbmsCatalogue.hpp index 8cdddbf55dbb1b96bb4c27c96862130633d52010..79f2dbb1ab03d05ab4e072d8e5f07b6fc90dd049 100644 --- a/catalogue/RdbmsCatalogue.hpp +++ b/catalogue/RdbmsCatalogue.hpp @@ -639,7 +639,7 @@ public: void modifyTapeTapePoolName(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const std::string &tapePoolName) override; void modifyTapeEncryptionKeyName(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const std::string &encryptionKeyName) override; void modifyTapeVerificationStatus(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const std::string &verificationStatus) override; - void modifyTapeState(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<std::string> & stateReason) override; + void modifyTapeState(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<common::dataStructures::Tape::State> & prev_state, const std::optional<std::string> & stateReason) override; static std::string generateTapeStateModifiedBy(const common::dataStructures::SecurityIdentity & admin); /** * Sets the full status of the specified tape. @@ -674,6 +674,7 @@ public: void setTapeIsFromCastorInUnitTests(const std::string &vid) override; void setTapeDisabled(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const std::string & reason) override; + void setTapeRepackingDisabled(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const std::string & reason) override; void setTapeDirty(const std::string & vid) override; void modifyTapeComment(const common::dataStructures::SecurityIdentity &admin, const std::string &vid, const std::optional<std::string> &comment) override; diff --git a/cmdline/CtaAdminCmdParse.hpp b/cmdline/CtaAdminCmdParse.hpp index 621c21e049a0f2b6ff1f6f082d0899a5f7ebd2b9..955df5cdfd8ecba4a9571e8291456263ffca45d6 100644 --- a/cmdline/CtaAdminCmdParse.hpp +++ b/cmdline/CtaAdminCmdParse.hpp @@ -396,8 +396,6 @@ const std::map<AdminCmd::Cmd, CmdHelp> cmdHelp = { " of the files located on the tape to repack.\n" " By default, a hardcoded mount policy is applied (all request priorities and minimum request\n" " ages = 1).\n" - " * If the --disabledtape flag is set, the tape to repack will be mounted for retrieval even if it\n" - " is disabled.\n" " * If the --no-recall flag is set, no retrieve mount will be triggered. Only the files that are\n" " located in the buffer will be considered for archival.\n\n" }}, @@ -534,7 +532,6 @@ const Option opt_vid { Option::OPT_STR, "--vid", const Option opt_vo { Option::OPT_STR, "--virtualorganisation", "--vo", " <virtual_organisation>" }; const Option opt_vidfile { Option::OPT_STR_LIST, "--vidfile", "-f", " <filename>" }; const Option opt_full { Option::OPT_BOOL, "--full", "-f", " <\"true\" or \"false\">" }; -const Option opt_disabled_tape { Option::OPT_FLAG, "--disabledtape", "-d", "" }; const Option opt_disksystem { Option::OPT_STR, "--disksystem", "-n", " <disk_system_name>" }; const Option opt_file_regexp { Option::OPT_STR, "--fileregexp", "-r", " <file_regexp>" }; const Option opt_free_space_query_url { Option::OPT_STR, "--freespacequeryurl", "-u", " <free_space_query_url>" }; @@ -547,7 +544,7 @@ const Option opt_object_id { Option::OPT_STR, "--objectid", const Option opt_read_max_drives { Option::OPT_UINT, "--readmaxdrives", "--rmd", " <read_max_drives>" }; const Option opt_write_max_drives { Option::OPT_UINT, "--writemaxdrives", "--wmd", " <write_max_drives>" }; -const Option opt_state { Option::OPT_STR, "--state", "-s", std::string(" <\"") + Tape::stateToString(Tape::ACTIVE) +"\"" + " or \"" + Tape::stateToString(Tape::DISABLED) + "\" or \"" + Tape::stateToString(Tape::BROKEN) + "\">" }; +const Option opt_state { Option::OPT_STR, "--state", "-s", std::string(" <\"") + Tape::stateToString(Tape::ACTIVE) +"\"" + " or \"" + Tape::stateToString(Tape::DISABLED) + "\" or \"" + Tape::stateToString(Tape::BROKEN) + "\" or \"" + Tape::stateToString(Tape::EXPORTED) + "\" or \"" + Tape::stateToString(Tape::REPACKING) + "\" or \"" + Tape::stateToString(Tape::REPACKING_DISABLED) + "\">" }; const Option opt_activityregex { Option::OPT_STR, "--activityregex", "--ar", " <activity_regex>"}; const Option opt_diskinstance { Option::OPT_STR, "--diskinstance", "--di", " <disk_instance_name>" }; const Option opt_diskinstance_alias { Option::OPT_STR, "--name", "-n", " <disk_instance_name>", "--diskinstance" }; @@ -618,7 +615,7 @@ const std::map<cmd_key_t, cmd_val_t> cmdOptions = { {{ AdminCmd::CMD_MOUNTPOLICY, AdminCmd::SUBCMD_LS }, { }}, /*----------------------------------------------------------------------------------------------------*/ {{ AdminCmd::CMD_REPACK, AdminCmd::SUBCMD_ADD }, - { opt_vid.optional(), opt_vidfile.optional(), opt_bufferurl.optional(), opt_justmove.optional(), opt_justaddcopies.optional(), opt_mountpolicy, opt_disabled_tape.optional(), opt_no_recall.optional() }}, + { opt_vid.optional(), opt_vidfile.optional(), opt_bufferurl.optional(), opt_justmove.optional(), opt_justaddcopies.optional(), opt_mountpolicy, opt_no_recall.optional() }}, {{ AdminCmd::CMD_REPACK, AdminCmd::SUBCMD_RM }, { opt_vid }}, {{ AdminCmd::CMD_REPACK, AdminCmd::SUBCMD_LS }, { opt_vid.optional() }}, {{ AdminCmd::CMD_REPACK, AdminCmd::SUBCMD_ERR }, { opt_vid }}, diff --git a/common/dataStructures/RepackInfo.hpp b/common/dataStructures/RepackInfo.hpp index 0ac94a792e5e2b829d087cd3a9d1a96e9f61a891..f0902f30b82110d83c349b2b694f2cc7cd2a0ec3 100644 --- a/common/dataStructures/RepackInfo.hpp +++ b/common/dataStructures/RepackInfo.hpp @@ -73,7 +73,6 @@ struct RepackInfo { uint64_t archivedFiles; uint64_t archivedBytes; bool isExpandFinished; - bool forceDisabledTape; bool noRecall; common::dataStructures::EntryLog creationLog; time_t repackFinishedTime = 0; diff --git a/common/dataStructures/Tape.cpp b/common/dataStructures/Tape.cpp index 6ae2849630292a95d981e9afc2306528a4ae8fba..6375e3bd724806ab4bca21ae0bd47fb5e02049a4 100644 --- a/common/dataStructures/Tape.cpp +++ b/common/dataStructures/Tape.cpp @@ -40,18 +40,39 @@ Tape::Tape(): const std::map<Tape::State,std::string> Tape::STATE_TO_STRING_MAP = { {Tape::State::ACTIVE,"ACTIVE"}, {Tape::State::BROKEN,"BROKEN"}, - {Tape::State::DISABLED,"DISABLED"} + {Tape::State::BROKEN_PENDING,"BROKEN_PENDING"}, + {Tape::State::DISABLED,"DISABLED"}, + {Tape::State::REPACKING,"REPACKING"}, + {Tape::State::REPACKING_PENDING,"REPACKING_PENDING"}, + {Tape::State::EXPORTED,"EXPORTED"}, + {Tape::State::EXPORTED_PENDING,"EXPORTED_PENDING"}, + {Tape::State::REPACKING_DISABLED,"REPACKING_DISABLED"}, +}; + +const std::set<Tape::State> Tape::PENDING_STATES_SET = { + Tape::State::BROKEN_PENDING, + Tape::State::REPACKING_PENDING, + Tape::State::EXPORTED_PENDING, }; const std::map<std::string,Tape::State> Tape::STRING_TO_STATE_MAP = { {"ACTIVE",Tape::State::ACTIVE}, {"BROKEN",Tape::State::BROKEN}, - {"DISABLED",Tape::State::DISABLED} + {"BROKEN_PENDING",Tape::State::BROKEN_PENDING}, + {"DISABLED",Tape::State::DISABLED}, + {"REPACKING",Tape::State::REPACKING}, + {"REPACKING_PENDING",Tape::State::REPACKING_PENDING}, + {"EXPORTED", Tape::State::EXPORTED}, + {"EXPORTED_PENDING", Tape::State::EXPORTED_PENDING}, + {"REPACKING_DISABLED",Tape::State::REPACKING_DISABLED}, }; -std::string Tape::getAllPossibleStates(){ +std::string Tape::getAllPossibleStates(bool hidePendingStates){ std::string ret; for(auto &kv: STRING_TO_STATE_MAP){ + if(hidePendingStates && PENDING_STATES_SET.count(kv.second)) { + continue; + } ret += kv.first + " "; } if(ret.size()) @@ -103,20 +124,36 @@ std::string Tape::stateToString(const Tape::State & state) { } } -Tape::State Tape::stringToState(const std::string& state) { - std::string stateUpperCase = state; +Tape::State Tape::stringToState(const std::string& stateStr, bool hidePendingStates) { + std::string stateUpperCase = stateStr; cta::utils::toUpper(stateUpperCase); try { return Tape::STRING_TO_STATE_MAP.at(stateUpperCase); } catch(std::out_of_range &ex){ - throw cta::exception::Exception(std::string("The state given (") + stateUpperCase + ") does not exist. Possible values are " + Tape::getAllPossibleStates()); + throw cta::exception::Exception(std::string("The state given (") + stateUpperCase + ") does not exist. Possible values are " + Tape::getAllPossibleStates(hidePendingStates)); } } +bool Tape::isActive() const { + return state == Tape::State::ACTIVE; +} + bool Tape::isDisabled() const { return state == Tape::State::DISABLED; } +bool Tape::isRepacking() const { + return state == Tape::State::REPACKING; +} + +bool Tape::isBroken() const { + return state == Tape::State::BROKEN; +} + +bool Tape::isExported() const { + return state == Tape::State::EXPORTED; +} + //------------------------------------------------------------------------------ // operator<< //------------------------------------------------------------------------------ diff --git a/common/dataStructures/Tape.hpp b/common/dataStructures/Tape.hpp index eaade151e03b77fc2dd2695e884d195305a32408..46e0eafbd4d12e01a4ec780dd8aa5da664a83175 100644 --- a/common/dataStructures/Tape.hpp +++ b/common/dataStructures/Tape.hpp @@ -23,6 +23,7 @@ #include <list> #include <map> +#include <set> #include <optional> #include <stdint.h> #include <string> @@ -40,13 +41,20 @@ struct Tape { enum State { ACTIVE = 1, BROKEN = 2, - DISABLED = 3 + DISABLED = 3, + REPACKING = 4, + EXPORTED = 5, + REPACKING_DISABLED = 6, + BROKEN_PENDING = 101, + REPACKING_PENDING = 102, + EXPORTED_PENDING = 103, }; static const std::map<State,std::string> STATE_TO_STRING_MAP; static const std::map<std::string,State> STRING_TO_STATE_MAP; + static const std::set<State> PENDING_STATES_SET; - static std::string getAllPossibleStates(); + static std::string getAllPossibleStates(bool hidePendingStates = false); Tape(); @@ -72,7 +80,7 @@ struct Tape { * @return the state corresponding to the State enum value * @throws cta::exception::Exception if the state passed in parameter does not match any existing State enum value */ - static State stringToState(const std::string & state); + static State stringToState(const std::string & state, bool hidePendingStates = false); std::string vid; std::string mediaType; @@ -115,8 +123,11 @@ struct Tape { time_t stateUpdateTime; std::optional<std::string> verificationStatus; + bool isActive() const; bool isDisabled() const; - + bool isRepacking() const; + bool isBroken() const; + bool isExported() const; }; // struct Tape std::ostream &operator<<(std::ostream &os, const Tape &obj); diff --git a/continuousintegration/orchestration/tests/archive_retrieve.sh b/continuousintegration/orchestration/tests/archive_retrieve.sh index ead3ba93c0fa9a2025227a7b93c567fe74f33dfd..706073fff4f250c14febadf905a99aba0830cbe6 100755 --- a/continuousintegration/orchestration/tests/archive_retrieve.sh +++ b/continuousintegration/orchestration/tests/archive_retrieve.sh @@ -44,7 +44,7 @@ if [ ! -z "${error}" ]; then fi echo "Preparing namespace for the tests" -./prepare_tests.sh -n ${NAMESPACE} + . prepare_tests.sh -n ${NAMESPACE} if [ $? -ne 0 ]; then echo "ERROR: failed to prepare namespace for the tests" exit 1 @@ -111,4 +111,15 @@ kubectl -n ${NAMESPACE} exec client -- bash /root/try_evict_before_archive_compl kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 +setup_tapes_for_multicopy_test + +echo +echo "Launching retrieve_queue_cleanup.sh on client pod" +echo " Archiving file: xrdcp as user1" +echo " Retrieving it as poweruser1" +kubectl -n ${NAMESPACE} cp retrieve_queue_cleanup.sh client:/root/retrieve_queue_cleanup.sh +kubectl -n ${NAMESPACE} exec client -- bash /root/retrieve_queue_cleanup.sh || exit 1 + +kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 + exit 0 diff --git a/continuousintegration/orchestration/tests/client_helper.sh b/continuousintegration/orchestration/tests/client_helper.sh index 99f4a148bde938f150c1f4205e4614dc7d7d574e..d9401ce4cccbca33844efbc444801b97d578f56b 100644 --- a/continuousintegration/orchestration/tests/client_helper.sh +++ b/continuousintegration/orchestration/tests/client_helper.sh @@ -14,6 +14,10 @@ # submit itself to any jurisdiction. +################################################################ +# Helper functions - KRB5 +################################################################ + ### # Helper functions for tests running on client pod. # @@ -93,3 +97,149 @@ eosadmin_kdestroy() { KRB5CCNAME=/tmp/${EOSADMIN_USER}/krb5cc_0 kdestroy eosadmin_klist } + +################################################################ +# Helper functions - Requests +################################################################ + +# Pass list of files waiting for archival + +wait_for_archive () { + + EOS_INSTANCE=$1 + SECONDS_PASSED=0 + WAIT_FOR_ARCHIVED_FILE_TIMEOUT=90 + + while test $(($# - 1)) != $(echo "${@:2}" | tr " " "\n" | xargs -iFILE eos root://${EOS_INSTANCE} info FILE | awk '{print $4;}' | grep tape | wc -l); do + echo "Waiting for files to be archived to tape: seconds passed = ${SECONDS_PASSED}" + sleep 1 + let SECONDS_PASSED=SECONDS_PASSED+1 + + if test ${SECONDS_PASSED} == ${WAIT_FOR_ARCHIVED_FILE_TIMEOUT}; then + echo "ERROR: Timed out after ${WAIT_FOR_ARCHIVED_FILE_TIMEOUT} seconds waiting for files to be archived to tape" + exit 1 + fi + done + +} + +# Pass list of files waiting for retrieval + +wait_for_retrieve () { + + EOS_INSTANCE=$1 + SECONDS_PASSED=0 + WAIT_FOR_RETRIEVED_FILE_TIMEOUT=90 + while test $(($# - 1)) != $(echo "${@:2}" | tr " " "\n" | xargs -iFILE eos root://${EOS_INSTANCE} info FILE | awk '{print $4;}' | grep -F "default.0" | wc -l); do + echo "Waiting for files to be retrieved from tape: Seconds passed = ${SECONDS_PASSED}" + sleep 1 + let SECONDS_PASSED=SECONDS_PASSED+1 + + if test ${SECONDS_PASSED} == ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT}; then + echo "Timed out after ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT} seconds waiting for files to be retrieved from tape" + exit 1 + fi + done + +} + +# Pass list of files waiting for eviction + +wait_for_evict () { + + EOS_INSTANCE=$1 + SECONDS_PASSED=0 + WAIT_FOR_EVICTED_FILE_TIMEOUT=90 + while test 0 != $(echo "${@:2}" | tr " " "\n" | xargs -iFILE eos root://${EOS_INSTANCE} info FILE | awk '{print $4;}' | grep -F "default.0" | wc -l); do + echo "Waiting for files to be evicted from disk: Seconds passed = ${SECONDS_PASSED}" + sleep 1 + let SECONDS_PASSED=SECONDS_PASSED+1 + + if test ${SECONDS_PASSED} == ${WAIT_FOR_EVICTED_FILE_TIMEOUT}; then + echo "Timed out after ${WAIT_FOR_EVICTED_FILE_TIMEOUT} seconds waiting for files to be evicted from disk" + exit 1 + fi + done + +} + +# Wait for tape change + +wait_for_tape_state() { + + SECONDS_PASSED=0 + WAIT_FOR_EVICTED_FILE_TIMEOUT=90 + echo "Waiting for tape $1 state to change to $2: Seconds passed = ${SECONDS_PASSED}" + while test $2 != $(admin_cta --json tape ls --vid $1 | jq -r '.[] | .state'); do + sleep 1 + let SECONDS_PASSED=SECONDS_PASSED+1 + echo "Waiting for tape $1 state to change to $2: Seconds passed = ${SECONDS_PASSED}" + + if test ${SECONDS_PASSED} == ${WAIT_FOR_EVICTED_FILE_TIMEOUT}; then + echo "Timed out after ${WAIT_FOR_EVICTED_FILE_TIMEOUT} seconds waiting for tape $1 state to change to $2" + exit 1 + fi + done + +} + +# Pass "UP" or "DOWN" as argument + +put_all_drives () { + + NEXT_STATE=$1 + [ "$1" = "UP" ] && PREV_STATE="DOWN" || PREV_STATE="UP" + next_state=$(echo $NEXT_STATE | awk '{print tolower($0)}') + prev_state=$(echo $PREV_STATE | awk '{print tolower($0)}') + + # Put all tape drives up/down + INITIAL_DRIVES_STATE=`admin_cta --json dr ls` + echo INITIAL_DRIVES_STATE: + echo ${INITIAL_DRIVES_STATE} | jq -r '.[] | [ .driveName, .driveStatus] | @tsv' | column -t + echo -n "Will put $next_state those drives : " + drivesToModify=`echo ${INITIAL_DRIVES_STATE} | jq -r ".[].driveName"` + echo $drivesToModify + for d in `echo $drivesToModify`; do + admin_cta drive $next_state $d --reason "PUTTING DRIVE $NEXT_STATE FOR TESTS" + done + + echo "$(date +%s): Waiting for the drives to be $next_state" + SECONDS_PASSED=0 + WAIT_FOR_DRIVES_TIMEOUT=$((10)) + while [[ $SECONDS_PASSED < $WAIT_FOR_DRIVES_TIMEOUT ]]; do + sleep 1 + oneStatusRemaining=0 + for d in `echo $drivesToModify`; do + status=`admin_cta --json drive ls | jq -r ". [] | select(.driveName == \"$d\") | .driveStatus"` + if [[ $NEXT_STATE == "DOWN" ]]; then + # Anything except DOWN is not acceptable + if [[ $status != "DOWN" ]]; then + oneStatusRemaining=1 + fi; + else + # Only DOWN is not OK. Starting, Unmounting, Running == UP + if [[ $status == "DOWN" ]]; then + oneStatusRemaining=1 + fi; + fi; + done + if [[ $oneStatusRemaining -eq 0 ]]; then + echo "Drives : $drivesToModify are $next_state" + break; + fi + echo -n "." + SECONDS_PASSED=$SECONDS_PASSED+1 + if [[ $SECONDS_PASSED -gt $WAIT_FOR_DRIVES_TIMEOUT ]]; then + die "ERROR: Timeout reach for trying to put all drives $next_state" + fi + done + +} + +put_all_drives_up () { + put_all_drives "UP" +} + +put_all_drives_down () { + put_all_drives "DOWN" +} diff --git a/continuousintegration/orchestration/tests/idempotent_prepare.sh b/continuousintegration/orchestration/tests/idempotent_prepare.sh index c1d96687f296dba88eef74ae2dfee35a780e5320..e0ffa7c8ec763b9febe358b8da264315fa19e952 100755 --- a/continuousintegration/orchestration/tests/idempotent_prepare.sh +++ b/continuousintegration/orchestration/tests/idempotent_prepare.sh @@ -69,129 +69,6 @@ eospower_kinit &>/dev/null admin_kdestroy &>/dev/null admin_kinit &>/dev/null -################################################################ -# Helper functions -################################################################ - -# Pass list of files waiting for archival - -wait_for_archive () { - - SECONDS_PASSED=0 - WAIT_FOR_ARCHIVED_FILE_TIMEOUT=90 - - while test $# != $(echo "$@" | tr " " "\n" | xargs -iFILE eos root://${EOS_INSTANCE} info FILE | awk '{print $4;}' | grep tape | wc -l); do - echo "Waiting for files to be archived to tape: seconds passed = ${SECONDS_PASSED}" - sleep 1 - let SECONDS_PASSED=SECONDS_PASSED+1 - - if test ${SECONDS_PASSED} == ${WAIT_FOR_ARCHIVED_FILE_TIMEOUT}; then - echo "ERROR: Timed out after ${WAIT_FOR_ARCHIVED_FILE_TIMEOUT} seconds waiting for files to be archived to tape" - exit 1 - fi - done - -} - -# Pass list of files waiting for retrieval - -wait_for_retrieve () { - - SECONDS_PASSED=0 - WAIT_FOR_RETRIEVED_FILE_TIMEOUT=90 - while test $# != $(echo "$@" | tr " " "\n" | xargs -iFILE eos root://${EOS_INSTANCE} info FILE | awk '{print $4;}' | grep -F "default.0" | wc -l); do - echo "Waiting for files to be retrieved from tape: Seconds passed = ${SECONDS_PASSED}" - sleep 1 - let SECONDS_PASSED=SECONDS_PASSED+1 - - if test ${SECONDS_PASSED} == ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT}; then - echo "Timed out after ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT} seconds waiting for files to be retrieved from tape" - exit 1 - fi - done - -} - -# Pass list of files waiting for eviction - -wait_for_evict () { - - SECONDS_PASSED=0 - WAIT_FOR_EVICTED_FILE_TIMEOUT=90 - while test 0 != $(echo "$@" | tr " " "\n" | xargs -iFILE eos root://${EOS_INSTANCE} info FILE | awk '{print $4;}' | grep -F "default.0" | wc -l); do - echo "Waiting for files to be evicted from disk: Seconds passed = ${SECONDS_PASSED}" - sleep 1 - let SECONDS_PASSED=SECONDS_PASSED+1 - - if test ${SECONDS_PASSED} == ${WAIT_FOR_EVICTED_FILE_TIMEOUT}; then - echo "Timed out after ${WAIT_FOR_EVICTED_FILE_TIMEOUT} seconds waiting for files to be evicted from disk" - exit 1 - fi - done - -} - -# Pass "UP" or "DOWN" as argument - -put_all_drives () { - - NEXT_STATE=$1 - [ "$1" = "UP" ] && PREV_STATE="DOWN" || PREV_STATE="UP" - next_state=$(echo $NEXT_STATE | awk '{print tolower($0)}') - prev_state=$(echo $PREV_STATE | awk '{print tolower($0)}') - - # Put all tape drives up/down - INITIAL_DRIVES_STATE=`admin_cta --json dr ls` - echo INITIAL_DRIVES_STATE: - echo ${INITIAL_DRIVES_STATE} | jq -r '.[] | [ .driveName, .driveStatus] | @tsv' | column -t - echo -n "Will put $next_state those drives : " - drivesToModify=`echo ${INITIAL_DRIVES_STATE} | jq -r ".[].driveName"` - echo $drivesToModify - for d in `echo $drivesToModify`; do - admin_cta drive $next_state $d --reason "PUTTING DRIVE $NEXT_STATE FOR TESTS" - done - - echo "$(date +%s): Waiting for the drives to be $next_state" - SECONDS_PASSED=0 - WAIT_FOR_DRIVES_TIMEOUT=$((10)) - while [[ $SECONDS_PASSED < $WAIT_FOR_DRIVES_TIMEOUT ]]; do - sleep 1 - oneStatusRemaining=0 - for d in `echo $drivesToModify`; do - status=`admin_cta --json drive ls | jq -r ". [] | select(.driveName == \"$d\") | .driveStatus"` - if [[ $NEXT_STATE == "DOWN" ]]; then - # Anything except DOWN is not acceptable - if [[ $status != "DOWN" ]]; then - oneStatusRemaining=1 - fi; - else - # Only DOWN is not OK. Starting, Unmounting, Running == UP - if [[ $status == "DOWN" ]]; then - oneStatusRemaining=1 - fi; - fi; - done - if [[ $oneStatusRemaining -eq 0 ]]; then - echo "Drives : $drivesToModify are $next_state" - break; - fi - echo -n "." - SECONDS_PASSED=$SECONDS_PASSED+1 - if [[ $SECONDS_PASSED -gt $WAIT_FOR_DRIVES_TIMEOUT ]]; then - die "ERROR: Timeout reach for trying to put all drives $next_state" - fi - done - -} - -put_all_drives_up () { - put_all_drives "UP" -} - -put_all_drives_down () { - put_all_drives "DOWN" -} - ################################################################ # Test preparing single file (exists on tape) @@ -207,7 +84,7 @@ echo "Testing normal 'prepare -s' request..." put_all_drives_up echo "Archiving ${TEMP_FILE_OK}..." xrdcp /etc/group root://${EOS_INSTANCE}/${TEMP_FILE_OK} -wait_for_archive ${TEMP_FILE_OK} +wait_for_archive ${EOS_INSTANCE} ${TEMP_FILE_OK} put_all_drives_down echo "Trigering EOS retrieve workflow as poweruser1:powerusers..." @@ -448,7 +325,7 @@ done put_all_drives_up cat ${TEST_FILES_TAPE_LIST} | xargs -iFILE_PATH xrdcp /etc/group root://${EOS_INSTANCE}/FILE_PATH -wait_for_archive $(cat ${TEST_FILES_TAPE_LIST} | tr "\n" " ") +wait_for_archive ${EOS_INSTANCE} $(cat ${TEST_FILES_TAPE_LIST} | tr "\n" " ") echo "Files to be written to directory with no prepare/evict permission:" for ((file_idx=0; file_idx < ${NB_FILES_NO_P}; file_idx++)); do @@ -555,7 +432,7 @@ echo "Testing 'prepare -a' request for file ${TEMP_FILE}..." put_all_drives_up echo "Archiving ${TEMP_FILE}..." xrdcp /etc/group root://${EOS_INSTANCE}/${TEMP_FILE} -wait_for_archive ${TEMP_FILE} +wait_for_archive ${EOS_INSTANCE} ${TEMP_FILE} echo "Disabling tape drives..." put_all_drives_down @@ -609,7 +486,7 @@ echo "Uploading & archiving test file ${TEMP_FILE_TAPE}." put_all_drives_up echo "Archiving ${TEMP_FILE_TAPE}..." xrdcp /etc/group root://${EOS_INSTANCE}/${TEMP_FILE_TAPE} -wait_for_archive ${TEMP_FILE_TAPE} +wait_for_archive ${EOS_INSTANCE} ${TEMP_FILE_TAPE} echo "Disabling tape drives..." put_all_drives_down @@ -661,12 +538,12 @@ put_all_drives_up echo "Archiving ${TEMP_FILE}..." xrdcp /etc/group root://${EOS_INSTANCE}/${TEMP_FILE} echo "Disabling tape drives..." -wait_for_archive ${TEMP_FILE} +wait_for_archive ${EOS_INSTANCE} ${TEMP_FILE} echo "Trigering EOS retrieve workflow as poweruser1:powerusers, for ${TEMP_FILE}..." # We need the -s as we are staging the files from tape (see xrootd prepare definition) REQUEST_ID=$(KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} prepare -s ${TEMP_FILE}) -wait_for_retrieve ${TEMP_FILE} +wait_for_retrieve ${EOS_INSTANCE} ${TEMP_FILE} echo "Trigering EOS evict workflow as poweruser1:powerusers..." KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} prepare -e ${TEMP_FILE} @@ -676,7 +553,7 @@ if [ $? -ne 0 ]; then exit 1 fi -wait_for_evict ${TEMP_FILE} +wait_for_evict ${EOS_INSTANCE} ${TEMP_FILE} echo "Test completed successfully" @@ -698,12 +575,12 @@ echo "Uploading & archiving test file ${TEMP_FILE_TAPE}." put_all_drives_up echo "Archiving ${TEMP_FILE_TAPE}..." xrdcp /etc/group root://${EOS_INSTANCE}/${TEMP_FILE_TAPE} -wait_for_archive ${TEMP_FILE_TAPE} +wait_for_archive ${EOS_INSTANCE} ${TEMP_FILE_TAPE} echo "Trigering EOS retrieve workflow as poweruser1:powerusers, for ${TEMP_FILE_TAPE}..." # We need the -s as we are staging the files from tape (see xrootd prepare definition) REQUEST_ID=$(KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} prepare -s ${TEMP_FILE_TAPE}) -wait_for_retrieve ${TEMP_FILE_TAPE} +wait_for_retrieve ${EOS_INSTANCE} ${TEMP_FILE_TAPE} echo "Trigering EOS abort workflow as poweruser1:powerusers..." echo "Error expected" @@ -714,7 +591,7 @@ if [ $? -eq 0 ]; then exit 1 fi -wait_for_evict ${TEMP_FILE_TAPE} +wait_for_evict ${EOS_INSTANCE} {TEMP_FILE_TAPE} echo "Test completed successfully" diff --git a/continuousintegration/orchestration/tests/prepare_tests.sh b/continuousintegration/orchestration/tests/prepare_tests.sh index 61739e1bc07c39799bb05a53d3c1f879291d0bbf..8fbae32920e5a0f5eb4692c87563fb0963e2eab9 100755 --- a/continuousintegration/orchestration/tests/prepare_tests.sh +++ b/continuousintegration/orchestration/tests/prepare_tests.sh @@ -42,10 +42,13 @@ if [ ! -z "${error}" ]; then exit 1 fi - # eos instance identified by SSS username EOSINSTANCE=ctaeos +MULTICOPY_DIR_1=/eos/ctaeos/preprod/dir_1_copy +MULTICOPY_DIR_2=/eos/ctaeos/preprod/dir_2_copy +MULTICOPY_DIR_3=/eos/ctaeos/preprod/dir_3_copy + tempdir=$(mktemp -d) # temporary directory for system test related config echo -n "Reading library configuration from tpsrv01" SECONDS_PASSED=0 @@ -94,6 +97,9 @@ kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin --json version | jq echo "Cleaning up leftovers from potential previous runs." kubectl --namespace ${NAMESPACE} exec ctaeos -- eos rm -rf /eos/ctaeos/cta/fail_on_closew_test/ kubectl --namespace ${NAMESPACE} exec ctaeos -- eos rm /eos/ctaeos/cta/* + kubectl --namespace ${NAMESPACE} exec ctaeos -- eos rm -rf ${MULTICOPY_DIR_1}/ + kubectl --namespace ${NAMESPACE} exec ctaeos -- eos rm -rf ${MULTICOPY_DIR_2}/ + kubectl --namespace ${NAMESPACE} exec ctaeos -- eos rm -rf ${MULTICOPY_DIR_3}/ kubectl --namespace ${NAMESPACE} exec ctaeos -- eos find -f /eos/ctaeos/preprod/ | xargs -I{} kubectl --namespace ${NAMESPACE} exec ctaeos -- eos rm -rf {} kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin --json tape ls --all | \ jq -r '.[] | .vid ' | xargs -I{} kubectl --namespace ${NAMESPACE} exec ctacli -- \ @@ -132,12 +138,7 @@ kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin --json version | jq --writemaxdrives 1 \ --diskinstance ${EOSINSTANCE} \ --comment "vo" - kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin tapepool add \ - --name ctasystest \ - --vo vo \ - --partialtapesnumber 5 \ - --encrypted false \ - --comment "ctasystest" + # add the media types of the tapes in production kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin mediatype add \ --name T10K500G \ @@ -175,11 +176,99 @@ kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin --json version | jq --primarydensitycode 94 \ --cartridge "LTO-8" \ --comment "LTO-8 cartridge formated at 12 TB" - # add all tapes + + # Setup default tapepool and storage class + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin tapepool add \ + --name ctasystest \ + --vo vo \ + --partialtapesnumber 5 \ + --encrypted false \ + --comment "ctasystest" + + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin storageclass add \ + --name ctaStorageClass \ + --numberofcopies 1 \ + --vo vo \ + --comment "ctasystest" + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin archiveroute add \ + --storageclass ctaStorageClass \ + --copynb 1 \ + --tapepool ctasystest \ + --comment "ctasystest" + + # Setup tapepools and storage classes for multiple tape copies + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin tapepool add \ + --name ctasystest_A \ + --vo vo \ + --partialtapesnumber 5 \ + --encrypted false \ + --comment "ctasystest_A" + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin tapepool add \ + --name ctasystest_B \ + --vo vo \ + --partialtapesnumber 5 \ + --encrypted false \ + --comment "ctasystest_B" + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin tapepool add \ + --name ctasystest_C \ + --vo vo \ + --partialtapesnumber 5 \ + --encrypted false \ + --comment "ctasystest_C" + + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin storageclass add \ + --name ctaStorageClass_1_copy \ + --numberofcopies 1 \ + --vo vo \ + --comment "ctasystest" + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin archiveroute add \ + --storageclass ctaStorageClass_1_copy \ + --copynb 1 \ + --tapepool ctasystest_A \ + --comment "ctasystest" + + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin storageclass add \ + --name ctaStorageClass_2_copy \ + --numberofcopies 2 \ + --vo vo \ + --comment "ctasystest" + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin archiveroute add \ + --storageclass ctaStorageClass_2_copy \ + --copynb 1 \ + --tapepool ctasystest_A \ + --comment "ctasystest" + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin archiveroute add \ + --storageclass ctaStorageClass_2_copy \ + --copynb 2 \ + --tapepool ctasystest_B \ + --comment "ctasystest" + + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin storageclass add \ + --name ctaStorageClass_3_copy \ + --numberofcopies 1 \ + --vo vo \ + --comment "ctasystest" + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin archiveroute add \ + --storageclass ctaStorageClass_3_copy \ + --copynb 1 \ + --tapepool ctasystest_A \ + --comment "ctasystest" + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin archiveroute add \ + --storageclass ctaStorageClass_3_copy \ + --copynb 2 \ + --tapepool ctasystest_B \ + --comment "ctasystest" + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin archiveroute add \ + --storageclass ctaStorageClass_3_copy \ + --copynb 3 \ + --tapepool ctasystest_C \ + --comment "ctasystest" + + # add all tapes to default tape pool for ((i=0; i<${#TAPES[@]}; i++)); do VID=${TAPES[${i}]} kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin tape add \ - --mediatype "T10K500G" \ + --mediatype "T10K500G" \ --vendor vendor \ --logicallibrary ${TAPEDRIVES_IN_USE[${i}%${NB_TAPEDRIVES_IN_USE}]} \ --tapepool ctasystest \ @@ -188,16 +277,6 @@ kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin --json version | jq --full false \ --comment "ctasystest" done - kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin storageclass add \ - --name ctaStorageClass \ - --numberofcopies 1 \ - --vo vo \ - --comment "ctasystest" - kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin archiveroute add \ - --storageclass ctaStorageClass \ - --copynb 1 \ - --tapepool ctasystest \ - --comment "ctasystest" kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin mountpolicy add \ --name ctasystest \ --archivepriority 1 \ @@ -291,3 +370,28 @@ done > ${TMP_HOSTS} kubectl -n ${NAMESPACE} get pods -o json | jq -r '.items[] | select(.status.phase=="Running") | {name: .metadata.name, containers: .spec.containers[].name} | {command: (.name + " -c " + .containers)}|to_entries[]|(.value)' | while read container; do cat ${TMP_HOSTS} | grep -v $(echo ${container} | awk '{print $1}')| kubectl -n ${NAMESPACE} exec ${container} -i -- bash -c "cat >> /etc/hosts" done + +setup_tapes_for_multicopy_test() { + + echo "Setting up tapes and tapepools for multi-copy test..." + + kubectl --namespace ${NAMESPACE} exec ctaeos -- eos mkdir ${MULTICOPY_DIR_1} + kubectl --namespace ${NAMESPACE} exec ctaeos -- eos mkdir ${MULTICOPY_DIR_2} + kubectl --namespace ${NAMESPACE} exec ctaeos -- eos mkdir ${MULTICOPY_DIR_3} + + kubectl --namespace ${NAMESPACE} exec ctaeos -- eos attr set sys.archive.storage_class=ctaStorageClass_1_copy ${MULTICOPY_DIR_1} + kubectl --namespace ${NAMESPACE} exec ctaeos -- eos attr set sys.archive.storage_class=ctaStorageClass_2_copy ${MULTICOPY_DIR_2} + kubectl --namespace ${NAMESPACE} exec ctaeos -- eos attr set sys.archive.storage_class=ctaStorageClass_3_copy ${MULTICOPY_DIR_3} + + # Find 3 non-full tapes and assign them to each one of the 3 tapepools + mapfile -t nonFullTapes < <( kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin --json tape ls --all | jq -r '.[] | select(.full==false) | .vid' ) + if ((${#nonFullTapes[@]} < 3)); then + echo "Not enought non-full tapes" + return 1 + fi + + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin tape ch --vid ${nonFullTapes[0]} --tapepool ctasystest_A + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin tape ch --vid ${nonFullTapes[1]} --tapepool ctasystest_B + kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin tape ch --vid ${nonFullTapes[2]} --tapepool ctasystest_C + +} diff --git a/continuousintegration/orchestration/tests/repack_systemtest.sh b/continuousintegration/orchestration/tests/repack_systemtest.sh index ac6b30e1a716c44959ca7c1a51aee534c77cd2f8..dcd64faca0eac2d805220cac5f986ba2cfe01974 100755 --- a/continuousintegration/orchestration/tests/repack_systemtest.sh +++ b/continuousintegration/orchestration/tests/repack_systemtest.sh @@ -29,7 +29,7 @@ die() { } usage() { cat <<EOF 1>&2 -Usage: $0 -v <vid> -b <bufferURL> -n <mountPolicyName> [-e <eosinstance>] [-t <timeout>] [-r <reportDirectory>] [-a] [-m] [-d] +Usage: $0 -v <vid> -b <bufferURL> -n <mountPolicyName> [-e <eosinstance>] [-t <timeout>] [-r <reportDirectory>] [-a] [-m] (bufferURL example : /eos/ctaeos/repack) mountPolicyName: the name of the mountPolicy to be applied to the repack request (example: ctasystest) eosinstance : the name of the ctaeos instance to be used (default : $EOSINSTANCE) @@ -37,7 +37,6 @@ timeout : the timeout in seconds to wait for the repack to be done reportDirectory : the directory to generate the report of the repack test (default : $REPORT_DIRECTORY) -a : Launch a repack just add copies workflow -m : Launch a repack just move workflow --d : Force a repack on a disabled tape (adds --disabled to the repack add command) -p : enable backpressure test -u : recall only option flag EOF @@ -62,8 +61,7 @@ then usage fi; -DISABLED_TAPE_FLAG="" -while getopts "v:e:b:t:r:n:amdpu" o; do +while getopts "v:e:b:t:r:n:ampu" o; do case "${o}" in v) VID_TO_REPACK=${OPTARG} @@ -86,9 +84,6 @@ while getopts "v:e:b:t:r:n:amdpu" o; do r) REPORT_DIRECTORY=${OPTARG} ;; - d) - DISABLED_TAPE_FLAG="--disabledtape" - ;; p) BACKPRESSURE_TEST=1 ;; @@ -165,7 +160,7 @@ if [ ! -z $NO_RECALL ]; then NO_RECALL_FLAG="--nr" fi -admin_cta repack add --mountpolicy ${MOUNT_POLICY_NAME} --vid ${VID_TO_REPACK} ${REPACK_OPTION} --bufferurl ${FULL_REPACK_BUFFER_URL} ${DISABLED_TAPE_FLAG} ${NO_RECALL_FLAG} || exit 1 +admin_cta repack add --mountpolicy ${MOUNT_POLICY_NAME} --vid ${VID_TO_REPACK} ${REPACK_OPTION} --bufferurl ${FULL_REPACK_BUFFER_URL} ${NO_RECALL_FLAG} || exit 1 if [ ! -z $BACKPRESSURE_TEST ]; then echo "Backpressure test: waiting to see a report of sleeping retrieve queue." diff --git a/continuousintegration/orchestration/tests/repack_systemtest_wrapper.sh b/continuousintegration/orchestration/tests/repack_systemtest_wrapper.sh index 35d7a13aad690132c5f92f470fb0dc7ae90407af..d612506189b0483ba2a5e8ed1252eff65fb12a82 100755 --- a/continuousintegration/orchestration/tests/repack_systemtest_wrapper.sh +++ b/continuousintegration/orchestration/tests/repack_systemtest_wrapper.sh @@ -50,6 +50,31 @@ echo "Preparing namespace for the tests" kubectl -n ${NAMESPACE} cp client_helper.sh client:/root/client_helper.sh kubectl -n ${NAMESPACE} cp client_prepare_file.sh client:/root/client_prepare_file.sh +removeRepackRequest() { + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin repack rm --vid $1 +} + +modifyTapeState() { + reason="${3:-Testing}" + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape ch --state $2 --reason "$reason" --vid $1 +} + +modifyTapeStateAndWait() { + WAIT_FOR_EMPTY_QUEUE_TIMEOUT=60 + SECONDS_PASSED=0 + modifyTapeState $1 $2 $3 + echo "Waiting for tape $1 to complete transitioning to $2" + while test 0 == `kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json tape ls --state $2 --vid $1 | jq -r ". [] | select(.vid == \"$1\")" | wc -l`; do + sleep 1 + printf "." + let SECONDS_PASSED=SECONDS_PASSED+1 + if test ${SECONDS_PASSED} == ${WAIT_FOR_EMPTY_QUEUE_TIMEOUT}; then + echo "Timed out after ${WAIT_FOR_EMPTY_QUEUE_TIMEOUT} seconds waiting for tape $1 to transition to state $2. Test failed." + exit 1 + fi + done +} + archiveFiles() { NB_FILES=$1 FILE_SIZE_KB=$2 @@ -80,6 +105,8 @@ roundTripRepack() { if [ "$VID_TO_REPACK" != "null" ] then echo + echo "Marking the tape ${VID_TO_REPACK} as REPACKING" + modifyTapeStateAndWait ${VID_TO_REPACK} REPACKING echo "Launching the repack \"just move\" test on VID ${VID_TO_REPACK} (with backpressure)" kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -m -r ${BASE_REPORT_DIRECTORY}/Step1-RoundTripRepack -p -n repack_ctasystest || exit 1 else @@ -87,6 +114,9 @@ roundTripRepack() { exit 1 fi + removeRepackRequest ${VID_TO_REPACK} + echo "Setting the tape ${VID_TO_REPACK} back to ACTIVE" + modifyTapeState ${VID_TO_REPACK} ACTIVE echo "Reclaiming tape ${VID_TO_REPACK}" kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} @@ -94,6 +124,8 @@ roundTripRepack() { if [ "$VID_TO_REPACK" != "null" ] then echo + echo "Marking the tape ${VID_TO_REPACK} as REPACKING" + modifyTapeStateAndWait ${VID_TO_REPACK} REPACKING echo "Launching the repack \"just move\" test on VID ${VID_TO_REPACK}" kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -m -r ${BASE_REPORT_DIRECTORY}/Step$1-RoundTripRepack -n repack_ctasystest || exit 1 else @@ -101,47 +133,68 @@ roundTripRepack() { exit 1 fi + removeRepackRequest ${VID_TO_REPACK} + echo "Setting the tape ${VID_TO_REPACK} back to ACTIVE" + modifyTapeState ${VID_TO_REPACK} ACTIVE echo "Reclaiming tape ${VID_TO_REPACK}" kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} + echo echo "*******************************************************************" echo "STEP $1. Launching a round trip repack \"just move\" request TEST OK" echo "*******************************************************************" } -repackDisableTape() { +repackNonRepackingTape() { echo - echo "*****************************************************" - echo "STEP $1. Launching a Repack Request on a disabled tape" - echo "*****************************************************" + echo "***************************************************************************************" + echo "STEP $1. Launching a Repack Request on a disabled/broken/exported/active/repacking tape" + echo "***************************************************************************************" VID_TO_REPACK=$(getFirstVidContainingFiles) if [ "$VID_TO_REPACK" != "null" ] then echo "Marking the tape ${VID_TO_REPACK} as DISABLED" - kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape ch --state DISABLED --reason "Repack disabled tape test" --vid ${VID_TO_REPACK} - echo "Launching the repack request test on VID ${VID_TO_REPACK} without the disabled flag" - kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -r ${BASE_REPORT_DIRECTORY}/Step$1-RepackDisabledTape -n repack_ctasystest && echo "The repack command should have failed as the tape is disabled" && exit 1 || echo "The repack submission has failed, test OK" + modifyTapeState ${VID_TO_REPACK} DISABLED "Repack disabled tape test" + echo "Launching the repack request test on VID ${VID_TO_REPACK} with DISABLED state" + kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -r ${BASE_REPORT_DIRECTORY}/Step$1-RepackDisabledTape -n repack_ctasystest && echo "The repack command should have failed as the tape is DISABLED" && exit 1 || echo "The repack submission has failed, test OK" + + echo "Marking the tape ${VID_TO_REPACK} as BROKEN" + modifyTapeStateAndWait ${VID_TO_REPACK} BROKEN "Repack broken tape test" + echo "Launching the repack request test on VID ${VID_TO_REPACK} with BROKEN state" + kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -r ${BASE_REPORT_DIRECTORY}/Step$1-RepackDisabledTape -n repack_ctasystest && echo "The repack command should have failed as the tape is BROKEN" && exit 1 || echo "The repack submission has failed, test OK" + + echo "Marking the tape ${VID_TO_REPACK} as EXPORTED" + modifyTapeStateAndWait ${VID_TO_REPACK} EXPORTED "Repack exported tape test" + echo "Launching the repack request test on VID ${VID_TO_REPACK} with EXPORTED state" + kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -r ${BASE_REPORT_DIRECTORY}/Step$1-RepackDisabledTape -n repack_ctasystest && echo "The repack command should have failed as the tape is EXPORTED" && exit 1 || echo "The repack submission has failed, test OK" + + echo "Marking the tape ${VID_TO_REPACK} as ACTIVE" + modifyTapeState ${VID_TO_REPACK} ACTIVE "Repack active tape test" + echo "Launching the repack request test on VID ${VID_TO_REPACK} with ACTIVE state" + kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -r ${BASE_REPORT_DIRECTORY}/Step$1-RepackDisabledTape -n repack_ctasystest && echo "The repack command should have failed as the tape is ACTIVE" && exit 1 || echo "The repack submission has failed, test OK" else echo "No vid found to repack" exit 1 fi; echo - echo "Launching the repack request test on VID ${VID_TO_REPACK} with the --disabledtape flag" - kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -d -r ${BASE_REPORT_DIRECTORY}/Step$1-RepackDisabledTape -n repack_ctasystest || exit 1 + echo "Marking the tape ${VID_TO_REPACK} as REPACKING" + modifyTapeStateAndWait ${VID_TO_REPACK} REPACKING "Repack repacking tape test" + echo "Launching the repack request test on VID ${VID_TO_REPACK} with REPACKING state" + kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -r ${BASE_REPORT_DIRECTORY}/Step$1-RepackDisabledTape -n repack_ctasystest || exit 1 + removeRepackRequest ${VID_TO_REPACK} + echo "Setting the tape ${VID_TO_REPACK} back to ACTIVE" + modifyTapeState ${VID_TO_REPACK} ACTIVE echo "Reclaiming tape ${VID_TO_REPACK}" kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} - echo "Setting the tape ${VID_TO_REPACK} back to ACTIVE" - kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape ch --state ACTIVE --vid ${VID_TO_REPACK} - echo - echo "*************************************************************" - echo "STEP $1. Launching a Repack Request on a disabled tape TEST OK" - echo "*************************************************************" + echo "***********************************************************************************************" + echo "STEP $1. Launching a Repack Request on a disabled/broken/exported/active/repacking tape TEST OK" + echo "***********************************************************************************************" } repackJustMove() { @@ -154,6 +207,8 @@ repackJustMove() { if [ "$VID_TO_REPACK" != "null" ] then echo + echo "Marking the tape ${VID_TO_REPACK} as REPACKING" + modifyTapeStateAndWait ${VID_TO_REPACK} REPACKING echo "Launching the repack test \"just move\" on VID ${VID_TO_REPACK}" kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -m -r ${BASE_REPORT_DIRECTORY}/Step$1-RepackJustMove -n repack_ctasystest || exit 1 else @@ -161,8 +216,12 @@ repackJustMove() { exit 1 fi + removeRepackRequest ${VID_TO_REPACK} + echo "Setting the tape ${VID_TO_REPACK} back to ACTIVE" + modifyTapeState ${VID_TO_REPACK} ACTIVE echo "Reclaiming tape ${VID_TO_REPACK}" kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} + echo echo "*****************************************************" echo "STEP $1. Testing Repack \"Just move\" workflow TEST OK" @@ -178,6 +237,8 @@ repackJustAddCopies() { VID_TO_REPACK=$(getFirstVidContainingFiles) if [ "$VID_TO_REPACK" != "null" ] then + echo "Marking the tape ${VID_TO_REPACK} as REPACKING" + modifyTapeStateAndWait ${VID_TO_REPACK} REPACKING echo "Launching the repack \"just add copies\" test on VID ${VID_TO_REPACK} with all copies already on CTA" kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -a -r ${BASE_REPORT_DIRECTORY}/Step$1-JustAddCopiesAllCopiesInCTA -n repack_ctasystest else @@ -198,6 +259,10 @@ repackJustAddCopies() { exit 1 fi + removeRepackRequest ${VID_TO_REPACK} + echo "Setting the tape ${VID_TO_REPACK} back to ACTIVE" + modifyTapeState ${VID_TO_REPACK} ACTIVE + echo echo "**********************************************************************************" echo "STEP $1. Testing Repack \"Just Add copies\" workflow with all copies on CTA TEST OK" @@ -218,6 +283,8 @@ repackCancellation() { if [ "$VID_TO_REPACK" != "null" ] then echo + echo "Marking the tape ${VID_TO_REPACK} as REPACKING" + modifyTapeStateAndWait ${VID_TO_REPACK} REPACKING echo "Launching a repack request on VID ${VID_TO_REPACK}" kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -m -r ${BASE_REPORT_DIRECTORY}/Step$1-RepackCancellation -n repack_ctasystest & 2>/dev/null pid=$! @@ -279,6 +346,10 @@ repackCancellation() { fi done + removeRepackRequest ${VID_TO_REPACK} + echo "Setting the tape ${VID_TO_REPACK} back to ACTIVE" + modifyTapeState ${VID_TO_REPACK} ACTIVE + echo "Retrieve queue of VID ${VID_TO_REPACK} is empty, test OK" echo "*******************************************" @@ -352,6 +423,8 @@ repackMoveAndAddCopies() { VID_TO_REPACK=$(getFirstVidContainingFiles) + echo "Marking the tape ${VID_TO_REPACK} as REPACKING" + modifyTapeStateAndWait ${VID_TO_REPACK} REPACKING echo "Launching the repack \"Move and add copies\" test on VID ${VID_TO_REPACK}" kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -t 600 -r ${BASE_REPORT_DIRECTORY}/Step$1-MoveAndAddCopies -n repack_ctasystest || exit 1 @@ -377,6 +450,9 @@ repackMoveAndAddCopies() { echo "ArchivedFiles ($archivedFiles) == totalFilesToArchive ($totalFilesToArchive), OK" fi + removeRepackRequest ${VID_TO_REPACK} + echo "Setting the tape ${VID_TO_REPACK} back to ACTIVE" + modifyTapeState ${VID_TO_REPACK} ACTIVE echo "Reclaimimg tape ${VID_TO_REPACK}" kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} @@ -435,6 +511,8 @@ repackTapeRepair() { kubectl -n ${NAMESPACE} exec ctaeos -- eos cp ${pathOfFilesToInject[$i]} $bufferDirectory/`printf "%9d\n" $fseqFile | tr ' ' 0` done + echo "Marking the tape ${VID_TO_REPACK} as REPACKING" + modifyTapeStateAndWait ${VID_TO_REPACK} REPACKING echo "Launching a repack request on the vid ${VID_TO_REPACK}" kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -m -r ${BASE_REPORT_DIRECTORY}/Step$1-RepackTapeRepair -n repack_ctasystest || exit 1 @@ -468,6 +546,10 @@ repackTapeRepair() { else echo "archivedFiles ($archivedFiles) == totalFilesToArchive ($totalFilesToArchive), OK" fi + + removeRepackRequest ${VID_TO_REPACK} + echo "Setting the tape ${VID_TO_REPACK} back to ACTIVE" + modifyTapeState ${VID_TO_REPACK} ACTIVE echo "Reclaiming tape ${VID_TO_REPACK}" kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} @@ -538,6 +620,9 @@ repackTapeRepairNoRecall() { kubectl -n ${NAMESPACE} exec ctaeos -- eos cp ${pathOfFilesToInject[$i]} $bufferDirectory/`printf "%9d\n" $fseqFile | tr ' ' 0` done + echo "Marking the tape ${VID_TO_REPACK} as REPACKING" + modifyTapeStateAndWait ${VID_TO_REPACK} REPACKING + echo "Launching a repack request on the vid ${VID_TO_REPACK}" kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -m -r ${BASE_REPORT_DIRECTORY}/Step$1-RepackTapeRepairNoRecall -n repack_ctasystest -u || exit 1 @@ -572,6 +657,10 @@ repackTapeRepairNoRecall() { echo "archivedFiles ($archivedFiles) == totalFilesToArchive ($totalFilesToArchive), OK" fi + removeRepackRequest ${VID_TO_REPACK} + echo "Setting the tape ${VID_TO_REPACK} back to ACTIVE" + modifyTapeState ${VID_TO_REPACK} ACTIVE + else echo "No file to inject, test not OK" exit 1 @@ -586,7 +675,7 @@ repackTapeRepairNoRecall() { #Execution of each tests archiveFiles 1 15 roundTripRepack 1 -repackDisableTape 2 +repackNonRepackingTape 2 archiveFiles 1152 15 repackJustMove 3 repackTapeRepair 4 diff --git a/continuousintegration/orchestration/tests/retrieve_queue_cleanup.sh b/continuousintegration/orchestration/tests/retrieve_queue_cleanup.sh new file mode 100755 index 0000000000000000000000000000000000000000..d03283543189732b58f8ffb4b5ea40c7aa3e6506 --- /dev/null +++ b/continuousintegration/orchestration/tests/retrieve_queue_cleanup.sh @@ -0,0 +1,605 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + +################################################################################ +# DESCRIPTION +# +# - This script tests the new behaviour of the PREPARE request, which treats +# all files independently and idempotendly. +# - If a file fails to prepare - for any reason - it should not +# affect the PREPARE of the remaining files in the list. +# +# EXPECTED BEHAVIOUR +# +# # PREPARE -s command +# +# - Both these commands should treat <file_1> the same way, regardless of the +# other files being staged: +# > prepare -s <file_1> .. <file_N> +# > prepare -s <file_1> +# - <file_1> is no longer affected if another file <file_M> fails for any reason. +# - [Edge case:] We return an error if ALL files fail to prepare. +# +# # QUERY PREPARE +# +# - If a file failed to stage, query prepare must be able to communicate back +# that it failed and the reason. +# - This error is signaled and communitated through the field "error_text". +# +# # PREPARE -e/-a commands +# +# - We should trigger prepare evict or abort for all files, even if some fail. +# - If any file failed, the prepare -e/prepare -a should return an error +# (different behaviour from 'prepare -s'). This is necessary because, for +# these commands, this is the only way to directly know that they failed. +# +################################################################################ + +EOS_INSTANCE=ctaeos + +MULTICOPY_DIR_1=/eos/ctaeos/preprod/dir_1_copy +MULTICOPY_DIR_2=/eos/ctaeos/preprod/dir_2_copy +MULTICOPY_DIR_3=/eos/ctaeos/preprod/dir_3_copy + +# get some common useful helpers for krb5 +. /root/client_helper.sh + +eospower_kdestroy &>/dev/null +eospower_kinit &>/dev/null + +admin_kdestroy &>/dev/null +admin_kinit &>/dev/null + +# Find tapes and tape pools + +STORAGECLASS_1=$( eos root://${EOS_INSTANCE} attr get sys.archive.storage_class ${MULTICOPY_DIR_1} | sed -n -e 's/.*="\(.*\)"/\1/p' ) +STORAGECLASS_2=$( eos root://${EOS_INSTANCE} attr get sys.archive.storage_class ${MULTICOPY_DIR_2} | sed -n -e 's/.*="\(.*\)"/\1/p' ) +STORAGECLASS_3=$( eos root://${EOS_INSTANCE} attr get sys.archive.storage_class ${MULTICOPY_DIR_3} | sed -n -e 's/.*="\(.*\)"/\1/p' ) + +mapfile -t TAPEPOOL_LIST_1 < <( admin_cta --json archiveroute ls | jq -r --arg STORAGECLASS "$STORAGECLASS_1" '.[] | select( .storageClass == $STORAGECLASS) | .tapepool' ) +mapfile -t TAPEPOOL_LIST_2 < <( admin_cta --json archiveroute ls | jq -r --arg STORAGECLASS "$STORAGECLASS_2" '.[] | select( .storageClass == $STORAGECLASS) | .tapepool' ) +mapfile -t TAPEPOOL_LIST_3 < <( admin_cta --json archiveroute ls | jq -r --arg STORAGECLASS "$STORAGECLASS_3" '.[] | select( .storageClass == $STORAGECLASS) | .tapepool' ) + +mapfile -t TAPE_LIST_1 < <( for t in "${TAPEPOOL_LIST_1[@]}" ; do admin_cta --json tape ls --all | jq -r --arg TAPEPOOL "$t" '.[] | select( .tapepool == $TAPEPOOL) | .vid' ; done ) +mapfile -t TAPE_LIST_2 < <( for t in "${TAPEPOOL_LIST_2[@]}" ; do admin_cta --json tape ls --all | jq -r --arg TAPEPOOL "$t" '.[] | select( .tapepool == $TAPEPOOL) | .vid' ; done ) +mapfile -t TAPE_LIST_3 < <( for t in "${TAPEPOOL_LIST_3[@]}" ; do admin_cta --json tape ls --all | jq -r --arg TAPEPOOL "$t" '.[] | select( .tapepool == $TAPEPOOL) | .vid' ; done ) + +if [ "${#TAPEPOOL_LIST_1[@]}" -ne "1" ] || [ "${#TAPE_LIST_1[@]}" -ne "1" ]; then + echo "ERROR: Tape pool 1 misconfigured" + exit 1 +fi +if [ "${#TAPEPOOL_LIST_2[@]}" -ne "2" ] || [ "${#TAPE_LIST_2[@]}" -ne "2" ]; then + echo "ERROR: Tape pool 2 misconfigured" + exit 1 +fi +if [ "${#TAPEPOOL_LIST_3[@]}" -ne "3" ] || [ "${#TAPE_LIST_3[@]}" -ne "3" ]; then + echo "ERROR: Tape pool 3 misconfigured" + exit 1 +fi + +# Save file with 1, 2, 3 replicas + +FILE_1_COPY=${MULTICOPY_DIR_1}/$(uuidgen) +FILE_2_COPY=${MULTICOPY_DIR_2}/$(uuidgen) +FILE_3_COPY=${MULTICOPY_DIR_3}/$(uuidgen) + +put_all_drives_up +xrdcp /etc/group root://${EOS_INSTANCE}/${FILE_1_COPY} +xrdcp /etc/group root://${EOS_INSTANCE}/${FILE_2_COPY} +xrdcp /etc/group root://${EOS_INSTANCE}/${FILE_3_COPY} + +wait_for_archive ${EOS_INSTANCE} ${FILE_1_COPY} ${FILE_2_COPY} ${FILE_3_COPY} +put_all_drives_down + +trigger_queue_cleanup() { + # Get a list of all tapes being used, without duplicates + repeatedTapeList=( "${TAPE_LIST_1[@]}" "${TAPE_LIST_2[@]}" "${TAPE_LIST_3[@]}" ) + tapeList=(); while IFS= read -r -d '' tape; do tapeList+=("$tape"); done < <(printf "%s\0" "${repeatedTapeList[@]}" | sort -uz) + for i in ${!tapeList[@]}; do + admin_cta tape ch --vid ${tapeList[$i]} --state BROKEN --reason "Trigger cleanup" + done + for i in ${!tapeList[@]}; do + wait_for_tape_state ${tapeList[$i]} BROKEN + done + for i in ${!tapeList[@]}; do + admin_cta tape ch --vid ${tapeList[$i]} --state ACTIVE + done + for i in ${!tapeList[@]}; do + wait_for_tape_state ${tapeList[$i]} ACTIVE + done +} + +wait_for_request_cancel_report() { + + SECONDS_PASSED=0 + WAIT_TIMEOUT=90 + REQUEST_ID=$1 + FILE_PATH=$2 + + echo "Waiting for request to be reported as canceled..." + while true; do + QUERY_RSP=$(KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} query prepare ${REQUEST_ID} ${FILE_PATH}) + REQUESTED=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").requested") + + # Check if request has finally been canceled + if [[ "false" == "${REQUESTED}" ]]; then + break + fi + + if test ${SECONDS_PASSED} == ${WAIT_TIMEOUT}; then + echo "Timed out after ${WAIT_TIMEOUT} seconds" + break + fi + + let SECONDS_PASSED=SECONDS_PASSED+1 + echo "Waiting for request to be reported as canceled: Seconds passed = ${SECONDS_PASSED}" + + done +} + +change_tape_state() { + VID=$1 + LAST_STATE=$2 + CURR_STATE=$(admin_cta --json tape ls --vid $VID | jq -r ".[] | .state") + + # If current or desired state is REPACKING_DISABLED, revert to REPACKING + # Any state transition can be done from REPACKING, but not from REPACKING_DISABLED + if [[ + "${CURR_STATE}" == "REPACKING_DISABLED" || + "${LAST_STATE}" == "REPACKING_DISABLED" ]] + then + admin_cta tape ch --vid $VID --state REPACKING --reason "Testing" + wait_for_tape_state $VID REPACKING + fi + admin_cta tape ch --vid $VID --state $LAST_STATE --reason "Testing" + wait_for_tape_state $VID $LAST_STATE +} + + +################################################################################ +# Test queueing priority between different tape states +################################################################################ + +test_tape_state_queueing_priority() { + + TEST_NR=$1 + TAPE_STATE_LIST=("$2" "$3" "$4") + EXPECTED_SELECTED_QUEUE=$5 + FILE_PATH=$FILE_3_COPY + + pid=() + ret=() + + echo + echo "########################################################################################################" + echo " ${TEST_NR}. Testing 'Tape state priority between ${TAPE_STATE_LIST[@]}'" + echo "########################################################################################################" + echo "Setting up queue ${TAPE_LIST_3[0]} as ${TAPE_STATE_LIST[0]}, ${TAPE_LIST_3[1]} as ${TAPE_STATE_LIST[1]}, ${TAPE_LIST_3[2]} as ${TAPE_STATE_LIST[2]}..." + + change_tape_state ${TAPE_LIST_3[0]} ${TAPE_STATE_LIST[0]} & pid[0]=$! + change_tape_state ${TAPE_LIST_3[1]} ${TAPE_STATE_LIST[1]} & pid[1]=$! + change_tape_state ${TAPE_LIST_3[2]} ${TAPE_STATE_LIST[2]} & pid[2]=$! + + wait ${pid[0]}; ret[0]=$? + wait ${pid[1]}; ret[1]=$? + wait ${pid[2]}; ret[2]=$? + + if [ ${ret[0]} -ne 0 ] || [ ${ret[1]} -ne 0 ] || [ ${ret[2]} -ne 0 ] + then + echo "Failed to change tape state" + exit 1 + fi + + echo "Requesting file prepare -s..." + REQUEST_ID=$(KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} prepare -s ${FILE_PATH}) + + echo "Checking if request went to ${TAPE_STATE_LIST[$EXPECTED_SELECTED_QUEUE]} queue ${TAPE_LIST_3[$EXPECTED_SELECTED_QUEUE]}..." + + for i in ${!TAPE_LIST_3[@]}; do + echo "Checking tape ${TAPE_LIST_3[$i]}..." + if [ $i -eq $EXPECTED_SELECTED_QUEUE ]; then + if test "1" != "$(admin_cta --json sq | jq -r --arg VID "${TAPE_LIST_3[$i]}" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Queue ${TAPE_LIST_3[$i]} does not contain a user request, when one was expected." + exit 1 + else + echo "Request found on ${TAPE_STATE_LIST[$i]} queue ${TAPE_LIST_3[$i]}, as expected." + fi + else + if test ! -z "$(admin_cta --json sq | jq -r --arg VID "${TAPE_LIST_3[$i]}" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Queue ${TAPE_LIST_3[$i]} contains a user request, when none was expected." + exit 1 + else + echo "Request not found on ${TAPE_STATE_LIST[$i]} queue ${TAPE_LIST_3[$i]}, as expected." + fi + fi + done + + echo "Cleaning up request and queues..." + KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} prepare -a ${REQUEST_ID} ${FILE_PATH} + trigger_queue_cleanup > /dev/null + + echo "OK" +} + + +################################################################################ +# Test tape state change that removes queue : 1 copy only +################################################################################ + +test_tape_state_change_queue_removed() { + + TEST_NR=$1 + STATE_START=$2 + STATE_END=$3 + + # Using $FILE_1_COPY, which has 1 replica in the following tape + + FILE_PATH=$FILE_1_COPY + + TAPE_0=${TAPE_LIST_1[0]} + + echo + echo "########################################################################################################" + echo " ${TEST_NR}. Testing 'Tape state change from $STATE_START to $STATE_END - queue removed (1 copy only)" + echo "########################################################################################################" + echo "Setting up $TAPE_0 queue as ${STATE_START}..." + + change_tape_state $TAPE_0 $STATE_START + + echo "Requesting file prepare -s..." + REQUEST_ID=$(KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} prepare -s ${FILE_PATH}) + + echo "Checking that the request was queued..." + + QUERY_RSP=$(KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} query prepare ${REQUEST_ID} ${FILE_PATH}) + PATH_EXISTS=$(echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").path_exists") + REQUESTED=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").requested") + HAS_REQID=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").has_reqid") + ERROR_TEXT=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").error_text") + if [[ + "true" != "${PATH_EXISTS}" || + "true" != "${REQUESTED}" || + "true" != "${HAS_REQID}" || + "\"\"" != "${ERROR_TEXT}" ]] + then + echo "ERROR: Request for ${FILE_PATH} not configured as expected: ${QUERY_RSP}" + exit 1 + fi + + if test "1" != "$(admin_cta --json sq | jq -r --arg VID "$TAPE_0" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Request non found on $TAPE_0 queue." + exit 1 + fi + + echo "Changing $TAPE_0 queue to ${STATE_END}..." + + change_tape_state $TAPE_0 $STATE_END + + echo "Checking that the request was canceled and the error reported to the user..." + + wait_for_request_cancel_report ${REQUEST_ID} ${FILE_PATH} + + QUERY_RSP=$(KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} query prepare ${REQUEST_ID} ${FILE_PATH}) + PATH_EXISTS=$(echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").path_exists") + REQUESTED=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").requested") + HAS_REQID=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").has_reqid") + ERROR_TEXT=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").error_text") + if [[ + "true" != "${PATH_EXISTS}" || + "false" != "${REQUESTED}" || + "false" != "${HAS_REQID}" || + "\"\"" == "${ERROR_TEXT}" ]] + then + echo "ERROR: Request for ${FILE_PATH} not removed as expected: ${QUERY_RSP}" + exit 1 + fi + + if test ! -z "$(admin_cta --json sq | jq -r --arg VID "$TAPE_0" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Queue $TAPE_0 contains a user request, when none was expected." + exit 1 + fi + + echo "Request removed and error reported back to user, as expected." + + echo "Cleaning up request and queues..." + KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} prepare -a ${REQUEST_ID} ${FILE_PATH} + trigger_queue_cleanup > /dev/null + + echo "OK" +} + + +################################################################################ +# Test tape state change that preserves queue : 1 copy only +################################################################################ + +test_tape_state_change_queue_preserved() { + + TEST_NR=$1 + STATE_START=$2 + STATE_END=$3 + + # Using $FILE_1_COPY, which has 1 replica in the following tape + + FILE_PATH=$FILE_1_COPY + + TAPE_0=${TAPE_LIST_1[0]} + + echo + echo "########################################################################################################" + echo " ${TEST_NR}. Testing 'Tape state change from $STATE_START to $STATE_END - queue preserved (1 copy only)" + echo "########################################################################################################" + echo "Setting up $TAPE_0 queue as ${STATE_START}..." + + change_tape_state $TAPE_0 $STATE_START + + echo "Requesting file prepare -s..." + REQUEST_ID=$(KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} prepare -s ${FILE_PATH}) + + echo "Checking that the request was queued..." + + QUERY_RSP=$(KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} query prepare ${REQUEST_ID} ${FILE_PATH}) + PATH_EXISTS=$(echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").path_exists") + REQUESTED=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").requested") + HAS_REQID=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").has_reqid") + ERROR_TEXT=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").error_text") + if [[ + "true" != "${PATH_EXISTS}" || + "true" != "${REQUESTED}" || + "true" != "${HAS_REQID}" || + "\"\"" != "${ERROR_TEXT}" ]] + then + echo "ERROR: Request for ${FILE_PATH} not configured as expected: ${QUERY_RSP}" + exit 1 + fi + + if test "1" != "$(admin_cta --json sq | jq -r --arg VID "$TAPE_0" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Request non found on $TAPE_0 queue." + exit 1 + fi + + echo "Changing $TAPE_0 queue to ${STATE_END}..." + + change_tape_state $TAPE_0 $STATE_END + + echo "Checking that the request was not modified on the queue..." + + # Wait for a bit, to take in account protocol latencies + sleep 1 + + QUERY_RSP=$(KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} query prepare ${REQUEST_ID} ${FILE_PATH}) + PATH_EXISTS=$(echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").path_exists") + REQUESTED=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").requested") + HAS_REQID=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").has_reqid") + ERROR_TEXT=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").error_text") + if [[ + "true" != "${PATH_EXISTS}" || + "true" != "${REQUESTED}" || + "true" != "${HAS_REQID}" || + "\"\"" != "${ERROR_TEXT}" ]] + then + echo "ERROR: Request for ${FILE_PATH} not preserved as expected: ${QUERY_RSP}" + exit 1 + fi + + if test "1" != "$(admin_cta --json sq | jq -r --arg VID "$TAPE_0" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Request not preserved on $TAPE_0 queue." + exit 1 + fi + + echo "Queue preserved, as expected." + + echo "Cleaning up request and queues..." + KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} prepare -a ${REQUEST_ID} ${FILE_PATH} + trigger_queue_cleanup > /dev/null + + echo "OK" +} + + +################################################################################ +# Test tape state change that moves queue : 2 copies +################################################################################ + +test_tape_state_change_queue_moved() { + + TEST_NR=$1 + TAPE_0_STATE_START=$2 + TAPE_1_STATE_START=$3 + EXPECTED_QUEUE_START=$4 + TAPE_0_STATE_END=$5 + TAPE_1_STATE_END=$6 + EXPECTED_QUEUE_END=$7 + + # Using $FILE_1_COPY, which has 1 replica in the following tape + + FILE_PATH=$FILE_2_COPY + + TAPE_0=${TAPE_LIST_2[0]} + TAPE_1=${TAPE_LIST_2[1]} + + pid=() + ret=() + + echo + echo "########################################################################################################" + echo " ${TEST_NR}. Testing 'Queue moved on tape state changes from ($TAPE_0_STATE_START, $TAPE_1_STATE_START) to ($TAPE_0_STATE_END, $TAPE_1_STATE_END)" + echo "########################################################################################################" + echo "Setting up ${TAPE_0} queue as ${TAPE_0_STATE_START} and ${TAPE_1} queue as ${TAPE_1_STATE_START}..." + + if [[ "0" != "${EXPECTED_QUEUE_START}" && "1" != "${EXPECTED_QUEUE_START}" ]]; then + echo "Initial request should be put on queue 0 or 1." + exit 1 + fi + + change_tape_state $TAPE_0 $TAPE_0_STATE_START & pid[0]=$! + change_tape_state $TAPE_1 $TAPE_1_STATE_START & pid[1]=$! + + wait ${pid[0]}; ret[0]=$? + wait ${pid[1]}; ret[1]=$? + + if [ ${ret[0]} -ne 0 ] || [ ${ret[1]} -ne 0 ] + then + echo "Failed to change tape state" + exit 1 + fi + + echo "Requesting file prepare -s..." + REQUEST_ID=$(KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} prepare -s ${FILE_PATH}) + + echo "Checking that the request was queued..." + + QUERY_RSP=$(KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} query prepare ${REQUEST_ID} ${FILE_PATH}) + PATH_EXISTS=$(echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").path_exists") + REQUESTED=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").requested") + HAS_REQID=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").has_reqid") + ERROR_TEXT=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").error_text") + if [[ + "true" != "${PATH_EXISTS}" || + "true" != "${REQUESTED}" || + "true" != "${HAS_REQID}" || + "\"\"" != "${ERROR_TEXT}" ]] + then + echo "ERROR: Request for ${FILE_PATH} not configured as expected: ${QUERY_RSP}" + exit 1 + fi + + if test "0" == "${EXPECTED_QUEUE_START}"; then + if test "1" != "$(admin_cta --json sq | jq -r --arg VID "$TAPE_0" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Request non found on $TAPE_0 queue." + exit 1 + fi + if test ! -z "$(admin_cta --json sq | jq -r --arg VID "$TAPE_1" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Queue $TAPE_1 contains a user request, when none was expected." + exit 1 + fi + else + if test ! -z "$(admin_cta --json sq | jq -r --arg VID "$TAPE_0" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Queue $TAPE_0 contains a user request, when none was expected." + exit 1 + fi + if test "1" != "$(admin_cta --json sq | jq -r --arg VID "$TAPE_1" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Request non found on $TAPE_1 queue." + exit 1 + fi + fi + + # Change tape states, starting by the tape without queue + + if test "0" == "${EXPECTED_QUEUE_START}"; then + echo "Changing $TAPE_1 queue to ${TAPE_1_STATE_END}..." + change_tape_state $TAPE_1 $TAPE_1_STATE_END + echo "Changing $TAPE_0 queue to ${TAPE_0_STATE_END}..." + change_tape_state $TAPE_0 $TAPE_0_STATE_END + else + echo "Changing $TAPE_0 queue to ${TAPE_0_STATE_END}..." + change_tape_state $TAPE_0 $TAPE_0_STATE_END + echo "Changing $TAPE_1 queue to ${TAPE_1_STATE_END}..." + change_tape_state $TAPE_1 $TAPE_1_STATE_END + fi + + if [[ "0" == "${EXPECTED_QUEUE_END}" || "1" == "${EXPECTED_QUEUE_END}" ]]; then + + echo "Checking that the request was moved from the queue ${TAPE_LIST_2[$EXPECTED_QUEUE_START]} to the queue ${TAPE_LIST_2[$EXPECTED_QUEUE_END]}..." + + if test "0" == "${EXPECTED_QUEUE_END}"; then + if test "1" != "$(admin_cta --json sq | jq -r --arg VID "$TAPE_0" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Request non found on $TAPE_0 queue." + exit 1 + fi + if test ! -z "$(admin_cta --json sq | jq -r --arg VID "$TAPE_1" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Queue $TAPE_1 contains a user request, when none was expected." + exit 1 + fi + else + if test ! -z "$(admin_cta --json sq | jq -r --arg VID "$TAPE_0" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Queue $TAPE_0 contains a user request, when none was expected." + exit 1 + fi + if test "1" != "$(admin_cta --json sq | jq -r --arg VID "$TAPE_1" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Request non found on $TAPE_1 queue." + exit 1 + fi + fi + + echo "Request moved to new queue, as expected." + + else + + echo "Checking that the request queue ${TAPE_LIST_2[$EXPECTED_QUEUE_START]} was canceled and the error reported to the user..." + + wait_for_request_cancel_report ${REQUEST_ID} ${FILE_PATH} + + QUERY_RSP=$(KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} query prepare ${REQUEST_ID} ${FILE_PATH}) + PATH_EXISTS=$(echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").path_exists") + REQUESTED=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").requested") + HAS_REQID=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").has_reqid") + ERROR_TEXT=$( echo ${QUERY_RSP} | jq ".responses[] | select(.path == \"${FILE_PATH}\").error_text") + if [[ + "true" != "${PATH_EXISTS}" || + "false" != "${REQUESTED}" || + "false" != "${HAS_REQID}" || + "\"\"" == "${ERROR_TEXT}" ]] + then + echo "ERROR: Request for ${FILE_PATH} not removed as expected: ${QUERY_RSP}" + exit 1 + fi + + if test ! -z "$(admin_cta --json sq | jq -r --arg VID "$TAPE_0" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Queue $TAPE_0 contains a user request, when none was expected." + exit 1 + fi + if test ! -z "$(admin_cta --json sq | jq -r --arg VID "$TAPE_1" '.[] | select(.vid == $VID) | .queuedFiles')"; then + echo "ERROR: Queue $TAPE_1 contains a user request, when none was expected." + exit 1 + fi + + echo "Request removed and error reported back to user, as expected." + fi + + echo "Cleaning up request and queues..." + KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOS_INSTANCE} prepare -a ${REQUEST_ID} ${FILE_PATH} + trigger_queue_cleanup > /dev/null + + echo "OK" +} + + +################################################################ +# Finalize +################################################################ + +test_tape_state_queueing_priority 1 DISABLED DISABLED ACTIVE 2 # ACTIVE queue has priority over DISABLED queue (1) +test_tape_state_queueing_priority 2 DISABLED ACTIVE DISABLED 1 # ACTIVE queue has priority over DISABLED queue (2) +test_tape_state_queueing_priority 3 REPACKING BROKEN DISABLED 2 # DISABLED queue selected when no ACTIVE queue is available (1) +test_tape_state_queueing_priority 4 BROKEN DISABLED REPACKING 1 # DISABLED queue selected when no ACTIVE queue is available (2) +test_tape_state_queueing_priority 5 BROKEN REPACKING EXPORTED 9999 # Request not queued on REPACKING or BROKEN or EXPORTED queues +test_tape_state_queueing_priority 6 REPACKING REPACKING REPACKING_DISABLED 9999 # Request not queued on REPACKING or REPACKING_DISABLED queues +test_tape_state_change_queue_removed 7 ACTIVE REPACKING # Request canceled and reported to user, after state changed from ACTIVE to REPACKING +test_tape_state_change_queue_removed 8 ACTIVE BROKEN # Request canceled and reported to user, after state changed from ACTIVE to BROKEN +test_tape_state_change_queue_removed 9 ACTIVE EXPORTED # Request canceled and reported to user, after state changed from ACTIVE to EXPORTED +test_tape_state_change_queue_removed 10 DISABLED REPACKING # Request canceled and reported to user, after state changed from DISABLED to REPACKING +test_tape_state_change_queue_removed 11 DISABLED BROKEN # Request canceled and reported to user, after state changed from DISABLED to BROKEN +test_tape_state_change_queue_removed 12 DISABLED EXPORTED # Request canceled and reported to user, after state changed from DISABLED to EXPORTED +test_tape_state_change_queue_preserved 13 ACTIVE DISABLED # Request preserved on queue, after state changed from ACTIVE to DISABLED +test_tape_state_change_queue_preserved 14 DISABLED ACTIVE # Request preserved on queue, after state changed from DISABLED to ACTIVE +test_tape_state_change_queue_moved 15 ACTIVE DISABLED 0 REPACKING ACTIVE 1 # State changed from ACTIVE to REPACKING, requests moved to another ACTIVE queue +test_tape_state_change_queue_moved 16 DISABLED ACTIVE 1 DISABLED BROKEN 0 # State changed from ACTIVE to BROKEN, request moved to another DISABLED queue (ACTIVE queue not available) +test_tape_state_change_queue_moved 17 ACTIVE BROKEN 0 REPACKING BROKEN 9999 # State changed from ACTIVE to REPACKING, request canceled and reported to user (ACTIVE/DISABLED queue not available)) + +echo +echo "OK: all tests passed" diff --git a/cta.spec.in b/cta.spec.in index 622c1eb7acab8ba23a1a7a01df58b6c1a5a9108d..7665a5368a5b7a3251ed89f3dc152dc25d9b2e7f 100644 --- a/cta.spec.in +++ b/cta.spec.in @@ -587,6 +587,11 @@ echo $(jq --argjson CTA_MAJOR_VERSION $cta_major_version '. += {"cta_major_versi %changelog +* Mon Nov 28 2022 Joao Afonso <joao.afonso@cern.ch> - 4.8.0-1 +- This CTA release contains significant changes related to repacking, including the addition of new final and temporary states +- Minor fixes and improvements to CTA +- See ReleaseNotes.md for details + * Wed Nov 16 2022 Joao Afonso <joao.afonso@cern.ch> - 4.7.14-1 - Fixes to CTA and cta-admin tool, see ReleaseNotes.md for details diff --git a/objectstore/Algorithms.hpp b/objectstore/Algorithms.hpp index a5f84f32e4bd929fe6c58b1a8223612a162c1606..2c6c8288a1f82b763933b483f4ed28b39b19c0ae 100644 --- a/objectstore/Algorithms.hpp +++ b/objectstore/Algorithms.hpp @@ -98,14 +98,14 @@ public: for (const auto & e: elements) { if (!failedElementsSet.count(ContainerTraits<Q,C>::getElementAddress(e))) { transferedElements.emplace_back(ContainerTraits<Q,C>::getElementAddress(e)); - } + } } if (transferedElements.size()) m_agentReference.removeBatchFromOwnership(transferedElements, m_backend); failureEx.failedElements = failedOwnershipSwitchElements; params.add("errorCount", failedOwnershipSwitchElements.size()); std::string failedElementsAddresses = ""; for(auto & failedElement: failedElementsSet){ - failedElementsAddresses += failedElement + " "; + failedElementsAddresses += failedElement + " "; } params.add("failedElementsAddresses",failedElementsAddresses); lc.log(log::WARNING, "In ContainerAlgorithms::referenceAndSwitchOwnership(): " diff --git a/objectstore/AlgorithmsTest.cpp b/objectstore/AlgorithmsTest.cpp index dcd239a22a5ab41acaa56309948d966e28687db2..32a13eaa6297d0decd7d63ada234d0aae11abce8 100644 --- a/objectstore/AlgorithmsTest.cpp +++ b/objectstore/AlgorithmsTest.cpp @@ -31,6 +31,7 @@ #include "RetrieveQueueAlgorithms.hpp" #include "RootEntry.hpp" #include "tests/TestsCompileTimeSwitches.hpp" +#include "ObjectStoreFixture.hpp" namespace unitTests { @@ -143,7 +144,7 @@ void fillArchiveRequests(typename cta::objectstore::ContainerAlgorithms<cta::obj } } -TEST(ObjectStore, ArchiveQueueAlgorithms) { +TEST_F(ObjectStore, ArchiveQueueAlgorithms) { using namespace cta::objectstore; // We will need a log object #ifdef STDOUT_LOGGING @@ -225,7 +226,7 @@ TEST(ObjectStore, ArchiveQueueAlgorithms) { } } -TEST(ObjectStore, ArchiveQueueAlgorithmsWithDeletedJobsInQueue) { +TEST_F(ObjectStore, ArchiveQueueAlgorithmsWithDeletedJobsInQueue) { using namespace cta::objectstore; // We will need a log object #ifdef STDOUT_LOGGING @@ -339,7 +340,7 @@ TEST(ObjectStore, ArchiveQueueAlgorithmsWithDeletedJobsInQueue) { } } -TEST(ObjectStore, RetrieveQueueAlgorithms) { +TEST_F(ObjectStore, RetrieveQueueAlgorithms) { using namespace cta::objectstore; // We will need a log object #ifdef STDOUT_LOGGING @@ -429,7 +430,7 @@ TEST(ObjectStore, RetrieveQueueAlgorithms) { } } -TEST(ObjectStore, RetrieveQueueAlgorithmsUpdatesOldestJobQueueTime) { +TEST_F(ObjectStore, RetrieveQueueAlgorithmsUpdatesOldestJobQueueTime) { using cta::common::dataStructures::JobQueueType; using cta::objectstore::RetrieveQueue; using cta::objectstore::RetrieveQueueToTransfer; @@ -518,7 +519,7 @@ TEST(ObjectStore, RetrieveQueueAlgorithmsUpdatesOldestJobQueueTime) { ASSERT_TRUE(oldestJobStartTime > firstBatchOldestJobStartTime); } -TEST(ObjectStore, ArchiveQueueAlgorithmsUpdatesOldestJobQueueTime) { +TEST_F(ObjectStore, ArchiveQueueAlgorithmsUpdatesOldestJobQueueTime) { using cta::common::dataStructures::JobQueueType; using cta::objectstore::ArchiveQueue; using cta::objectstore::ArchiveQueueToTransferForUser; diff --git a/objectstore/CMakeLists.txt b/objectstore/CMakeLists.txt index 55b917c6582021bd760f3ab2146de16048e93d2e..8e2683b85390d9810a566f08531708b7045482d2 100644 --- a/objectstore/CMakeLists.txt +++ b/objectstore/CMakeLists.txt @@ -94,6 +94,7 @@ add_library (ctaobjectstore SHARED ProtocolBuffersAlgorithms.cpp GenericObject.cpp GarbageCollector.cpp + QueueCleanupRunner.cpp SchedulerGlobalLock.cpp ValueCountMap.cpp Helpers.cpp @@ -106,10 +107,14 @@ set_source_files_properties(BackendRados.cpp PROPERTIES COMPILE_FLAGS -Wno-depre install (TARGETS ctaobjectstore DESTINATION usr/${CMAKE_INSTALL_LIBDIR}) set(ObjectStoreUnitTests + ObjectStoreFixture.cpp BackendTest.cpp RootEntryTest.cpp RetrieveQueueTest.cpp GarbageCollectorTest.cpp + QueueCleanupRunnerTestUtils.cpp + QueueCleanupRunnerTest.cpp + QueueCleanupRunnerConcurrentTest.cpp AlgorithmsTest.cpp SorterTest.cpp ) diff --git a/objectstore/GarbageCollector.cpp b/objectstore/GarbageCollector.cpp index 35c41d67fa4762cfe0cd607d047494dc13e9dbdf..57cb2b0a44b8b056486dc5d5120c8e4d84890407 100644 --- a/objectstore/GarbageCollector.cpp +++ b/objectstore/GarbageCollector.cpp @@ -349,7 +349,7 @@ void GarbageCollector::OwnedObjectSorter::sortFetchedObjects(Agent& agent, std:: obj.reset(); // Get the list of vids for non failed tape files. std::set<std::string> candidateVids; - bool disabledTape = rr->getRepackInfo().forceDisabledTape; + bool isRepack = rr->getRepackInfo().isRepack; for (auto & j: rr->dumpJobs()) { if(j.status==RetrieveJobStatus::RJS_ToTransfer) { for (auto &tf: rr->getArchiveFile().tapeFiles) { @@ -384,7 +384,7 @@ void GarbageCollector::OwnedObjectSorter::sortFetchedObjects(Agent& agent, std:: // Back to the transfer case. std::string vid; try { - vid=Helpers::selectBestRetrieveQueue(candidateVids, catalogue, objectStore, disabledTape); + vid=Helpers::selectBestRetrieveQueue(candidateVids, catalogue, objectStore, isRepack); } catch (Helpers::NoTapeAvailableForRetrieve & ex) { log::ScopedParamContainer params3(lc); params3.add("fileId", rr->getArchiveFile().archiveFileID); diff --git a/objectstore/GarbageCollectorTest.cpp b/objectstore/GarbageCollectorTest.cpp index 6d93c94864883099a105694cc179d84b7c606c38..e49a07d7793bd6825f7a9a8bd5c93ad31a0fb9d5 100644 --- a/objectstore/GarbageCollectorTest.cpp +++ b/objectstore/GarbageCollectorTest.cpp @@ -41,9 +41,11 @@ #include "RootEntry.hpp" #include "tests/TestsCompileTimeSwitches.hpp" +#include "ObjectStoreFixture.hpp" + namespace unitTests { -TEST(ObjectStore, GarbageCollectorBasicFuctionnality) { +TEST_F(ObjectStore, GarbageCollectorBasicFuctionnality) { // We will need a log object #ifdef STDOUT_LOGGING cta::log::StdoutLogger dl("dummy", "unitTest"); @@ -100,7 +102,7 @@ TEST(ObjectStore, GarbageCollectorBasicFuctionnality) { ASSERT_NO_THROW(re.removeIfEmpty(lc)); } -TEST(ObjectStore, GarbageCollectorRegister) { +TEST_F(ObjectStore, GarbageCollectorRegister) { // We will need a log object #ifdef STDOUT_LOGGING cta::log::StdoutLogger dl("dummy", "unitTest"); @@ -163,7 +165,7 @@ TEST(ObjectStore, GarbageCollectorRegister) { ASSERT_NO_THROW(re.removeIfEmpty(lc)); } -TEST(ObjectStore, GarbageCollectorArchiveQueue) { +TEST_F(ObjectStore, GarbageCollectorArchiveQueue) { // We will need a log object #ifdef STDOUT_LOGGING cta::log::StdoutLogger dl("dummy", "unitTest"); @@ -227,7 +229,7 @@ TEST(ObjectStore, GarbageCollectorArchiveQueue) { ASSERT_NO_THROW(re.removeIfEmpty(lc)); } -TEST(ObjectStore, GarbageCollectorDriveRegister) { +TEST_F(ObjectStore, GarbageCollectorDriveRegister) { // We will need a log object #ifdef STDOUT_LOGGING cta::log::StdoutLogger dl("dummy", "unitTest"); @@ -291,7 +293,7 @@ TEST(ObjectStore, GarbageCollectorDriveRegister) { ASSERT_NO_THROW(re.removeIfEmpty(lc)); } -TEST(ObjectStore, GarbageCollectorArchiveRequest) { +TEST_F(ObjectStore, GarbageCollectorArchiveRequest) { using cta::common::dataStructures::JobQueueType; // We will need a log object #ifdef STDOUT_LOGGING @@ -480,7 +482,7 @@ TEST(ObjectStore, GarbageCollectorArchiveRequest) { // TODO: this unit test still leaks tape pools and requests } -TEST(ObjectStore, GarbageCollectorRetrieveRequest) { +TEST_F(ObjectStore, GarbageCollectorRetrieveRequest) { using cta::common::dataStructures::JobQueueType; // We will need a log object #ifdef STDOUT_LOGGING @@ -669,7 +671,7 @@ TEST(ObjectStore, GarbageCollectorRetrieveRequest) { // TODO: this unit test still leaks tape pools and requests } -TEST(ObjectStore, GarbageCollectorRepackRequestPending) { +TEST_F(ObjectStore, GarbageCollectorRepackRequestPending) { // We will need a log object #ifdef STDOUT_LOGGING cta::log::StdoutLogger dl("dummy", "unitTest"); @@ -751,7 +753,7 @@ TEST(ObjectStore, GarbageCollectorRepackRequestPending) { } } -TEST(ObjectStore, GarbageCollectorRepackRequestToExpand) { +TEST_F(ObjectStore, GarbageCollectorRepackRequestToExpand) { // We will need a log object #ifdef STDOUT_LOGGING cta::log::StdoutLogger dl("dummy", "unitTest"); @@ -832,7 +834,7 @@ TEST(ObjectStore, GarbageCollectorRepackRequestToExpand) { } } -TEST(ObjectStore, GarbageCollectorRepackRequestRunningExpandNotFinished) { +TEST_F(ObjectStore, GarbageCollectorRepackRequestRunningExpandNotFinished) { // We will need a log object #ifdef STDOUT_LOGGING cta::log::StdoutLogger dl("dummy", "unitTest"); @@ -915,7 +917,7 @@ TEST(ObjectStore, GarbageCollectorRepackRequestRunningExpandNotFinished) { } } -TEST(ObjectStore, GarbageCollectorRepackRequestRunningExpandFinished) { +TEST_F(ObjectStore, GarbageCollectorRepackRequestRunningExpandFinished) { // We will need a log object #ifdef STDOUT_LOGGING cta::log::StdoutLogger dl("dummy", "unitTest"); @@ -1024,7 +1026,7 @@ TEST(ObjectStore, GarbageCollectorRepackRequestRunningExpandFinished) { ASSERT_NE(std::string::npos,logToCheck.find("MSG=\"In RepackRequest::garbageCollect(): failed to requeue the RepackRequest (leaving it as it is) : The status Running has no corresponding queue type.\"")); } -TEST(ObjectStore, GarbageCollectorRepackRequestStarting) { +TEST_F(ObjectStore, GarbageCollectorRepackRequestStarting) { // We will need a log object #ifdef STDOUT_LOGGING cta::log::StdoutLogger dl("dummy", "unitTest"); @@ -1102,7 +1104,7 @@ TEST(ObjectStore, GarbageCollectorRepackRequestStarting) { ASSERT_NE(std::string::npos,logToCheck.find("MSG=\"In RepackRequest::garbageCollect(): failed to requeue the RepackRequest (leaving it as it is) : The status Starting has no corresponding queue type.\"")); } -TEST(ObjectStore, GarbageCollectorRetrieveAllStatusesAndQueues) { +TEST_F(ObjectStore, GarbageCollectorRetrieveAllStatusesAndQueues) { using cta::common::dataStructures::JobQueueType; // We will need a log object #ifdef STDOUT_LOGGING @@ -1571,7 +1573,7 @@ TEST(ObjectStore, GarbageCollectorRetrieveAllStatusesAndQueues) { } } -TEST(ObjectStore, GarbageCollectorRetrieveRequestRepackDisabledTape) { +TEST_F(ObjectStore, GarbageCollectorRetrieveRequestRepackRepackingTape) { using cta::common::dataStructures::JobQueueType; // We will need a log object #ifdef STDOUT_LOGGING @@ -1652,7 +1654,6 @@ TEST(ObjectStore, GarbageCollectorRetrieveRequestRepackDisabledTape) { cta::objectstore::RetrieveRequest::RepackInfo ri; ri.isRepack = true; - ri.forceDisabledTape = true; ri.fSeq = 1; ri.fileBufferURL = "testFileBufferURL"; ri.repackRequestAddress = "repackRequestAddress"; @@ -1667,7 +1668,7 @@ TEST(ObjectStore, GarbageCollectorRetrieveRequestRepackDisabledTape) { gcAgent.setTimeout_us(0); gcAgent.insertAndRegisterSelf(lc); - catalogue.addDisabledTape("Tape0"); + catalogue.addRepackingTape("Tape0"); cta::objectstore::GarbageCollector gc(be, gcAgentRef, catalogue); gc.runOnePass(lc); @@ -1689,9 +1690,9 @@ TEST(ObjectStore, GarbageCollectorRetrieveRequestRepackDisabledTape) { } { - //Test the RetrieveRequest::garbageCollect method for RJS_ToTransferForUser job and a disabled tape - cta::objectstore::AgentReference agentRefToTransferDisabledTapeAutoGc("ToReportToRepackForFailureAutoGC", dl); - cta::objectstore::Agent agentToReportToRepackForFailureJobAutoGc(agentRefToTransferDisabledTapeAutoGc.getAgentAddress(), be); + //Test the RetrieveRequest::garbageCollect method for RJS_ToTransferForUser job and a repacking tape + cta::objectstore::AgentReference agentRefToTransferRepackingTapeAutoGc("ToReportToRepackForFailureAutoGC", dl); + cta::objectstore::Agent agentToReportToRepackForFailureJobAutoGc(agentRefToTransferRepackingTapeAutoGc.getAgentAddress(), be); agentToReportToRepackForFailureJobAutoGc.initialize(); agentToReportToRepackForFailureJobAutoGc.setTimeout_us(0); agentToReportToRepackForFailureJobAutoGc.insertAndRegisterSelf(lc); @@ -1706,13 +1707,13 @@ TEST(ObjectStore, GarbageCollectorRetrieveRequestRepackDisabledTape) { { cta::objectstore::ScopedExclusiveLock sel(rr); rr.fetch(); - rr.setOwner(agentRefToTransferDisabledTapeAutoGc.getAgentAddress()); + rr.setOwner(agentRefToTransferRepackingTapeAutoGc.getAgentAddress()); rr.setJobStatus(2,cta::objectstore::serializers::RetrieveJobStatus::RJS_ToTransfer); rr.commit(); - agentRefToTransferDisabledTapeAutoGc.addToOwnership(rr.getAddressIfSet(),be); + agentRefToTransferRepackingTapeAutoGc.addToOwnership(rr.getAddressIfSet(),be); - ASSERT_NO_THROW(rr.garbageCollect(agentRefToTransferDisabledTapeAutoGc.getAgentAddress(),agentRef,lc,catalogue)); + ASSERT_NO_THROW(rr.garbageCollect(agentRefToTransferRepackingTapeAutoGc.getAgentAddress(),agentRef,lc,catalogue)); } //The Retrieve Request should now be queued in the RetrieveQueueToTransferForUser @@ -1732,7 +1733,7 @@ TEST(ObjectStore, GarbageCollectorRetrieveRequestRepackDisabledTape) { } } -TEST(ObjectStore, GarbageCollectorArchiveAllStatusesAndQueues) { +TEST_F(ObjectStore, GarbageCollectorArchiveAllStatusesAndQueues) { using cta::common::dataStructures::JobQueueType; // We will need a log object #ifdef STDOUT_LOGGING diff --git a/objectstore/Helpers.cpp b/objectstore/Helpers.cpp index 67d3f1a5b6acdb30d9d9613f6883095999e7bd23..5c7d9a345a399ccef7ecfdf0e97ac117395777c9 100644 --- a/objectstore/Helpers.cpp +++ b/objectstore/Helpers.cpp @@ -365,9 +365,11 @@ void Helpers::getLockedAndFetchedRepackQueue(RepackQueue& queue, ScopedExclusive // Helpers::selectBestRetrieveQueue() //------------------------------------------------------------------------------ std::string Helpers::selectBestRetrieveQueue(const std::set<std::string>& candidateVids, cta::catalogue::Catalogue & catalogue, - objectstore::Backend & objectstore, bool forceDisabledTape) { - // We will build the retrieve stats of the non-disable candidate vids here + objectstore::Backend & objectstore, bool isRepack) { + // We will build the retrieve stats of the non-disabled, non-broken/exported candidate vids here std::list<SchedulerDatabase::RetrieveQueueStatistics> candidateVidsStats; + // We will build the retrieve stats of the disabled vids here, as a fallback + std::list<SchedulerDatabase::RetrieveQueueStatistics> candidateVidsStatsFallback; // A promise we create so we can make users wait on it. // Take the global lock cta::threading::MutexLocker grqsmLock(g_retrieveQueueStatisticsMutex); @@ -385,9 +387,14 @@ std::string Helpers::selectBestRetrieveQueue(const std::set<std::string>& candid grqsmLock.unlock(); updateFuture.wait(); grqsmLock.lock(); - if(g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::ACTIVE || (g_retrieveQueueStatistics.at(v).tapeStatus.isDisabled() && forceDisabledTape)) { - logUpdateCacheIfNeeded(false,g_retrieveQueueStatistics.at(v),"g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::ACTIVE || (g_retrieveQueueStatistics.at(v).tapeStatus.isDisabled() && forceDisabledTape)"); + if ((g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::ACTIVE && !isRepack) || + (g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::REPACKING && isRepack)) { + logUpdateCacheIfNeeded(false,g_retrieveQueueStatistics.at(v),"(g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::ACTIVE && !isRepack) || (g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::REPACKING && isRepack)"); candidateVidsStats.emplace_back(g_retrieveQueueStatistics.at(v).stats); + } else if ((g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::DISABLED && !isRepack) || + (g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::REPACKING_DISABLED && isRepack)) { + logUpdateCacheIfNeeded(false,g_retrieveQueueStatistics.at(v),"(g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::DISABLED && !isRepack) || (g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::REPACKING_DISABLED && isRepack)"); + candidateVidsStatsFallback.emplace_back(g_retrieveQueueStatistics.at(v).stats); } } else { // We have a cache hit, check it's not stale. @@ -401,8 +408,13 @@ std::string Helpers::selectBestRetrieveQueue(const std::set<std::string>& candid logUpdateCacheIfNeeded(false,g_retrieveQueueStatistics.at(v),"Cache is not updated, timeSinceLastUpdate ("+std::to_string(timeSinceLastUpdate)+ ") <= c_retrieveQueueCacheMaxAge ("+std::to_string(c_retrieveQueueCacheMaxAge)+")"); // We're lucky: cache hit (and not stale) - if (g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::ACTIVE || (g_retrieveQueueStatistics.at(v).tapeStatus.isDisabled() && forceDisabledTape)) + if ((g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::ACTIVE && !isRepack) || + (g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::REPACKING && isRepack)) { candidateVidsStats.emplace_back(g_retrieveQueueStatistics.at(v).stats); + } else if ((g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::DISABLED && !isRepack) || + (g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::REPACKING_DISABLED && isRepack)) { + candidateVidsStatsFallback.emplace_back(g_retrieveQueueStatistics.at(v).stats); + } } } catch (std::out_of_range &) { // We need to update the entry in the cache (miss or stale, we handle the same way). @@ -442,14 +454,22 @@ std::string Helpers::selectBestRetrieveQueue(const std::set<std::string>& candid // Signal to potential waiters updatePromise.set_value(); // Update our own candidate list if needed. - if (g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::ACTIVE || (g_retrieveQueueStatistics.at(v).tapeStatus.isDisabled() && forceDisabledTape)) { + if ((g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::ACTIVE && !isRepack) || + (g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::REPACKING && isRepack)) { candidateVidsStats.emplace_back(g_retrieveQueueStatistics.at(v).stats); + } else if ((g_retrieveQueueStatistics.at(v).tapeStatus.state == common::dataStructures::Tape::DISABLED && !isRepack)) { + candidateVidsStatsFallback.emplace_back(g_retrieveQueueStatistics.at(v).stats); } } } // We now have all the candidates listed (if any). - if (candidateVidsStats.empty()) - throw NoTapeAvailableForRetrieve("In Helpers::selectBestRetrieveQueue(): no tape available to recall from."); + if (candidateVidsStats.empty()) { + if (candidateVidsStatsFallback.empty()) { + throw NoTapeAvailableForRetrieve("In Helpers::selectBestRetrieveQueue(): no tape available to recall from."); + } + // If `candidateVidsStats` is empty, insert the DISABLED tapes + candidateVidsStats.insert(candidateVidsStats.end(), candidateVidsStatsFallback.begin(), candidateVidsStatsFallback.end()); + } // Sort the tapes. candidateVidsStats.sort(SchedulerDatabase::RetrieveQueueStatistics::leftGreaterThanRight); // Get a list of equivalent best tapes @@ -501,6 +521,11 @@ void Helpers::flushRetrieveQueueStatisticsCache(){ g_retrieveQueueStatistics.clear(); } +void Helpers::flushRetrieveQueueStatisticsCacheForVid(const std::string & vid){ + threading::MutexLocker ml(g_retrieveQueueStatisticsMutex); + g_retrieveQueueStatistics.erase(vid); +} + //------------------------------------------------------------------------------ // Helpers::g_retrieveQueueStatistics //------------------------------------------------------------------------------ diff --git a/objectstore/Helpers.hpp b/objectstore/Helpers.hpp index 2cc5e4de7317c7085aef7f576cc5349896ddfacf..caa1c5f0251271c3a9eee9f470549612f1b9cf84 100644 --- a/objectstore/Helpers.hpp +++ b/objectstore/Helpers.hpp @@ -87,12 +87,12 @@ class Helpers { /** * Find the most appropriate queue (bid) to add the retrieve request to. The potential * VIDs (VIDs for non-failed copies) is provided by the caller. The status of the - * the tapes (disabled or not, and available queue size) are all cached to avoid + * the tapes (and available queue size) are all cached to avoid * frequent access to the object store. The caching create a small inefficiency * to the algorithm, but will help performance drastically for a very similar result */ static std::string selectBestRetrieveQueue(const std::set<std::string> & candidateVids, - cta::catalogue::Catalogue & catalogue, objectstore::Backend & objectstore, bool forceDisabledTape = false); + cta::catalogue::Catalogue & catalogue, objectstore::Backend & objectstore, bool isRepack = false); /** * Gets the retrieve queue statistics for a set of Vids (extracted from the OStoreDB @@ -114,6 +114,7 @@ class Helpers { * TO BE USED BY UNIT TESTS ! */ static void flushRetrieveQueueStatisticsCache(); + static void flushRetrieveQueueStatisticsCacheForVid(const std::string & vid); private: /** Lock for the retrieve queues stats */ diff --git a/objectstore/ObjectStoreFixture.cpp b/objectstore/ObjectStoreFixture.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7a5cbf46e31f4fc957c5dcd63a00f70d8e966125 --- /dev/null +++ b/objectstore/ObjectStoreFixture.cpp @@ -0,0 +1,29 @@ + +/* + * @project The CERN Tape Archive (CTA) + * @copyright Copyright © 2021-2022 CERN + * @license This program is free software, distributed under the terms of the GNU General Public + * Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can + * redistribute it and/or modify it under the terms of the GPL Version 3, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * In applying this licence, CERN does not waive the privileges and immunities + * granted to it by virtue of its status as an Intergovernmental Organization or + * submit itself to any jurisdiction. + */ + +#include "ObjectStoreFixture.hpp" +#include "Helpers.hpp" + +namespace unitTests { + + void ObjectStore::SetUp() { + // We need to cleanup the queue statistics cache before every test + cta::objectstore::Helpers::flushRetrieveQueueStatisticsCache(); + } + +} diff --git a/objectstore/ObjectStoreFixture.hpp b/objectstore/ObjectStoreFixture.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8af16f65d8b3974ec45bb99297472ed9b2c0e7a2 --- /dev/null +++ b/objectstore/ObjectStoreFixture.hpp @@ -0,0 +1,30 @@ + +/* + * @project The CERN Tape Archive (CTA) + * @copyright Copyright © 2021-2022 CERN + * @license This program is free software, distributed under the terms of the GNU General Public + * Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can + * redistribute it and/or modify it under the terms of the GPL Version 3, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * In applying this licence, CERN does not waive the privileges and immunities + * granted to it by virtue of its status as an Intergovernmental Organization or + * submit itself to any jurisdiction. + */ + +#include <gtest/gtest.h> + +#include "Helpers.hpp" + +namespace unitTests { + + class ObjectStore : public ::testing::Test { + protected: + virtual void SetUp() override; + }; + +} \ No newline at end of file diff --git a/objectstore/QueueCleanupRunner.cpp b/objectstore/QueueCleanupRunner.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9a11c9f422123ac584dcda6b0544dfd4b2d9ab2a --- /dev/null +++ b/objectstore/QueueCleanupRunner.cpp @@ -0,0 +1,231 @@ +/* + * @project The CERN Tape Archive (CTA) + * @copyright Copyright © 2021-2022 CERN + * @license This program is free software, distributed under the terms of the GNU General Public + * Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can + * redistribute it and/or modify it under the terms of the GPL Version 3, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * In applying this licence, CERN does not waive the privileges and immunities + * granted to it by virtue of its status as an Intergovernmental Organization or + * submit itself to any jurisdiction. + */ + +#include "objectstore/QueueCleanupRunner.hpp" + +namespace cta { namespace objectstore { + +QueueCleanupRunner::QueueCleanupRunner(AgentReference &agentReference, SchedulerDatabase & oStoreDb, catalogue::Catalogue &catalogue, + std::optional<double> heartBeatTimeout, std::optional<int> batchSize) : + m_catalogue(catalogue), m_db(oStoreDb), + m_batchSize(batchSize.value_or(DEFAULT_BATCH_SIZE)), m_heartBeatTimeout(heartBeatTimeout.value_or(DEFAULT_HEARTBEAT_TIMEOUT)) { +} + +void QueueCleanupRunner::runOnePass(log::LogContext &logContext) { + + cta::common::dataStructures::SecurityIdentity admin; + // TODO: Check if these parameters make sense, mainly the username + admin.username = "Queue cleanup runner"; + admin.host = cta::utils::getShortHostname(); + + std::list<QueueCleanupInfo> queuesForCleanupInfo; + auto queuesForCleanup = m_db.getRetrieveQueuesCleanupInfo(logContext); + + // Check which queues need and can be cleaned up + + for (auto queue: queuesForCleanup) { + + cta::common::dataStructures::Tape tapeToCheck; + + try { + auto vidToTapesMap = m_catalogue.getTapesByVid(queue.vid); //throws an exception if the vid is not found on the database + tapeToCheck = vidToTapesMap.at(queue.vid); + } catch (const exception::UserError &ex) { + log::ScopedParamContainer params(logContext); + params.add("tapeVid", queue.vid) + .add("cleanupFlag", queue.doCleanup) + .add("exceptionMessage", ex.getMessageValue()); + logContext.log(log::WARNING, "WARNING: In QueueCleanupRunner::runOnePass(): failed to find a tape in the database. Skipping it."); + continue; // Ignore queue + } + + if (!queue.doCleanup) { + // Do not clean a queue that does not have the cleanup flag set true + continue; // Ignore queue + } + + // Check heartbeat of other agents + if (queue.assignedAgent.has_value()) { + bool newEntry = false; + + // We must register all new queues that are being cleaned up + if (m_heartbeatCheck.find(queue.vid) == m_heartbeatCheck.end()) { + newEntry = true; + m_heartbeatCheck[queue.vid].agent = queue.assignedAgent.value(); + m_heartbeatCheck[queue.vid].heartbeat = queue.heartbeat; + m_heartbeatCheck[queue.vid].lastUpdateTimestamp = m_timer.secs(); + } + + auto oldHeartbeatValue = m_heartbeatCheck[queue.vid].heartbeat; + + if (newEntry || queue.heartbeat != oldHeartbeatValue) { + // If heartbeat has been updated, then the queue is being actively processed by another agent + // Record new timestamp and move on + m_heartbeatCheck[queue.vid].lastUpdateTimestamp = m_timer.secs(); + continue; // Ignore queue + } else { + // If heartbeat has not been updated, check how long ago the last update happened + // If not enough time has passed, do not consider this queue for cleanup + auto lastHeartbeatTimestamp = m_heartbeatCheck[queue.vid].lastUpdateTimestamp; + if ((m_timer.secs() - lastHeartbeatTimestamp) < m_heartBeatTimeout) { + continue; // Ignore queue + } + } + } + + if (tapeToCheck.state != common::dataStructures::Tape::REPACKING_PENDING + && tapeToCheck.state != common::dataStructures::Tape::BROKEN_PENDING + && tapeToCheck.state != common::dataStructures::Tape::EXPORTED_PENDING) { + // Do not cleanup a tape that is not in a X_PENDING state + log::ScopedParamContainer params(logContext); + params.add("tapeVid", queue.vid) + .add("cleanupFlag", queue.doCleanup) + .add("tapeState", common::dataStructures::Tape::stateToString(tapeToCheck.state)); + logContext.log( + log::WARNING, + "In QueueCleanupRunner::runOnePass(): Queue is has cleanup flag enabled but is not in the expected PENDING state. Skipping it."); + continue; + } + + queuesForCleanupInfo.push_back(QueueCleanupInfo()); + queuesForCleanupInfo.back().vid = queue.vid; + queuesForCleanupInfo.back().tapeState = tapeToCheck.state; + } + + // Cleanup queues one by one + + for (auto qForCleanup: queuesForCleanupInfo) { + + utils::Timer t; + + log::ScopedParamContainer loopParams(logContext); + loopParams.add("tapeVid", qForCleanup.vid) + .add("tapeState", common::dataStructures::Tape::stateToString(qForCleanup.tapeState)); + + try { + bool prevHeartbeatExists = (m_heartbeatCheck.find(qForCleanup.vid) != m_heartbeatCheck.end()); + m_db.reserveRetrieveQueueForCleanup( + qForCleanup.vid, + prevHeartbeatExists ? std::optional(m_heartbeatCheck[qForCleanup.vid].heartbeat) : std::nullopt); + } catch (OStoreDB::RetrieveQueueNotFound & ex) { + log::ScopedParamContainer paramsExcMsg(logContext); + paramsExcMsg.add("exceptionMessage", ex.getMessageValue()); + logContext.log(log::WARNING, + "In QueueCleanupRunner::runOnePass(): Unable to find the retrieve queue for cleanup. Queue may have already been deleted. Skipping it."); + continue; + } catch (OStoreDB::RetrieveQueueNotReservedForCleanup & ex) { + log::ScopedParamContainer paramsExcMsg(logContext); + paramsExcMsg.add("exceptionMessage", ex.getMessageValue()); + logContext.log(log::WARNING, + "In QueueCleanupRunner::runOnePass(): Unable to reserve the retrieve queue due to it not being available for cleanup. Skipping it."); + continue; + } catch (cta::exception::Exception & ex) { + log::ScopedParamContainer paramsExcMsg(logContext); + paramsExcMsg.add("exceptionMessage", ex.getMessageValue()); + logContext.log(log::WARNING, + "In QueueCleanupRunner::runOnePass(): Unable to reserve the retrieve queue for cleanup for unknown reasons. Skipping it."); + continue; + } + + // Transfer all the jobs to a different queue, or report to the user if no replicas exist + + while (true) { + + utils::Timer tLoop; + log::ScopedParamContainer paramsLoopMsg(logContext); + + auto dbRet = m_db.getNextRetrieveJobsToTransferBatch(qForCleanup.vid, m_batchSize, logContext); + if (dbRet.empty()) break; + std::list<cta::SchedulerDatabase::RetrieveJob *> jobPtList; + for (auto &j: dbRet) { + jobPtList.push_back(j.get()); + } + m_db.requeueRetrieveRequestJobs(jobPtList, logContext); + + double jobMovingTime = tLoop.secs(utils::Timer::resetCounter); + + paramsLoopMsg.add("numberOfJobsMoved", dbRet.size()) + .add("jobMovingTime", jobMovingTime) + .add("tapeVid", qForCleanup.vid); + logContext.log(cta::log::INFO,"In DiskReportRunner::runOnePass(): Queue jobs moved."); + + // Tick heartbeat + try { + m_db.tickRetrieveQueueCleanupHeartbeat(qForCleanup.vid); + } catch (OStoreDB::RetrieveQueueNotFound & ex) { + break; // Queue was already deleted, probably after all the requests have been removed + } catch (OStoreDB::RetrieveQueueNotReservedForCleanup & ex) { + log::ScopedParamContainer paramsExcMsg(logContext); + paramsExcMsg.add("exceptionMessage", ex.getMessageValue()); + logContext.log(log::WARNING, + "In QueueCleanupRunner::runOnePass(): Unable to update heartbeat of retrieve queue cleanup, due to it not being reserved by agent. Aborting cleanup."); + break; + } catch (cta::exception::Exception & ex) { + log::ScopedParamContainer paramsExcMsg(logContext); + paramsExcMsg.add("exceptionMessage", ex.getMessageValue()); + logContext.log(log::WARNING, + "In QueueCleanupRunner::runOnePass(): Unable to update heartbeat of retrieve queue cleanup for unknown reasons. Aborting cleanup."); + break; + } + } + + // Finally, update the tape state + + { + cta::common::dataStructures::Tape tapeToModify; + + try { + auto vidToTapesMap = m_catalogue.getTapesByVid(qForCleanup.vid); //throws an exception if the vid is not found on the database + tapeToModify = vidToTapesMap.at(qForCleanup.vid); + } catch (const exception::UserError &ex) { + log::ScopedParamContainer params(logContext); + params.add("tapeVid", qForCleanup.vid) + .add("exceptionMessage", ex.getMessageValue()); + logContext.log(log::WARNING, "WARNING: In QueueCleanupRunner::runOnePass(): Failed to find a tape in the database. Unable to update tape state."); + continue; // Ignore queue + } + + // Finally, modify tape state to REPACKING or BROKEN + // The STATE_REASON set by operator will be preserved, with just an extra message prepended. + std::optional<std::string> prevReason = tapeToModify.stateReason; + switch (tapeToModify.state) { + case common::dataStructures::Tape::REPACKING_PENDING: + m_catalogue.modifyTapeState(admin, qForCleanup.vid, common::dataStructures::Tape::REPACKING, common::dataStructures::Tape::REPACKING_PENDING, prevReason.value_or("QueueCleanupRunner: changed tape state to REPACKING")); + m_db.clearRetrieveQueueStatisticsCache(qForCleanup.vid); + break; + case common::dataStructures::Tape::BROKEN_PENDING: + m_catalogue.modifyTapeState(admin, qForCleanup.vid, common::dataStructures::Tape::BROKEN, common::dataStructures::Tape::BROKEN_PENDING, prevReason.value_or("QueueCleanupRunner: changed tape state to BROKEN")); + m_db.clearRetrieveQueueStatisticsCache(qForCleanup.vid); + break; + case common::dataStructures::Tape::EXPORTED_PENDING: + m_catalogue.modifyTapeState(admin, qForCleanup.vid, common::dataStructures::Tape::EXPORTED, common::dataStructures::Tape::EXPORTED_PENDING, prevReason.value_or("QueueCleanupRunner: changed tape state to EXPORTED")); + m_db.clearRetrieveQueueStatisticsCache(qForCleanup.vid); + break; + default: + log::ScopedParamContainer paramsWarnMsg(logContext); + paramsWarnMsg.add("tapeVid", qForCleanup.vid) + .add("expectedPrevState", common::dataStructures::Tape::stateToString(qForCleanup.tapeState)) + .add("actualPrevState", common::dataStructures::Tape::stateToString(tapeToModify.state)); + logContext.log(log::WARNING, "WARNING: In QueueCleanupRunner::runOnePass(): Cleaned up tape is not in a PENDING state. Unable to change it to its corresponding final state."); + break; + } + } + } +} + +} +} \ No newline at end of file diff --git a/objectstore/QueueCleanupRunner.hpp b/objectstore/QueueCleanupRunner.hpp new file mode 100644 index 0000000000000000000000000000000000000000..dc4281ba7e400fed49ae677bfdd9697008d6d07b --- /dev/null +++ b/objectstore/QueueCleanupRunner.hpp @@ -0,0 +1,75 @@ +/* +* @project The CERN Tape Archive (CTA) +* @copyright Copyright © 2021-2022 CERN +* @license This program is free software, distributed under the terms of the GNU General Public +* Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +* redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +* option) any later version. +* +* This program is distributed in the hope that it will be useful, but WITHOUT ANY +* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +* PARTICULAR PURPOSE. See the GNU General Public License for more details. +* +* In applying this licence, CERN does not waive the privileges and immunities +* granted to it by virtue of its status as an Intergovernmental Organization or +* submit itself to any jurisdiction. +*/ + +#pragma once + +#include "objectstore/Agent.hpp" +#include "objectstore/AgentRegister.hpp" +#include "objectstore/AgentWatchdog.hpp" +#include "objectstore/GenericObject.hpp" +#include "objectstore/Sorter.hpp" +#include "catalogue/Catalogue.hpp" + +/** + * Plan => Cleanup runner keeps track of queues that need to be emptied + * If a queue is signaled for cleanup, the cleanup runner should take ownership of it, and move all the requests + * to other queues. + * If there is no other queue available, the request should be aborted and reported back to the user. + */ + +namespace cta { namespace objectstore { + +class RetrieveRequest; + +class QueueCleanupRunner { + +public: + // We currently got for a hardcoded number of jobs batch to requeue every turn + static constexpr int DEFAULT_BATCH_SIZE = 500; + static constexpr double DEFAULT_HEARTBEAT_TIMEOUT = 120; + + QueueCleanupRunner(AgentReference &agentReference, SchedulerDatabase & oStoreDb, catalogue::Catalogue &catalogue, + std::optional<double> heartBeatTimeout = std::nullopt, std::optional<int> batchSize = std::nullopt); + + ~QueueCleanupRunner() = default; + + void runOnePass(log::LogContext &lc); + +private: + + struct QueueCleanupInfo { + std::string vid; + cta::common::dataStructures::Tape::State tapeState; + }; + + struct HeartbeatStatus { + std::string agent; + uint64_t heartbeat; + double lastUpdateTimestamp; + }; + + catalogue::Catalogue &m_catalogue; + SchedulerDatabase &m_db; + std::map<std::string, HeartbeatStatus> m_heartbeatCheck; + cta::utils::Timer m_timer; + + int m_batchSize; + double m_heartBeatTimeout; +}; + +} +} \ No newline at end of file diff --git a/objectstore/QueueCleanupRunnerConcurrentTest.cpp b/objectstore/QueueCleanupRunnerConcurrentTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f37ba9844333cd1d12119610b78396910325a42f --- /dev/null +++ b/objectstore/QueueCleanupRunnerConcurrentTest.cpp @@ -0,0 +1,419 @@ +/* + * @project The CERN Tape Archive (CTA) + * @copyright Copyright © 2021-2022 CERN + * @license This program is free software, distributed under the terms of the GNU General Public + * Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can + * redistribute it and/or modify it under the terms of the GPL Version 3, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * In applying this licence, CERN does not waive the privileges and immunities + * granted to it by virtue of its status as an Intergovernmental Organization or + * submit itself to any jurisdiction. + */ + +#include "objectstore/BackendRadosTestSwitch.hpp" +#include "tests/TestsCompileTimeSwitches.hpp" +#include "scheduler/SchedulerDatabase.hpp" +#include "scheduler/SchedulerDatabaseFactory.hpp" +#include "common/dataStructures/SecurityIdentity.hpp" +#include "catalogue/InMemoryCatalogue.hpp" +#include "objectstore/BackendRados.hpp" +#include "common/log/DummyLogger.hpp" +#ifdef STDOUT_LOGGING +#include "common/log/StdoutLogger.hpp" +#endif + +#include <exception> +#include <gtest/gtest.h> +#include <algorithm> +#include <uuid/uuid.h> + +#include "objectstore/ObjectStoreFixture.hpp" +#include "catalogue/DummyCatalogue.hpp" +#include "objectstore/BackendVFS.hpp" +#include "objectstore/GarbageCollector.hpp" +#include "objectstore/QueueCleanupRunner.hpp" +#include "scheduler/OStoreDB/OStoreDBFactory.hpp" +#include "scheduler/OStoreDB/OStoreDBWithAgent.hpp" +#include "scheduler/Scheduler.hpp" +#include "common/log/StdoutLogger.hpp" + +#include "objectstore/QueueCleanupRunnerTestUtils.hpp" + +//#define STDOUT_LOGGING + +namespace unitTests { + +using Tape = cta::common::dataStructures::Tape; + +/** + * This structure represents the state and number of jobs of a queue at a certain point. + * It is used to parameterize the tests. + */ +struct ConcurrentTapeQueueSetup { + uint32_t retrieveQueueToTransferJobs; + uint32_t retrieveQueueToReportJobs; +}; + +/** + * This structure represents the initial and final setup of a queue. + * It is used to parameterize the tests. + */ +struct ConcurrentTapeQueueTransition { + std::string vid; + Tape::State initialState; // Initial tape state + Tape::State desiredState; // New desired state (`modifyTapeState`) + Tape::State expectedState; // Expected state at the end of test + ConcurrentTapeQueueSetup initialSetup; + ConcurrentTapeQueueSetup finalSetup; +}; + +/** + * This structure parameterizes the initial number of requests to insert on a queue and the existing replicas. + * It is used to parameterize the tests. + */ +struct RetrieveRequestSetup { + uint32_t numberOfRequests; + std::string activeCopyVid; + std::list<std::string> replicaCopyVids; +}; + +/** + * This structure is used to parameterize OStore database tests. + */ +struct QueueCleanupRunnerConcurrentTestParams { + cta::SchedulerDatabaseFactory &dbFactory; + std::list<RetrieveRequestSetup> &retrieveRequestSetupList; + std::list<ConcurrentTapeQueueTransition> &tapeQueueTransitionList; + double cleanupTimeout; + + QueueCleanupRunnerConcurrentTestParams( + cta::SchedulerDatabaseFactory &dbFactory, + std::list<RetrieveRequestSetup> &retrieveRequestSetupList, + std::list<ConcurrentTapeQueueTransition> &tapeQueueTransitionList, + double cleanupTimeout) : + dbFactory(dbFactory), + retrieveRequestSetupList(retrieveRequestSetupList), + tapeQueueTransitionList(tapeQueueTransitionList), + cleanupTimeout(cleanupTimeout) { + } +}; + +/** + * The OStore database test is a parameterized test. It takes an + * OStore database factory as a parameter. + */ +class QueueCleanupRunnerConcurrentTest: public + ::testing::TestWithParam<QueueCleanupRunnerConcurrentTestParams> { +public: + + QueueCleanupRunnerConcurrentTest() noexcept { + } + + class FailedToGetDatabase: public std::exception { + public: + const char *what() const noexcept override { + return "Failed to get scheduler database"; + } + }; + + class FailedToGetCatalogue: public std::exception { + public: + const char *what() const noexcept override { + return "Failed to get catalogue"; + } + }; + + class FailedToGetScheduler: public std::exception { + public: + const char *what() const noexcept override { + return "Failed to get scheduler"; + } + }; + + virtual void SetUp() { + // We do a deep reference to the member as the C++ compiler requires the function to be + // already defined if called implicitly. + const auto &factory = GetParam().dbFactory; + m_catalogue = std::make_unique<cta::catalogue::DummyCatalogue>(); + // Get the OStore DB from the factory. + auto osDb = factory.create(m_catalogue); + // Make sure the type of the SchedulerDatabase is correct (it should be an OStoreDBWrapperInterface). + dynamic_cast<cta::objectstore::OStoreDBWrapperInterface *> (osDb.get()); + // We know the cast will not fail, so we can safely do it (otherwise we could leak memory). + m_db.reset(dynamic_cast<cta::objectstore::OStoreDBWrapperInterface *> (osDb.release())); + // Setup scheduler + m_scheduler = std::make_unique<cta::Scheduler>(*m_catalogue, *m_db, 5, 2 * 1000 * 1000); + } + + virtual void TearDown() { + cta::objectstore::Helpers::flushRetrieveQueueStatisticsCache(); + m_scheduler.reset(); + m_db.reset(); + m_catalogue.reset(); + } + + cta::objectstore::OStoreDBWrapperInterface &getDb() { + cta::objectstore::OStoreDBWrapperInterface *const ptr = m_db.get(); + if (nullptr == ptr) { + throw FailedToGetDatabase(); + } + return *ptr; + } + + cta::catalogue::DummyCatalogue &getCatalogue() { + cta::catalogue::DummyCatalogue *const ptr = dynamic_cast<cta::catalogue::DummyCatalogue*>(m_catalogue.get()); + if (nullptr == ptr) { + throw FailedToGetCatalogue(); + } + return *ptr; + } + + cta::Scheduler &getScheduler() { + + cta::Scheduler *const ptr = m_scheduler.get(); + if (nullptr == ptr) { + throw FailedToGetScheduler(); + } + return *ptr; + } + +private: + // Prevent copying + QueueCleanupRunnerConcurrentTest(const QueueCleanupRunnerConcurrentTest &) = delete; + + // Prevent assignment + QueueCleanupRunnerConcurrentTest & operator= (const QueueCleanupRunnerConcurrentTest &) = delete; + std::unique_ptr<cta::objectstore::OStoreDBWrapperInterface> m_db; + std::unique_ptr<cta::catalogue::Catalogue> m_catalogue; + std::unique_ptr<cta::Scheduler> m_scheduler; +}; + +class OStoreDBWithAgentBroken : public cta::OStoreDBWithAgent { + +public: + class TriggeredException: public std::exception { + public: + const char *what() const noexcept override { + return "Triggered exception"; + } + }; + + using OStoreDBWithAgent::OStoreDBWithAgent; + std::list<std::unique_ptr<SchedulerDatabase::RetrieveJob>> getNextRetrieveJobsToTransferBatch( + std::string & vid, uint64_t filesRequested, cta::log::LogContext &logContext) override { + throw TriggeredException(); + } +}; + + +TEST_P(QueueCleanupRunnerConcurrentTest, CleanupRunnerParameterizedTest) { + using cta::common::dataStructures::JobQueueType; + // We will need a log object +#ifdef STDOUT_LOGGING + cta::log::StdoutLogger dl("dummy", "unitTest"); +#else + cta::log::DummyLogger dl("dummy", "unitTest"); +#endif + cta::log::LogContext lc(dl); + // We need a dummy catalogue + cta::catalogue::DummyCatalogue & catalogue = getCatalogue(); + // Object store + cta::objectstore::OStoreDBWrapperInterface & oKOStore = getDb(); + // Broken object store, pointing to same as `oKOStore` + auto brokenOStore = OStoreDBWithAgentBroken(oKOStore.getBackend(), oKOStore.getAgentReference(), catalogue, dl); + // Backend + auto & be = dynamic_cast<cta::objectstore::BackendVFS&>(oKOStore.getBackend()); + // Remove this comment to avoid cleaning the object store files on destruction, useful for debugging + // be.noDeleteOnExit(); + // Scheduler + cta::Scheduler & scheduler = getScheduler(); + // Dummy admin + const cta::common::dataStructures::SecurityIdentity dummyAdmin; + + //AgentA for queueing + cta::objectstore::AgentReference agentForSetupRef("AgentForSetup", dl); + cta::objectstore::Agent agentForSetup(agentForSetupRef.getAgentAddress(), be); + + //AgentB for popping + cta::objectstore::AgentReference agentForCleanupRef("AgentForCleanup", dl); + cta::objectstore::Agent agentForCleanup(agentForCleanupRef.getAgentAddress(), be); + + // Create the root entry + cta::objectstore::EntryLogSerDeser el("user0", "unittesthost", time(nullptr)); + cta::objectstore::RootEntry re(be); + cta::objectstore::ScopedExclusiveLock rel(re); + re.fetch(); + + // Create the agent register + re.addOrGetAgentRegisterPointerAndCommit(agentForSetupRef, el, lc); + re.addOrGetAgentRegisterPointerAndCommit(agentForCleanupRef, el, lc); + rel.release(); + + agentForSetup.initialize(); + agentForSetup.insertAndRegisterSelf(lc); + agentForCleanup.initialize(); + agentForCleanup.insertAndRegisterSelf(lc); + + // Create retrieve requests and add them to the queues + // Create queues when they do not exist + for (auto & retrieveRequestSetupList : GetParam().retrieveRequestSetupList) { + + // Identify list of vids where copies exist, including active copy + std::set<std::string> allVids; + allVids.insert(retrieveRequestSetupList.replicaCopyVids.begin(), retrieveRequestSetupList.replicaCopyVids.end()); + allVids.insert(retrieveRequestSetupList.activeCopyVid); + std::string activeVid = retrieveRequestSetupList.activeCopyVid; + + // Generate requests + std::list<std::unique_ptr<cta::objectstore::RetrieveRequest> > requestsPtrs; + cta::objectstore::ContainerAlgorithms<cta::objectstore::RetrieveQueue, cta::objectstore::RetrieveQueueToTransfer>::InsertedElement::list requests; + unitTests::fillRetrieveRequestsForCleanupRunner(requests, retrieveRequestSetupList.numberOfRequests, requestsPtrs, allVids, activeVid, be, agentForSetupRef); //memory leak avoided here with 'requestsPtrs' + + // Create queue for requests to active copy + std::string agentForSetupAddr = agentForSetupRef.getAgentAddress(); + { + cta::objectstore::ScopedExclusiveLock relQ(re); + re.fetch(); + re.addOrGetRetrieveQueueAndCommit(activeVid, agentForSetupRef, JobQueueType::JobsToTransferForUser); + } + + // Insert requests into active vid queue + using RetrieveQueueAlgorithm = cta::objectstore::ContainerAlgorithms<cta::objectstore::RetrieveQueue, cta::objectstore::RetrieveQueueToTransfer>; + RetrieveQueueAlgorithm retrieveQueueAlgo(be, agentForSetupRef); + retrieveQueueAlgo.referenceAndSwitchOwnership(activeVid, agentForSetupAddr, requests, lc); + } + + // Setup initial tape states and validate number of requests + //for (TapeQueueTransition tapeQueueStateTrans : GetParam().tapeQueueTransitionList) { + for (auto & tapeQueueStateTrans : GetParam().tapeQueueTransitionList) { + + std::string vid = tapeQueueStateTrans.vid; + auto initialState = tapeQueueStateTrans.initialState; + auto initialRetrieveQueueToTransferJobs = tapeQueueStateTrans.initialSetup.retrieveQueueToTransferJobs; + auto initialRetrieveQueueToReportJobs = tapeQueueStateTrans.initialSetup.retrieveQueueToReportJobs; + + // Initial tape state + catalogue.modifyTapeState(dummyAdmin, vid, initialState, std::nullopt, "Testing"); + + // Assert initial queue setup, for pre-validation of tests + { + re.fetchNoLock(); + if (initialRetrieveQueueToTransferJobs > 0) { + auto qAddr = re.getRetrieveQueueAddress(vid, JobQueueType::JobsToTransferForUser); + cta::objectstore::RetrieveQueue rQueue(qAddr, be); + rQueue.fetchNoLock(); + ASSERT_EQ(initialRetrieveQueueToTransferJobs, rQueue.getJobsSummary().jobs); + } else { + ASSERT_THROW(re.getRetrieveQueueAddress(vid, JobQueueType::JobsToTransferForUser), cta::objectstore::RootEntry::NoSuchRetrieveQueue); + } + if (initialRetrieveQueueToReportJobs > 0) { + auto qAddr = re.getRetrieveQueueAddress(vid, JobQueueType::JobsToReportToUser); + cta::objectstore::RetrieveQueue rQueue(qAddr, be); + rQueue.fetchNoLock(); + ASSERT_EQ(initialRetrieveQueueToReportJobs, rQueue.getJobsSummary().jobs); + } else { + ASSERT_THROW(re.getRetrieveQueueAddress(vid, JobQueueType::JobsToReportToUser), cta::objectstore::RootEntry::NoSuchRetrieveQueue); + } + } + } + + // Trigger tape state change + for (auto & tapeQueueStateTrans : GetParam().tapeQueueTransitionList) { + + std::string vid = tapeQueueStateTrans.vid; + auto initialState = tapeQueueStateTrans.initialState; + auto desiredState = tapeQueueStateTrans.desiredState; + + if (initialState == desiredState) { + continue; // No desired tape state change, ignore + } + + scheduler.triggerTapeStateChange(dummyAdmin, vid, desiredState, "", lc); + } + + // Execute cleanup runner + { + cta::objectstore::QueueCleanupRunner qCleanupRunnerBroken(agentForCleanupRef, brokenOStore, catalogue, GetParam().cleanupTimeout); + cta::objectstore::QueueCleanupRunner qCleanupRunnerOk(agentForCleanupRef, oKOStore, catalogue, GetParam().cleanupTimeout); + + ASSERT_THROW(qCleanupRunnerBroken.runOnePass(lc), OStoreDBWithAgentBroken::TriggeredException); + ASSERT_NO_THROW(qCleanupRunnerOk.runOnePass(lc)); // Two passes are needed for the other cleanup runner to be able to track the heartbeats + ASSERT_NO_THROW(qCleanupRunnerOk.runOnePass(lc)); + } + + // Validate final setup of tapes and corresponding queues, after the cleanup runner has been executed + for (auto & tapeQueueStateTrans : GetParam().tapeQueueTransitionList) { + + std::string vid = tapeQueueStateTrans.vid; + auto expectedState = tapeQueueStateTrans.expectedState; + auto expectedRetrieveQueueToTransferJobs = tapeQueueStateTrans.finalSetup.retrieveQueueToTransferJobs; + auto expectedRetrieveQueueToReportJobs = tapeQueueStateTrans.finalSetup.retrieveQueueToReportJobs; + + // Check final tape state + ASSERT_EQ(expectedState, catalogue.getTapeState(vid)); + + // Assert final queue setup + { + re.fetchNoLock(); + if (expectedRetrieveQueueToTransferJobs > 0) { + auto qAddr = re.getRetrieveQueueAddress(vid, JobQueueType::JobsToTransferForUser); + cta::objectstore::RetrieveQueue rQueue(qAddr, be); + rQueue.fetchNoLock(); + ASSERT_EQ(expectedRetrieveQueueToTransferJobs, rQueue.getJobsSummary().jobs); + } else { + ASSERT_THROW(re.getRetrieveQueueAddress(vid, JobQueueType::JobsToTransferForUser), cta::objectstore::RootEntry::NoSuchRetrieveQueue); + } + if (expectedRetrieveQueueToReportJobs > 0) { + auto qAddr = re.getRetrieveQueueAddress(vid, JobQueueType::JobsToReportToUser); + cta::objectstore::RetrieveQueue rQueue(qAddr, be); + rQueue.fetchNoLock(); + ASSERT_EQ(expectedRetrieveQueueToReportJobs, rQueue.getJobsSummary().jobs); + } else { + ASSERT_THROW(re.getRetrieveQueueAddress(vid, JobQueueType::JobsToReportToUser), cta::objectstore::RootEntry::NoSuchRetrieveQueue); + } + } + } +} + +static cta::OStoreDBFactory<cta::objectstore::BackendVFS> OStoreDBFactoryVFS; + +static std::list<RetrieveRequestSetup> Test_retrieveRequestSetupList = { + { 10, "Tape0", { } }, +}; +static std::list<ConcurrentTapeQueueTransition> Test_tapeQueueTransitionList_Completed = { + { + "Tape0", + Tape::ACTIVE, Tape::REPACKING, Tape::REPACKING, + {10, 0}, { 0, 10 } + }, +}; +static std::list<ConcurrentTapeQueueTransition> Test_tapeQueueTransitionList_Failed = { + { + "Tape0", + Tape::ACTIVE, Tape::REPACKING, Tape::REPACKING_PENDING, + { 10, 0 }, { 10, 0 } + }, +}; + +INSTANTIATE_TEST_CASE_P(OStoreTestVFS, QueueCleanupRunnerConcurrentTest, + ::testing::Values( + // With a timeout of 0.0s the 2nd cleanup runner will be able to complete the task after the 1st has failed + QueueCleanupRunnerConcurrentTestParams( + OStoreDBFactoryVFS, + Test_retrieveRequestSetupList, + Test_tapeQueueTransitionList_Completed, + 0.0), + // With a timeout of 120.0s the 2nd cleanup runner will NOT immediately complete the task after the 1st has failed + QueueCleanupRunnerConcurrentTestParams( + OStoreDBFactoryVFS, + Test_retrieveRequestSetupList, + Test_tapeQueueTransitionList_Failed, + 120.0) + ) +); +} \ No newline at end of file diff --git a/objectstore/QueueCleanupRunnerTest.cpp b/objectstore/QueueCleanupRunnerTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0333a2614b6f90565b0013ee7f80a8b1e84d7c8f --- /dev/null +++ b/objectstore/QueueCleanupRunnerTest.cpp @@ -0,0 +1,503 @@ +/* + * @project The CERN Tape Archive (CTA) + * @copyright Copyright © 2021-2022 CERN + * @license This program is free software, distributed under the terms of the GNU General Public + * Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can + * redistribute it and/or modify it under the terms of the GPL Version 3, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * In applying this licence, CERN does not waive the privileges and immunities + * granted to it by virtue of its status as an Intergovernmental Organization or + * submit itself to any jurisdiction. + */ + +#include "objectstore/BackendRadosTestSwitch.hpp" +#include "tests/TestsCompileTimeSwitches.hpp" +#include "scheduler/SchedulerDatabase.hpp" +#include "scheduler/SchedulerDatabaseFactory.hpp" +#include "common/dataStructures/SecurityIdentity.hpp" +#include "catalogue/InMemoryCatalogue.hpp" +#include "objectstore/BackendRados.hpp" +#include "common/log/DummyLogger.hpp" +#ifdef STDOUT_LOGGING +#include "common/log/StdoutLogger.hpp" +#endif + +#include <exception> +#include <gtest/gtest.h> +#include <algorithm> +#include <uuid/uuid.h> + +#include "objectstore/ObjectStoreFixture.hpp" +#include "catalogue/DummyCatalogue.hpp" +#include "objectstore/BackendVFS.hpp" +#include "objectstore/GarbageCollector.hpp" +#include "objectstore/QueueCleanupRunnerTestUtils.hpp" +#include "objectstore/QueueCleanupRunner.hpp" +#include "scheduler/OStoreDB/OStoreDBFactory.hpp" +#include "scheduler/Scheduler.hpp" +#include "common/log/StdoutLogger.hpp" + +//#define STDOUT_LOGGING + +namespace unitTests { + +using Tape = cta::common::dataStructures::Tape; + +/** + * This structure represents the state and number of jobs of a queue at a certain point. + * It is used to parameterize the tests. + */ +struct TapeQueueSetup { + Tape::State state; + uint32_t retrieveQueueToTransferJobs; + uint32_t retrieveQueueToReportJobs; +}; + +/** + * This structure represents the initial and final setup of a queue. + * It is used to parameterize the tests. + */ +struct TapeQueueTransition { + std::string vid; + TapeQueueSetup initialSetup; + TapeQueueSetup finalSetup; +}; + +/** + * This structure parameterizes the initial number of requests to insert on a queue and the existing replicas. + * It is used to parameterize the tests. + */ +struct RetrieveRequestSetup { + uint32_t numberOfRequests; + std::string activeCopyVid; + std::list<std::string> replicaCopyVids; +}; + + +/** + * This structure is used to parameterize OStore database tests. + */ +struct QueueCleanupRunnerTestParams { + cta::SchedulerDatabaseFactory &dbFactory; + std::list<RetrieveRequestSetup> &retrieveRequestSetupList; + std::list<TapeQueueTransition> &tapeQueueTransitionList; + + explicit QueueCleanupRunnerTestParams( + cta::SchedulerDatabaseFactory *dbFactory, + std::list<RetrieveRequestSetup> &retrieveRequestSetupList, + std::list<TapeQueueTransition> &tapeQueueTransitionList) : + dbFactory(*dbFactory), + retrieveRequestSetupList(retrieveRequestSetupList), + tapeQueueTransitionList(tapeQueueTransitionList) {} +}; + +/** + * The OStore database test is a parameterized test. It takes an + * OStore database factory as a parameter. + */ +class QueueCleanupRunnerTest: public + ::testing::TestWithParam<QueueCleanupRunnerTestParams> { +public: + + QueueCleanupRunnerTest() noexcept { + } + + class FailedToGetDatabase: public std::exception { + public: + const char *what() const noexcept override { + return "Failed to get scheduler database"; + } + }; + + class FailedToGetCatalogue: public std::exception { + public: + const char *what() const noexcept override { + return "Failed to get catalogue"; + } + }; + + class FailedToGetScheduler: public std::exception { + public: + const char *what() const noexcept override { + return "Failed to get scheduler"; + } + }; + + virtual void SetUp() { + // We do a deep reference to the member as the C++ compiler requires the function to be + // already defined if called implicitly. + const auto &factory = GetParam().dbFactory; + m_catalogue = std::make_unique<cta::catalogue::DummyCatalogue>(); + // Get the OStore DB from the factory. + auto osdb = factory.create(m_catalogue); + // Make sure the type of the SchedulerDatabase is correct (it should be an OStoreDBWrapperInterface). + dynamic_cast<cta::objectstore::OStoreDBWrapperInterface *> (osdb.get()); + // We know the cast will not fail, so we can safely do it (otherwise we could leak memory). + m_db.reset(dynamic_cast<cta::objectstore::OStoreDBWrapperInterface *> (osdb.release())); + // Setup scheduler + m_scheduler = std::make_unique<cta::Scheduler>(*m_catalogue, *m_db, 5, 2 * 1000 * 1000); + } + + virtual void TearDown() { + cta::objectstore::Helpers::flushRetrieveQueueStatisticsCache(); + m_scheduler.reset(); + m_db.reset(); + m_catalogue.reset(); + } + + cta::objectstore::OStoreDBWrapperInterface &getDb() { + cta::objectstore::OStoreDBWrapperInterface *const ptr = m_db.get(); + if (nullptr == ptr) { + throw FailedToGetDatabase(); + } + return *ptr; + } + + cta::catalogue::DummyCatalogue &getCatalogue() { + cta::catalogue::DummyCatalogue *const ptr = dynamic_cast<cta::catalogue::DummyCatalogue*>(m_catalogue.get()); + if (nullptr == ptr) { + throw FailedToGetCatalogue(); + } + return *ptr; + } + + cta::Scheduler &getScheduler() { + + cta::Scheduler *const ptr = m_scheduler.get(); + if (nullptr == ptr) { + throw FailedToGetScheduler(); + } + return *ptr; + } + + Tape::State m_finalTapeState; + +private: + // Prevent copying + QueueCleanupRunnerTest(const QueueCleanupRunnerTest &) = delete; + + // Prevent assignment + QueueCleanupRunnerTest & operator= (const QueueCleanupRunnerTest &) = delete; + std::unique_ptr<cta::objectstore::OStoreDBWrapperInterface> m_db; + std::unique_ptr<cta::catalogue::Catalogue> m_catalogue; + std::unique_ptr<cta::Scheduler> m_scheduler; +}; + + +TEST_P(QueueCleanupRunnerTest, CleanupRunnerParameterizedTest) { + using cta::common::dataStructures::JobQueueType; + // We will need a log object +#ifdef STDOUT_LOGGING + cta::log::StdoutLogger dl("dummy", "unitTest"); +#else + cta::log::DummyLogger dl("dummy", "unitTest"); +#endif + cta::log::LogContext lc(dl); + // We need a dummy catalogue + cta::catalogue::DummyCatalogue & catalogue = getCatalogue(); + // Object store + cta::objectstore::OStoreDBWrapperInterface & oStore = getDb(); + // Backend + auto & be = dynamic_cast<cta::objectstore::BackendVFS&>(oStore.getBackend()); + // Remove this comment to avoid cleaning the object store files on destruction, useful for debugging + // be.noDeleteOnExit(); + // Scheduler + cta::Scheduler & scheduler = getScheduler(); + // Dummy admin + const cta::common::dataStructures::SecurityIdentity dummyAdmin; + + //AgentA for queueing + cta::objectstore::AgentReference agentForSetupRef("AgentForSetup", dl); + cta::objectstore::Agent agentForSetup(agentForSetupRef.getAgentAddress(), be); + + //AgentB for popping + cta::objectstore::AgentReference agentForCleanupRef("AgentForCleanup", dl); + cta::objectstore::Agent agentForCleanup(agentForCleanupRef.getAgentAddress(), be); + + // Create the root entry + cta::objectstore::EntryLogSerDeser el("user0", "unittesthost", time(nullptr)); + cta::objectstore::RootEntry re(be); + cta::objectstore::ScopedExclusiveLock rel(re); + re.fetch(); + //re.initialize(); + + // Create the agent register + re.addOrGetAgentRegisterPointerAndCommit(agentForSetupRef, el, lc); + re.addOrGetAgentRegisterPointerAndCommit(agentForCleanupRef, el, lc); + rel.release(); + + agentForSetup.initialize(); + agentForSetup.insertAndRegisterSelf(lc); + agentForCleanup.initialize(); + agentForCleanup.insertAndRegisterSelf(lc); + + // Create retrieve requests and add them to the queues + // Create queues when they do not exist + for (auto retrieveRequestSetupList : GetParam().retrieveRequestSetupList) { + + // Identify list of vids where copies exist, including active copy + std::set<std::string> allVIds; + allVIds.insert(retrieveRequestSetupList.replicaCopyVids.begin(), retrieveRequestSetupList.replicaCopyVids.end()); + allVIds.insert(retrieveRequestSetupList.activeCopyVid); + std::string activeVid = retrieveRequestSetupList.activeCopyVid; + + // Generate requests + std::list<std::unique_ptr<cta::objectstore::RetrieveRequest> > requestsPtrs; + cta::objectstore::ContainerAlgorithms<cta::objectstore::RetrieveQueue, cta::objectstore::RetrieveQueueToTransfer>::InsertedElement::list requests; + fillRetrieveRequestsForCleanupRunner(requests, retrieveRequestSetupList.numberOfRequests, requestsPtrs, allVIds, activeVid, be, agentForSetupRef); //memory leak avoided here with 'requestsPtrs' + + // Create queue for requests to active copy + std::string agentForSetupAddr = agentForSetupRef.getAgentAddress(); + { + cta::objectstore::ScopedExclusiveLock relQ(re); + re.fetch(); + re.addOrGetRetrieveQueueAndCommit(activeVid, agentForSetupRef, JobQueueType::JobsToTransferForUser); + } + + // Insert requests into active vid queue + using RetrieveQueueAlgorithm = cta::objectstore::ContainerAlgorithms<cta::objectstore::RetrieveQueue, cta::objectstore::RetrieveQueueToTransfer>; + RetrieveQueueAlgorithm retrieveQueueAlgo(be, agentForSetupRef); + retrieveQueueAlgo.referenceAndSwitchOwnership(activeVid, agentForSetupAddr, requests, lc); + } + + // Setup initial tape states and validate number of requests + for (auto tapeQueueStateTrans : GetParam().tapeQueueTransitionList) { + + std::string vid = tapeQueueStateTrans.vid; + auto initialState = tapeQueueStateTrans.initialSetup.state; + auto initialRetrieveQueueToTransferJobs = tapeQueueStateTrans.initialSetup.retrieveQueueToTransferJobs; + auto initialRetrieveQueueToReportJobs = tapeQueueStateTrans.initialSetup.retrieveQueueToReportJobs; + + // Initial tape state + catalogue.modifyTapeState(dummyAdmin, vid, initialState, std::nullopt, "Testing"); + + // Assert initial queue setup, for pre-validation of tests + { + re.fetchNoLock(); + if (initialRetrieveQueueToTransferJobs > 0) { + auto qAddr = re.getRetrieveQueueAddress(vid, JobQueueType::JobsToTransferForUser); + cta::objectstore::RetrieveQueue rQueue(qAddr, be); + rQueue.fetchNoLock(); + ASSERT_EQ(initialRetrieveQueueToTransferJobs, rQueue.getJobsSummary().jobs); + } else { + ASSERT_THROW(re.getRetrieveQueueAddress(vid, JobQueueType::JobsToTransferForUser), cta::objectstore::RootEntry::NoSuchRetrieveQueue); + } + if (initialRetrieveQueueToReportJobs > 0) { + auto qAddr = re.getRetrieveQueueAddress(vid, JobQueueType::JobsToReportToUser); + cta::objectstore::RetrieveQueue rQueue(qAddr, be); + rQueue.fetchNoLock(); + ASSERT_EQ(initialRetrieveQueueToReportJobs, rQueue.getJobsSummary().jobs); + } else { + ASSERT_THROW(re.getRetrieveQueueAddress(vid, JobQueueType::JobsToReportToUser), cta::objectstore::RootEntry::NoSuchRetrieveQueue); + } + } + } + + // Trigger tape state change + for (auto tapeQueueStateTrans : GetParam().tapeQueueTransitionList) { + + std::string vid = tapeQueueStateTrans.vid; + auto initialState = tapeQueueStateTrans.initialSetup.state; + auto finalState = tapeQueueStateTrans.finalSetup.state; + + if (initialState == finalState) { + continue; // No desired tape state change, ignore + } + + scheduler.triggerTapeStateChange(dummyAdmin, vid, finalState, "", lc); + } + + // Execute cleanup runner + { + cta::objectstore::QueueCleanupRunner qcr(agentForCleanupRef, oStore, catalogue); + qcr.runOnePass(lc); // RUNNER + } + + // Validate final setup of tapes and corresponding queues, after the cleanup runner has been executed + for (auto tapeQueueStateTrans : GetParam().tapeQueueTransitionList) { + + std::string vid = tapeQueueStateTrans.vid; + auto finalDesiredState = tapeQueueStateTrans.finalSetup.state; + auto finalRetrieveQueueToTransferJobs = tapeQueueStateTrans.finalSetup.retrieveQueueToTransferJobs; + auto finalRetrieveQueueToReportJobs = tapeQueueStateTrans.finalSetup.retrieveQueueToReportJobs; + + // Check final tape state + ASSERT_EQ(finalDesiredState, catalogue.getTapeState(vid)); + + // Assert final queue setup + { + re.fetchNoLock(); + if (finalRetrieveQueueToTransferJobs > 0) { + auto qAddr = re.getRetrieveQueueAddress(vid, JobQueueType::JobsToTransferForUser); + cta::objectstore::RetrieveQueue rQueue(qAddr, be); + rQueue.fetchNoLock(); + ASSERT_EQ(finalRetrieveQueueToTransferJobs, rQueue.getJobsSummary().jobs); + } else { + ASSERT_THROW(re.getRetrieveQueueAddress(vid, JobQueueType::JobsToTransferForUser), cta::objectstore::RootEntry::NoSuchRetrieveQueue); + } + if (finalRetrieveQueueToReportJobs > 0) { + auto qAddr = re.getRetrieveQueueAddress(vid, JobQueueType::JobsToReportToUser); + cta::objectstore::RetrieveQueue rQueue(qAddr, be); + rQueue.fetchNoLock(); + ASSERT_EQ(finalRetrieveQueueToReportJobs, rQueue.getJobsSummary().jobs); + } else { + ASSERT_THROW(re.getRetrieveQueueAddress(vid, JobQueueType::JobsToReportToUser), cta::objectstore::RootEntry::NoSuchRetrieveQueue); + } + } + } +} + +static cta::OStoreDBFactory<cta::objectstore::BackendVFS> OStoreDBFactoryVFS; + +// Testing requests without replicas + +// Test A1: Requests removed from an ACTIVE to REPACKING queue when no replicas are available +std::list<RetrieveRequestSetup> TestA1_retrieveRequestSetupList { + { 10, "Tape0", { } } +}; +std::list<TapeQueueTransition> TestA1_tapeQueueTransitionList { + { "Tape0", { Tape::ACTIVE, 10, 0 }, { Tape::REPACKING, 0, 10 } }, +}; + +// Test A2: Requests removed from a DISABLED to REPACKING queue when no replicas are available +std::list<RetrieveRequestSetup> TestA2_retrieveRequestSetupList { + { 10, "Tape0", { } } +}; +std::list<TapeQueueTransition> TestA2_tapeQueueTransitionList { + { "Tape0", { Tape::DISABLED, 10, 0 }, { Tape::REPACKING, 0, 10 } }, +}; + +// Test A3: Requests removed from an ACTIVE to BROKEN queue when no replicas are available +std::list<RetrieveRequestSetup> TestA3_retrieveRequestSetupList { + { 10, "Tape0", { } } +}; +std::list<TapeQueueTransition> TestA3_tapeQueueTransitionList { + { "Tape0", { Tape::ACTIVE, 10, 0 }, { Tape::BROKEN, 0, 10 } }, +}; + +// Test A4: Requests removed from a DISABLED to BROKEN queue when no replicas are available +std::list<RetrieveRequestSetup> TestA4_retrieveRequestSetupList { + { 10, "Tape0", { } } +}; +std::list<TapeQueueTransition> TestA4_tapeQueueTransitionList { + { "Tape0", { Tape::DISABLED, 10, 0 }, { Tape::BROKEN, 0, 10 } }, +}; + +// Test A5: No requests removed from an ACTIVE queue +std::list<RetrieveRequestSetup> TestA5_retrieveRequestSetupList { + { 10, "Tape0", { } } +}; +std::list<TapeQueueTransition> TestA5_tapeQueueTransitionList { + { "Tape0", { Tape::ACTIVE, 10, 0 }, { Tape::ACTIVE, 10, 0 } }, +}; + +// Test A5: No requests removed from an DISABLED queue +std::list<RetrieveRequestSetup> TestA6_retrieveRequestSetupList { + { 10, "Tape0", { } } +}; +std::list<TapeQueueTransition> TestA6_tapeQueueTransitionList { + { "Tape0", { Tape::DISABLED, 10, 0 }, { Tape::DISABLED, 10, 0 } }, +}; + +// Testing requests with double replicas + +// Test B1: Requests moved from a REPACKING queue to an ACTIVE queue +std::list<RetrieveRequestSetup> TestB1_retrieveRequestSetupList { + { 10, "Tape0", { "Tape1" } } +}; +std::list<TapeQueueTransition> TestB1_tapeQueueTransitionList { + { "Tape0", { Tape::ACTIVE, 10, 0 }, { Tape::REPACKING, 0, 0 } }, + { "Tape1", { Tape::ACTIVE, 0, 0 }, { Tape::ACTIVE, 10, 0 } } +}; + +// Test B2: Requests moved from a REPACKING queue to a DISABLED queue +std::list<RetrieveRequestSetup> TestB2_retrieveRequestSetupList { + { 10, "Tape0", { "Tape1" } } +}; +std::list<TapeQueueTransition> TestB2_tapeQueueTransitionList { + { "Tape0", { Tape::ACTIVE, 10, 0 }, { Tape::REPACKING, 0, 0 } }, + { "Tape1", { Tape::DISABLED, 0, 0 }, { Tape::DISABLED, 10, 0 } } +}; + +// Test B3: Requests not moved from a REPACKING queue to an already BROKEN queue +std::list<RetrieveRequestSetup> TestB3_retrieveRequestSetupList { + { 10, "Tape0", { "Tape1" } } +}; +std::list<TapeQueueTransition> TestB3_tapeQueueTransitionList { + { "Tape0", { Tape::ACTIVE, 10, 0 }, { Tape::REPACKING, 0, 10 } }, + { "Tape1", { Tape::BROKEN, 0, 0 }, { Tape::BROKEN, 0, 0 } } +}; + +// Test B4: Requests not moved from a REPACKING queue to an already REPACKING queue +std::list<RetrieveRequestSetup> TestB4_retrieveRequestSetupList { + { 10, "Tape0", { "Tape1" } } +}; +std::list<TapeQueueTransition> TestB4_tapeQueueTransitionList { + { "Tape0", { Tape::ACTIVE, 10, 0 }, { Tape::REPACKING, 0, 10 } }, + { "Tape1", { Tape::REPACKING, 0, 0 }, { Tape::REPACKING, 0, 0 } } +}; + +// Testing requests with multiple replicas + +// Test C1: Requests moved from a REPACKING queue to 2 ACTIVE queue +std::list<RetrieveRequestSetup> TestC1_retrieveRequestSetupList { + { 5, "Tape0", { "Tape1" } }, + { 5, "Tape0", { "Tape2" } } +}; +std::list<TapeQueueTransition> TestC1_tapeQueueTransitionList { + { "Tape0", { Tape::ACTIVE, 10, 0 }, { Tape::REPACKING, 0, 0 } }, + { "Tape1", { Tape::ACTIVE, 0, 0 }, { Tape::ACTIVE, 5, 0 } }, + { "Tape2", { Tape::ACTIVE, 0, 0 }, { Tape::ACTIVE, 5, 0 } } +}; + +// Test C1: Requests moved from a REPACKING queue to ACTIVE (higher priority comparing to DISABLED) +std::list<RetrieveRequestSetup> TestC2_retrieveRequestSetupList { + { 10, "Tape0", { "Tape1", "Tape2", "Tape3" } } +}; +std::list<TapeQueueTransition> TestC2_tapeQueueTransitionList { + { "Tape0", { Tape::ACTIVE, 10, 0 }, { Tape::REPACKING, 0, 0 } }, + { "Tape1", { Tape::ACTIVE, 0, 0 }, { Tape::DISABLED, 0, 0 } }, + { "Tape2", { Tape::ACTIVE, 0, 0 }, { Tape::ACTIVE, 10, 0 } }, + { "Tape3", { Tape::ACTIVE, 0, 0 }, { Tape::DISABLED, 0, 0 } } +}; + +// Test C2: Mix of multiple requests being moved around +std::list<RetrieveRequestSetup> TestC3_retrieveRequestSetupList { + { 10, "Tape0", { "Tape1" } }, + { 10, "Tape0", { "Tape1", "Tape2" } }, + { 10, "Tape1", { "Tape2", "Tape3" } }, + { 10, "Tape2", { "Tape3", "Tape4" } } +}; +std::list<TapeQueueTransition> TestC3_tapeQueueTransitionList { + { "Tape0", { Tape::ACTIVE, 20, 0 }, { Tape::REPACKING, 0, 10 } }, + { "Tape1", { Tape::ACTIVE, 10, 0 }, { Tape::BROKEN, 0, 0 } }, + { "Tape2", { Tape::ACTIVE, 10, 0 }, { Tape::DISABLED, 20, 0 } }, + { "Tape3", { Tape::ACTIVE, 0, 0 }, { Tape::ACTIVE, 10, 0 } }, + { "Tape4", { Tape::ACTIVE, 0, 0 }, { Tape::ACTIVE, 0, 0 } } +}; + +INSTANTIATE_TEST_CASE_P(OStoreTestVFS, QueueCleanupRunnerTest, + ::testing::Values( + QueueCleanupRunnerTestParams(&OStoreDBFactoryVFS, TestA1_retrieveRequestSetupList, TestA1_tapeQueueTransitionList), + QueueCleanupRunnerTestParams(&OStoreDBFactoryVFS, TestA2_retrieveRequestSetupList, TestA2_tapeQueueTransitionList), + QueueCleanupRunnerTestParams(&OStoreDBFactoryVFS, TestA3_retrieveRequestSetupList, TestA3_tapeQueueTransitionList), + QueueCleanupRunnerTestParams(&OStoreDBFactoryVFS, TestA4_retrieveRequestSetupList, TestA4_tapeQueueTransitionList), + QueueCleanupRunnerTestParams(&OStoreDBFactoryVFS, TestA5_retrieveRequestSetupList, TestA5_tapeQueueTransitionList), + QueueCleanupRunnerTestParams(&OStoreDBFactoryVFS, TestA6_retrieveRequestSetupList, TestA6_tapeQueueTransitionList), + + QueueCleanupRunnerTestParams(&OStoreDBFactoryVFS, TestB1_retrieveRequestSetupList, TestB1_tapeQueueTransitionList), + QueueCleanupRunnerTestParams(&OStoreDBFactoryVFS, TestB2_retrieveRequestSetupList, TestB2_tapeQueueTransitionList), + QueueCleanupRunnerTestParams(&OStoreDBFactoryVFS, TestB3_retrieveRequestSetupList, TestB3_tapeQueueTransitionList), + QueueCleanupRunnerTestParams(&OStoreDBFactoryVFS, TestB4_retrieveRequestSetupList, TestB4_tapeQueueTransitionList), + + QueueCleanupRunnerTestParams(&OStoreDBFactoryVFS, TestC1_retrieveRequestSetupList, TestC1_tapeQueueTransitionList), + QueueCleanupRunnerTestParams(&OStoreDBFactoryVFS, TestC2_retrieveRequestSetupList, TestC2_tapeQueueTransitionList), + QueueCleanupRunnerTestParams(&OStoreDBFactoryVFS, TestC3_retrieveRequestSetupList, TestC3_tapeQueueTransitionList) + ) + ); +} \ No newline at end of file diff --git a/objectstore/QueueCleanupRunnerTestUtils.cpp b/objectstore/QueueCleanupRunnerTestUtils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2c5b94ce8a209cd52cd206d5dd70896723e70655 --- /dev/null +++ b/objectstore/QueueCleanupRunnerTestUtils.cpp @@ -0,0 +1,108 @@ +/* + * @project The CERN Tape Archive (CTA) + * @copyright Copyright © 2021-2022 CERN + * @license This program is free software, distributed under the terms of the GNU General Public + * Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can + * redistribute it and/or modify it under the terms of the GPL Version 3, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * In applying this licence, CERN does not waive the privileges and immunities + * granted to it by virtue of its status as an Intergovernmental Organization or + * submit itself to any jurisdiction. + */ + +#include "scheduler/SchedulerDatabaseFactory.hpp" +#include "common/dataStructures/SecurityIdentity.hpp" +#include "catalogue/InMemoryCatalogue.hpp" +#ifdef STDOUT_LOGGING +#include "common/log/StdoutLogger.hpp" +#endif + +#include <exception> +#include <gtest/gtest.h> +#include <uuid/uuid.h> + +#include "objectstore/ObjectStoreFixture.hpp" +#include "objectstore/BackendVFS.hpp" +#include "objectstore/GarbageCollector.hpp" +#include "scheduler/Scheduler.hpp" + +namespace unitTests { + +/** + * Create Objectstore RetrieveRequest and insert them in a list that could be used to queue with the Algorithms + * @param requests the list of RetrieveRequests that will be queued in the objectstore + * @param requestPtrs the pointers of the RetrieveRequests that will be queued in the objectstore + * @param be objectstore backend + * @param agentRef the current agent that queues + * @param startFseq allows to set the FSeq of the first file to be queued (in case this method is called multiple times) + */ +void fillRetrieveRequestsForCleanupRunner( + typename cta::objectstore::ContainerAlgorithms<cta::objectstore::RetrieveQueue, cta::objectstore::RetrieveQueueToTransfer>::InsertedElement::list &requests, + uint32_t requestNr, + std::list<std::unique_ptr<cta::objectstore::RetrieveRequest> > &requestPtrs, //List to avoid memory leak on ArchiveQueueAlgorithms test + std::set<std::string> & tapeNames, // List of tapes that will contain a replica + std::string & activeCopyTape, + cta::objectstore::BackendVFS &be, + cta::objectstore::AgentReference &agentRef, uint64_t startFseq) { + using namespace cta::objectstore; + for (size_t i = 0; i < requestNr; i++) { + std::string rrAddr = agentRef.nextId("RetrieveRequest"); + agentRef.addToOwnership(rrAddr, be); + cta::common::dataStructures::MountPolicy mp; + cta::common::dataStructures::RetrieveFileQueueCriteria rqc; + rqc.archiveFile.archiveFileID = 123456789L; + rqc.archiveFile.diskFileId = "eos://diskFile"; + rqc.archiveFile.checksumBlob.insert(cta::checksum::NONE, ""); + rqc.archiveFile.creationTime = 0; + rqc.archiveFile.reconciliationTime = 0; + rqc.archiveFile.diskFileInfo = cta::common::dataStructures::DiskFileInfo(); + rqc.archiveFile.diskInstance = "eoseos"; + rqc.archiveFile.fileSize = 1000 + i; + rqc.archiveFile.storageClass = "sc"; + uint32_t currentCopyNb = 0; + uint32_t activeCopyNr = 0; + for (auto & tapeName: tapeNames) { + cta::common::dataStructures::TapeFile tf; + tf.blockId = 0; + tf.fileSize = 1; + tf.copyNb = currentCopyNb; + tf.creationTime = time(nullptr); + tf.fSeq = startFseq; + tf.vid = tapeName; + rqc.archiveFile.tapeFiles.push_back(tf); + + if (activeCopyTape == tapeName) { + activeCopyNr = currentCopyNb; + } + currentCopyNb++; + } + rqc.mountPolicy.archiveMinRequestAge = 1; + rqc.mountPolicy.archivePriority = 1; + rqc.mountPolicy.creationLog.time = time(nullptr); + rqc.mountPolicy.lastModificationLog.time = time(nullptr); + rqc.mountPolicy.retrieveMinRequestAge = 1; + rqc.mountPolicy.retrievePriority = 1; + requestPtrs.emplace_back(new cta::objectstore::RetrieveRequest(rrAddr, be)); + requests.emplace_back(ContainerAlgorithms<RetrieveQueue, RetrieveQueueToTransfer>::InsertedElement{ + requestPtrs.back().get(), activeCopyNr, startFseq++, 667, mp, std::nullopt, std::nullopt + }); + auto &rr = *requests.back().retrieveRequest; + rr.initialize(); + rr.setRetrieveFileQueueCriteria(rqc); + cta::common::dataStructures::RetrieveRequest sReq; + sReq.archiveFileID = rqc.archiveFile.archiveFileID; + sReq.creationLog.time = time(nullptr); + rr.setSchedulerRequest(sReq); + rr.addJob(activeCopyNr, 1, 1, 1); + rr.setOwner(agentRef.getAgentAddress()); + rr.setActiveCopyNumber(activeCopyNr); + rr.insert(); + } +} + +} \ No newline at end of file diff --git a/objectstore/QueueCleanupRunnerTestUtils.hpp b/objectstore/QueueCleanupRunnerTestUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..eb3fd35780e49a7204b34aa90cbc0640bdabc7b0 --- /dev/null +++ b/objectstore/QueueCleanupRunnerTestUtils.hpp @@ -0,0 +1,42 @@ +/* +* @project The CERN Tape Archive (CTA) +* @copyright Copyright © 2021-2022 CERN +* @license This program is free software, distributed under the terms of the GNU General Public +* Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +* redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +* option) any later version. +* +* This program is distributed in the hope that it will be useful, but WITHOUT ANY +* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +* PARTICULAR PURPOSE. See the GNU General Public License for more details. +* +* In applying this licence, CERN does not waive the privileges and immunities +* granted to it by virtue of its status as an Intergovernmental Organization or +* submit itself to any jurisdiction. +*/ + +#pragma once + +#include "objectstore/Sorter.hpp" +#include "objectstore/BackendVFS.hpp" + + +/** + * Plan => Cleanup runner keeps track of queues that need to be emptied + * If a queue is signaled for cleanup, the cleanup runner should take ownership of it, and move all the requests + * to other queues. + * If there is no other queue available, the request should be aborted and reported back to the user. + */ + +namespace unitTests { + +void fillRetrieveRequestsForCleanupRunner( + typename cta::objectstore::ContainerAlgorithms<cta::objectstore::RetrieveQueue, cta::objectstore::RetrieveQueueToTransfer>::InsertedElement::list &requests, + uint32_t requestNr, + std::list<std::unique_ptr<cta::objectstore::RetrieveRequest> > &requestPtrs, //List to avoid memory leak on ArchiveQueueAlgorithms test + std::set<std::string> & tapeNames, // List of tapes that will contain a replica + std::string & activeCopyTape, + cta::objectstore::BackendVFS &be, + cta::objectstore::AgentReference &agentRef, uint64_t startFseq = 0); + +} \ No newline at end of file diff --git a/objectstore/RepackRequest.cpp b/objectstore/RepackRequest.cpp index 35aa20811dd7ead63a5fdf55e32b9d4e5a468ab4..8e38a99c1a68684d6f94f2baefaa845e4cb8f521 100644 --- a/objectstore/RepackRequest.cpp +++ b/objectstore/RepackRequest.cpp @@ -80,7 +80,7 @@ void RepackRequest::initialize() { m_payload.set_lastexpandedfseq(0); m_payload.set_is_expand_finished(false); m_payload.set_is_expand_started(false); - m_payload.set_force_disabled_tape(false); + m_payload.set_force_disabled_tape(false); // TODO: To remove after REPACKING state is fully deployed m_payload.set_no_recall(false); m_payload.set_is_complete(false); m_payload.set_repack_finished_time(0); @@ -156,7 +156,6 @@ common::dataStructures::RepackInfo RepackRequest::getInfo() { ret.lastExpandedFseq = m_payload.lastexpandedfseq(); ret.userProvidedFiles = m_payload.userprovidedfiles(); ret.isExpandFinished = m_payload.is_expand_finished(); - ret.forceDisabledTape = m_payload.force_disabled_tape(); ret.noRecall = m_payload.no_recall(); EntryLogSerDeser creationLog; creationLog.deserialize(m_payload.creation_log()); @@ -298,16 +297,6 @@ common::dataStructures::EntryLog RepackRequest::getCreationLog() { return ret; } -void RepackRequest::setForceDisabledTape(const bool disabledTape){ - checkPayloadWritable(); - m_payload.set_force_disabled_tape(disabledTape); -} - -bool RepackRequest::getForceDisabledTape() { - checkPayloadReadable(); - return m_payload.force_disabled_tape(); -} - void RepackRequest::setNoRecall(const bool noRecall) { checkPayloadWritable(); m_payload.set_no_recall(noRecall); @@ -789,7 +778,6 @@ RepackRequest::AsyncOwnerAndStatusUpdater* RepackRequest::asyncUpdateOwnerAndSta retRef.m_repackInfo.status = (RepackInfo::Status) payload.status(); retRef.m_repackInfo.vid = payload.vid(); retRef.m_repackInfo.repackBufferBaseURL = payload.buffer_url(); - retRef.m_repackInfo.forceDisabledTape = payload.force_disabled_tape(); retRef.m_repackInfo.noRecall = payload.no_recall(); if (payload.move_mode()) { if (payload.add_copies_mode()) { diff --git a/objectstore/RepackRequest.hpp b/objectstore/RepackRequest.hpp index 4d69c9d4cddd8f91146183fc6432a87e71de4596..0e93a3b698119ca8d3c4bb8139637d5f7ff454a1 100644 --- a/objectstore/RepackRequest.hpp +++ b/objectstore/RepackRequest.hpp @@ -55,14 +55,6 @@ public: std::list<common::dataStructures::RepackInfo::RepackDestinationInfo> getRepackDestinationInfos(); void setCreationLog(const common::dataStructures::EntryLog & creationLog); common::dataStructures::EntryLog getCreationLog(); - /** - * Set the flag disabledTape to allow the mounting of a - * disabled tape for file retrieval - * @param disabledTape if true, the disabled tape will be mounted for retrieval, if false, the - * tape will not be mounted if it is disabled - */ - void setForceDisabledTape(const bool disabledTape); - bool getForceDisabledTape(); /** * Set the flag noRecall to only inject files that are in the buffer. This will prevent diff --git a/objectstore/RetrieveQueue.cpp b/objectstore/RetrieveQueue.cpp index ea4fbaba2c4bb49f51b15e59d327d5344c5847c0..19465fcac3220b801e916a0a9b041e710f5a7e3e 100644 --- a/objectstore/RetrieveQueue.cpp +++ b/objectstore/RetrieveQueue.cpp @@ -52,6 +52,8 @@ void RetrieveQueue::initialize(const std::string &vid) { m_payload.set_vid(vid); m_payload.set_mapsrebuildcount(0); m_payload.set_maxshardsize(m_maxShardSize); + m_payload.mutable_cleanupinfo()->set_docleanup(false); + m_payload.mutable_cleanupinfo()->set_heartbeat(0); m_payloadInterpreted = true; } @@ -789,6 +791,44 @@ void RetrieveQueue::removeJobsAndCommit(const std::list<std::string>& jobsToRemo } } +bool RetrieveQueue::getQueueCleanupDoCleanup() { + checkPayloadReadable(); + return m_payload.mutable_cleanupinfo()->docleanup(); +} + +std::optional<std::string> RetrieveQueue::getQueueCleanupAssignedAgent() { + checkPayloadReadable(); + if (m_payload.mutable_cleanupinfo()->has_assignedagent()) { + return std::optional{m_payload.mutable_cleanupinfo()->assignedagent()}; + } + return std::nullopt; +} + +uint64_t RetrieveQueue::getQueueCleanupHeartbeat() { + checkPayloadReadable(); + return m_payload.mutable_cleanupinfo()->heartbeat(); +} + +void RetrieveQueue::setQueueCleanupDoCleanup(bool value) { + checkPayloadWritable(); + m_payload.mutable_cleanupinfo()->set_docleanup(value); +} + +void RetrieveQueue::setQueueCleanupAssignedAgent(std::string agent) { + checkPayloadWritable(); + m_payload.mutable_cleanupinfo()->set_assignedagent(agent); +} + +void RetrieveQueue::clearQueueCleanupAssignedAgent() { + checkPayloadWritable(); + m_payload.mutable_cleanupinfo()->clear_assignedagent(); +} + +void RetrieveQueue::tickQueueCleanupHeartbeat() { + checkPayloadWritable(); + m_payload.mutable_cleanupinfo()->set_heartbeat(m_payload.mutable_cleanupinfo()->heartbeat() + 1); +} + void RetrieveQueue::garbageCollect(const std::string &presumedOwner, AgentReference & agentReference, log::LogContext & lc, cta::catalogue::Catalogue & catalogue) { throw cta::exception::Exception("In RetrieveQueue::garbageCollect(): not implemented"); diff --git a/objectstore/RetrieveQueue.hpp b/objectstore/RetrieveQueue.hpp index 6e79bac57f7190f6a081008c2df8d86323ef17be..f40a28155c687cb8fab8b71f05425f0581d0ba14 100644 --- a/objectstore/RetrieveQueue.hpp +++ b/objectstore/RetrieveQueue.hpp @@ -148,6 +148,17 @@ public: std::list<std::string> getMountPolicyNames(); void removeJobsAndCommit(const std::list<std::string> & jobsToRemove); + + bool getQueueCleanupDoCleanup(); + void setQueueCleanupDoCleanup(bool value = true); + + std::optional<std::string> getQueueCleanupAssignedAgent(); + void setQueueCleanupAssignedAgent(std::string agent); + void clearQueueCleanupAssignedAgent(); + + uint64_t getQueueCleanupHeartbeat(); + void tickQueueCleanupHeartbeat(); + // -- Generic parameters std::string getVid(); diff --git a/objectstore/RetrieveQueueTest.cpp b/objectstore/RetrieveQueueTest.cpp index 5e03ee008a297795ad1fa25b2d96b57421fee194..2353f29fa54a979ad5a281e35ff10204bc76dc79 100644 --- a/objectstore/RetrieveQueueTest.cpp +++ b/objectstore/RetrieveQueueTest.cpp @@ -23,9 +23,11 @@ #include <random> +#include "ObjectStoreFixture.hpp" + namespace unitTests { -TEST(ObjectStore, RetrieveQueueBasicAccess) { +TEST_F(ObjectStore, RetrieveQueueBasicAccess) { cta::objectstore::BackendVFS be; cta::log::DummyLogger dl("dummy", "dummyLogger"); cta::log::LogContext lc(dl); @@ -53,7 +55,7 @@ TEST(ObjectStore, RetrieveQueueBasicAccess) { ASSERT_FALSE(rq.exists()); } -TEST(ObjectStore, RetrieveQueueShardingAndOrderingTest) { +TEST_F(ObjectStore, RetrieveQueueShardingAndOrderingTest) { cta::objectstore::BackendVFS be; cta::log::DummyLogger dl("dummy", "dummyLogger"); cta::log::LogContext lc(dl); @@ -154,7 +156,7 @@ TEST(ObjectStore, RetrieveQueueShardingAndOrderingTest) { ASSERT_FALSE(rq.exists()); } -TEST(ObjectStore, RetrieveQueueActivityCounts) { +TEST_F(ObjectStore, RetrieveQueueActivityCounts) { cta::objectstore::BackendVFS be; cta::log::DummyLogger dl("dummy", "dummyLogger"); cta::log::LogContext lc(dl); diff --git a/objectstore/RetrieveRequest.cpp b/objectstore/RetrieveRequest.cpp index 85ae90e384f9a87b2a50a10a52a734345bb8e41a..87e57e9d2425a4c9e29941d0d03c895ef62c2744 100644 --- a/objectstore/RetrieveRequest.cpp +++ b/objectstore/RetrieveRequest.cpp @@ -69,16 +69,22 @@ void RetrieveRequest::initialize() { // RetrieveRequest::garbageCollect() //------------------------------------------------------------------------------ void RetrieveRequest::garbageCollect(const std::string& presumedOwner, AgentReference & agentReference, log::LogContext & lc, - cta::catalogue::Catalogue & catalogue) { + cta::catalogue::Catalogue & catalogue) { + garbageCollectRetrieveRequest(presumedOwner, agentReference, lc, catalogue, false); +} + +void RetrieveRequest::garbageCollectRetrieveRequest(const std::string& presumedOwner, AgentReference & agentReference, log::LogContext & lc, + cta::catalogue::Catalogue & catalogue, bool isQueueCleanup) { checkPayloadWritable(); utils::Timer t; + std::string logHead = std::string("In RetrieveRequest::garbageCollect()") + (isQueueCleanup ? " [queue cleanup]" : "") + ": "; // Check the request is indeed owned by the right owner. if (getOwner() != presumedOwner) { log::ScopedParamContainer params(lc); params.add("jobObject", getAddressIfSet()) .add("presumedOwner", presumedOwner) .add("owner", getOwner()); - lc.log(log::INFO, "In RetrieveRequest::garbageCollect(): no garbage collection needed."); + lc.log(log::INFO, logHead + "no garbage collection needed."); } // The owner is indeed the right one. We should requeue the request either to // the to tranfer queue for one vid, or to the to report (or failed) queue (for one arbitrary VID). @@ -97,7 +103,7 @@ void RetrieveRequest::garbageCollect(const std::string& presumedOwner, AgentRefe } { std::stringstream err; - err << "In RetrieveRequest::garbageCollect(): could not find tapefile for copynb " << j.copynb(); + err << (logHead + "could not find tapefile for copynb ") << j.copynb(); throw exception::Exception(err.str()); } break; @@ -136,7 +142,7 @@ void RetrieveRequest::garbageCollect(const std::string& presumedOwner, AgentRefe .add("copynb", tf.copynb()) .add("tapeVid", tf.vid()); tl.addToLog(params); - lc.log(log::INFO, "In RetrieveRequest::garbageCollect(): requeued the repack retrieve request."); + lc.log(log::INFO, logHead + "requeued the repack retrieve request."); return; } } @@ -153,7 +159,7 @@ void RetrieveRequest::garbageCollect(const std::string& presumedOwner, AgentRefe // filter on tape availability. try { // If we have to fetch the status of the tapes and queued for the non-disabled vids. - bestVid=Helpers::selectBestRetrieveQueue(candidateVids, catalogue, m_objectStore,m_payload.repack_info().force_disabled_tape()); + bestVid=Helpers::selectBestRetrieveQueue(candidateVids, catalogue, m_objectStore, m_payload.repack_info().has_repack_request_address()); goto queueForTransfer; } catch (Helpers::NoTapeAvailableForRetrieve &) {} queueForFailure:; @@ -161,10 +167,11 @@ queueForFailure:; // If there is no candidate, we fail the jobs that are not yet, and queue the request as failed (on any VID). for (auto & j: *m_payload.mutable_jobs()) { if (j.status() == RetrieveJobStatus::RJS_ToTransfer) { - j.set_status(RetrieveJobStatus::RJS_ToReportToUserForFailure); - log::ScopedParamContainer params(lc); + j.set_status(m_payload.isrepack() ? RetrieveJobStatus::RJS_ToReportToRepackForFailure : RetrieveJobStatus::RJS_ToReportToUserForFailure); + log::ScopedParamContainer params(lc); params.add("fileId", m_payload.archivefile().archivefileid()) - .add("copyNb", j.copynb()); + .add("copyNb", j.copynb()) + .add("isRepack", m_payload.isrepack()); for (auto &tf: m_payload.archivefile().tapefiles()) { if (tf.copynb() == j.copynb()) { params.add("tapeVid", tf.vid()) @@ -174,8 +181,8 @@ queueForFailure:; } // Generate the last failure for this job (tape unavailable). *j.mutable_failurelogs()->Add() = utils::getCurrentLocalTime() + " " + - utils::getShortHostname() + " In RetrieveRequest::garbageCollect(): No VID available to requeue the request. Failing it."; - lc.log(log::ERR, "In RetrieveRequest::garbageCollect(): No VID available to requeue the request. Failing all jobs."); + utils::getShortHostname() + logHead + "No VID available to requeue the request. Failing it."; + lc.log(log::ERR, logHead + "No VID available to requeue the request. Failing all jobs."); } } // Ok, the request is ready to be queued. We will queue it to the VID corresponding @@ -192,7 +199,7 @@ queueForFailure:; } { std::stringstream err; - err << "In RetrieveRequest::garbageCollect(): could not find tapefile for copynb " << activeCopyNb; + err << (logHead + "could not find tapefile for copynb ") << activeCopyNb; throw exception::Exception(err.str()); } failedVidFound:; @@ -224,7 +231,7 @@ queueForFailure:; .add("tapeVid", activeVid) .add("queueUpdateTime", queueUpdateTime) .add("commitUnlockQueueTime", commitUnlockQueueTime); - lc.log(log::INFO, "In RetrieveRequest::garbageCollect(): queued the request to the failed queue."); + lc.log(log::INFO, logHead + "queued the request to the failed queue."); } return; } @@ -240,7 +247,7 @@ queueForTransfer:; } { std::stringstream err; - err << "In RetrieveRequest::garbageCollect(): could not find tapefile for vid " << bestVid; + err << (logHead + "could not find tapefile for vid ") << bestVid; throw exception::Exception(err.str()); } tapeFileFound:; @@ -253,7 +260,7 @@ queueForTransfer:; } { std::stringstream err; - err << "In RetrieveRequest::garbageCollect(): could not find job for copynb " << bestTapeFile->copynb(); + err << (logHead + "could not find job for copynb ") << bestTapeFile->copynb(); throw exception::Exception(err.str()); } jobFound:; @@ -291,7 +298,7 @@ queueForTransfer:; .add("tapeSelectionTime", tapeSelectionTime) .add("queueUpdateTime", queueUpdateTime) .add("commitUnlockQueueTime", commitUnlockQueueTime); - lc.log(log::INFO, "In RetrieveRequest::garbageCollect(): requeued the request."); + lc.log(log::INFO, logHead + "requeued the request."); } timespec ts; // We will sleep a bit to make sure other processes can also access the queue @@ -316,7 +323,7 @@ queueForTransfer:; .add("queueUpdateTime", queueUpdateTime) .add("commitUnlockQueueTime", commitUnlockQueueTime) .add("sleepTime", sleepTime); - lc.log(log::INFO, "In RetrieveRequest::garbageCollect(): slept some time to not sit on the queue after GC requeueing."); + lc.log(log::INFO, logHead + "slept some time to not sit on the queue after GC requeueing."); } } } @@ -668,7 +675,7 @@ void RetrieveRequest::setRepackInfo(const RepackInfo& repackInfo) { } m_payload.mutable_repack_info()->set_has_user_provided_file(repackInfo.hasUserProvidedFile); - m_payload.mutable_repack_info()->set_force_disabled_tape(repackInfo.forceDisabledTape); + m_payload.mutable_repack_info()->set_force_disabled_tape(false); // TODO: To remove after REPACKING state is fully deployed m_payload.mutable_repack_info()->set_file_buffer_url(repackInfo.fileBufferURL); m_payload.mutable_repack_info()->set_repack_request_address(repackInfo.repackRequestAddress); m_payload.mutable_repack_info()->set_fseq(repackInfo.fSeq); @@ -966,7 +973,6 @@ auto RetrieveRequest::asyncUpdateJobOwner(uint32_t copyNumber, const std::string ri.isRepack = true; ri.repackRequestAddress = payload.repack_info().repack_request_address(); ri.fSeq = payload.repack_info().fseq(); - ri.forceDisabledTape = payload.repack_info().force_disabled_tape(); } // TODO serialization of payload maybe not necessary oh.set_payload(payload.SerializeAsString()); diff --git a/objectstore/RetrieveRequest.hpp b/objectstore/RetrieveRequest.hpp index fac709e7f740798a38843616e74af680ff3faa0f..914006646a225ff572a295f21eb792d8a7b9bd8b 100644 --- a/objectstore/RetrieveRequest.hpp +++ b/objectstore/RetrieveRequest.hpp @@ -46,6 +46,8 @@ public: void initialize(); void garbageCollect(const std::string &presumedOwner, AgentReference & agentReference, log::LogContext & lc, cta::catalogue::Catalogue & catalogue) override; + void garbageCollectRetrieveRequest(const std::string &presumedOwner, AgentReference & agentReference, log::LogContext & lc, + cta::catalogue::Catalogue & catalogue, bool isQueueCleanup); // Job management ============================================================ void addJob(uint32_t copyNumber, uint16_t maxRetriesWithinMount, uint16_t maxTotalRetries, uint16_t maxReportRetries); std::string getLastActiveVid(); @@ -147,7 +149,6 @@ public: }; struct RepackInfo { bool isRepack = false; - bool forceDisabledTape = false; std::map<uint32_t, std::string> archiveRouteMap; std::set<uint32_t> copyNbsToRearchive; std::string repackRequestAddress; @@ -171,10 +172,8 @@ public: rrri.set_file_buffer_url(fileBufferURL); rrri.set_repack_request_address(repackRequestAddress); rrri.set_fseq(fSeq); - rrri.set_force_disabled_tape(forceDisabledTape); - if(rrri.has_has_user_provided_file()){ - rrri.set_has_user_provided_file(hasUserProvidedFile); - } + rrri.set_force_disabled_tape(false); // TODO: To remove after REPACKING state is fully deployed + rrri.set_has_user_provided_file(hasUserProvidedFile); } void deserialize(const cta::objectstore::serializers::RetrieveRequestRepackInfo & rrri) { @@ -184,9 +183,8 @@ public: fileBufferURL = rrri.file_buffer_url(); repackRequestAddress = rrri.repack_request_address(); fSeq = rrri.fseq(); - forceDisabledTape = rrri.force_disabled_tape(); if(rrri.has_has_user_provided_file()){ - hasUserProvidedFile = rrri.has_user_provided_file(); + hasUserProvidedFile = rrri.has_user_provided_file(); } } }; diff --git a/objectstore/RootEntryTest.cpp b/objectstore/RootEntryTest.cpp index a1b0b2e8838537592c37850397e60e7c2b2f202e..ae8d00eef12011905a9f082a7f829669c7fba6e6 100644 --- a/objectstore/RootEntryTest.cpp +++ b/objectstore/RootEntryTest.cpp @@ -29,9 +29,11 @@ #include "RetrieveQueue.hpp" #include "RootEntry.hpp" +#include "ObjectStoreFixture.hpp" + namespace unitTests { -TEST(ObjectStore, RootEntryBasicAccess) { +TEST_F(ObjectStore, RootEntryBasicAccess) { cta::objectstore::BackendVFS be; cta::log::DummyLogger dl("dummy", "dummyLogger"); cta::log::LogContext lc(dl); @@ -75,7 +77,7 @@ TEST(ObjectStore, RootEntryBasicAccess) { ASSERT_FALSE(re.exists()); } -TEST(ObjectStore, RootEntryArchiveQueues) { +TEST_F(ObjectStore, RootEntryArchiveQueues) { using cta::common::dataStructures::JobQueueType; cta::objectstore::BackendVFS be; cta::objectstore::EntryLogSerDeser el("user0", @@ -138,7 +140,7 @@ TEST(ObjectStore, RootEntryArchiveQueues) { ASSERT_FALSE(re.exists()); } -TEST(ObjectStore, RootEntryDriveRegister) { +TEST_F(ObjectStore, RootEntryDriveRegister) { cta::objectstore::BackendVFS be; { // Try to create the root entry @@ -194,7 +196,7 @@ TEST(ObjectStore, RootEntryDriveRegister) { ASSERT_FALSE(re.exists()); } -TEST(ObjectStore, RootEntryAgentRegister) { +TEST_F(ObjectStore, RootEntryAgentRegister) { cta::objectstore::BackendVFS be; { // Try to create the root entry @@ -244,7 +246,7 @@ TEST(ObjectStore, RootEntryAgentRegister) { ASSERT_FALSE(re.exists()); } -TEST(ObjectStore, RootEntrySchedulerGlobalLock) { +TEST_F(ObjectStore, RootEntrySchedulerGlobalLock) { cta::objectstore::BackendVFS be; { // Try to create the root entry @@ -300,7 +302,7 @@ TEST(ObjectStore, RootEntrySchedulerGlobalLock) { ASSERT_FALSE(re.exists()); } -TEST(ObjectStore, RetrieveQueueToReportToRepackForSuccessRootEntryTest) { +TEST_F(ObjectStore, RetrieveQueueToReportToRepackForSuccessRootEntryTest) { using cta::common::dataStructures::JobQueueType; cta::objectstore::BackendVFS be; cta::objectstore::EntryLogSerDeser el("user0", diff --git a/objectstore/Sorter.cpp b/objectstore/Sorter.cpp index eedbe12345184bc8cba6b8059161dad3d103dea8..33b456f495e9a7129bc4c5f38191983d7cb348b3 100644 --- a/objectstore/Sorter.cpp +++ b/objectstore/Sorter.cpp @@ -337,7 +337,7 @@ std::set<std::string> Sorter::getCandidateVidsToTransfer(RetrieveRequestInfosAcc std::string Sorter::getBestVidForQueueingRetrieveRequest(RetrieveRequestInfosAccessorInterface &requestAccessor, std::set<std::string>& candidateVids, log::LogContext &lc){ std::string vid; try{ - vid = Helpers::selectBestRetrieveQueue(candidateVids,m_catalogue,m_objectstore,requestAccessor.getForceDisabledTape()); + vid = Helpers::selectBestRetrieveQueue(candidateVids,m_catalogue,m_objectstore); } catch (Helpers::NoTapeAvailableForRetrieve & ex) { log::ScopedParamContainer params(lc); params.add("fileId", requestAccessor.getArchiveFile().archiveFileID); @@ -434,8 +434,8 @@ std::string OStoreRetrieveRequestAccessor::getRepackAddress() { return m_retrieveRequest->getRepackInfo().repackRequestAddress; } -bool OStoreRetrieveRequestAccessor::getForceDisabledTape() { - return m_retrieveRequest->getRepackInfo().forceDisabledTape; +bool OStoreRetrieveRequestAccessor::getIsRepack() { + return m_retrieveRequest->getRepackInfo().isRepack; } /* END OF RetrieveRequestAccessor CLASS */ @@ -472,8 +472,8 @@ std::string SorterRetrieveRequestAccessor::getRepackAddress() { return m_retrieveRequest.repackRequestAddress; } -bool SorterRetrieveRequestAccessor::getForceDisabledTape() { - return m_retrieveRequest.forceDisabledTape; +bool SorterRetrieveRequestAccessor::getIsRepack() { + return m_retrieveRequest.isRepack; } /* END OF SorterRetrieveRequestAccessor CLASS*/ diff --git a/objectstore/Sorter.hpp b/objectstore/Sorter.hpp index 76e7f8197d074cb0c463919a263701d87979f9c6..d5931f2dfbd23eca40b481ec24f556eefa962843 100644 --- a/objectstore/Sorter.hpp +++ b/objectstore/Sorter.hpp @@ -124,7 +124,7 @@ public: common::dataStructures::ArchiveFile archiveFile; std::map<uint32_t, RetrieveJob> retrieveJobs; std::string repackRequestAddress; - bool forceDisabledTape = false; + bool isRepack = false; }; /* Retrieve-related methods */ @@ -247,7 +247,7 @@ class RetrieveRequestInfosAccessorInterface{ virtual ~RetrieveRequestInfosAccessorInterface(); virtual serializers::RetrieveJobStatus getJobStatus(const uint32_t copyNb) = 0; virtual std::string getRepackAddress() = 0; - virtual bool getForceDisabledTape() = 0; + virtual bool getIsRepack() = 0; }; class OStoreRetrieveRequestAccessor: public RetrieveRequestInfosAccessorInterface{ @@ -260,7 +260,7 @@ class OStoreRetrieveRequestAccessor: public RetrieveRequestInfosAccessorInterfac const uint32_t copyNb, const uint64_t fSeq, AgentReferenceInterface* previousOwner); serializers::RetrieveJobStatus getJobStatus(const uint32_t copyNb); std::string getRepackAddress(); - bool getForceDisabledTape(); + bool getIsRepack(); private: std::shared_ptr<RetrieveRequest> m_retrieveRequest; }; @@ -275,7 +275,7 @@ class SorterRetrieveRequestAccessor: public RetrieveRequestInfosAccessorInterfac const uint32_t copyNb, const uint64_t fSeq, AgentReferenceInterface* previousOwner); serializers::RetrieveJobStatus getJobStatus(const uint32_t copyNb); std::string getRepackAddress(); - bool getForceDisabledTape(); + bool getIsRepack(); private: Sorter::SorterRetrieveRequest& m_retrieveRequest; }; diff --git a/objectstore/SorterTest.cpp b/objectstore/SorterTest.cpp index 632501402873566d9688d3bb757a246141b1e455..122a1b4abefa1ccdb9ea069ba77e0b1579c58b61 100644 --- a/objectstore/SorterTest.cpp +++ b/objectstore/SorterTest.cpp @@ -39,9 +39,11 @@ #include "catalogue/DummyCatalogue.hpp" #include "Sorter.hpp" +#include "ObjectStoreFixture.hpp" + namespace unitTests { -TEST(ObjectStore,SorterInsertArchiveRequest){ +TEST_F(ObjectStore,SorterInsertArchiveRequest){ //cta::log::StdoutLogger dl("dummy", "unitTest"); cta::log::DummyLogger dl("dummy", "unitTest"); cta::log::LogContext lc(dl); @@ -187,7 +189,7 @@ TEST(ObjectStore,SorterInsertArchiveRequest){ ASSERT_EQ(sorter.getAllArchive().size(),0); } -TEST(ObjectStore,SorterInsertRetrieveRequest){ +TEST_F(ObjectStore,SorterInsertRetrieveRequest){ using namespace cta::objectstore; @@ -392,7 +394,7 @@ TEST(ObjectStore,SorterInsertRetrieveRequest){ } } -TEST(ObjectStore,SorterInsertDifferentTypesOfRequests){ +TEST_F(ObjectStore,SorterInsertDifferentTypesOfRequests){ using namespace cta::objectstore; @@ -695,7 +697,7 @@ TEST(ObjectStore,SorterInsertDifferentTypesOfRequests){ } } -TEST(ObjectStore,SorterInsertArchiveRequestNotFetched){ +TEST_F(ObjectStore,SorterInsertArchiveRequestNotFetched){ using namespace cta::objectstore; @@ -839,7 +841,7 @@ TEST(ObjectStore,SorterInsertArchiveRequestNotFetched){ } -TEST(ObjectStore,SorterInsertRetrieveRequestNotFetched){ +TEST_F(ObjectStore,SorterInsertRetrieveRequestNotFetched){ using namespace cta::objectstore; //cta::log::StdoutLogger dl("dummy", "unitTest"); diff --git a/objectstore/cta.proto b/objectstore/cta.proto index 058e99df1a7ea0aea384e4c2dddb8a2f8c12af06..9efaed027ab28b73cd0dab7d676de594aa4761c6 100644 --- a/objectstore/cta.proto +++ b/objectstore/cta.proto @@ -428,7 +428,7 @@ message RetrieveRequestRepackInfo { required string repack_request_address = 9520; required string file_buffer_url = 9530; required uint64 fseq = 9540; - required bool force_disabled_tape = 9560; + required bool force_disabled_tape = 9560 [deprecated=true]; optional bool has_user_provided_file = 9163 [default = false]; } @@ -450,7 +450,7 @@ message RetrieveActivityWeight { message RetrieveRequest { required SchedulerRetrieveRequest schedulerrequest = 9150; required MountPolicy mountpolicy = 9151; - optional string mountpolicyname = 9165; //TODO: Once it has been deployed, make it required and remove mountpolicy + optional string mountpolicyname = 9165; optional RetrieveActivityWeight activity_weight = 9160 [deprecated=true]; // Deprecated for cta/CTA#1077 optional string activity = 9164; required ArchiveFile archivefile = 9152; @@ -546,6 +546,12 @@ message RetrieveActivityCountPair { required uint64 count = 10601; } +message RetrieveQueueCleanupInfo { + required bool doCleanup = 14100; + optional string assignedAgent = 14110; + required uint64 heartbeat = 14120; +} + message RetrieveQueue { required string vid = 10100; repeated RetrieveQueueShardPointer retrievequeueshards = 10111; @@ -562,6 +568,7 @@ message RetrieveQueue { optional string disk_system_slept_for = 10190; optional uint64 sleep_time = 10200; optional uint64 youngestjobcreationtime = 10210 [default = 0]; + required RetrieveQueueCleanupInfo cleanupInfo = 10220; } // ------------- Repack data strcutures ---------------------------------------- @@ -634,7 +641,7 @@ message RepackRequest { required bool is_expand_started = 11562; required MountPolicy mount_policy = 11563; optional string mountpolicyname = 11575; //TODO: Once it has been deployed, make it required and remove mountpolicy - required bool force_disabled_tape = 11564; + required bool force_disabled_tape = 11564 [deprecated=true]; required bool is_complete = 11565; required bool no_recall = 11566; repeated RepackSubRequestPointer subrequests = 11570; diff --git a/scheduler/GenericSchedulerTest.cpp b/scheduler/GenericSchedulerTest.cpp index acdaef77cea7197c9b0c7e600d557004203407d8..8d46afe61dad045ff054aceee18333b45fb2fb94 100644 --- a/scheduler/GenericSchedulerTest.cpp +++ b/scheduler/GenericSchedulerTest.cpp @@ -2281,7 +2281,7 @@ TEST_P(SchedulerTest, getNextMountEmptyArchiveForRepackIfNbFilesQueuedIsLessThan ASSERT_EQ(0,1); } -TEST_P(SchedulerTest, getNextMountBrokenOrDisabledTapeShouldNotReturnAMount) { +TEST_P(SchedulerTest, getNextMountTapeStatesThatShouldNotReturnAMount) { //Queue 2 archive requests in two different logical libraries using namespace cta; @@ -2346,14 +2346,24 @@ TEST_P(SchedulerTest, getNextMountBrokenOrDisabledTapeShouldNotReturnAMount) { scheduler.waitSchedulerDbSubthreadsComplete(); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::BROKEN,std::string("Test")); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::BROKEN,std::nullopt,std::string("Test")); ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,std::nullopt); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::BROKEN, std::nullopt); ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::DISABLED,std::string("Test")); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::EXPORTED,std::nullopt,std::string("Test")); ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,std::nullopt); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::EXPORTED, std::nullopt); + ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::REPACKING_DISABLED,std::nullopt,std::string("Test")); + ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::REPACKING_DISABLED,std::nullopt); + ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::DISABLED,std::nullopt,std::string("Test")); + ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::DISABLED,std::nullopt); ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); { @@ -2402,14 +2412,24 @@ TEST_P(SchedulerTest, getNextMountBrokenOrDisabledTapeShouldNotReturnAMount) { scheduler.queueRetrieve(s_diskInstance, request, lc); scheduler.waitSchedulerDbSubthreadsComplete(); } - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::BROKEN,std::string("Test")); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::BROKEN,std::nullopt,std::string("Test")); + ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::BROKEN,std::nullopt); + ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::EXPORTED,std::nullopt,std::string("Test")); + ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::EXPORTED,std::nullopt); + ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::REPACKING_DISABLED,std::nullopt,std::string("Test")); ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,std::nullopt); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::REPACKING_DISABLED,std::nullopt); ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::DISABLED,std::string("Test")); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::DISABLED,std::nullopt,std::string("Test")); ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,std::nullopt); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::DISABLED,std::nullopt); ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); } diff --git a/scheduler/OStoreDB/OStoreDB.cpp b/scheduler/OStoreDB/OStoreDB.cpp index 79c1988476799ba8d41452d36a27c12d9715b0e3..83f7a79f6b02411b9c7db9198ec4eab703ac0b3b 100644 --- a/scheduler/OStoreDB/OStoreDB.cpp +++ b/scheduler/OStoreDB/OStoreDB.cpp @@ -185,6 +185,61 @@ void OStoreDB::ping() { re.fetchNoLock(); } +//------------------------------------------------------------------------------ +// OStoreDB::fetchRetrieveQueueCleanupInfo() +//------------------------------------------------------------------------------ +std::list<SchedulerDatabase::RetrieveQueueCleanupInfo> OStoreDB::getRetrieveQueuesCleanupInfo(log::LogContext& logContext) { + + utils::Timer t; + assertAgentAddressSet(); + std::list<SchedulerDatabase::RetrieveQueueCleanupInfo> ret; + + // Get all tapes with retrieve queues + objectstore::RootEntry re(m_objectStore); + re.fetchNoLock(); + auto rootFetchNoLockTime = t.secs(utils::Timer::resetCounter); + + // Walk the retrieve queues for cleanup flag + for (auto &rqp: re.dumpRetrieveQueues(common::dataStructures::JobQueueType::JobsToTransferForUser)) { + RetrieveQueue rqueue(rqp.address, m_objectStore); + double queueLockTime = 0; + double queueFetchTime = 0; + + try { + rqueue.fetchNoLock(); + queueFetchTime = t.secs(utils::Timer::resetCounter); + } catch (cta::exception::Exception &ex) { + log::ScopedParamContainer params(logContext); + params.add("queueObject", rqp.address) + .add("tapeVid", rqp.vid) + .add("exceptionMessage", ex.getMessageValue()); + logContext.log(log::DEBUG, + "WARNING: In OStoreDB::getRetrieveQueuesCleanupInfo(): failed to lock/fetch a retrieve queue. Skipping it."); + continue; + } + + ret.push_back(SchedulerDatabase::RetrieveQueueCleanupInfo()); + ret.back().vid = rqueue.getVid(); + ret.back().doCleanup = rqueue.getQueueCleanupDoCleanup(); + ret.back().assignedAgent = rqueue.getQueueCleanupAssignedAgent(); + ret.back().heartbeat = rqueue.getQueueCleanupHeartbeat(); + + auto processingTime = t.secs(utils::Timer::resetCounter); + log::ScopedParamContainer params(logContext); + params.add("queueObject", rqp.address) + .add("tapeVid", rqp.vid) + .add("rootFetchNoLockTime", rootFetchNoLockTime) + .add("queueLockTime", queueLockTime) + .add("queueFetchTime", queueFetchTime) + .add("processingTime", processingTime); + if (queueLockTime > 1 || queueFetchTime > 1) { + logContext.log(log::WARNING, + "In OStoreDB::getRetrieveQueuesCleanupInfo(): fetched a retrieve queue and that lasted more than 1 second."); + } + } + return ret; +} + //------------------------------------------------------------------------------ // OStoreDB::fetchMountInfo() //------------------------------------------------------------------------------ @@ -349,30 +404,10 @@ void OStoreDB::fetchMountInfo(SchedulerDatabase::TapeMountDecisionInfo& tmdi, Ro bool isPotentialMount = false; auto vidToTapeMap = m_catalogue.getTapesByVid(rqp.vid); common::dataStructures::Tape::State tapeState = vidToTapeMap.at(rqp.vid).state; - bool tapeIsDisabled = tapeState == common::dataStructures::Tape::DISABLED; - bool tapeIsActive = tapeState == common::dataStructures::Tape::ACTIVE; - if (tapeIsActive) { + if (tapeState == common::dataStructures::Tape::ACTIVE || + tapeState == common::dataStructures::Tape::REPACKING) { isPotentialMount = true; } - else if(tapeIsDisabled){ - //In the case there are Repack Retrieve Requests with the force disabled flag set - //on a disabled tape, we will trigger a mount. - //Mount policies that begin with repack are used for repack requests with force disabled flag - //set to true. We only look for those to avoid looping through the retrieve queue - //while holding the global scheduler lock. - //In the case there are only deleted Retrieve Request on a DISABLED or BROKEN tape - //we will no longer trigger a mount. Eventually the oldestRequestAge will pass the configured - //threshold and the queue will be flushed - - auto queueMountPolicyNames = rqueue.getMountPolicyNames(); - auto mountPolicyItor = std::find_if(queueMountPolicyNames.begin(),queueMountPolicyNames.end(), [](const std::string &mountPolicyName){ - return mountPolicyName.rfind("repack", 0) == 0; - }); - - if(mountPolicyItor != queueMountPolicyNames.end()){ - isPotentialMount = true; - } - } if (rqSummary.jobs && (isPotentialMount || purpose == SchedulerDatabase::PurposeGetMountInfo::SHOW_QUEUES)) { //Getting the default mountPolicies parameters from the queue summary uint64_t minRetrieveRequestAge = rqSummary.minRetrieveRequestAge; @@ -1320,6 +1355,13 @@ std::list<SchedulerDatabase::RetrieveQueueStatistics> OStoreDB::getRetrieveQueue return Helpers::getRetrieveQueueStatistics(criteria, vidsToConsider, m_objectStore); } +//------------------------------------------------------------------------------ +// OStoreDB::clearRetrieveQueueStatisticsCache() +//------------------------------------------------------------------------------ +void OStoreDB::clearRetrieveQueueStatisticsCache(const std::string & vid) { + return Helpers::flushRetrieveQueueStatisticsCacheForVid(vid); +} + //------------------------------------------------------------------------------ // OStoreDB::queueRetrieve() //------------------------------------------------------------------------------ @@ -1808,7 +1850,6 @@ std::string OStoreDB::queueRepack(const SchedulerDatabase::QueueRepackRequest & common::dataStructures::RepackInfo::Type repackType = repackRequest.m_repackType; std::string bufferURL = repackRequest.m_repackBufferURL; common::dataStructures::MountPolicy mountPolicy = repackRequest.m_mountPolicy; - bool forceDisabledTape = repackRequest.m_forceDisabledTape; // Prepare the repack request object in memory. assertAgentAddressSet(); cta::utils::Timer t; @@ -1820,7 +1861,6 @@ std::string OStoreDB::queueRepack(const SchedulerDatabase::QueueRepackRequest & rr->setType(repackType); rr->setBufferURL(bufferURL); rr->setMountPolicy(mountPolicy); - rr->setForceDisabledTape(forceDisabledTape); rr->setNoRecall(repackRequest.m_noRecall); rr->setCreationLog(repackRequest.m_creationLog); // Try to reference the object in the index (will fail if there is already a request with this VID. @@ -1922,6 +1962,96 @@ void OStoreDB::requeueRetrieveJobs(std::list<cta::SchedulerDatabase::RetrieveJob sorter.flushAll(logContext); } +//------------------------------------------------------------------------------ +// OStoreDB::resheduleRetrieveRequest() +//------------------------------------------------------------------------------ +void OStoreDB::requeueRetrieveRequestJobs(std::list<cta::SchedulerDatabase::RetrieveJob *> &jobs, log::LogContext& logContext) { + std::list<std::shared_ptr<objectstore::RetrieveRequest>> rrlist; + std::list<objectstore::ScopedExclusiveLock> locks; + for (auto &job: jobs) { + auto oStoreJob = dynamic_cast<OStoreDB::RetrieveJob *>(job); + auto rr = std::make_shared<objectstore::RetrieveRequest>(oStoreJob->m_retrieveRequest.getAddressIfSet(), m_objectStore); + rrlist.push_back(rr); + locks.emplace_back(*rr); + rr->fetch(); + rr->garbageCollectRetrieveRequest(m_agentReference->getAgentAddress(), *m_agentReference, logContext, m_catalogue, true); + } + locks.clear(); + rrlist.clear(); +} + +//------------------------------------------------------------------------------ +// OStoreDB::reserveRetrieveQueueForCleanup() +//------------------------------------------------------------------------------ +void OStoreDB::reserveRetrieveQueueForCleanup(std::string & vid, std::optional<uint64_t> cleanupHeartBeatValue) { + + RootEntry re(m_objectStore); + RetrieveQueue rq(m_objectStore); + ScopedExclusiveLock rql; + re.fetchNoLock(); + + try { + rq.setAddress(re.getRetrieveQueueAddress(vid, common::dataStructures::JobQueueType::JobsToTransferForUser)); + rql.lock(rq); + rq.fetch(); + } catch (cta::objectstore::RootEntry::NoSuchRetrieveQueue & ex) { + throw RetrieveQueueNotFound("Retrieve queue of vid " + vid + " not found. " + ex.getMessageValue()); + } catch (cta::exception::NoSuchObject &ex) { + throw RetrieveQueueNotFound("Retrieve queue of vid " + vid + " not found. " + ex.getMessageValue()); + } catch (cta::exception::Exception &ex) { + throw; + } + + // After locking a queue, check again if the cleanup flag is still true + if (!rq.getQueueCleanupDoCleanup()) { + throw RetrieveQueueNotReservedForCleanup("Queue no longer has the cleanup flag enabled after fetching. Skipping it."); + } + + // Check if heartbeat has been updated, which means that another agent is still tracking it + if (rq.getQueueCleanupAssignedAgent().has_value() && cleanupHeartBeatValue.has_value()) { + if (cleanupHeartBeatValue.value() != rq.getQueueCleanupHeartbeat()) { + throw RetrieveQueueNotReservedForCleanup("Another agent is alive and cleaning up the queue. Skipping it."); + } + } + + // Otherwise, carry on with cleanup of this queue + rq.setQueueCleanupAssignedAgent(m_agentReference->getAgentAddress()); + rq.tickQueueCleanupHeartbeat(); + rq.commit(); +} + +//------------------------------------------------------------------------------ +// OStoreDB::tickRetrieveQueueCleanupHeartbeat() +//------------------------------------------------------------------------------ +void OStoreDB::tickRetrieveQueueCleanupHeartbeat(std::string & vid) { + + RootEntry re(m_objectStore); + RetrieveQueue rq(m_objectStore); + ScopedExclusiveLock rql; + re.fetchNoLock(); + + try { + rq.setAddress(re.getRetrieveQueueAddress(vid, common::dataStructures::JobQueueType::JobsToTransferForUser)); + rql.lock(rq); + rq.fetch(); + if (rq.getQueueCleanupAssignedAgent().has_value() && + (rq.getQueueCleanupAssignedAgent() != m_agentReference->getAgentAddress())) { + throw RetrieveQueueNotReservedForCleanup( + "Another agent is alive and cleaning up the retrieve queue of tape " + vid + ". Heartbeat not ticked."); + } + rq.tickQueueCleanupHeartbeat(); + rq.commit(); + } catch (RetrieveQueueNotReservedForCleanup & ex) { + throw; // Just pass this exception to the outside + } catch (cta::objectstore::RootEntry::NoSuchRetrieveQueue & ex) { + throw RetrieveQueueNotFound("Retrieve queue of vid " + vid + " not found. " + ex.getMessageValue()); + } catch (cta::exception::NoSuchObject & ex) { + throw RetrieveQueueNotFound("Retrieve queue of vid " + vid + " not found. " + ex.getMessageValue()); + } catch (cta::exception::Exception & ex) { + throw; + } +} + //------------------------------------------------------------------------------ // OStoreDB::RepackRequestPromotionStatistics::RepackRequestPromotionStatistics() //------------------------------------------------------------------------------ @@ -2044,6 +2174,106 @@ auto OStoreDB::getRepackStatisticsNoLock() -> std::unique_ptr<SchedulerDatabase: return ret; } +//------------------------------------------------------------------------------ +// OStoreDB::getNextRetrieveJobsToTransferBatch() +//------------------------------------------------------------------------------ +std::list<std::unique_ptr<SchedulerDatabase::RetrieveJob>> OStoreDB::getNextRetrieveJobsToTransferBatch( + std::string & vid, uint64_t filesRequested, log::LogContext &logContext) { + + using RQTTAlgo = objectstore::ContainerAlgorithms<RetrieveQueue, RetrieveQueueToTransfer>; + RQTTAlgo rqttAlgo(m_objectStore, *m_agentReference); + std::list<std::unique_ptr<SchedulerDatabase::RetrieveJob>> ret; + + // Try to get jobs from the queue. If it is empty, it will be trimmed. + RQTTAlgo::PopCriteria criteria; + criteria.files = filesRequested; + criteria.bytes = std::numeric_limits<decltype(criteria.bytes)>::max(); + + // Pop the objects + auto jobs = rqttAlgo.popNextBatch(vid, criteria, logContext); + + for (auto &j : jobs.elements) { + std::unique_ptr<OStoreDB::RetrieveJob> rj(new OStoreDB::RetrieveJob(j.retrieveRequest->getAddressIfSet(), *this, nullptr)); + rj->archiveFile = j.archiveFile; + rj->retrieveRequest = j.rr; + rj->selectedCopyNb = j.copyNb; + rj->errorReportURL = j.errorReportURL; + rj->reportType = j.reportType; + rj->m_repackInfo = j.repackInfo; + rj->setJobOwned(); + ret.emplace_back(std::move(rj)); + } + return ret; +} + +//------------------------------------------------------------------------------ +// OStoreDB::setRetrieveQueueCleanupFlag() +//------------------------------------------------------------------------------ +void OStoreDB::setRetrieveQueueCleanupFlag(const std::string& vid, bool val, log::LogContext& logContext) { + + cta::utils::Timer t; + double rootFetchNoLockTime = 0; + double rootRelockExclusiveTime = 0; + double rootRefetchTime = 0; + double addOrGetQueueandCommitTime = 0; + double queueLockTime = 0; + double queueFetchTime = 0; + + std::string qAddress; + RetrieveQueue rqueue(m_objectStore); + ScopedExclusiveLock rqlock; + + { + RootEntry re(m_objectStore); + re.fetchNoLock(); + rootFetchNoLockTime = t.secs(utils::Timer::resetCounter); + try { + qAddress = re.getRetrieveQueueAddress(vid, common::dataStructures::JobQueueType::JobsToTransferForUser); + rqueue.setAddress(qAddress); + } catch (cta::exception::Exception & ex) { + ScopedExclusiveLock rexl(re); + rootRelockExclusiveTime = t.secs(utils::Timer::resetCounter); + re.fetch(); + rootRefetchTime = t.secs(utils::Timer::resetCounter); + qAddress = re.addOrGetRetrieveQueueAndCommit(vid, *m_agentReference, common::dataStructures::JobQueueType::JobsToTransferForUser); + rqueue.setAddress(qAddress); + addOrGetQueueandCommitTime = t.secs(utils::Timer::resetCounter); + } + } + + try { + rqlock.lock(rqueue); + queueLockTime = t.secs(utils::Timer::resetCounter); + rqueue.fetch(); + queueFetchTime = t.secs(utils::Timer::resetCounter); + rqueue.setQueueCleanupDoCleanup(val); + rqueue.commit(); + } catch (cta::exception::Exception &ex) { + log::ScopedParamContainer params(logContext); + params.add("queueObject", qAddress) + .add("tapeVid", vid) + .add("cleanupFlagValue", val) + .add("exceptionMessage", ex.getMessageValue()); + logContext.log(log::DEBUG, "WARNING: In OStoreDB::setRetrieveQueueCleanupFlag(): failed to set cleanup flag value on retrieve queue."); + } + + double processingTime = t.secs(utils::Timer::resetCounter); + log::ScopedParamContainer params (logContext); + params.add("queueObject", qAddress) + .add("tapeVid", vid) + .add("cleanupFlagValue", val) + .add("rootFetchNoLockTime", rootFetchNoLockTime) + .add("rootRelockExclusiveTime", rootRelockExclusiveTime) + .add("rootRefetchTime", rootRefetchTime) + .add("addOrGetQueueandCommitTime", addOrGetQueueandCommitTime) + .add("queueLockTime", queueLockTime) + .add("queueFetchTime", queueFetchTime) + .add("processingTime", processingTime); + if(queueLockTime > 1 || queueFetchTime > 1){ + logContext.log(log::WARNING, "In OStoreDB::setRetrieveQueueCleanupFlag(): fetched a retrieve queue and that lasted more than 1 second."); + } +} + //------------------------------------------------------------------------------ // OStoreDB::getNextRepackJobToExpand() //------------------------------------------------------------------------------ @@ -2069,7 +2299,6 @@ std::unique_ptr<SchedulerDatabase::RepackRequest> OStoreDB::getNextRepackJobToEx ret->repackInfo.type = repackInfo.type; ret->repackInfo.status = repackInfo.status; ret->repackInfo.repackBufferBaseURL = repackInfo.repackBufferBaseURL; - ret->repackInfo.forceDisabledTape = repackInfo.forceDisabledTape; ret->repackInfo.noRecall = repackInfo.noRecall; return ret; } @@ -2675,7 +2904,6 @@ uint64_t OStoreDB::RepackRequest::addSubrequestsAndUpdateStats(std::list<Subrequ m_repackRequest.setTotalStats(totalStatsFiles); uint64_t fSeq = std::max(maxFSeqLowBound + 1, maxAddedFSeq + 1); common::dataStructures::MountPolicy mountPolicy = m_repackRequest.getMountPolicy(); - bool forceDisabledTape = repackInfo.forceDisabledTape; bool noRecall = repackInfo.noRecall; // We make sure the references to subrequests exist persistently before creating them. m_repackRequest.commit(); @@ -2761,7 +2989,6 @@ uint64_t OStoreDB::RepackRequest::addSubrequestsAndUpdateStats(std::list<Subrequ rRRepackInfo.fileBufferURL = rsr.fileBufferURL; rRRepackInfo.fSeq = rsr.fSeq; rRRepackInfo.isRepack = true; - rRRepackInfo.forceDisabledTape = forceDisabledTape; rRRepackInfo.repackRequestAddress = m_repackRequest.getAddressIfSet(); if(rsr.hasUserProvidedFile){ rRRepackInfo.hasUserProvidedFile = true; @@ -2784,7 +3011,7 @@ uint64_t OStoreDB::RepackRequest::addSubrequestsAndUpdateStats(std::list<Subrequ if (tc.vid == repackInfo.vid) { try { // Try to select the repack VID from a one-vid list. - Helpers::selectBestRetrieveQueue({repackInfo.vid}, m_oStoreDB.m_catalogue, m_oStoreDB.m_objectStore,forceDisabledTape); + Helpers::selectBestRetrieveQueue({repackInfo.vid}, m_oStoreDB.m_catalogue, m_oStoreDB.m_objectStore, true); bestVid = repackInfo.vid; activeCopyNumber = tc.copyNb; } catch (Helpers::NoTapeAvailableForRetrieve &) {} @@ -2797,7 +3024,7 @@ uint64_t OStoreDB::RepackRequest::addSubrequestsAndUpdateStats(std::list<Subrequ std::set<std::string> candidateVids; for (auto & tc: rsr.archiveFile.tapeFiles) candidateVids.insert(tc.vid); try { - bestVid = Helpers::selectBestRetrieveQueue(candidateVids, m_oStoreDB.m_catalogue, m_oStoreDB.m_objectStore,forceDisabledTape); + bestVid = Helpers::selectBestRetrieveQueue(candidateVids, m_oStoreDB.m_catalogue, m_oStoreDB.m_objectStore, true); } catch (Helpers::NoTapeAvailableForRetrieve &) { // Count the failure for this subrequest. notCreatedSubrequests.emplace_back(rsr); @@ -2805,7 +3032,6 @@ uint64_t OStoreDB::RepackRequest::addSubrequestsAndUpdateStats(std::list<Subrequ failedCreationStats.bytes += rsr.archiveFile.fileSize; log::ScopedParamContainer params(lc); params.add("fileId", rsr.archiveFile.archiveFileID) - .add("wasRepackSubmittedWithForceDisabledTape",forceDisabledTape) .add("repackVid", repackInfo.vid); lc.log(log::ERR, "In OStoreDB::RepackRequest::addSubrequests(): could not queue a retrieve subrequest. Subrequest failed. Maybe the tape to repack is disabled ?"); @@ -4864,13 +5090,13 @@ void OStoreDB::RetrieveJob::failTransfer(const std::string &failureReason, log:: "In OStoreDB::RetrieveJob::failTransfer(): no active job after addJobFailure() returned false." ); } - bool disabledTape = m_retrieveRequest.getRepackInfo().forceDisabledTape; + bool isRepack = m_retrieveRequest.getRepackInfo().isRepack; m_retrieveRequest.commit(); rel.release(); // Check that the requested retrieve job (for the provided VID) exists, and record the copy number std::string bestVid = Helpers::selectBestRetrieveQueue(candidateVids, m_oStoreDB.m_catalogue, - m_oStoreDB.m_objectStore,disabledTape); + m_oStoreDB.m_objectStore, isRepack); auto tf_it = af.tapeFiles.begin(); for( ; tf_it != af.tapeFiles.end() && tf_it->vid != bestVid; ++tf_it) ; diff --git a/scheduler/OStoreDB/OStoreDB.hpp b/scheduler/OStoreDB/OStoreDB.hpp index 3dee520ef5925576fac3166529ceb493a044c683..9045f01ba4e9d642a3456f84ea4a0f2831edb70e 100644 --- a/scheduler/OStoreDB/OStoreDB.hpp +++ b/scheduler/OStoreDB/OStoreDB.hpp @@ -412,9 +412,14 @@ class OStoreDB: public SchedulerDatabase { log::TimingList & timingList, utils::Timer & t, log::LogContext & lc) override; /* === Retrieve requests handling ======================================== */ + std::list<RetrieveQueueCleanupInfo> getRetrieveQueuesCleanupInfo(log::LogContext & logContext) override; + void setRetrieveQueueCleanupFlag(const std::string& vid, bool val, log::LogContext& logContext) override; + std::list<RetrieveQueueStatistics> getRetrieveQueueStatistics( const cta::common::dataStructures::RetrieveFileQueueCriteria& criteria, const std::set<std::string>& vidsToConsider) override; + void clearRetrieveQueueStatisticsCache(const std::string & vid) override; + CTA_GENERATE_EXCEPTION_CLASS(RetrieveRequestHasNoCopies); CTA_GENERATE_EXCEPTION_CLASS(TapeCopyNumberOutOfRange); SchedulerDatabase::RetrieveRequestInfo queueRetrieve(cta::common::dataStructures::RetrieveRequest& rqst, @@ -466,6 +471,14 @@ class OStoreDB: public SchedulerDatabase { // RetrieveQueueItor_t* getRetrieveJobItorPtr(const std::string &vid, // common::dataStructures::JobQueueType queueType = common::dataStructures::JobQueueType::JobsToTransferForUser) const; + std::list<std::unique_ptr<SchedulerDatabase::RetrieveJob>> getNextRetrieveJobsToTransferBatch( + std::string & vid, uint64_t filesRequested, log::LogContext &logContext) override; + void requeueRetrieveRequestJobs(std::list<cta::SchedulerDatabase::RetrieveJob *> &jobs, log::LogContext& logContext) override; + void reserveRetrieveQueueForCleanup(std::string & vid, std::optional<uint64_t> cleanupHeartBeatValue) override; + void tickRetrieveQueueCleanupHeartbeat(std::string & vid) override; + CTA_GENERATE_EXCEPTION_CLASS(RetrieveQueueNotReservedForCleanup); + CTA_GENERATE_EXCEPTION_CLASS(RetrieveQueueNotFound); + std::list<std::unique_ptr<SchedulerDatabase::RetrieveJob>> getNextRetrieveJobsToReportBatch( uint64_t filesRequested, log::LogContext &logContext) override; diff --git a/scheduler/OStoreDB/OStoreDBFactory.hpp b/scheduler/OStoreDB/OStoreDBFactory.hpp index ab44b19aaeb6f43058beecf39669a801f35e8ebe..96b4f265874ab8e161af79f0bec7f6c7026bd2f8 100644 --- a/scheduler/OStoreDB/OStoreDBFactory.hpp +++ b/scheduler/OStoreDB/OStoreDBFactory.hpp @@ -115,7 +115,7 @@ template <> OStoreDBWrapper<cta::objectstore::BackendVFS>::OStoreDBWrapper( const std::string &context, std::unique_ptr<cta::catalogue::Catalogue> & catalogue, const std::string &URL) : OStoreDBWrapperInterface(m_OStoreDB), -m_logger(new cta::log::DummyLogger("", "")), m_backend(new cta::objectstore::BackendVFS()), +m_logger(new cta::log::DummyLogger("", "")), m_backend(URL.empty() ? new cta::objectstore::BackendVFS() : new cta::objectstore::BackendVFS(URL)), m_catalogue(catalogue), m_OStoreDB(*m_backend, *m_catalogue, *m_logger), m_agentReferencePtr(new objectstore::AgentReference("OStoreDBFactory", *m_logger)) diff --git a/scheduler/OStoreDB/OStoreDBInit.hpp b/scheduler/OStoreDB/OStoreDBInit.hpp index c90c564e99f5a4485851bc74ae396b33e0192eac..fd8ad2f936c7505345bf37513241c4cf13dd748f 100644 --- a/scheduler/OStoreDB/OStoreDBInit.hpp +++ b/scheduler/OStoreDB/OStoreDBInit.hpp @@ -22,6 +22,7 @@ #include <objectstore/AgentHeartbeatThread.hpp> #include <objectstore/BackendVFS.hpp> #include <objectstore/GarbageCollector.hpp> +#include <objectstore/QueueCleanupRunner.hpp> #include <scheduler/OStoreDB/OStoreDBWithAgent.hpp> namespace cta { @@ -60,6 +61,10 @@ public: return objectstore::GarbageCollector(*m_backend, m_backendPopulator->getAgentReference(), catalogue); } + objectstore::QueueCleanupRunner getQueueCleanupRunner(catalogue::Catalogue& catalogue, SchedulerDatabase& oStoreDb) { + return objectstore::QueueCleanupRunner(m_backendPopulator->getAgentReference(), oStoreDb, catalogue); + } + private: /*! * Deleter for instances of the AgentHeartbeatThread class. diff --git a/scheduler/PostgresSchedDB/PostgresSchedDB.cpp b/scheduler/PostgresSchedDB/PostgresSchedDB.cpp index 3e83fe49718ddb727ea75b8ed346d77b82be84ef..d6a01f9cff2f2400f621c0ec2e752c2eafea66a4 100644 --- a/scheduler/PostgresSchedDB/PostgresSchedDB.cpp +++ b/scheduler/PostgresSchedDB/PostgresSchedDB.cpp @@ -75,6 +75,26 @@ SchedulerDatabase::JobsFailedSummary PostgresSchedDB::getArchiveJobsFailedSummar throw cta::exception::Exception("Not implemented"); } +std::list<std::unique_ptr<RetrieveJob>> PostgresSchedDB::getNextRetrieveJobsToTransferBatch(std::string & vid, uint64_t filesRequested, log::LogContext &lc) +{ + throw cta::exception::Exception("Not implemented"); +} + +void PostgresSchedDB::requeueRetrieveRequestJobs(std::list<cta::SchedulerDatabase::RetrieveJob *> &jobs, log::LogContext &lc) +{ +throw cta::exception::Exception("Not implemented"); +} + +void PostgresSchedDB::reserveRetrieveQueueForCleanup(std::string & vid, std::optional<uint64_t> cleanupHeartBeatValue) +{ + throw cta::exception::Exception("Not implemented"); +} + +void PostgresSchedDB::tickRetrieveQueueCleanupHeartbeat(std::string & vid) +{ + throw cta::exception::Exception("Not implemented"); +} + void PostgresSchedDB::setArchiveJobBatchReported(std::list<SchedulerDatabase::ArchiveJob*> & jobsBatch, log::TimingList & timingList, utils::Timer & t, log::LogContext & lc) { @@ -87,6 +107,11 @@ std::list<SchedulerDatabase::RetrieveQueueStatistics> PostgresSchedDB::getRetrie throw cta::exception::Exception("Not implemented"); } +void PostgresSchedDB::clearRetrieveQueueStatisticsCache(const std::string & vid) +{ + throw cta::exception::Exception("Not implemented"); +} + SchedulerDatabase::RetrieveRequestInfo PostgresSchedDB::queueRetrieve(cta::common::dataStructures::RetrieveRequest& rqst, const cta::common::dataStructures::RetrieveFileQueueCriteria &criteria, const std::optional<std::string> diskSystemName, log::LogContext &logContext) diff --git a/scheduler/PostgresSchedDB/PostgresSchedDB.hpp b/scheduler/PostgresSchedDB/PostgresSchedDB.hpp index 8412758b8b82a5a1c175a909b9ce018ff27b50e1..a19e91673522f4c572f8a1c8efd826e4d1369030 100644 --- a/scheduler/PostgresSchedDB/PostgresSchedDB.hpp +++ b/scheduler/PostgresSchedDB/PostgresSchedDB.hpp @@ -82,6 +82,17 @@ class PostgresSchedDB: public SchedulerDatabase { JobsFailedSummary getArchiveJobsFailedSummary(log::LogContext &logContext) override; + std::list<std::unique_ptr<SchedulerDatabase::RetrieveJob>> getNextRetrieveJobsToTransferBatch(std::string & vid, uint64_t filesRequested, log::LogContext &lc) override; + + void requeueRetrieveRequestJobs(std::list<cta::SchedulerDatabase::RetrieveJob *> &jobs, log::LogContext &lc) override; + + void reserveRetrieveQueueForCleanup(std::string & vid, std::optional<uint64_t> cleanupHeartBeatValue) override; + + void tickRetrieveQueueCleanupHeartbeat(std::string & vid) override; + + void setArchiveJobBatchReported(std::list<SchedulerDatabase::ArchiveJob*> & jobsBatch, + log::TimingList & timingList, utils::Timer & t, log::LogContext & lc) override; + void setArchiveJobBatchReported(std::list<SchedulerDatabase::ArchiveJob*> & jobsBatch, log::TimingList & timingList, utils::Timer & t, log::LogContext & lc) override; @@ -92,6 +103,8 @@ class PostgresSchedDB: public SchedulerDatabase { const cta::common::dataStructures::RetrieveFileQueueCriteria &criteria, const std::optional<std::string> diskSystemName, log::LogContext &logContext) override; + void clearRetrieveQueueStatisticsCache(const std::string & vid) override; + void cancelRetrieve(const std::string& instanceName, const cta::common::dataStructures::CancelRetrieveRequest& rqst, log::LogContext& lc) override; diff --git a/scheduler/Scheduler.cpp b/scheduler/Scheduler.cpp index c53d5e7a79bbb05bf81417bdb3860b211eb152e3..7addc8f834a772d0948e86259217e4e85b508dc9 100644 --- a/scheduler/Scheduler.cpp +++ b/scheduler/Scheduler.cpp @@ -27,6 +27,7 @@ #include <iostream> #include <random> #include <sstream> +#include <regex> #include "catalogue/Catalogue.hpp" #include "catalogue/CatalogueItor.hpp" @@ -303,15 +304,47 @@ void Scheduler::checkTapeCanBeRepacked(const std::string & vid, const SchedulerD try{ auto vidToTapesMap = m_catalogue.getTapesByVid(vid); //throws an exception if the vid is not found on the database cta::common::dataStructures::Tape tapeToCheck = vidToTapesMap.at(vid); + if(!tapeToCheck.full){ throw exception::UserError("You must set the tape as full before repacking it."); } - if(tapeToCheck.state == common::dataStructures::Tape::BROKEN){ - throw exception::UserError(std::string("You cannot repack a tape that is ") + common::dataStructures::Tape::stateToString(common::dataStructures::Tape::BROKEN) + "."); - } - if(tapeToCheck.isDisabled() && !repackRequest.m_forceDisabledTape){ - throw exception::UserError(std::string("You cannot repack a ") + common::dataStructures::Tape::stateToString(common::dataStructures::Tape::DISABLED)+ " tape. You can force it by using the flag --disabledtape."); + + switch (tapeToCheck.state) { + + case common::dataStructures::Tape::DISABLED: + case common::dataStructures::Tape::ACTIVE: + throw exception::UserError( + std::string("You cannot repack a tape that is ") + + common::dataStructures::Tape::stateToString(tapeToCheck.state) + + ". You must first change its state to " + + common::dataStructures::Tape::stateToString(common::dataStructures::Tape::REPACKING) + "."); + + case common::dataStructures::Tape::BROKEN: + case common::dataStructures::Tape::BROKEN_PENDING: + case common::dataStructures::Tape::EXPORTED: + case common::dataStructures::Tape::EXPORTED_PENDING: + throw exception::UserError( + std::string("You cannot repack a tape that is ") + + common::dataStructures::Tape::stateToString(tapeToCheck.state) + "."); + + case common::dataStructures::Tape::REPACKING_PENDING: + throw exception::UserError( + std::string("You cannot repack a tape that is ") + + common::dataStructures::Tape::stateToString(common::dataStructures::Tape::REPACKING_PENDING) + + ". You must wait for its state to complete the transition to " + + common::dataStructures::Tape::stateToString(common::dataStructures::Tape::REPACKING) + "."); + + case common::dataStructures::Tape::REPACKING: + case common::dataStructures::Tape::REPACKING_DISABLED: + break; // OK to repack! + + default: + throw exception::UserError( + std::string("You cannot repack the tape with VID ") + + vid + + ". The current state is unknown."); } + } catch(const exception::UserError& userEx){ throw userEx; } catch(const cta::exception::Exception & ex){ @@ -338,7 +371,6 @@ void Scheduler::queueRepack(const common::dataStructures::SecurityIdentity &cliI log::ScopedParamContainer params(lc); params.add("tapeVid", vid) .add("repackType", toString(repackRequest.m_repackType)) - .add("forceDisabledTape", repackRequest.m_forceDisabledTape) .add("mountPolicy", repackRequest.m_mountPolicy.name) .add("noRecall", repackRequest.m_noRecall) .add("creationHostName",repackRequestToQueue.m_creationLog.host) @@ -1777,7 +1809,7 @@ std::list<common::dataStructures::QueueAndMountSummary> Scheduler::getQueuesAndM mountOrQueue.filesOnTapes += t.lastFSeq; mountOrQueue.dataOnTapes += t.dataOnTapeInBytes; if (t.full) mountOrQueue.fullTapes++; - if (!t.full && !t.isDisabled()) mountOrQueue.writableTapes++; + if (!t.full && t.isActive()) mountOrQueue.writableTapes++; mountOrQueue.tapePool = t.tapePoolName; } } @@ -1791,6 +1823,118 @@ std::list<common::dataStructures::QueueAndMountSummary> Scheduler::getQueuesAndM return ret; } +//------------------------------------------------------------------------------ +// triggerTapeStateChange +//------------------------------------------------------------------------------ +void Scheduler::triggerTapeStateChange(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State &new_state, const std::optional<std::string> &stateReason, log::LogContext& logContext) { + + using Tape = common::dataStructures::Tape; + + // Tape must exist on catalogue + if (!m_catalogue.tapeExists(vid)) { + throw cta::exception::UserError("The VID " + vid + " does not exist"); + } + + // Validate tape state change based on previous state + auto prev_state = m_catalogue.getTapesByVid(vid)[vid].state; + + // If previous and desired states are the same, do nothing + if (prev_state == new_state) return; + + // If previous state is already in transition (temporary state) to the desired state, do nothing + if (prev_state == Tape::BROKEN_PENDING && new_state == Tape::BROKEN) return; + if (prev_state == Tape::EXPORTED_PENDING && new_state == Tape::EXPORTED) return; + if (prev_state == Tape::REPACKING_PENDING && new_state == Tape::REPACKING) return; + + // If previous state is temporary, user should wait for it to complete + if ( + prev_state == Tape::BROKEN_PENDING + || prev_state == Tape::EXPORTED_PENDING + || prev_state == Tape::REPACKING_PENDING + ) { + throw cta::exception::UserError("Cannot modify tape " + vid + " state while it is in a temporary internal state"); + } + + // User is not allowed to select explicitly a temporary state + if ( + new_state == Tape::BROKEN_PENDING + || new_state == Tape::EXPORTED_PENDING + || new_state == Tape::REPACKING_PENDING + ) { + throw cta::exception::UserError("Internal states cannot be set directly by the user"); + } + + // Moving out of REPACKING/REPACKING_DISABLED is only allowed if there is no repacking ongoing + if ( + (prev_state == Tape::REPACKING || prev_state == Tape::REPACKING_DISABLED) + && !(new_state == Tape::REPACKING || new_state == Tape::REPACKING_DISABLED) + && isBeingRepacked(vid) + ) { + throw cta::exception::UserError("Cannot modify tape " + vid + " state because there is a repack for that tape"); + } + + // REPACKING_DISABLED can only be set while in REPACKING + if (prev_state != Tape::REPACKING && new_state == Tape::REPACKING_DISABLED) { + throw cta::exception::UserError("Cannot modify tape " + vid + " state from " + Tape::stateToString(prev_state) + " to " + Tape::stateToString(new_state)); + } + + // REPACKING_DISABLED can only be modified to REPACKING, BROKEN or EXPORTED + if ( + prev_state == Tape::REPACKING_DISABLED + && !(new_state == Tape::REPACKING || new_state == Tape::BROKEN || new_state == Tape::EXPORTED) + ) { + throw cta::exception::UserError("Cannot modify tape " + vid + " state from " + Tape::stateToString(prev_state) + " to " + Tape::stateToString(new_state)); + } + + // Validation of tape state change request is complete + // Proceed with tape state change... + + switch (new_state) { + case Tape::ACTIVE: + case Tape::DISABLED: + case Tape::REPACKING_DISABLED: + // Simply set the new tape state + m_catalogue.modifyTapeState(admin, vid, new_state, prev_state, stateReason); + break; + case Tape::BROKEN: + try { + m_catalogue.modifyTapeState(admin, vid, Tape::BROKEN_PENDING, prev_state, stateReason); + } catch (catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive & ex) { + throw catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive( + std::regex_replace(ex.getMessageValue(), std::regex(Tape::stateToString(Tape::BROKEN_PENDING)), Tape::stateToString(Tape::BROKEN))); + } + m_db.setRetrieveQueueCleanupFlag(vid, true, logContext); + break; + case Tape::REPACKING: + if (prev_state == Tape::REPACKING_DISABLED) { + // If tape is on REPACKING_DISABLED state, move it directly to REPACKING + m_catalogue.modifyTapeState(admin, vid, new_state, prev_state, stateReason); + } else { + try { + m_catalogue.modifyTapeState(admin, vid, Tape::REPACKING_PENDING, prev_state, stateReason); + } catch (catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive & ex) { + throw catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive( + std::regex_replace(ex.getMessageValue(), std::regex(Tape::stateToString(Tape::REPACKING_PENDING)), Tape::stateToString(Tape::REPACKING))); + } + m_db.setRetrieveQueueCleanupFlag(vid, true, logContext); + } + break; + case Tape::EXPORTED: + try { + m_catalogue.modifyTapeState(admin, vid, Tape::EXPORTED_PENDING, prev_state, stateReason); + } catch (catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive & ex) { + throw catalogue::UserSpecifiedAnEmptyStringReasonWhenTapeStateNotActive( + std::regex_replace(ex.getMessageValue(), std::regex(Tape::stateToString(Tape::EXPORTED_PENDING)), Tape::stateToString(Tape::EXPORTED))); + } + m_db.setRetrieveQueueCleanupFlag(vid, true, logContext); + break; + default: + throw cta::exception::UserError("Unknown procedure to change tape state to " + Tape::stateToString(new_state)); + } + + m_db.clearRetrieveQueueStatisticsCache(vid); +} + //------------------------------------------------------------------------------ // getNextArchiveJobsToReportBatch //------------------------------------------------------------------------------ diff --git a/scheduler/Scheduler.hpp b/scheduler/Scheduler.hpp index 769367c067eb367305275c917b300fbb7a806dd7..78765e622fe09a2b4b8d4afa3f4827861c35c26b 100644 --- a/scheduler/Scheduler.hpp +++ b/scheduler/Scheduler.hpp @@ -340,6 +340,16 @@ public: */ std::list<common::dataStructures::QueueAndMountSummary> getQueuesAndMountSummaries(log::LogContext & lc); + /** + * Modify the state of the specified tape. Intermediate states may be temporarily applied + * until the final desired state is achieved. + * @param admin, the person or the system who modified the state of the tape + * @param vid the VID of the tape to change the state + * @param state the desired final state + * @param stateReason the reason why the state changes, if the state is ACTIVE and the stateReason is std::nullopt, the state will be reset to null + */ + void triggerTapeStateChange(const common::dataStructures::SecurityIdentity &admin,const std::string &vid, const common::dataStructures::Tape::State & state, const std::optional<std::string> & stateReason, log::LogContext& logContext); + /*======================== Archive reporting support =======================*/ /** * Batch job factory diff --git a/scheduler/SchedulerDatabase.hpp b/scheduler/SchedulerDatabase.hpp index 8addbc2a6b568eebbd1df09b7258dfd8982540ae..6801ae0b4590af927997d9a3f09ed5f9c187bc96 100644 --- a/scheduler/SchedulerDatabase.hpp +++ b/scheduler/SchedulerDatabase.hpp @@ -153,23 +153,21 @@ class SchedulerDatabase { * @param repackBufferURL * @param repackType * @param mountPolicy - * @param forceDisabledTape * @param noRecall */ class QueueRepackRequest { public: QueueRepackRequest(const std::string & vid, const std::string& repackBufferURL, const common::dataStructures::RepackInfo::Type& repackType, - const common::dataStructures::MountPolicy & mountPolicy, const bool forceDisabledTape, + const common::dataStructures::MountPolicy & mountPolicy, const bool noRecall) - : m_vid(vid), m_repackBufferURL(repackBufferURL), m_repackType(repackType), m_mountPolicy(mountPolicy), - m_forceDisabledTape(forceDisabledTape), m_noRecall(noRecall) {} + : m_vid(vid), m_repackBufferURL(repackBufferURL), m_repackType(repackType), + m_mountPolicy(mountPolicy), m_noRecall(noRecall) {} std::string m_vid; std::string m_repackBufferURL; common::dataStructures::RepackInfo::Type m_repackType; common::dataStructures::MountPolicy m_mountPolicy; - bool m_forceDisabledTape; bool m_noRecall; common::dataStructures::EntryLog m_creationLog; }; @@ -274,6 +272,24 @@ class SchedulerDatabase { /*============ Retrieve management: user side ============================*/ + /** + * A representation of the cleanup request status of a retrieve queue. + */ + struct RetrieveQueueCleanupInfo { + std::string vid; + bool doCleanup; + std::optional<std::string> assignedAgent; + uint64_t heartbeat; + }; + + /** + * Get the retrieve queue cleanup status. + * @param vidsToConsider list of vids to considers. If empty, all vids will be considered. + * @return the list of cleanup request status. + */ + virtual std::list<RetrieveQueueCleanupInfo> getRetrieveQueuesCleanupInfo(log::LogContext& logContext) = 0; + virtual void setRetrieveQueueCleanupFlag(const std::string&vid, bool val, log::LogContext& logContext) = 0; + /** * A representation of an existing retrieve queue. This is a (simpler) relative * to the PotentialMount used for mount scheduling. This summary will be used to @@ -310,17 +326,24 @@ class SchedulerDatabase { virtual std::list<RetrieveQueueStatistics> getRetrieveQueueStatistics( const cta::common::dataStructures::RetrieveFileQueueCriteria &criteria, const std::set<std::string> & vidsToConsider) = 0; + /** - * Queues the specified request. As the object store has access to the catalogue, - * the best queue (most likely to go, and not disabled can be chosen directly there). - * - * @param rqst The request. - * @param criteria The criteria retrieved from the CTA catalogue to be used to - * decide how to quue the request. - * @param diskSystemName optional disk system name if the destination matches a declared one. - * @param logContext context allowing logging db operation - * @return the selected vid (mostly for logging) + * Clear the retrieve queue statistics cache. + * @param vid the queue vid */ + virtual void clearRetrieveQueueStatisticsCache(const std::string & vid) = 0; + + /** + * Queues the specified request. As the object store has access to the catalogue, + * the best queue (most likely to go, and not disabled can be chosen directly there). + * + * @param rqst The request. + * @param criteria The criteria retrieved from the CTA catalogue to be used to + * decide how to quue the request. + * @param diskSystemName optional disk system name if the destination matches a declared one. + * @param logContext context allowing logging db operation + * @return the selected vid (mostly for logging) + */ struct RetrieveRequestInfo { std::string selectedVid; std::string requestId; @@ -583,6 +606,10 @@ class SchedulerDatabase { /***/ virtual std::unique_ptr<RepackRequest> getNextRepackJobToExpand() = 0; + virtual std::list<std::unique_ptr<RetrieveJob>> getNextRetrieveJobsToTransferBatch(std::string & vid, uint64_t filesRequested, log::LogContext &logContext) = 0; + virtual void requeueRetrieveRequestJobs(std::list<cta::SchedulerDatabase::RetrieveJob *> &jobs, log::LogContext& logContext) = 0; + virtual void reserveRetrieveQueueForCleanup(std::string & vid, std::optional<uint64_t> cleanupHeartBeatValue) = 0; + virtual void tickRetrieveQueueCleanupHeartbeat(std::string & vid) = 0; /*============ Repack management: maintenance process side =========================*/ diff --git a/scheduler/SchedulerDatabaseFactory.hpp b/scheduler/SchedulerDatabaseFactory.hpp index 59e387c317550531fc4e18e3e28f4ce9a8788be6..2556c04aaa584de1c5f9cb49995161064306f766 100644 --- a/scheduler/SchedulerDatabaseFactory.hpp +++ b/scheduler/SchedulerDatabaseFactory.hpp @@ -115,6 +115,30 @@ public: return m_SchedDB->getArchiveJobsFailedSummary(lc); } + std::list<std::unique_ptr<RetrieveJob>> getNextRetrieveJobsToTransferBatch(std::string & vid, uint64_t filesRequested, log::LogContext &lc) override { + return m_SchedDB->getNextRetrieveJobsToTransferBatch(vid, filesRequested, lc); + } + + void requeueRetrieveRequestJobs(std::list<cta::SchedulerDatabase::RetrieveJob *> &jobs, log::LogContext &lc) override { + m_SchedDB->requeueRetrieveRequestJobs(jobs, lc); + } + + void reserveRetrieveQueueForCleanup(std::string & vid, std::optional<uint64_t> cleanupHeartBeatValue) override { + m_SchedDB->reserveRetrieveQueueForCleanup(vid, cleanupHeartBeatValue); + } + + void tickRetrieveQueueCleanupHeartbeat(std::string & vid) override { + m_SchedDB->tickRetrieveQueueCleanupHeartbeat(vid); + } + + std::list<RetrieveQueueCleanupInfo> getRetrieveQueuesCleanupInfo(log::LogContext& logContext) override { + return m_SchedDB->getRetrieveQueuesCleanupInfo(logContext); + } + + void setRetrieveQueueCleanupFlag(const std::string& vid, bool val, log::LogContext& lc) override { + m_SchedDB->setRetrieveQueueCleanupFlag(vid, val, lc); + } + std::list<std::unique_ptr<RetrieveJob>> getNextRetrieveJobsToReportBatch(uint64_t filesRequested, log::LogContext &lc) override { return m_SchedDB->getNextRetrieveJobsToReportBatch(filesRequested, lc); } @@ -186,6 +210,10 @@ public: return m_SchedDB->getRetrieveQueueStatistics(criteria, vidsToConsider); } + void clearRetrieveQueueStatisticsCache(const std::string & vid) override { + return m_SchedDB->clearRetrieveQueueStatisticsCache(vid); + } + SchedulerDatabase::RetrieveRequestInfo queueRetrieve(common::dataStructures::RetrieveRequest& rqst, const common::dataStructures::RetrieveFileQueueCriteria &criteria, const std::optional<std::string> diskSystemName, log::LogContext &logContext) override { diff --git a/scheduler/SchedulerTest.cpp b/scheduler/SchedulerTest.cpp index 455872aedd8416d9dcc4a858321d17e3c4a33409..f7330547000443eb2aee46d25eb281e1e86b01d5 100644 --- a/scheduler/SchedulerTest.cpp +++ b/scheduler/SchedulerTest.cpp @@ -67,18 +67,51 @@ const uint32_t PUBLIC_GID = 9754; namespace { +/** + * This structure is used to describe a tape state change during the 'triggerTapeStateChangeValidScenarios' test + */ +struct TriggerTapeStateChangeBehaviour { + cta::common::dataStructures::Tape::State fromState; + cta::common::dataStructures::Tape::State toState; + cta::common::dataStructures::Tape::State observedState; + bool changeRaisedException; + bool cleanupFlagActivated; +}; + /** * This structure is used to parameterize scheduler tests. */ struct SchedulerTestParam { - cta::SchedulerDatabaseFactory &dbFactory; + cta::SchedulerDatabaseFactory &m_dbFactory; + std::optional<TriggerTapeStateChangeBehaviour> m_triggerTapeStateChangeBehaviour; SchedulerTestParam( cta::SchedulerDatabaseFactory &dbFactory): - dbFactory(dbFactory) { - } + m_dbFactory(dbFactory) { + } + + SchedulerTestParam( + cta::SchedulerDatabaseFactory &dbFactory, + TriggerTapeStateChangeBehaviour triggerTapeStateChangeBehaviour): + m_dbFactory(dbFactory), + m_triggerTapeStateChangeBehaviour(triggerTapeStateChangeBehaviour) { + } }; // struct SchedulerTestParam +std::ostream& operator<<(std::ostream& os, const SchedulerTestParam& c) { + if (!c.m_triggerTapeStateChangeBehaviour.has_value()) { + return os << "Test"; + } else { + auto & params = c.m_triggerTapeStateChangeBehaviour.value(); + return os << "{ " + << "\"from\": " << "\"" << cta::common::dataStructures::Tape::stateToString(params.fromState) << "\"" << ", " + << "\"to\": " << "\"" << cta::common::dataStructures::Tape::stateToString(params.toState) << "\"" << ", " + << "\"expected_state\": " << "\"" << cta::common::dataStructures::Tape::stateToString(params.observedState) << "\"" << ", " + << "\"expected_exception\": " << "\"" << (params.changeRaisedException ? "yes" : "no") << "\"" << ", " + << "\"expected_cleanup\": " << "\"" << (params.cleanupFlagActivated ? "yes" : "no") << "\"" << " }"; + } +} + } /** @@ -116,7 +149,7 @@ public: using namespace cta; // We do a deep reference to the member as the C++ compiler requires the function to be already defined if called implicitly - const auto &factory = GetParam().dbFactory; + const auto &factory = GetParam().m_dbFactory; const uint64_t nbConns = 1; const uint64_t nbArchiveFileListingConns = 1; //m_catalogue = std::make_unique<catalogue::SchemaCreatingSqliteCatalogue>(m_tempSqliteFile.path(), nbConns); @@ -313,7 +346,6 @@ protected: const std::string s_vendor = "TestVendor"; const std::string s_mountPolicyName = "mount_group"; const std::string s_repackMountPolicyName = "repack_mount_group"; - const bool s_defaultRepackDisabledTapeFlag = false; const bool s_defaultRepackNoRecall = false; const uint64_t s_minFilesToWarrantAMount = 5; const uint64_t s_minBytesToWarrantAMount = 2*1000*1000; @@ -327,6 +359,12 @@ protected: }; // class SchedulerTest +/** + * The trigger tape state change is a parameterized test. In addition to the default parameters, + * it should take a 'TriggerTapeStateChangeBehaviour' reference. + */ +class SchedulerTestTriggerTapeStateChangeBehaviour : public SchedulerTest {}; + TEST_P(SchedulerTest, archive_to_new_file) { using namespace cta; @@ -1998,12 +2036,14 @@ TEST_P(SchedulerTest, repack) { { auto tape = getDefaultTape(); tape.vid = tape1; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(cliId, tape); } //The queueing of a repack request should fail if the tape to repack is not full cta::SchedulerDatabase::QueueRepackRequest qrr(tape1,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); ASSERT_THROW(scheduler.queueRepack(cliId, qrr, lc),cta::exception::UserError); //The queueing of a repack request in a vid that does not exist should throw an exception qrr.m_vid = "NOT_EXIST"; @@ -2028,6 +2068,8 @@ TEST_P(SchedulerTest, repack) { auto tape = getDefaultTape(); catalogue.createTape(s_adminOnAdminHost, tape); tape.vid = tape2; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; tape.full = true; catalogue.createTape(cliId, tape); } @@ -2073,12 +2115,14 @@ TEST_P(SchedulerTest, getNextRepackRequestToExpand) { auto tape = getDefaultTape(); tape.vid = tape1; tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(cliId, tape); } //Queue the first repack request cta::SchedulerDatabase::QueueRepackRequest qrr(tape1,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); scheduler.queueRepack(cliId, qrr, lc); std::string tape2 = "Tape2"; @@ -2087,6 +2131,8 @@ TEST_P(SchedulerTest, getNextRepackRequestToExpand) { auto tape = getDefaultTape(); tape.vid = tape2; tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(cliId, tape); } @@ -2173,6 +2219,8 @@ TEST_P(SchedulerTest, expandRepackRequest) { auto tape = getDefaultTape(); tape.vid = vid; tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(s_adminOnAdminHost, tape); } @@ -2227,7 +2275,7 @@ TEST_P(SchedulerTest, expandRepackRequest) { for(uint64_t i = 0; i < nbTapesToRepack ; ++i) { //Queue the first repack request cta::SchedulerDatabase::QueueRepackRequest qrr(allVid.at(i),"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); scheduler.queueRepack(admin,qrr,lc); } scheduler.waitSchedulerDbSubthreadsComplete(); @@ -2494,8 +2542,9 @@ TEST_P(SchedulerTest, expandRepackRequestRetrieveFailed) { { auto tape = getDefaultTape(); tape.vid = vid; - tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(s_adminOnAdminHost, tape); } @@ -2547,7 +2596,7 @@ TEST_P(SchedulerTest, expandRepackRequestRetrieveFailed) { { cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); scheduler.queueRepack(admin,qrr,lc); scheduler.waitSchedulerDbSubthreadsComplete(); @@ -2733,6 +2782,8 @@ TEST_P(SchedulerTest, expandRepackRequestArchiveSuccess) { auto tape = getDefaultTape(); tape.vid = vid; tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(s_adminOnAdminHost, tape); } @@ -2793,7 +2844,7 @@ TEST_P(SchedulerTest, expandRepackRequestArchiveSuccess) { { cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); scheduler.queueRepack(admin,qrr,lc); scheduler.waitSchedulerDbSubthreadsComplete(); //scheduler.waitSchedulerDbSubthreadsComplete(); @@ -2990,6 +3041,8 @@ TEST_P(SchedulerTest, expandRepackRequestArchiveFailed) { auto tape = getDefaultTape(); tape.vid = vid; tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(s_adminOnAdminHost, tape); } @@ -3049,7 +3102,7 @@ TEST_P(SchedulerTest, expandRepackRequestArchiveFailed) { { cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); scheduler.queueRepack(admin,qrr, lc); scheduler.waitSchedulerDbSubthreadsComplete(); @@ -3253,7 +3306,7 @@ TEST_P(SchedulerTest, expandRepackRequestArchiveFailed) { } } -TEST_P(SchedulerTest, expandRepackRequestDisabledTape) { +TEST_P(SchedulerTest, expandRepackRequestRepackingTape) { using namespace cta; using namespace cta::objectstore; unitTests::TempDirectory tempDirectory; @@ -3293,7 +3346,7 @@ TEST_P(SchedulerTest, expandRepackRequestDisabledTape) { auto tape = getDefaultTape(); tape.vid = vid; tape.full = true; - tape.state = common::dataStructures::Tape::DISABLED; + tape.state = common::dataStructures::Tape::REPACKING; tape.stateReason = "Test"; catalogue.createTape(s_adminOnAdminHost, tape); } @@ -3341,14 +3394,12 @@ TEST_P(SchedulerTest, expandRepackRequestDisabledTape) { catalogue.filesWrittenToTape(tapeFilesWrittenCopy1); tapeFilesWrittenCopy1.clear(); } - //Test the expanding requeue the Repack after the creation of - //one retrieve request - scheduler.waitSchedulerDbSubthreadsComplete(); + // Queue the repack request for a repacking tape + // Should work { - bool forceDisableTape = false; cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,forceDisableTape,s_defaultRepackNoRecall); - ASSERT_THROW(scheduler.queueRepack(admin,qrr,lc),cta::exception::UserError); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); + ASSERT_NO_THROW(scheduler.queueRepack(admin,qrr,lc)); scheduler.waitSchedulerDbSubthreadsComplete(); log::TimingList tl; @@ -3358,13 +3409,103 @@ TEST_P(SchedulerTest, expandRepackRequestDisabledTape) { scheduler.waitSchedulerDbSubthreadsComplete(); auto repackRequestToExpand = scheduler.getNextRepackRequestToExpand(); - ASSERT_EQ(nullptr,repackRequestToExpand); + ASSERT_NE(nullptr,repackRequestToExpand); } - //Queue the repack request with the force disabled tape flag +} + +TEST_P(SchedulerTest, expandRepackRequestRepackingDisabledTape) { + using namespace cta; + using namespace cta::objectstore; + unitTests::TempDirectory tempDirectory; + auto &catalogue = getCatalogue(); + auto &scheduler = getScheduler(); + auto &schedulerDB = getSchedulerDB(); + + cta::objectstore::Backend& backend = schedulerDB.getBackend(); + setupDefaultCatalogue(); +#ifdef STDOUT_LOGGING + log::StdoutLogger dl("dummy", "unitTest"); +#else + log::DummyLogger dl("", ""); +#endif + log::LogContext lc(dl); + + //Create an agent to represent this test process + cta::objectstore::AgentReference agentReference("expandRepackRequestTest", dl); + cta::objectstore::Agent agent(agentReference.getAgentAddress(), backend); + agent.initialize(); + agent.setTimeout_us(0); + agent.insertAndRegisterSelf(lc); + + cta::common::dataStructures::SecurityIdentity admin; + admin.username = "admin_user_name"; + admin.host = "admin_host"; + + //Create a logical library in the catalogue + const bool logicalLibraryIsDisabled = false; + catalogue.createLogicalLibrary(admin, s_libraryName, logicalLibraryIsDisabled, "Create logical library"); + + std::ostringstream ossVid; + ossVid << s_vid << "_" << 1; + std::string vid = ossVid.str(); + + { + auto tape = getDefaultTape(); + tape.vid = vid; + tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING_DISABLED; + tape.stateReason = "Test"; + catalogue.createTape(s_adminOnAdminHost, tape); + } + + //Create a storage class in the catalogue + common::dataStructures::StorageClass storageClass; + storageClass.name = s_storageClassName; + storageClass.nbCopies = 2; + storageClass.comment = "Create storage class"; + + const std::string tapeDrive = "tape_drive"; + const uint64_t nbArchiveFilesPerTape = 10; + const uint64_t archiveFileSize = 2 * 1000 * 1000 * 1000; + + //Simulate the writing of 10 files in 1 tape in the catalogue + std::set<catalogue::TapeItemWrittenPointer> tapeFilesWrittenCopy1; + { + uint64_t archiveFileId = 1; + std::string currentVid = vid; + for(uint64_t j = 1; j <= nbArchiveFilesPerTape; ++j) { + std::ostringstream diskFileId; + diskFileId << (12345677 + archiveFileId); + std::ostringstream diskFilePath; + diskFilePath << "/public_dir/public_file_"<<1<<"_"<< j; + auto fileWrittenUP=std::make_unique<cta::catalogue::TapeFileWritten>(); + auto & fileWritten = *fileWrittenUP; + fileWritten.archiveFileId = archiveFileId++; + fileWritten.diskInstance = s_diskInstance; + fileWritten.diskFileId = diskFileId.str(); + + fileWritten.diskFileOwnerUid = PUBLIC_OWNER_UID; + fileWritten.diskFileGid = PUBLIC_GID; + fileWritten.size = archiveFileSize; + fileWritten.checksumBlob.insert(cta::checksum::ADLER32,"1234"); + fileWritten.storageClassName = s_storageClassName; + fileWritten.vid = currentVid; + fileWritten.fSeq = j; + fileWritten.blockId = j * 100; + fileWritten.size = archiveFileSize; + fileWritten.copyNb = 1; + fileWritten.tapeDrive = tapeDrive; + tapeFilesWrittenCopy1.emplace(fileWrittenUP.release()); + } + //update the DB tape + catalogue.filesWrittenToTape(tapeFilesWrittenCopy1); + tapeFilesWrittenCopy1.clear(); + } + // Queue the repack request for a repacking tape + // Should work { - bool forceDisableTape = true; cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,forceDisableTape,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); ASSERT_NO_THROW(scheduler.queueRepack(admin,qrr,lc)); scheduler.waitSchedulerDbSubthreadsComplete(); @@ -3425,9 +3566,8 @@ TEST_P(SchedulerTest, expandRepackRequestBrokenTape) { } { - bool forceDisableTape = false; cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,forceDisableTape,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); ASSERT_THROW(scheduler.queueRepack(admin,qrr,lc),cta::exception::UserError); scheduler.waitSchedulerDbSubthreadsComplete(); @@ -3442,6 +3582,130 @@ TEST_P(SchedulerTest, expandRepackRequestBrokenTape) { } } +TEST_P(SchedulerTest, expandRepackRequestDisabledTape) { + using namespace cta; + using namespace cta::objectstore; + unitTests::TempDirectory tempDirectory; + auto &catalogue = getCatalogue(); + auto &scheduler = getScheduler(); + auto &schedulerDB = getSchedulerDB(); + + cta::objectstore::Backend& backend = schedulerDB.getBackend(); + setupDefaultCatalogue(); +#ifdef STDOUT_LOGGING + log::StdoutLogger dl("dummy", "unitTest"); +#else + log::DummyLogger dl("", ""); +#endif + log::LogContext lc(dl); + + //Create an agent to represent this test process + cta::objectstore::AgentReference agentReference("expandRepackRequestTest", dl); + cta::objectstore::Agent agent(agentReference.getAgentAddress(), backend); + agent.initialize(); + agent.setTimeout_us(0); + agent.insertAndRegisterSelf(lc); + + cta::common::dataStructures::SecurityIdentity admin; + admin.username = "admin_user_name"; + admin.host = "admin_host"; + + //Create a logical library in the catalogue + const bool logicalLibraryIsDisabled = false; + catalogue.createLogicalLibrary(admin, s_libraryName, logicalLibraryIsDisabled, "Create logical library"); + + std::ostringstream ossVid; + ossVid << s_vid << "_" << 1; + std::string vid = ossVid.str(); + + { + auto tape = getDefaultTape(); + tape.vid = vid; + tape.full = true; + tape.state = common::dataStructures::Tape::DISABLED; + tape.stateReason = "Test"; + catalogue.createTape(s_adminOnAdminHost, tape); + } + + { + cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); + ASSERT_THROW(scheduler.queueRepack(admin,qrr,lc),cta::exception::UserError); + scheduler.waitSchedulerDbSubthreadsComplete(); + + log::TimingList tl; + utils::Timer t; + + scheduler.promoteRepackRequestsToToExpand(lc); + scheduler.waitSchedulerDbSubthreadsComplete(); + + auto repackRequestToExpand = scheduler.getNextRepackRequestToExpand(); + ASSERT_EQ(nullptr,repackRequestToExpand); + } +} + +TEST_P(SchedulerTest, expandRepackRequestActiveTape) { + using namespace cta; + using namespace cta::objectstore; + unitTests::TempDirectory tempDirectory; + auto &catalogue = getCatalogue(); + auto &scheduler = getScheduler(); + auto &schedulerDB = getSchedulerDB(); + + cta::objectstore::Backend& backend = schedulerDB.getBackend(); + setupDefaultCatalogue(); +#ifdef STDOUT_LOGGING + log::StdoutLogger dl("dummy", "unitTest"); +#else + log::DummyLogger dl("", ""); +#endif + log::LogContext lc(dl); + + //Create an agent to represent this test process + cta::objectstore::AgentReference agentReference("expandRepackRequestTest", dl); + cta::objectstore::Agent agent(agentReference.getAgentAddress(), backend); + agent.initialize(); + agent.setTimeout_us(0); + agent.insertAndRegisterSelf(lc); + + cta::common::dataStructures::SecurityIdentity admin; + admin.username = "admin_user_name"; + admin.host = "admin_host"; + + //Create a logical library in the catalogue + const bool logicalLibraryIsDisabled = false; + catalogue.createLogicalLibrary(admin, s_libraryName, logicalLibraryIsDisabled, "Create logical library"); + + std::ostringstream ossVid; + ossVid << s_vid << "_" << 1; + std::string vid = ossVid.str(); + + { + auto tape = getDefaultTape(); + tape.vid = vid; + tape.full = true; + tape.state = common::dataStructures::Tape::ACTIVE; + tape.stateReason = "Test"; + catalogue.createTape(s_adminOnAdminHost, tape); + } + + { + cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); + ASSERT_THROW(scheduler.queueRepack(admin,qrr,lc),cta::exception::UserError); + scheduler.waitSchedulerDbSubthreadsComplete(); + + log::TimingList tl; + utils::Timer t; + + scheduler.promoteRepackRequestsToToExpand(lc); + scheduler.waitSchedulerDbSubthreadsComplete(); + + auto repackRequestToExpand = scheduler.getNextRepackRequestToExpand(); + ASSERT_EQ(nullptr,repackRequestToExpand); + } +} +/* Disabled tapes should be ok to be mounted, because it is a transient state TEST_P(SchedulerTest, noMountIsTriggeredWhenTapeIsDisabled) { using namespace cta; using namespace cta::objectstore; @@ -3550,7 +3814,7 @@ TEST_P(SchedulerTest, noMountIsTriggeredWhenTapeIsDisabled) { ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); //enable the tape - catalogue.modifyTapeState(admin,vid,common::dataStructures::Tape::ACTIVE,std::nullopt); + catalogue.modifyTapeState(admin,vid,common::dataStructures::Tape::ACTIVE,std::nullopt,std::nullopt); //A mount should be returned by getNextMount ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName,driveName,lc)); @@ -3585,9 +3849,8 @@ TEST_P(SchedulerTest, noMountIsTriggeredWhenTapeIsDisabled) { ASSERT_NE(mountPolicyItor, mountPolicies.end()); - //Queue a Repack Request with --disabledtape flag set to force Retrieve Mount for disabled tape with repack prefix in the mount policy cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, - *mountPolicyItor,true,s_defaultRepackNoRecall); + *mountPolicyItor,s_defaultRepackNoRecall); scheduler.queueRepack(admin,qrr, lc); scheduler.waitSchedulerDbSubthreadsComplete(); @@ -3602,19 +3865,17 @@ TEST_P(SchedulerTest, noMountIsTriggeredWhenTapeIsDisabled) { scheduler.expandRepackRequest(repackRequestToExpand,tl,t,lc); scheduler.waitSchedulerDbSubthreadsComplete(); - /* - * Test expected behaviour for NOW: - * The getNextMount should return a mount as the tape is disabled, there are repack --disabledtape retrieve jobs in it - * and the mount policy name begins with repack - * We will then get the Repack AND USER jobs from the getNextJobBatch - */ + //Test expected behaviour for NOW: + //The getNextMount should return a mount as the tape is disabled, there are repack --disabledtape retrieve jobs in it + //and the mount policy name begins with repack + //We will then get the Repack AND USER jobs from the getNextJobBatch auto nextMount = scheduler.getNextMount(s_libraryName,driveName,lc); ASSERT_NE(nullptr,nextMount); std::unique_ptr<cta::RetrieveMount> retrieveMount; retrieveMount.reset(dynamic_cast<cta::RetrieveMount*>(nextMount.release())); auto jobBatch = retrieveMount->getNextJobBatch(20,20*archiveFileSize,lc); ASSERT_EQ(11,jobBatch.size()); //1 user job + 10 Repack jobs = 11 jobs in the batch -} +}*/ /* TEST_P(SchedulerTest, emptyMountIsTriggeredWhenCancelledRetrieveRequest) { @@ -4011,6 +4272,8 @@ TEST_P(SchedulerTest, expandRepackRequestAddCopiesOnly) { auto tape = getDefaultTape(); tape.vid = vid; tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(s_adminOnAdminHost, tape); } @@ -4092,7 +4355,7 @@ TEST_P(SchedulerTest, expandRepackRequestAddCopiesOnly) { scheduler.waitSchedulerDbSubthreadsComplete(); { cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::AddCopiesOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); scheduler.queueRepack(admin,qrr,lc); scheduler.waitSchedulerDbSubthreadsComplete(); @@ -4261,6 +4524,8 @@ TEST_P(SchedulerTest, expandRepackRequestShouldFailIfArchiveRouteMissing) { auto tape = getDefaultTape(); tape.vid = vidCopyNb1; tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(s_adminOnAdminHost, tape); } @@ -4285,6 +4550,8 @@ TEST_P(SchedulerTest, expandRepackRequestShouldFailIfArchiveRouteMissing) { auto tape = getDefaultTape(); tape.vid = vidCopyNb2_source; tape.tapePoolName = tapepool2Name; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(s_adminOnAdminHost, tape); } @@ -4371,7 +4638,7 @@ TEST_P(SchedulerTest, expandRepackRequestShouldFailIfArchiveRouteMissing) { { std::string vid = vidCopyNb2_source; cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveAndAddCopies, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); scheduler.queueRepack(admin,qrr,lc); scheduler.waitSchedulerDbSubthreadsComplete(); @@ -4439,6 +4706,8 @@ TEST_P(SchedulerTest, expandRepackRequestMoveAndAddCopies){ auto tape = getDefaultTape(); tape.vid = vid; tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(s_adminOnAdminHost, tape); } @@ -4528,7 +4797,7 @@ TEST_P(SchedulerTest, expandRepackRequestMoveAndAddCopies){ scheduler.waitSchedulerDbSubthreadsComplete(); { cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveAndAddCopies, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); scheduler.queueRepack(admin,qrr, lc); scheduler.waitSchedulerDbSubthreadsComplete(); @@ -4718,6 +4987,8 @@ TEST_P(SchedulerTest, cancelRepackRequest) { auto tape = getDefaultTape(); tape.vid = vid; tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(s_adminOnAdminHost, tape); } //Create a repack destination tape @@ -4776,7 +5047,7 @@ TEST_P(SchedulerTest, cancelRepackRequest) { { cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); scheduler.queueRepack(admin,qrr,lc); scheduler.waitSchedulerDbSubthreadsComplete(); } @@ -4828,7 +5099,7 @@ TEST_P(SchedulerTest, cancelRepackRequest) { //Do another test to check the deletion of ArchiveSubrequests { cta::SchedulerDatabase::QueueRepackRequest qrr(vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); scheduler.queueRepack(admin,qrr,lc); scheduler.waitSchedulerDbSubthreadsComplete(); } @@ -5032,7 +5303,7 @@ TEST_P(SchedulerTest, getNextMountEmptyArchiveForRepackIfNbFilesQueuedIsLessThan ASSERT_EQ(2 * s_minFilesToWarrantAMount,tapeMount->getNbFiles()); } -TEST_P(SchedulerTest, getNextMountBrokenOrDisabledTapeShouldNotReturnAMount) { +TEST_P(SchedulerTest, getNextMountTapeStatesThatShouldNotReturnAMount) { //Queue 2 archive requests in two different logical libraries using namespace cta; @@ -5097,14 +5368,24 @@ TEST_P(SchedulerTest, getNextMountBrokenOrDisabledTapeShouldNotReturnAMount) { scheduler.waitSchedulerDbSubthreadsComplete(); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::BROKEN,std::string("Test")); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::BROKEN,std::nullopt,std::string("Test")); + ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::BROKEN,std::nullopt); + ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::EXPORTED,std::nullopt,std::string("Test")); + ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::EXPORTED,std::nullopt); + ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::REPACKING_DISABLED,std::nullopt,std::string("Test")); ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,std::nullopt); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::REPACKING_DISABLED,std::nullopt); ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::DISABLED,std::string("Test")); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::DISABLED,std::nullopt,std::string("Test")); ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,std::nullopt); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::DISABLED,std::nullopt); ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); { @@ -5153,14 +5434,24 @@ TEST_P(SchedulerTest, getNextMountBrokenOrDisabledTapeShouldNotReturnAMount) { scheduler.queueRetrieve(s_diskInstance, request, lc); scheduler.waitSchedulerDbSubthreadsComplete(); } - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::BROKEN,std::string("Test")); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::BROKEN,std::nullopt,std::string("Test")); ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,std::nullopt); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::BROKEN,std::nullopt); ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::DISABLED,std::string("Test")); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::EXPORTED,std::nullopt,std::string("Test")); ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); - catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,std::nullopt); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::EXPORTED,std::nullopt); + ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::REPACKING_DISABLED,std::nullopt,std::string("Test")); + ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::REPACKING_DISABLED,std::nullopt); + ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::DISABLED,std::nullopt,std::string("Test")); + ASSERT_EQ(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); + catalogue.modifyTapeState(s_adminOnAdminHost,tape.vid,common::dataStructures::Tape::ACTIVE,common::dataStructures::Tape::DISABLED,std::nullopt); ASSERT_NE(nullptr,scheduler.getNextMount(s_libraryName, driveName, lc)); } @@ -5201,6 +5492,8 @@ TEST_P(SchedulerTest, repackRetrieveRequestsFailToFetchDiskSystem){ { auto tape = getDefaultTape(); tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(s_adminOnAdminHost, tape); } @@ -5250,7 +5543,7 @@ TEST_P(SchedulerTest, repackRetrieveRequestsFailToFetchDiskSystem){ scheduler.waitSchedulerDbSubthreadsComplete(); cta::SchedulerDatabase::QueueRepackRequest qrr(s_vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,s_defaultRepackNoRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackNoRecall); scheduler.queueRepack(admin,qrr, lc); scheduler.waitSchedulerDbSubthreadsComplete(); @@ -5348,6 +5641,8 @@ TEST_P(SchedulerTest, expandRepackRequestShouldThrowIfUseBufferNotRecallButNoDir { auto tape = getDefaultTape(); tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(s_adminOnAdminHost, tape); } @@ -5399,7 +5694,7 @@ TEST_P(SchedulerTest, expandRepackRequestShouldThrowIfUseBufferNotRecallButNoDir bool noRecall = true; cta::SchedulerDatabase::QueueRepackRequest qrr(s_vid,"file://DOES_NOT_EXIST",common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,noRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,noRecall); scheduler.queueRepack(admin,qrr, lc); scheduler.waitSchedulerDbSubthreadsComplete(); @@ -5445,6 +5740,8 @@ TEST_P(SchedulerTest, expandRepackRequestShouldNotThrowIfTapeDisabledButNoRecall { auto tape = getDefaultTape(); tape.full = true; + tape.state = common::dataStructures::Tape::REPACKING; + tape.stateReason = "Test"; catalogue.createTape(s_adminOnAdminHost, tape); } @@ -5498,7 +5795,7 @@ TEST_P(SchedulerTest, expandRepackRequestShouldNotThrowIfTapeDisabledButNoRecall tempDirectory.append("/"+s_vid); tempDirectory.mkdir(); cta::SchedulerDatabase::QueueRepackRequest qrr(s_vid,pathRepackBuffer,common::dataStructures::RepackInfo::Type::MoveOnly, - common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,s_defaultRepackDisabledTapeFlag,noRecall); + common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack,noRecall); scheduler.queueRepack(admin,qrr, lc); scheduler.waitSchedulerDbSubthreadsComplete(); @@ -5736,7 +6033,6 @@ TEST_P(SchedulerTest, retrieveArchiveAllTypesMaxDrivesVoInFlightChangeScheduleMo //Two tapes for ArchiveForUser and ArchiveForRepack mounts std::string vid2 = "vid_2"; - std::string vid3 = "vid_3"; auto tape2 = tape1; tape2.vid = vid2; catalogue.createTape(s_adminOnAdminHost, tape2); @@ -5746,6 +6042,7 @@ TEST_P(SchedulerTest, retrieveArchiveAllTypesMaxDrivesVoInFlightChangeScheduleMo catalogue.createTapePool(s_adminOnAdminHost,newTapepool,s_vo,1,false,std::nullopt,"Test"); //Create the third tape in the new tapepool + std::string vid3 = "vid_3"; auto tape3 = tape1; tape3.vid = vid3; tape3.tapePoolName = newTapepool; @@ -5862,7 +6159,7 @@ TEST_P(SchedulerTest, retrieveArchiveAllTypesMaxDrivesVoInFlightChangeScheduleMo //Allow one drive for write and trigger the mount catalogue.modifyVirtualOrganizationWriteMaxDrives(s_adminOnAdminHost,s_vo,1); - //Disable the tape 1 to prevent the mount in it (should be the Retrieve) + //Set the tape 1 to disabled state to prevent the mount in it (should be the Retrieve) catalogue.setTapeDisabled(s_adminOnAdminHost,tape1.vid,"test"); ASSERT_TRUE(scheduler.getNextMountDryRun(s_libraryName,drive1,lc)); { @@ -5911,10 +6208,10 @@ TEST_P(SchedulerTest, retrieveArchiveAllTypesMaxDrivesVoInFlightChangeScheduleMo ASSERT_FALSE(scheduler.getNextMountDryRun(s_libraryName,drive3,lc)); //Now allocate one drive for Retrieve catalogue.modifyVirtualOrganizationReadMaxDrives(s_adminOnAdminHost,s_vo,1); - //The retrieve mount should not be triggered as the tape 1 is disabled + //The retrieve mount should not be triggered as the tape 1 state is broken ASSERT_FALSE(scheduler.getNextMountDryRun(s_libraryName,drive3,lc)); //Setting the state of the tape back to active - catalogue.modifyTapeState(s_adminOnAdminHost,tape1.vid,common::dataStructures::Tape::ACTIVE,std::nullopt); + catalogue.modifyTapeState(s_adminOnAdminHost,tape1.vid,common::dataStructures::Tape::ACTIVE,std::nullopt,std::nullopt); //The mount should be triggered on tape 1 ASSERT_TRUE(scheduler.getNextMountDryRun(s_libraryName,drive3,lc)); //The mount should be a Retrieve mount @@ -6291,6 +6588,56 @@ TEST_P(SchedulerTest, getNextMountWithArchiveForUserAndArchiveForRepackShouldRet ASSERT_FALSE(scheduler.getNextMountDryRun(s_libraryName,drive2,lc)); } +// This checks valid tape state changes +TEST_P(SchedulerTestTriggerTapeStateChangeBehaviour, triggerTapeStateChangeValidScenarios){ +//Queue 2 archive requests in two different logical libraries + using namespace cta; + + Scheduler &scheduler = getScheduler(); + auto &catalogue = getCatalogue(); + auto &schedulerDB = getSchedulerDB(); + + setupDefaultCatalogue(); +#ifdef STDOUT_LOGGING + log::StdoutLogger dl("dummy", "unitTest"); +#else + log::DummyLogger dl("", ""); +#endif + log::LogContext lc(dl); + + if (!GetParam().m_triggerTapeStateChangeBehaviour.has_value()) { + throw exception::Exception("Test needs 'TriggerTapeStateChangeBehaviour' parameters"); + } + + auto triggerTapeStateChangeBehaviour = GetParam().m_triggerTapeStateChangeBehaviour.value(); + + // Create the environment for the migration to happen (library + tape) + const std::string libraryComment = "Library comment"; + const bool libraryIsDisabled = false; + catalogue.createLogicalLibrary(s_adminOnAdminHost, s_libraryName, + libraryIsDisabled, libraryComment); + + auto tape = getDefaultTape(); + { + catalogue.createTape(s_adminOnAdminHost, tape); + } + + // Setup initial conditions + schedulerDB.setRetrieveQueueCleanupFlag(tape.vid, false, lc); + catalogue.modifyTapeState(s_adminOnAdminHost, tape.vid,triggerTapeStateChangeBehaviour.fromState,std::nullopt,"Test"); + + // Trigger change + if (triggerTapeStateChangeBehaviour.changeRaisedException) { + ASSERT_THROW(scheduler.triggerTapeStateChange(s_adminOnAdminHost, tape.vid, triggerTapeStateChangeBehaviour.toState, "Test", lc), exception::UserError); + } else { + ASSERT_NO_THROW(scheduler.triggerTapeStateChange(s_adminOnAdminHost, tape.vid, triggerTapeStateChangeBehaviour.toState, "Test", lc)); + } + + // Observe results + ASSERT_EQ(catalogue.getTapesByVid(tape.vid).at(tape.vid).state, triggerTapeStateChangeBehaviour.observedState); + ASSERT_EQ(schedulerDB.getRetrieveQueuesCleanupInfo(lc).front().doCleanup, triggerTapeStateChangeBehaviour.cleanupFlagActivated); +} + #undef TEST_MOCK_DB #ifdef TEST_MOCK_DB static cta::MockSchedulerDatabaseFactory mockDbFactory; @@ -6304,6 +6651,62 @@ static cta::OStoreDBFactory<cta::objectstore::BackendVFS> OStoreDBFactoryVFS; INSTANTIATE_TEST_CASE_P(OStoreDBPlusMockSchedulerTestVFS, SchedulerTest, ::testing::Values(SchedulerTestParam(OStoreDBFactoryVFS))); + +using Tape = cta::common::dataStructures::Tape; + +INSTANTIATE_TEST_CASE_P(OStoreDBPlusMockSchedulerTestVFS, SchedulerTestTriggerTapeStateChangeBehaviour, + ::testing::Values( + /* { fromState, toState, observedState, changeRaisedException, cleanupFlagActivated } */ + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::ACTIVE, Tape::ACTIVE, Tape::ACTIVE, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::ACTIVE, Tape::DISABLED, Tape::DISABLED, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::ACTIVE, Tape::REPACKING, Tape::REPACKING_PENDING, false, true }), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::ACTIVE, Tape::REPACKING_PENDING, Tape::ACTIVE, true, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::ACTIVE, Tape::REPACKING_DISABLED, Tape::ACTIVE, true, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::ACTIVE, Tape::BROKEN, Tape::BROKEN_PENDING, false, true }), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::ACTIVE, Tape::BROKEN_PENDING, Tape::ACTIVE, true, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::ACTIVE, Tape::EXPORTED, Tape::EXPORTED_PENDING, false, true }), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::ACTIVE, Tape::EXPORTED_PENDING, Tape::ACTIVE, true, false}), + + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::DISABLED, Tape::ACTIVE, Tape::ACTIVE, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::DISABLED, Tape::DISABLED, Tape::DISABLED, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::DISABLED, Tape::REPACKING, Tape::REPACKING_PENDING, false, true }), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::DISABLED, Tape::REPACKING_DISABLED, Tape::DISABLED, true, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::DISABLED, Tape::BROKEN, Tape::BROKEN_PENDING, false, true }), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::DISABLED, Tape::EXPORTED, Tape::EXPORTED_PENDING, false, true }), + + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::REPACKING, Tape::ACTIVE, Tape::ACTIVE, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::REPACKING, Tape::DISABLED, Tape::DISABLED, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::REPACKING, Tape::REPACKING, Tape::REPACKING, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::REPACKING, Tape::REPACKING_DISABLED, Tape::REPACKING_DISABLED, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::REPACKING, Tape::BROKEN, Tape::BROKEN_PENDING, false, true }), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::REPACKING, Tape::EXPORTED, Tape::EXPORTED_PENDING, false, true }), + + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::REPACKING_DISABLED, Tape::ACTIVE, Tape::REPACKING_DISABLED, true, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::REPACKING_DISABLED, Tape::DISABLED, Tape::REPACKING_DISABLED, true, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::REPACKING_DISABLED, Tape::REPACKING, Tape::REPACKING, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::REPACKING_DISABLED, Tape::REPACKING_DISABLED, Tape::REPACKING_DISABLED, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::REPACKING_DISABLED, Tape::BROKEN, Tape::BROKEN_PENDING, false, true }), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::REPACKING_DISABLED, Tape::EXPORTED, Tape::EXPORTED_PENDING, false, true }), + + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::BROKEN, Tape::ACTIVE, Tape::ACTIVE, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::BROKEN, Tape::DISABLED, Tape::DISABLED, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::BROKEN, Tape::REPACKING, Tape::REPACKING_PENDING, false, true }), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::BROKEN, Tape::REPACKING_DISABLED, Tape::BROKEN, true, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::BROKEN, Tape::BROKEN, Tape::BROKEN, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::BROKEN, Tape::EXPORTED, Tape::EXPORTED_PENDING, false, true }), + + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::EXPORTED, Tape::ACTIVE, Tape::ACTIVE, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::EXPORTED, Tape::DISABLED, Tape::DISABLED, false, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::EXPORTED, Tape::REPACKING, Tape::REPACKING_PENDING, false, true }), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::EXPORTED, Tape::REPACKING_DISABLED, Tape::EXPORTED, true, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::EXPORTED, Tape::BROKEN, Tape::BROKEN_PENDING, false, true }), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::EXPORTED, Tape::EXPORTED, Tape::EXPORTED, false, false}), + + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::REPACKING_PENDING, Tape::ACTIVE, Tape::REPACKING_PENDING, true, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::BROKEN_PENDING, Tape::ACTIVE, Tape::BROKEN_PENDING, true, false}), + SchedulerTestParam(OStoreDBFactoryVFS, {Tape::EXPORTED_PENDING, Tape::ACTIVE, Tape::EXPORTED_PENDING, true, false}) + )); + #endif #ifdef TEST_RADOS diff --git a/tapeserver/castor/tape/tapeserver/daemon/CleanerSession.cpp b/tapeserver/castor/tape/tapeserver/daemon/CleanerSession.cpp index fbc5171c05dbe1b4819a2d611cab05427b5f9ceb..1f2d09585f3847ec3f0f84532ff888b6412e45d1 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/CleanerSession.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/CleanerSession.cpp @@ -159,7 +159,12 @@ castor::tape::tapeserver::daemon::Session::EndOfSessionAction try { std::string disabledReason = cta::utils::getCurrentLocalTime("%F %T") + ":" + currentExceptionMsg; - m_catalogue.setTapeDisabled(admin, m_vid, disabledReason); + auto curr_state = m_catalogue.getTapesByVid(m_vid).at(m_vid).state; + if (curr_state == cta::common::dataStructures::Tape::REPACKING) { + m_catalogue.setTapeRepackingDisabled(admin, m_vid, disabledReason); + } else { + m_catalogue.setTapeDisabled(admin, m_vid, disabledReason); + } } catch(cta::exception::Exception &ex) { cta::log::Param param("exceptionMsg", ex.getMessageValue()); params.push_back(param); diff --git a/tapeserver/daemon/MaintenanceHandler.cpp b/tapeserver/daemon/MaintenanceHandler.cpp index e9c97e525bb97aa4a009a2caf74983f2b6f2d449..57057885e8153ab8512fafde38cc305932ce8d81 100644 --- a/tapeserver/daemon/MaintenanceHandler.cpp +++ b/tapeserver/daemon/MaintenanceHandler.cpp @@ -26,6 +26,7 @@ #include "rdbms/Login.hpp" #include "scheduler/DiskReportRunner.hpp" #include "scheduler/RepackRequestManager.hpp" +#include "objectstore/QueueCleanupRunner.hpp" #include "scheduler/Scheduler.hpp" #include "tapeserver/daemon/MaintenanceHandler.hpp" @@ -303,6 +304,7 @@ void MaintenanceHandler::exceptionThrowingRunChild(){ // Create the garbage collector and the disk reporter auto gc = sched_db_init.getGarbageCollector(*catalogue); + auto cleanupRunner = sched_db_init.getQueueCleanupRunner(*catalogue, *sched_db); DiskReportRunner diskReportRunner(*scheduler); RepackRequestManager repackRequestManager(*scheduler); @@ -311,7 +313,7 @@ void MaintenanceHandler::exceptionThrowingRunChild(){ "In MaintenanceHandler::exceptionThrowingRunChild(): Repack management is disabled. No repack-related operations will run on this tapeserver."); } - // Run the maintenance in a loop: garbage collector and disk reporter + // Run the maintenance in a loop: queue cleanup, garbage collector and disk reporter try { server::SocketPair::pollMap pollList; pollList["0"]=m_socketPair.get(); @@ -320,6 +322,7 @@ void MaintenanceHandler::exceptionThrowingRunChild(){ utils::Timer t; m_processManager.logContext().log(log::DEBUG, "In MaintenanceHandler::exceptionThrowingRunChild(): About to do a maintenance pass."); + cleanupRunner.runOnePass(m_processManager.logContext()); gc.runOnePass(m_processManager.logContext()); diskReportRunner.runOnePass(m_processManager.logContext()); if(runRepackRequestManager()){ diff --git a/xroot_plugins/XrdCtaTapeLs.hpp b/xroot_plugins/XrdCtaTapeLs.hpp index e98f0c0f77fde0401384694a4dc209f1b5d28afd..df99d6936fea49f3e5a602b0f7bd0db5e97046a8 100644 --- a/xroot_plugins/XrdCtaTapeLs.hpp +++ b/xroot_plugins/XrdCtaTapeLs.hpp @@ -80,7 +80,7 @@ TapeLsStream::TapeLsStream(const RequestMessage &requestMsg, cta::catalogue::Cat searchCriteria.diskFileIds = requestMsg.getOptional(OptionStrList::FILE_ID, &has_any); auto stateOpt = requestMsg.getOptional(OptionString::STATE, &has_any); if(stateOpt){ - searchCriteria.state = common::dataStructures::Tape::stringToState(stateOpt.value()); + searchCriteria.state = common::dataStructures::Tape::stringToState(stateOpt.value(), true); } if(!(requestMsg.has_flag(OptionBoolean::ALL) || has_any)) { throw cta::exception::UserError("Must specify at least one search option, or --all"); diff --git a/xroot_plugins/XrdSsiCtaRequestMessage.cpp b/xroot_plugins/XrdSsiCtaRequestMessage.cpp index 93ab24173fd8e83f91964b365de37ce427dc5a6a..16051ad9a0d880de2e7d0010fa06b953a1c51879 100644 --- a/xroot_plugins/XrdSsiCtaRequestMessage.cpp +++ b/xroot_plugins/XrdSsiCtaRequestMessage.cpp @@ -1618,20 +1618,11 @@ void RequestMessage::processRepack_Add(cta::xrd::Response &response) type = cta::common::dataStructures::RepackInfo::Type::MoveAndAddCopies; } - bool forceDisabledTape = has_flag(OptionBoolean::DISABLED); - - if (forceDisabledTape) { - //repacks on a disabled tape must be from a mount policy whose name starts with repack - if (mountPolicy.name.rfind("repack", 0) != 0) { - throw cta::exception::UserError("--disabledtape requires a mount policy whose name starts with repack"); - } - } - bool noRecall = has_flag(OptionBoolean::NO_RECALL); // Process each item in the list for(auto it = vid_list.begin(); it != vid_list.end(); ++it) { - SchedulerDatabase::QueueRepackRequest repackRequest(*it,bufferURL,type,mountPolicy,forceDisabledTape, noRecall); + SchedulerDatabase::QueueRepackRequest repackRequest(*it,bufferURL,type,mountPolicy, noRecall); m_scheduler.queueRepack(m_cliIdentity, repackRequest, m_lc); } @@ -1917,7 +1908,7 @@ void RequestMessage::processTape_Add(cta::xrd::Response &response) tape.state = common::dataStructures::Tape::ACTIVE; } else { //State has been provided by the user, assign it. Will throw an exception if the state provided does not exist. - tape.state = common::dataStructures::Tape::stringToState(state.value()); + tape.state = common::dataStructures::Tape::stringToState(state.value(), true); } tape.stateReason = stateReason; m_catalogue.createTape(m_cliIdentity, tape); @@ -1974,8 +1965,8 @@ void RequestMessage::processTape_Ch(cta::xrd::Response &response) m_catalogue.setTapeFull(m_cliIdentity, vid, full.value()); } if(state){ - auto stateEnumValue = common::dataStructures::Tape::stringToState(state.value()); - m_catalogue.modifyTapeState(m_cliIdentity,vid,stateEnumValue,stateReason); + auto stateEnumValue = common::dataStructures::Tape::stringToState(state.value(), true); + m_scheduler.triggerTapeStateChange(m_cliIdentity,vid,stateEnumValue,stateReason, m_lc); } if (dirty) { m_catalogue.setTapeDirty(m_cliIdentity, vid, dirty.value());