diff --git a/continuousintegration/orchestration/create_instance.sh b/continuousintegration/orchestration/create_instance.sh index 493e33368433a1691116d0209c0d83820306d670..811f0fd582d5e4db85fd62c35e5e8d5b918996fe 100755 --- a/continuousintegration/orchestration/create_instance.sh +++ b/continuousintegration/orchestration/create_instance.sh @@ -287,11 +287,11 @@ echo -n "Waiting for other pods" for ((i=0; i<240; i++)); do echo -n "." # exit loop when all pods are in Running state - kubectl get pods -a --namespace=${instance} | grep -v init | tail -n+2 | grep -q -v Running || break + kubectl get pods -a --namespace=${instance} | grep -v init | grep -v oracleunittests | tail -n+2 | grep -q -v Running || break sleep 1 done -if [[ $(kubectl get pods -a --namespace=${instance} | grep -v init | tail -n+2 | grep -q -v Running) ]]; then +if [[ $(kubectl get pods -a --namespace=${instance} | grep -v init | grep -v oracleunittests | tail -n+2 | grep -q -v Running) ]]; then echo "TIMED OUT" echo "Some pods have not been initialized properly:" kubectl get pods -a --namespace=${instance} diff --git a/continuousintegration/orchestration/tests/prepare_tests.sh b/continuousintegration/orchestration/tests/prepare_tests.sh index 35ebeeebd7cbc9561089541cb84eb6c536ea405b..38be1e888496aa2003084ed98486206da87c3ad5 100755 --- a/continuousintegration/orchestration/tests/prepare_tests.sh +++ b/continuousintegration/orchestration/tests/prepare_tests.sh @@ -75,6 +75,7 @@ echo "Preparing CTA configuration for tests" echo "Cleaning up leftovers from potential previous runs." kubectl --namespace ${NAMESPACE} exec ctaeos -- eos rm /eos/ctaeos/cta/* + kubectl --namespace ${NAMESPACE} exec ctaeos -- eos find -f /eos/ctaeos/preprod/ | xargs -I{} kubectl --namespace ${NAMESPACE} exec ctaeos -- eos rm -rf {} kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin --json tape ls --all | \ jq -r '.[] | .vid ' | xargs -I{} kubectl --namespace ${NAMESPACE} exec ctacli -- \ cta-admin tape rm -v {} diff --git a/continuousintegration/orchestration/tests/repack_systemtest_wrapper.sh b/continuousintegration/orchestration/tests/repack_systemtest_wrapper.sh index 7fa1929a28c8cea8f5d8c0de4a50f9092b260102..c39b6487a872f5b3dcb158ab287e7ee1a4573b4d 100755 --- a/continuousintegration/orchestration/tests/repack_systemtest_wrapper.sh +++ b/continuousintegration/orchestration/tests/repack_systemtest_wrapper.sh @@ -53,198 +53,304 @@ source ./repack_helper.sh kubectl -n ${NAMESPACE} cp repack_systemtest.sh client:/root/repack_systemtest.sh kubectl -n ${NAMESPACE} cp repack_generate_report.sh client:/root/repack_generate_report.sh -echo -echo "***********************************************************" -echo "STEP 1. Launching a round trip repack \"just move\" request" -echo "***********************************************************" +roundTripRepack() { + echo + echo "***********************************************************" + echo "STEP 1. Launching a round trip repack \"just move\" request" + echo "***********************************************************" + + VID_TO_REPACK=$(getFirstVidContainingFiles) + if [ "$VID_TO_REPACK" != "null" ] + then + echo + echo "Launching the repack \"just move\" test on VID ${VID_TO_REPACK}" + kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -m -r ${BASE_REPORT_DIRECTORY}/Step1-RoundTripRepack || exit 1 + else + echo "No vid found to repack" + exit 1 + fi -VID_TO_REPACK=$(getFirstVidContainingFiles) -if [ "$VID_TO_REPACK" != "null" ] -then -echo - echo "Launching the repack \"just move\" test on VID ${VID_TO_REPACK}" - kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -m -r ${BASE_REPORT_DIRECTORY}/Step1-RoundTripRepack -p || exit 1 -else - echo "No vid found to repack" - exit 1 -fi + echo "Reclaiming tape ${VID_TO_REPACK}" + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} -echo "Reclaiming tape ${VID_TO_REPACK}" -kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} + VID_TO_REPACK=$(getFirstVidContainingFiles) + if [ "$VID_TO_REPACK" != "null" ] + then + echo + echo "Launching the repack \"just move\" test on VID ${VID_TO_REPACK}" + kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -m -r ${BASE_REPORT_DIRECTORY}/Step1-RoundTripRepack || exit 1 + else + echo "No vid found to repack" + exit 1 + fi -VID_TO_REPACK=$(getFirstVidContainingFiles) -if [ "$VID_TO_REPACK" != "null" ] -then -echo - echo "Launching the repack \"just move\" test on VID ${VID_TO_REPACK}" - kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -m -r ${BASE_REPORT_DIRECTORY}/Step1-RoundTripRepack -p || exit 1 -else - echo "No vid found to repack" - exit 1 -fi + echo "Reclaiming tape ${VID_TO_REPACK}" + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} + echo + echo "*******************************************************************" + echo "STEP 1. Launching a round trip repack \"just move\" request TEST OK" + echo "*******************************************************************" +} -echo "Reclaiming tape ${VID_TO_REPACK}" -kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} -echo -echo "*******************************************************************" -echo "STEP 1. Launching a round trip repack \"just move\" request TEST OK" -echo "*******************************************************************" -echo -echo "*****************************************************" -echo "STEP 2. Launching a Repack Request on a disabled tape" -echo "*****************************************************" - -VID_TO_REPACK=$(getFirstVidContainingFiles) - -if [ "$VID_TO_REPACK" != "null" ] -then - echo "Marking the tape ${VID_TO_REPACK} as disabled" - kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape ch --disabled true --vid ${VID_TO_REPACK} - echo "Waiting 20 seconds so that the RetrieveQueueStatisticsCache is flushed" - sleep 20 - echo "Launching the repack request test on VID ${VID_TO_REPACK}" - kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -r ${BASE_REPORT_DIRECTORY}/Step2-RepackDisabledTape -p && echo "The repack request is Complete instead of Failed, it should be failed as the tape is disabled" && exit 1 || echo "REPACK FAILED, the tape is disabled so, Test OK" -else - echo "No vid found to repack" - exit 1 -fi; +repackDisableTape() { + echo + echo "*****************************************************" + echo "STEP 2. Launching a Repack Request on a disabled tape" + echo "*****************************************************" -echo -echo "Launching the repack request test on VID ${VID_TO_REPACK} with the --disabledtape flag" -kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -d -r ${BASE_REPORT_DIRECTORY}/Step2-RepackDisabledTape -p || exit 1 + VID_TO_REPACK=$(getFirstVidContainingFiles) -echo "Reclaiming tape ${VID_TO_REPACK}" -kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} + if [ "$VID_TO_REPACK" != "null" ] + then + echo "Marking the tape ${VID_TO_REPACK} as disabled" + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape ch --disabled true --vid ${VID_TO_REPACK} + echo "Waiting 20 seconds so that the RetrieveQueueStatisticsCache is flushed" + sleep 20 + echo "Launching the repack request test on VID ${VID_TO_REPACK}" + kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -r ${BASE_REPORT_DIRECTORY}/Step2-RepackDisabledTape && echo "The repack request is Complete instead of Failed, it should be failed as the tape is disabled" && exit 1 || echo "REPACK FAILED, the tape is disabled so, Test OK" + else + echo "No vid found to repack" + exit 1 + fi; -echo -echo "*************************************************************" -echo "STEP 2. Launching a Repack Request on a disabled tape TEST OK" -echo "*************************************************************" -echo -echo "*********************************************" -echo "STEP 3. Testing Repack \"Just move\" workflow" -echo "*********************************************" - -NB_FILES=1152 -kubectl -n ${NAMESPACE} exec client -- bash /root/client_ar.sh -n ${NB_FILES} -s ${FILE_SIZE_KB} -p 100 -d /eos/ctaeos/preprod -v -A || exit 1 + echo + echo "Launching the repack request test on VID ${VID_TO_REPACK} with the --disabledtape flag" + kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -d -r ${BASE_REPORT_DIRECTORY}/Step2-RepackDisabledTape || exit 1 -VID_TO_REPACK=$(getFirstVidContainingFiles) -if [ "$VID_TO_REPACK" != "null" ] -then -echo - echo "Launching the repack test \"just move\" on VID ${VID_TO_REPACK}" - kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -m -r ${BASE_REPORT_DIRECTORY}/Step3-RepackJustMove -p || exit 1 -else - echo "No vid found to repack" - exit 1 -fi + echo "Reclaiming tape ${VID_TO_REPACK}" + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} -echo "Reclaiming tape ${VID_TO_REPACK}" -kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} -echo -echo "*****************************************************" -echo "STEP 3. Testing Repack \"Just move\" workflow TEST OK" -echo "*****************************************************" -echo -echo "**************************************************************************" -echo "STEP 4. Testing Repack \"Just Add copies\" workflow with all copies on CTA" -echo "**************************************************************************" - -VID_TO_REPACK=$(getFirstVidContainingFiles) -if [ "$VID_TO_REPACK" != "null" ] -then - echo "Launching the repack \"just add copies\" test on VID ${VID_TO_REPACK} with all copies already on CTA" - kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -a -r ${BASE_REPORT_DIRECTORY}/Step4-JustAddCopiesAllCopiesInCTA || exit 1 -else - echo "No vid found to repack" - exit 1 -fi + echo + echo "*************************************************************" + echo "STEP 2. Launching a Repack Request on a disabled tape TEST OK" + echo "*************************************************************" +} -repackJustAddCopiesResult=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json re ls | jq -r ". [] | select (.vid == \"${VID_TO_REPACK}\")"` +archiveFiles() { + NB_FILES=1152 + kubectl -n ${NAMESPACE} exec client -- bash /root/client_ar.sh -n ${NB_FILES} -s ${FILE_SIZE_KB} -p 100 -d /eos/ctaeos/preprod -v -A || exit 1 +} -nbRetrievedFiles=`echo ${repackJustAddCopiesResult} | jq -r ".retrievedFiles"` -nbArchivedFiles=`echo ${repackJustAddCopiesResult} | jq -r ".archivedFiles"` +repackJustMove() { + echo + echo "*********************************************" + echo "STEP 3. Testing Repack \"Just move\" workflow" + echo "*********************************************" -if [ $nbArchivedFiles == 0 ] && [ $nbRetrievedFiles == 0 ] -then - echo "Nb retrieved files = 0 and nb archived files = 0. Test OK" -else - echo "Repack \"just add copies\" on VID ${VID_TO_REPACK} failed : nbRetrievedFiles = $nbRetrievedFiles, nbArchivedFiles = $nbArchivedFiles" - exit 1 -fi + VID_TO_REPACK=$(getFirstVidContainingFiles) + if [ "$VID_TO_REPACK" != "null" ] + then + echo + echo "Launching the repack test \"just move\" on VID ${VID_TO_REPACK}" + kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -m -r ${BASE_REPORT_DIRECTORY}/Step3-RepackJustMove || exit 1 + else + echo "No vid found to repack" + exit 1 + fi -echo -echo "**********************************************************************************" -echo "STEP 4. Testing Repack \"Just Add copies\" workflow with all copies on CTA TEST OK" -echo "**********************************************************************************" + echo "Reclaiming tape ${VID_TO_REPACK}" + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape reclaim --vid ${VID_TO_REPACK} + echo + echo "*****************************************************" + echo "STEP 3. Testing Repack \"Just move\" workflow TEST OK" + echo "*****************************************************" +} -echo -echo "*******************************************************" -echo "STEP 5. Testing Repack \"Move and Add copies\" workflow" -echo "*******************************************************" +repackJustAddCopies() { + echo + echo "**************************************************************************" + echo "STEP 4. Testing Repack \"Just Add copies\" workflow with all copies on CTA" + echo "**************************************************************************" -tapepoolDestination1="ctasystest2" -tapepoolDestination2="ctasystest3" + VID_TO_REPACK=$(getFirstVidContainingFiles) + if [ "$VID_TO_REPACK" != "null" ] + then + echo "Launching the repack \"just add copies\" test on VID ${VID_TO_REPACK} with all copies already on CTA" + kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -a -r ${BASE_REPORT_DIRECTORY}/Step4-JustAddCopiesAllCopiesInCTA || exit 1 + else + echo "No vid found to repack" + exit 1 + fi -echo "Creating two destination tapepool : $tapepoolDestination1 and $tapepoolDestination2" -kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tapepool add --name $tapepoolDestination1 --vo vo --partialtapesnumber 2 --encrypted false --comment "$tapepoolDestination1 tapepool" -kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tapepool add --name $tapepoolDestination2 --vo vo --partialtapesnumber 2 --encrypted false --comment "$tapepoolDestination2 tapepool" -echo "OK" + repackJustAddCopiesResult=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json re ls | jq -r ". [] | select (.vid == \"${VID_TO_REPACK}\")"` -echo "Creating archive routes for adding two copies of the file" -kubectl -n ${NAMESPACE} exec ctacli -- cta-admin archiveroute add --instance ctaeos --storageclass ctaStorageClass --copynb 2 --tapepool $tapepoolDestination1 --comment "ArchiveRoute2" -kubectl -n ${NAMESPACE} exec ctacli -- cta-admin archiveroute add --instance ctaeos --storageclass ctaStorageClass --copynb 3 --tapepool $tapepoolDestination2 --comment "ArchiveRoute3" -echo "OK" + nbRetrievedFiles=`echo ${repackJustAddCopiesResult} | jq -r ".retrievedFiles"` + nbArchivedFiles=`echo ${repackJustAddCopiesResult} | jq -r ".archivedFiles"` -echo "Will change the tapepool of the tapes" + if [ $nbArchivedFiles == 0 ] && [ $nbRetrievedFiles == 0 ] + then + echo "Nb retrieved files = 0 and nb archived files = 0. Test OK" + else + echo "Repack \"just add copies\" on VID ${VID_TO_REPACK} failed : nbRetrievedFiles = $nbRetrievedFiles, nbArchivedFiles = $nbArchivedFiles" + exit 1 + fi -allVID=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json tape ls --all | jq -r ". [] | .vid"` -read -a allVIDTable <<< $allVID + echo + echo "**********************************************************************************" + echo "STEP 4. Testing Repack \"Just Add copies\" workflow with all copies on CTA TEST OK" + echo "**********************************************************************************" +} -nbVid=${#allVIDTable[@]} +repackCancellation() { + echo + echo "***********************************" + echo "STEP 5. Testing Repack cancellation" + echo "***********************************" -allTapepool=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json tapepool ls | jq -r ". [] .name"` + echo "Putting all drives down" + echo 'kubectl -n ${NAMESPACE} exec ctacli -- cta-admin drive down VD.*' + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin drive down VD.* -read -a allTapepoolTable <<< $allTapepool + VID_TO_REPACK=$(getFirstVidContainingFiles) + if [ "$VID_TO_REPACK" != "null" ] + then + echo + echo "Launching a repack request on VID ${VID_TO_REPACK}" + kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -m -r ${BASE_REPORT_DIRECTORY}/Step5-RepackCancellation & 2>/dev/null + pid=$! + else + echo "No vid found to repack" + exit 1 + fi -nbTapepool=${#allTapepoolTable[@]} + echo "Waiting for the launch of the repack request..." + returnCode=1 + while [[ $returnCode != 0 ]] + do + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json re ls --vid $VID_TO_REPACK 1> /dev/null 2> /dev/null + returnCode=$? + done + echo "Repack request launched" + echo + echo "Waiting for the expansion of the repack request..." + + lastFSeqTapeToRepack=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json tape ls --vid ${VID_TO_REPACK} | jq -r ".[0] | .lastFseq"` + lastExpandedFSeq=0 + while [[ $lastExpandedFSeq != $lastFSeqTapeToRepack ]] + do + lastExpandedFSeq=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json repack ls --vid ${VID_TO_REPACK} | jq -r ".[0] | .lastExpandedFseq" || 0` + done + + echo "Expansion finished, deleting the Repack Request" + kill $pid + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin repack rm --vid ${VID_TO_REPACK} || echo "Error while removing the Repack Request. Test FAILED" + + echo + echo "Checking that the Retrieve queue of the VID ${VID_TO_REPACK} contains the Retrieve Requests created from the Repack Request expansion" + nbFilesOnTapeToRepack=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json archivefile ls --vid ${VID_TO_REPACK} | jq "length"` + echo "Nb files on tape = $nbFilesOnTapeToRepack" + + nbFilesOnQueue=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json showqueues | jq -r ". [] | select(.vid == \"${VID_TO_REPACK}\") | .queuedFiles"` + echo "Nb files on the queue ${VID_TO_REPACK} = $nbFilesOnQueue" + + if [[ $nbFilesOnTapeToRepack != $nbFilesOnQueue ]] + then + echo "Nb files on tape != nb files queued, test Failed" + exit 1 + fi -nbTapePerTapepool=$(($nbVid / $nbTapepool)) + echo "Putting all drives up" + echo 'kubectl -n ${NAMESPACE} exec ctacli -- cta-admin drive up VD.*' + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin drive up VD.* + + WAIT_FOR_EMPTY_QUEUE_TIMEOUT=100 + + SECONDS_PASSED=0 + while test 0 != `kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json showqueues | jq -r ". [] | select(.vid == \"${VID_TO_REPACK}\")" | wc -l`; do + echo "Waiting for the Retrieve queue ${VID_TO_REPACK} to be empty: Seconds passed = $SECONDS_PASSED" + sleep 1 + let SECONDS_PASSED=SECONDS_PASSED+1 + if test ${SECONDS_PASSED} == ${WAIT_FOR_EMPTY_QUEUE_TIMEOUT}; then + echo "Timed out after ${ WAIT_FOR_EMPTY_QUEUE_TIMEOUT} seconds waiting for retrieve queue ${VID_TO_REPACK} to be emptied. Test failed." + exit 1 + fi + done + + echo "Retrieve queue of VID ${VID_TO_REPACK} is empty, test OK" + + echo "*******************************************" + echo "STEP 5. Testing Repack cancellation TEST OK" + echo "*******************************************" +} -allTapepool=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json tapepool ls | jq -r ". [] .name"` -read -a allTapepoolTable <<< $allTapepool +repackMoveAndAddCopies() { + echo + echo "*******************************************************" + echo "STEP 6. Testing Repack \"Move and Add copies\" workflow" + echo "*******************************************************" + tapepoolDestination1="ctasystest2" + tapepoolDestination2="ctasystest3" -countChanging=0 -tapepoolIndice=1 #We only change the vid of the remaining other tapes + echo "Creating two destination tapepool : $tapepoolDestination1 and $tapepoolDestination2" + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tapepool add --name $tapepoolDestination1 --vo vo --partialtapesnumber 2 --encrypted false --comment "$tapepoolDestination1 tapepool" + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tapepool add --name $tapepoolDestination2 --vo vo --partialtapesnumber 2 --encrypted false --comment "$tapepoolDestination2 tapepool" + echo "OK" -for ((i=$(($nbTapePerTapepool+$(($nbVid%$nbTapepool)))); i<$nbVid; i++)); -do - echo "kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape ch --vid ${allVIDTable[$i]} --tapepool ${allTapepoolTable[$tapepoolIndice]}" - kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape ch --vid ${allVIDTable[$i]} --tapepool ${allTapepoolTable[$tapepoolIndice]} - countChanging=$((countChanging + 1)) - if [ $countChanging != 0 ] && [ $((countChanging % nbTapePerTapepool)) == 0 ] - then - tapepoolIndice=$((tapepoolIndice + 1)) - fi -done + echo "Creating archive routes for adding two copies of the file" + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin archiveroute add --instance ctaeos --storageclass ctaStorageClass --copynb 2 --tapepool $tapepoolDestination1 --comment "ArchiveRoute2" + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin archiveroute add --instance ctaeos --storageclass ctaStorageClass --copynb 3 --tapepool $tapepoolDestination2 --comment "ArchiveRoute3" + echo "OK" -echo "OK" + echo "Will change the tapepool of the tapes" -storageClassName=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json storageclass ls | jq -r ". [0] | .name"` -instanceName=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json storageclass ls | jq -r ". [0] | .diskInstance"` + allVID=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json tape ls --all | jq -r ". [] | .vid"` + read -a allVIDTable <<< $allVID -echo "Changing the storage class $storageClassName nb copies" -kubectl -n ${NAMESPACE} exec ctacli -- cta-admin storageclass ch --instance $instanceName --name $storageClassName --copynb 3 -echo "OK" + nbVid=${#allVIDTable[@]} -echo "Putting all drives up" -kubectl -n ${NAMESPACE} exec ctacli -- cta-admin dr up VD.* -echo "OK" + allTapepool=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json tapepool ls | jq -r ". [] .name"` -echo "Launching the repack \"Move and add copies\" test on VID ${VID_TO_REPACK}" -kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -t 600 -r ${BASE_REPORT_DIRECTORY}/Step5-MoveAndAddCopies || exit 1 -echo -echo "***************************************************************" -echo "STEP 5. Testing Repack \"Move and Add copies\" workflow TEST OK" -echo "***************************************************************" + read -a allTapepoolTable <<< $allTapepool + + nbTapepool=${#allTapepoolTable[@]} + + nbTapePerTapepool=$(($nbVid / $nbTapepool)) + + allTapepool=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json tapepool ls | jq -r ". [] .name"` + read -a allTapepoolTable <<< $allTapepool + + + countChanging=0 + tapepoolIndice=1 #We only change the vid of the remaining other tapes + + for ((i=$(($nbTapePerTapepool+$(($nbVid%$nbTapepool)))); i<$nbVid; i++)); + do + echo "kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape ch --vid ${allVIDTable[$i]} --tapepool ${allTapepoolTable[$tapepoolIndice]}" + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin tape ch --vid ${allVIDTable[$i]} --tapepool ${allTapepoolTable[$tapepoolIndice]} + countChanging=$((countChanging + 1)) + if [ $countChanging != 0 ] && [ $((countChanging % nbTapePerTapepool)) == 0 ] + then + tapepoolIndice=$((tapepoolIndice + 1)) + fi + done + + echo "OK" + + storageClassName=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json storageclass ls | jq -r ". [0] | .name"` + instanceName=`kubectl -n ${NAMESPACE} exec ctacli -- cta-admin --json storageclass ls | jq -r ". [0] | .diskInstance"` + + echo "Changing the storage class $storageClassName nb copies" + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin storageclass ch --instance $instanceName --name $storageClassName --copynb 3 + echo "OK" + + echo "Putting all drives up" + kubectl -n ${NAMESPACE} exec ctacli -- cta-admin dr up VD.* + echo "OK" + + echo "Launching the repack \"Move and add copies\" test on VID ${VID_TO_REPACK}" + kubectl -n ${NAMESPACE} exec client -- bash /root/repack_systemtest.sh -v ${VID_TO_REPACK} -b ${REPACK_BUFFER_URL} -t 600 -r ${BASE_REPORT_DIRECTORY}/Step6-MoveAndAddCopies || exit 1 + echo + echo "***************************************************************" + echo "STEP 6. Testing Repack \"Move and Add copies\" workflow TEST OK" + echo "***************************************************************" +} + +#Execution of each tests +roundTripRepack +repackDisableTape +archiveFiles +repackJustMove +repackJustAddCopies +repackCancellation +repackMoveAndAddCopies diff --git a/objectstore/GarbageCollector.cpp b/objectstore/GarbageCollector.cpp index 052f4bb2098d62aec6c2664243bc89c31c6499da..bdaebb2e70ad0c19a3a432b132323ddd60fa05d9 100644 --- a/objectstore/GarbageCollector.cpp +++ b/objectstore/GarbageCollector.cpp @@ -441,7 +441,8 @@ void GarbageCollector::OwnedObjectSorter::executeArchiveAlgorithm(std::list<std: .add("copyNb", arup.copyNb) .add("fileId", arup.archiveRequest->getArchiveFile().archiveFileID) .add("exceptionType", debugType); - lc.log(log::ERR, + int logLevel = typeid(e) == typeid(Backend::NoSuchObject) ? log::WARNING : log::ERR; + lc.log(logLevel, "In GarbageCollector::OwnedObjectSorter::lockFetchAndUpdateArchiveJobs(): " "failed to requeue gone/not owned archive job. Removed from queue."); } else { diff --git a/objectstore/RepackRequest.cpp b/objectstore/RepackRequest.cpp index a766a86f2cc367be61ff2905bfc5499254c77721..178e7a86a97318ba2f438134416df6b3fc803233 100644 --- a/objectstore/RepackRequest.cpp +++ b/objectstore/RepackRequest.cpp @@ -79,6 +79,7 @@ void RepackRequest::initialize() { m_payload.set_is_expand_finished(false); m_payload.set_is_expand_started(false); m_payload.set_force_disabled_tape(false); + m_payload.set_is_complete(false); // This object is good to go (to storage) m_payloadInterpreted = true; } @@ -194,6 +195,29 @@ common::dataStructures::MountPolicy RepackRequest::getMountPolicy(){ return mpSerDeser; } +void RepackRequest::deleteAllSubrequests() { + checkPayloadWritable(); + std::list<std::unique_ptr<Backend::AsyncDeleter>> deleters; + if(!m_payload.is_complete()){ + try{ + for(auto itor = m_payload.mutable_subrequests()->begin(); itor != m_payload.mutable_subrequests()->end(); ++itor){ + //Avoid the race condition that can happen during expansion of the RepackRequest + auto & subrequest = *itor; + subrequest.set_subrequest_deleted(true); + deleters.emplace_back(m_objectStore.asyncDelete(subrequest.address())); + } + for(auto & deleter: deleters){ + deleter->wait(); + } + } catch(objectstore::Backend::NoSuchObject & ){ /* If object already deleted, do nothing */ } + } +} + +void RepackRequest::setIsComplete(const bool isComplete){ + checkPayloadWritable(); + m_payload.set_is_complete(isComplete); +} + void RepackRequest::setForceDisabledTape(const bool disabledTape){ checkPayloadWritable(); m_payload.set_force_disabled_tape(disabledTape); diff --git a/objectstore/RepackRequest.hpp b/objectstore/RepackRequest.hpp index 9f2601da0f6427db14b1b57ef6dbd9524f10a048..c049551a21cde43154d0bbdcb992d12b4f797b57 100644 --- a/objectstore/RepackRequest.hpp +++ b/objectstore/RepackRequest.hpp @@ -50,6 +50,8 @@ public: cta::SchedulerDatabase::RepackRequest::TotalStatsFiles getTotalStatsFile(); void setMountPolicy(const common::dataStructures::MountPolicy &mp); common::dataStructures::MountPolicy getMountPolicy(); + void deleteAllSubrequests(); + void setIsComplete(const bool complete); /** * Set the flag disabledTape to allow the mounting of a * disabled tape for file retrieval @@ -159,6 +161,6 @@ public: // An owner updater factory. The owner MUST be previousOwner for the update to be executed. AsyncOwnerAndStatusUpdater *asyncUpdateOwnerAndStatus(const std::string &owner, const std::string &previousOwner, cta::optional<serializers::RepackRequestStatus> newStatus); -}; + }; }} // namespace cta::objectstore \ No newline at end of file diff --git a/objectstore/Sorter.cpp b/objectstore/Sorter.cpp index 54b0809cca7cadb23be59a19fb58a675ccd16b3d..124cd089a0b70e2abd244d98a8d87b1a09967c05 100644 --- a/objectstore/Sorter.cpp +++ b/objectstore/Sorter.cpp @@ -50,6 +50,8 @@ void Sorter::executeArchiveAlgorithm(const std::string tapePool, std::string& qu for(auto &failedAR: failure.failedElements){ try{ std::rethrow_exception(failedAR.failure); + } catch (const cta::objectstore::Backend::NoSuchObject &ex) { + lc.log(log::WARNING,"In Sorter::executeArchiveAlgorithm(), queueing impossible, jobs do not exist in the objectstore."); } catch(const cta::exception::Exception &e){ uint32_t copyNb = failedAR.element->copyNb; std::get<1>(succeededJobs[copyNb]->jobToQueue).set_exception(std::current_exception()); @@ -167,6 +169,8 @@ void Sorter::executeRetrieveAlgorithm(const std::string vid, std::string& queueA for(auto& failedRR: failure.failedElements){ try{ std::rethrow_exception(failedRR.failure); + } catch (const cta::objectstore::Backend::NoSuchObject &ex) { + lc.log(log::WARNING,"In Sorter::executeRetrieveAlgorithm(), queueing impossible, jobs do not exist in the objectstore."); } catch (const cta::exception::Exception&){ uint32_t copyNb = failedRR.element->copyNb; std::get<1>(succeededJobs[copyNb]->jobToQueue).set_exception(std::current_exception()); diff --git a/objectstore/cta.proto b/objectstore/cta.proto index 9523face634a1920c4781bc165d36f688880a188..25af2633accb4de8cdf1c05ed41ec7f6c58f889f 100644 --- a/objectstore/cta.proto +++ b/objectstore/cta.proto @@ -603,6 +603,7 @@ message RepackRequest { required bool is_expand_started = 11562; required MountPolicy mount_policy = 11563; required bool force_disabled_tape = 11564; + required bool is_complete = 11565; repeated RepackSubRequestPointer subrequests = 11570; } diff --git a/scheduler/ArchiveMount.cpp b/scheduler/ArchiveMount.cpp index 6578013c0b4347288dbd8e9bf40a486c1ae17d10..e6a0ff4ef9db233245438d206bb24fcc551fa93f 100644 --- a/scheduler/ArchiveMount.cpp +++ b/scheduler/ArchiveMount.cpp @@ -18,6 +18,7 @@ #include "scheduler/ArchiveMount.hpp" #include "common/make_unique.hpp" +#include "objectstore/Backend.hpp" //------------------------------------------------------------------------------ // constructor @@ -218,6 +219,18 @@ void cta::ArchiveMount::reportJobsBatchTransferred(std::queue<std::unique_ptr<ct .add("schedulerDbTime", schedulerDbTime) .add("totalTime", catalogueTime + schedulerDbTime + clientReportingTime); logContext.log(log::INFO, "In ArchiveMount::reportJobsBatchWritten(): recorded a batch of archive jobs in metadata."); + } catch (const cta::objectstore::Backend::NoSuchObject& ex){ + cta::log::ScopedParamContainer params(logContext); + params.add("exceptionMessageValue", ex.getMessageValue()); + if (job.get()) { + params.add("fileId", job->archiveFile.archiveFileID) + .add("diskInstance", job->archiveFile.diskInstance) + .add("diskFileId", job->archiveFile.diskFileId) + .add("lastKnownDiskPath", job->archiveFile.diskFileInfo.path) + .add("reportURL", job->reportURL()); + } + const std::string msg_error="In ArchiveMount::reportJobsBatchWritten(): job does not exist in the objectstore."; + logContext.log(cta::log::WARNING, msg_error); } catch(const cta::exception::Exception& e){ cta::log::ScopedParamContainer params(logContext); params.add("exceptionMessageValue", e.getMessageValue()); diff --git a/scheduler/OStoreDB/OStoreDB.cpp b/scheduler/OStoreDB/OStoreDB.cpp index 464e73e6efeb2d1f0ae739c3b98c21e2abc7f298..c89bd8115843fdb4afa208f181890f8073dbfa05 100644 --- a/scheduler/OStoreDB/OStoreDB.cpp +++ b/scheduler/OStoreDB/OStoreDB.cpp @@ -1981,6 +1981,12 @@ void OStoreDB::RepackRetrieveSuccessesReportBatch::report(log::LogContext& lc) { rr.subrequest->asyncTransformToArchiveRequest(*m_oStoreDb.m_agentReference) ) }); + } catch (const cta::objectstore::Backend::NoSuchObject &ex){ + log::ScopedParamContainer params(lc); + params.add("fileId", rr.archiveFile.archiveFileID) + .add("subrequestAddress", rr.subrequest->getAddressIfSet()) + .add("exceptionMsg", ex.getMessageValue()); + lc.log(log::WARNING, "In OStoreDB::RepackRetrieveSuccessesReportBatch::report(): failed to asyncTransformToArchiveRequest(), object does not exist in the objectstore."); } catch (exception::Exception & ex) { // We failed to archive the file (to create the request, in fact). So all the copyNbs // can be counted as failed. @@ -2036,6 +2042,12 @@ void OStoreDB::RepackRetrieveSuccessesReportBatch::report(log::LogContext& lc) { atar.subrequestInfo, sorterArchiveRequest }); + } catch (const cta::objectstore::Backend::NoSuchObject &ex){ + log::ScopedParamContainer params(lc); + params.add("fileId", atar.subrequestInfo.archiveFile.archiveFileID) + .add("subrequestAddress", atar.subrequestInfo.subrequest->getAddressIfSet()) + .add("exceptionMsg", ex.getMessageValue()); + lc.log(log::WARNING, "In OStoreDB::RepackRetrieveSuccessesReportBatch::report(): async transformation failed on wait(). Object does not exist in the objectstore"); } catch (exception::Exception & ex) { // We failed to archive the file (to create the request, in fact). So all the copyNbs // can be counted as failed. @@ -2087,12 +2099,18 @@ void OStoreDB::RepackRetrieveSuccessesReportBatch::report(log::LogContext& lc) { try { asyncDeleterAndReqs.push_back({*fs, std::unique_ptr<RetrieveRequest::AsyncJobDeleter>(fs->subrequest->asyncDeleteJob())}); + } catch (const cta::objectstore::Backend::NoSuchObject &ex) { + log::ScopedParamContainer params(lc); + params.add("fileId", fs->archiveFile.archiveFileID) + .add("subrequestAddress", fs->subrequest->getAddressIfSet()) + .add("exceptionMsg", ex.getMessageValue()); + lc.log(log::WARNING, "In OStoreDB::RepackRetrieveSuccessesReportBatch::report(): failed to asyncDelete() retrieve request. Object does not exist in the objectstore."); } catch (cta::exception::Exception &ex) { // Log the failure to delete. log::ScopedParamContainer params(lc); params.add("fileId", fs->archiveFile.archiveFileID) .add("subrequestAddress", fs->subrequest->getAddressIfSet()) - .add("excepitonMsg", ex.getMessageValue()); + .add("exceptionMsg", ex.getMessageValue()); lc.log(log::ERR, "In OStoreDB::RepackRetrieveSuccessesReportBatch::report(): failed to asyncDelete() retrieve request."); } } @@ -2105,6 +2123,13 @@ void OStoreDB::RepackRetrieveSuccessesReportBatch::report(log::LogContext& lc) { params.add("fileId", adar.subrequestInfo.archiveFile.archiveFileID) .add("subrequestAddress", adar.subrequestInfo.subrequest->getAddressIfSet()); lc.log(log::INFO, "In OStoreDB::RepackRetrieveSuccessesReportBatch::report(): deleted retrieve request after failure to transform in archive request."); + } catch (const cta::objectstore::Backend::NoSuchObject & ex) { + // Log the failure to delete. + log::ScopedParamContainer params(lc); + params.add("fileId", adar.subrequestInfo.archiveFile.archiveFileID) + .add("subrequestAddress", adar.subrequestInfo.subrequest->getAddressIfSet()) + .add("exceptionMsg", ex.getMessageValue()); + lc.log(log::WARNING, "In OStoreDB::RepackRetrieveSuccessesReportBatch::report(): async deletion of retrieve request failed on wait(). Object does not exist in the objectstore."); } catch (cta::exception::Exception & ex) { // Log the failure to delete. log::ScopedParamContainer params(lc); @@ -2194,6 +2219,12 @@ void OStoreDB::RepackRetrieveFailureReportBatch::report(log::LogContext& lc){ retrieveRequestsToUnown.push_back(fs.subrequest->getAddressIfSet()); try{ asyncDeleterAndReqs.push_back({fs,std::unique_ptr<RetrieveRequest::AsyncJobDeleter>(fs.subrequest->asyncDeleteJob())}); + } catch (cta::objectstore::Backend::NoSuchObject &ex) { + log::ScopedParamContainer params(lc); + params.add("fileId", fs.archiveFile.archiveFileID) + .add("subrequestAddress", fs.subrequest->getAddressIfSet()) + .add("exceptionMsg", ex.getMessageValue()); + lc.log(log::WARNING, "In OStoreDB::RepackRetrieveFailureReportBatch::report(): failed to asyncDelete() retrieve request. Object does not exist in the objectstore."); } catch (cta::exception::Exception &ex) { // Log the failure to delete. log::ScopedParamContainer params(lc); @@ -2215,6 +2246,13 @@ void OStoreDB::RepackRetrieveFailureReportBatch::report(log::LogContext& lc){ timingList.addToLog(params); lc.log(log::INFO, "In OStoreDB::RepackRetrieveFailureReportBatch::report(): deleted retrieve request after multiple failures"); timingList.clear(); + } catch (const cta::objectstore::Backend::NoSuchObject & ex) { + // Log the failure to delete. + log::ScopedParamContainer params(lc); + params.add("fileId", adar.subrequestInfo.archiveFile.archiveFileID) + .add("subrequestAddress", adar.subrequestInfo.subrequest->getAddressIfSet()) + .add("exceptionMsg", ex.getMessageValue()); + lc.log(log::WARNING, "In OStoreDB::RepackRetrieveFailureReportBatch::report(): async deletion of retrieve request failed on wait(). Object does not exist in the objectstore."); } catch (cta::exception::Exception & ex) { // Log the failure to delete. log::ScopedParamContainer params(lc); @@ -2594,7 +2632,7 @@ void OStoreDB::cancelRepack(const std::string& vid, log::LogContext & lc) { throw exception::Exception("In OStoreDB::getRepackInfo(): unexpected vid when reading request"); // We now have a hold of the repack request. // We should delete all the file level subrequests. - // TODO + rr.deleteAllSubrequests(); // And then delete the request std::string repackRequestOwner = rr.getOwner(); rr.remove(); @@ -3933,6 +3971,13 @@ void OStoreDB::RetrieveMount::flushAsyncSuccessReports(std::list<cta::SchedulerD } mountPolicy = osdbJob->m_jobSucceedForRepackReporter->m_MountPolicy; jobsToRequeueForRepackMap[osdbJob->m_repackInfo.repackRequestAddress].emplace_back(osdbJob); + } catch (cta::objectstore::Backend::NoSuchObject &ex){ + log::ScopedParamContainer params(lc); + params.add("fileId", osdbJob->archiveFile.archiveFileID) + .add("requestObject", osdbJob->m_retrieveRequest.getAddressIfSet()) + .add("exceptionMessage", ex.getMessageValue()); + lc.log(log::WARNING, + "In OStoreDB::RetrieveMount::flushAsyncSuccessReports(): async status update failed, job does not exist in the objectstore."); } catch (cta::exception::Exception & ex) { log::ScopedParamContainer params(lc); params.add("fileId", osdbJob->archiveFile.archiveFileID) @@ -4006,16 +4051,21 @@ void OStoreDB::RetrieveMount::flushAsyncSuccessReports(std::list<cta::SchedulerD params.add("fileId", requestToJobMap.at(fe.element->retrieveRequest)->archiveFile.archiveFileID) .add("copyNb", fe.element->copyNb) .add("requestObject", fe.element->retrieveRequest->getAddressIfSet()); + std::string logMessage = "In OStoreDB::RetrieveMount::flushAsyncSuccessReports(): failed to queue request to report for repack." + " Leaving request to be garbage collected."; + int priority = log::ERR; try { std::rethrow_exception(fe.failure); + } catch (cta::objectstore::Backend::NoSuchObject &ex) { + priority=log::WARNING; + logMessage = "In OStoreDB::RetrieveMount::flushAsyncSuccessReports(): failed to queue request to report for repack, job does not exist in the objectstore."; } catch (cta::exception::Exception & ex) { params.add("exeptionMessage", ex.getMessageValue()); } catch (std::exception & ex) { params.add("exceptionWhat", ex.what()) .add("exceptionTypeName", typeid(ex).name()); } - lc.log(log::ERR, "In OStoreDB::RetrieveMount::flushAsyncSuccessReports(): failed to queue request to report for repack." - "Leaving request to be garbage collected."); + lc.log(priority, logMessage); // Add the failed request to the set. failedElements.insert(fe.element->retrieveRequest->getAddressIfSet()); } @@ -4168,6 +4218,11 @@ void OStoreDB::ArchiveMount::setJobBatchTransferred(std::list<std::unique_ptr<ct .add("jobs", list.second.size()) .add("enqueueTime", t.secs()); lc.log(log::INFO, "In OStoreDB::ArchiveMount::setJobBatchTransferred(): queued a batch of requests for reporting to repack."); + } catch (cta::objectstore::Backend::NoSuchObject &ex) { + log::ScopedParamContainer params(lc); + params.add("tapeVid", list.first) + .add("exceptionMSG", ex.getMessageValue()); + lc.log(log::WARNING, "In OStoreDB::ArchiveMount::setJobBatchTransferred(): failed to queue a batch of requests for reporting to repack, jobs do not exist in the objectstore."); } catch (cta::exception::Exception & ex) { log::ScopedParamContainer params(lc); params.add("tapeVid", list.first) @@ -4632,6 +4687,13 @@ void OStoreDB::RepackArchiveReportBatch::report(log::LogContext& lc){ newStatus)), sri}); } + } catch(const cta::objectstore::Backend::NoSuchObject &ex) { + // Log the error + log::ScopedParamContainer params(lc); + params.add("fileId", sri.archiveFile.archiveFileID) + .add("subrequestAddress", sri.subrequest->getAddressIfSet()) + .add("exceptionMsg", ex.getMessageValue()); + lc.log(log::WARNING, "In OStoreDB::RepackArchiveReportBatch::report(): failed to asyncUpdateJobOwner(), object does not exist in the objectstore."); } catch (cta::exception::Exception & ex) { // Log the error log::ScopedParamContainer params(lc); @@ -4643,6 +4705,13 @@ void OStoreDB::RepackArchiveReportBatch::report(log::LogContext& lc){ } else { try { deletersList.push_back({std::unique_ptr<objectstore::ArchiveRequest::AsyncRequestDeleter>(ar.asyncDeleteRequest()), sri}); + } catch(const cta::objectstore::Backend::NoSuchObject &ex) { + // Log the error + log::ScopedParamContainer params(lc); + params.add("fileId", sri.archiveFile.archiveFileID) + .add("subrequestAddress", sri.subrequest->getAddressIfSet()) + .add("exceptionMsg", ex.getMessageValue()); + lc.log(log::WARNING, "In OStoreDB::RepackArchiveReportBatch::report(): failed to asyncDelete(), object does not exist in the objectstore."); } catch (cta::exception::Exception & ex) { // Log the error log::ScopedParamContainer params(lc); @@ -4680,6 +4749,13 @@ void OStoreDB::RepackArchiveReportBatch::report(log::LogContext& lc){ .add("exceptionMsg", ex.getMessageValue()); lc.log(log::ERR, "In OStoreDB::RepackArchiveReportBatch::report(): async deletion of disk file failed."); } + } catch(const cta::objectstore::Backend::NoSuchObject &ex){ + // Log the error + log::ScopedParamContainer params(lc); + params.add("fileId", d.subrequestInfo.archiveFile.archiveFileID) + .add("subrequestAddress", d.subrequestInfo.subrequest->getAddressIfSet()) + .add("exceptionMsg", ex.getMessageValue()); + lc.log(log::WARNING, "In OStoreDB::RepackArchiveReportBatch::report(): async deletion failed. Object does not exist in the objectstore."); } catch (cta::exception::Exception & ex) { // Log the error log::ScopedParamContainer params(lc); @@ -4707,22 +4783,32 @@ void OStoreDB::RepackArchiveReportBatch::report(log::LogContext& lc){ lc.log(log::ERR, "In OStoreDB::RepackArchiveReportBatch::report(): async file not deleted."); } } - if(repackRequestStatus == objectstore::serializers::RepackRequestStatus::RRS_Complete){ - //Repack Request is complete, delete the directory in the buffer - cta::disk::DirectoryFactory directoryFactory; - std::string directoryPath = cta::utils::getEnclosingPath(bufferURL); - std::unique_ptr<cta::disk::Directory> directory; - try{ - directory.reset(directoryFactory.createDirectory(directoryPath)); - directory->rmdir(); - log::ScopedParamContainer params(lc); - params.add("repackRequestAddress", m_repackRequest.getAddressIfSet()); - lc.log(log::INFO, "In OStoreDB::RepackArchiveReportBatch::report(): deleted the "+directoryPath+" directory"); - } catch (const cta::exception::Exception &ex){ - log::ScopedParamContainer params(lc); - params.add("repackRequestAddress", m_repackRequest.getAddressIfSet()) - .add("exceptionMsg", ex.getMessageValue()); - lc.log(log::ERR, "In OStoreDB::RepackArchiveReportBatch::report(): failed to remove the "+directoryPath+" directory"); + if(repackRequestStatus == objectstore::serializers::RepackRequestStatus::RRS_Complete || repackRequestStatus == objectstore::serializers::RepackRequestStatus::RRS_Failed) + { + //Repack request is Complete or Failed, set it as complete + //to tell the cta-admin repack rm command not to try to delete all the subrequests again + cta::ScopedExclusiveLock sel(m_repackRequest); + m_repackRequest.fetch(); + m_repackRequest.setIsComplete(true); + m_repackRequest.commit(); + + if(repackRequestStatus == objectstore::serializers::RepackRequestStatus::RRS_Complete){ + //Repack Request is complete, delete the directory in the buffer + cta::disk::DirectoryFactory directoryFactory; + std::string directoryPath = cta::utils::getEnclosingPath(bufferURL); + std::unique_ptr<cta::disk::Directory> directory; + try{ + directory.reset(directoryFactory.createDirectory(directoryPath)); + directory->rmdir(); + log::ScopedParamContainer params(lc); + params.add("repackRequestAddress", m_repackRequest.getAddressIfSet()); + lc.log(log::INFO, "In OStoreDB::RepackArchiveReportBatch::report(): deleted the "+directoryPath+" directory"); + } catch (const cta::exception::Exception &ex){ + log::ScopedParamContainer params(lc); + params.add("repackRequestAddress", m_repackRequest.getAddressIfSet()) + .add("exceptionMsg", ex.getMessageValue()); + lc.log(log::ERR, "In OStoreDB::RepackArchiveReportBatch::report(): failed to remove the "+directoryPath+" directory"); + } } } for (auto & jou: jobOwnerUpdatersList) { @@ -4732,6 +4818,13 @@ void OStoreDB::RepackArchiveReportBatch::report(log::LogContext& lc){ params.add("fileId", jou.subrequestInfo.archiveFile.archiveFileID) .add("subrequestAddress", jou.subrequestInfo.subrequest->getAddressIfSet()); lc.log(log::INFO, "In OStoreDB::RepackArchiveReportBatch::report(): async updated job."); + } catch(const cta::objectstore::Backend::NoSuchObject &ex){ + // Log the error + log::ScopedParamContainer params(lc); + params.add("fileId", jou.subrequestInfo.archiveFile.archiveFileID) + .add("subrequestAddress", jou.subrequestInfo.subrequest->getAddressIfSet()) + .add("exceptionMsg", ex.getMessageValue()); + lc.log(log::WARNING, "In OStoreDB::RepackArchiveReportBatch::report(): async job update failed. Object does not exist in the objectstore."); } catch (cta::exception::Exception & ex) { // Log the error log::ScopedParamContainer params(lc); diff --git a/scheduler/RepackRequestManager.cpp b/scheduler/RepackRequestManager.cpp index 7f7146793d46699bcea90dc380e284149840d772..b87352e0b39b75371bf45dd2aa138f39f80848d6 100644 --- a/scheduler/RepackRequestManager.cpp +++ b/scheduler/RepackRequestManager.cpp @@ -40,14 +40,19 @@ void RepackRequestManager::runOnePass(log::LogContext& lc) { if(repackRequest != nullptr){ //We have a RepackRequest that has the status ToExpand, expand it try{ - m_scheduler.expandRepackRequest(repackRequest,timingList,t,lc); - } catch (const ExpandRepackRequestException& ex){ - lc.log(log::ERR,ex.what()); - repackRequest->fail(); - } catch (const cta::exception::Exception &e){ - lc.log(log::ERR,e.what()); - repackRequest->fail(); - throw(e); + try{ + m_scheduler.expandRepackRequest(repackRequest,timingList,t,lc); + } catch (const ExpandRepackRequestException& ex){ + lc.log(log::ERR,ex.what()); + repackRequest->fail(); + } catch (const cta::exception::Exception &e){ + lc.log(log::ERR,e.what()); + repackRequest->fail(); + throw(e); + } + } catch (const cta::objectstore::Backend::NoSuchObject &ex){ + //In case the repack request is deleted during expansion, avoid a segmentation fault of the tapeserver + lc.log(log::WARNING,"In RepackRequestManager::runOnePass(), RepackRequest object does not exist in the objectstore"); } } } diff --git a/scheduler/RetrieveMount.cpp b/scheduler/RetrieveMount.cpp index 6ed0f8f5ec0356e2fa900057d1b840fda2496e9e..11f5b0a1fa539c6e2fa084f5787a7ee638fb1718 100644 --- a/scheduler/RetrieveMount.cpp +++ b/scheduler/RetrieveMount.cpp @@ -19,6 +19,7 @@ #include "scheduler/RetrieveMount.hpp" #include "common/Timer.hpp" #include "common/log/TimingList.hpp" +#include "objectstore/Backend.hpp" #include "disk/DiskSystem.hpp" //------------------------------------------------------------------------------ @@ -199,8 +200,19 @@ void cta::RetrieveMount::flushAsyncSuccessReports(std::queue<std::unique_ptr<cta .add("bytes", bytes); tl.addToLog(params); //TODO : if repack, add log to say that the jobs were marked as RJS_Succeeded - logContext.log(cta::log::DEBUG,"In RetrieveMout::waitAndFinishSettingJobsBatchRetrieved(): deleted complete retrieve jobs."); + logContext.log(cta::log::DEBUG,"In cta::RetrieveMount::flushAsyncSuccessReports(): deleted complete retrieve jobs."); } + } catch(const cta::objectstore::Backend::NoSuchObject &ex){ + cta::log::ScopedParamContainer params(logContext); + params.add("exceptionMessageValue", ex.getMessageValue()); + if (job.get()) { + params.add("fileId", job->archiveFile.archiveFileID) + .add("diskInstance", job->archiveFile.diskInstance) + .add("diskFileId", job->archiveFile.diskFileId) + .add("lastKnownDiskPath", job->archiveFile.diskFileInfo.path); + } + const std::string msg_error="In cta::RetrieveMount::flushAsyncSuccessReports(): unable to access jobs, they do not exist in the objectstore."; + logContext.log(cta::log::WARNING, msg_error); } catch(const cta::exception::Exception& e){ cta::log::ScopedParamContainer params(logContext); params.add("exceptionMessageValue", e.getMessageValue()); @@ -210,7 +222,7 @@ void cta::RetrieveMount::flushAsyncSuccessReports(std::queue<std::unique_ptr<cta .add("diskFileId", job->archiveFile.diskFileId) .add("lastKnownDiskPath", job->archiveFile.diskFileInfo.path); } - const std::string msg_error="In RetrieveMount::waitAndFinishSettingJobsBatchRetrieved(): got an exception"; + const std::string msg_error="In cta::RetrieveMount::flushAsyncSuccessReports(): got an exception"; logContext.log(cta::log::ERR, msg_error); logContext.logBacktrace(cta::log::ERR, e.backtrace()); // Failing here does not really affect the session so we can carry on. Reported jobs are reported, non-reported ones @@ -224,7 +236,7 @@ void cta::RetrieveMount::flushAsyncSuccessReports(std::queue<std::unique_ptr<cta .add("diskFileId", job->archiveFile.diskFileId) .add("lastKnownDiskPath", job->archiveFile.diskFileInfo.path); } - const std::string msg_error="In RetrieveMount::reportJobsBatchWritten(): got an standard exception"; + const std::string msg_error="In cta::RetrieveMount::flushAsyncSuccessReports(): got an standard exception"; logContext.log(cta::log::ERR, msg_error); // Failing here does not really affect the session so we can carry on. Reported jobs are reported, non-reported ones // will be retried. diff --git a/scheduler/SchedulerTest.cpp b/scheduler/SchedulerTest.cpp index 0d35428ba4da9a1f735bab6af76b7e52c73509b1..3f27f275606bb96b3036d2c569a6109854763f19 100644 --- a/scheduler/SchedulerTest.cpp +++ b/scheduler/SchedulerTest.cpp @@ -3482,6 +3482,240 @@ TEST_P(SchedulerTest, expandRepackRequestMoveAndAddCopies){ } } +TEST_P(SchedulerTest, cancelRepackRequest) { + using namespace cta; + using namespace cta::objectstore; + unitTests::TempDirectory tempDirectory; + auto &catalogue = getCatalogue(); + auto &scheduler = getScheduler(); + auto &schedulerDB = getSchedulerDB(); + cta::objectstore::Backend& backend = schedulerDB.getBackend(); + setupDefaultCatalogue(); + +#ifdef STDOUT_LOGGING + log::StdoutLogger dl("dummy", "unitTest"); +#else + log::DummyLogger dl("", ""); +#endif + log::LogContext lc(dl); + + //Create an agent to represent this test process + cta::objectstore::AgentReference agentReference("expandRepackRequestTest", dl); + cta::objectstore::Agent agent(agentReference.getAgentAddress(), backend); + agent.initialize(); + agent.setTimeout_us(0); + agent.insertAndRegisterSelf(lc); + + const uint64_t capacityInBytes = (uint64_t)10 * 1000 * 1000 * 1000 * 1000; + const bool disabledValue = false; + const bool fullValue = true; + const bool readOnlyValue = false; + const std::string comment = "Create tape"; + cta::common::dataStructures::SecurityIdentity admin; + admin.username = "admin_user_name"; + admin.host = "admin_host"; + + //Create a logical library in the catalogue + const bool libraryIsDisabled = false; + catalogue.createLogicalLibrary(admin, s_libraryName, libraryIsDisabled, "Create logical library"); + + std::ostringstream ossVid; + ossVid << s_vid << "_" << 1; + std::string vid = ossVid.str(); + catalogue.createTape(s_adminOnAdminHost,vid, s_mediaType, s_vendor, s_libraryName, s_tapePoolName, capacityInBytes, + disabledValue, fullValue, readOnlyValue, comment); + //Create a repack destination tape + std::string vidDestination = "vidDestination"; + catalogue.createTape(s_adminOnAdminHost,vidDestination, s_mediaType, s_vendor, s_libraryName, s_tapePoolName, capacityInBytes, + disabledValue, false, readOnlyValue, comment); + + //Create a storage class in the catalogue + common::dataStructures::StorageClass storageClass; + storageClass.diskInstance = s_diskInstance; + storageClass.name = s_storageClassName; + storageClass.nbCopies = 2; + storageClass.comment = "Create storage class"; + + const std::string tapeDrive = "tape_drive"; + const uint64_t nbArchiveFilesPerTape = 10; + const uint64_t archiveFileSize = 2 * 1000 * 1000 * 1000; + + //Simulate the writing of 10 files per tape in the catalogue + std::set<catalogue::TapeItemWrittenPointer> tapeFilesWrittenCopy1; + { + uint64_t archiveFileId = 1; + std::string currentVid = vid; + for(uint64_t j = 1; j <= nbArchiveFilesPerTape; ++j) { + std::ostringstream diskFileId; + diskFileId << (12345677 + archiveFileId); + std::ostringstream diskFilePath; + diskFilePath << "/public_dir/public_file_"<<1<<"_"<< j; + auto fileWrittenUP=cta::make_unique<cta::catalogue::TapeFileWritten>(); + auto & fileWritten = *fileWrittenUP; + fileWritten.archiveFileId = archiveFileId++; + fileWritten.diskInstance = storageClass.diskInstance; + fileWritten.diskFileId = diskFileId.str(); + fileWritten.diskFilePath = diskFilePath.str(); + fileWritten.diskFileOwnerUid = PUBLIC_OWNER_UID; + fileWritten.diskFileGid = PUBLIC_GID; + fileWritten.size = archiveFileSize; + fileWritten.checksumBlob.insert(cta::checksum::ADLER32,"1234"); + fileWritten.storageClassName = s_storageClassName; + fileWritten.vid = currentVid; + fileWritten.fSeq = j; + fileWritten.blockId = j * 100; + fileWritten.size = archiveFileSize; + fileWritten.copyNb = 1; + fileWritten.tapeDrive = tapeDrive; + tapeFilesWrittenCopy1.emplace(fileWrittenUP.release()); + } + //update the DB tape + catalogue.filesWrittenToTape(tapeFilesWrittenCopy1); + tapeFilesWrittenCopy1.clear(); + } + //Test the expandRepackRequest method + scheduler.waitSchedulerDbSubthreadsComplete(); + + { + scheduler.queueRepack(admin,vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly,common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack, s_defaultRepackDisabledTapeFlag,lc); + scheduler.waitSchedulerDbSubthreadsComplete(); + } + + { + cta::objectstore::RootEntry re(backend); + re.fetchNoLock(); + + std::string repackQueueAddress = re.getRepackQueueAddress(RepackQueueType::Pending); + + cta::objectstore::RepackQueuePending repackQueuePending(repackQueueAddress,backend); + repackQueuePending.fetchNoLock(); + + std::string repackRequestAddress = repackQueuePending.getCandidateList(1,{}).candidates.front().address; + + log::TimingList tl; + utils::Timer t; + + scheduler.promoteRepackRequestsToToExpand(lc); + scheduler.waitSchedulerDbSubthreadsComplete(); + + auto repackRequestToExpand = scheduler.getNextRepackRequestToExpand(); + + scheduler.waitSchedulerDbSubthreadsComplete(); + + ASSERT_EQ(vid,repackRequestToExpand->getRepackInfo().vid); + + scheduler.expandRepackRequest(repackRequestToExpand,tl,t,lc); + + scheduler.waitSchedulerDbSubthreadsComplete(); + re.fetchNoLock(); + //Get all retrieve subrequests in the RetrieveQueue + cta::objectstore::RetrieveQueue rq(re.getRetrieveQueueAddress(vid, cta::objectstore::JobQueueType::JobsToTransferForUser),backend); + rq.fetchNoLock(); + for(auto & job: rq.dumpJobs()){ + //Check that subrequests exist in the objectstore + cta::objectstore::RetrieveRequest retrieveReq(job.address,backend); + ASSERT_NO_THROW(retrieveReq.fetchNoLock()); + } + scheduler.cancelRepack(s_adminOnAdminHost,vid,lc); + //Check that the subrequests are deleted from the objectstore + for(auto & job: rq.dumpJobs()){ + cta::objectstore::RetrieveRequest retrieveReq(job.address,backend); + ASSERT_THROW(retrieveReq.fetchNoLock(),cta::objectstore::Backend::NoSuchObject); + } + //Check that the RepackRequest is deleted from the objectstore + ASSERT_THROW(cta::objectstore::RepackRequest(repackRequestAddress,backend).fetchNoLock(),cta::objectstore::Backend::NoSuchObject); + } + //Do another test to check the deletion of ArchiveSubrequests + { + scheduler.queueRepack(admin,vid,"file://"+tempDirectory.path(),common::dataStructures::RepackInfo::Type::MoveOnly,common::dataStructures::MountPolicy::s_defaultMountPolicyForRepack, s_defaultRepackDisabledTapeFlag,lc); + scheduler.waitSchedulerDbSubthreadsComplete(); + } + + { + cta::objectstore::RootEntry re(backend); + re.fetchNoLock(); + + std::string repackQueueAddress = re.getRepackQueueAddress(RepackQueueType::Pending); + + cta::objectstore::RepackQueuePending repackQueuePending(repackQueueAddress,backend); + repackQueuePending.fetchNoLock(); + + std::string repackRequestAddress = repackQueuePending.getCandidateList(1,{}).candidates.front().address; + + log::TimingList tl; + utils::Timer t; + + scheduler.promoteRepackRequestsToToExpand(lc); + scheduler.waitSchedulerDbSubthreadsComplete(); + + auto repackRequestToExpand = scheduler.getNextRepackRequestToExpand(); + + scheduler.waitSchedulerDbSubthreadsComplete(); + + scheduler.expandRepackRequest(repackRequestToExpand,tl,t,lc); + + scheduler.waitSchedulerDbSubthreadsComplete(); + { + std::unique_ptr<cta::TapeMount> mount; + mount.reset(scheduler.getNextMount(s_libraryName, "drive0", lc).release()); + std::unique_ptr<cta::RetrieveMount> retrieveMount; + retrieveMount.reset(dynamic_cast<cta::RetrieveMount*>(mount.release())); + std::unique_ptr<cta::RetrieveJob> retrieveJob; + + std::list<std::unique_ptr<cta::RetrieveJob>> executedJobs; + //For each tape we will see if the retrieve jobs are not null + for(uint64_t j = 1; j<=nbArchiveFilesPerTape; ++j) + { + auto jobBatch = retrieveMount->getNextJobBatch(1,archiveFileSize,lc); + retrieveJob.reset(jobBatch.front().release()); + executedJobs.push_back(std::move(retrieveJob)); + } + //Now, report the retrieve jobs to be completed + castor::tape::tapeserver::daemon::RecallReportPacker rrp(retrieveMount.get(),lc); + + rrp.startThreads(); + + //Report all jobs as succeeded + for(auto it = executedJobs.begin(); it != executedJobs.end(); ++it) + { + rrp.reportCompletedJob(std::move(*it)); + } + + rrp.setDiskDone(); + rrp.setTapeDone(); + + rrp.reportDriveStatus(cta::common::dataStructures::DriveStatus::Unmounting); + + rrp.reportEndOfSession(); + rrp.waitThread(); + } + { + //Do the reporting of RetrieveJobs, will transform the Retrieve request in Archive requests + while (true) { + auto rep = schedulerDB.getNextRepackReportBatch(lc); + if (nullptr == rep) break; + rep->report(lc); + } + } + re.fetchNoLock(); + //Get all archive subrequests in the ArchiveQueue + cta::objectstore::ArchiveQueue aq(re.getArchiveQueueAddress(s_tapePoolName, cta::objectstore::JobQueueType::JobsToTransferForRepack),backend); + aq.fetchNoLock(); + for(auto & job: aq.dumpJobs()){ + cta::objectstore::ArchiveRequest archiveReq(job.address,backend); + ASSERT_NO_THROW(archiveReq.fetchNoLock()); + } + scheduler.cancelRepack(s_adminOnAdminHost,vid,lc); + //Check that the subrequests are deleted from the objectstore + for(auto & job: aq.dumpJobs()){ + cta::objectstore::ArchiveRequest archiveReq(job.address,backend); + ASSERT_THROW(archiveReq.fetchNoLock(),cta::objectstore::Backend::NoSuchObject); + } + //Check that the RepackRequest is deleted from the objectstore + ASSERT_THROW(cta::objectstore::RepackRequest(repackRequestAddress,backend).fetchNoLock(),cta::objectstore::Backend::NoSuchObject); + } +} + #undef TEST_MOCK_DB #ifdef TEST_MOCK_DB static cta::MockSchedulerDatabaseFactory mockDbFactory; diff --git a/tapeserver/castor/tape/tapeserver/daemon/MigrationReportPacker.cpp b/tapeserver/castor/tape/tapeserver/daemon/MigrationReportPacker.cpp index 6acac09a9650eb38ad7cb6d8ce7e7cb2431718f3..47b885f3c22ccdfc499dbdc7aa733e58fcee35bd 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/MigrationReportPacker.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/MigrationReportPacker.cpp @@ -26,6 +26,7 @@ #include "castor/tape/tapeserver/drive/DriveInterface.hpp" #include "catalogue/TapeFileWritten.hpp" #include "common/utils/utils.hpp" +#include "objectstore/Backend.hpp" #include <memory> #include <numeric> @@ -197,6 +198,11 @@ void MigrationReportPacker::ReportSkipped::execute(MigrationReportPacker& report } try { m_skippedArchiveJob->transferFailed(m_failureLog, reportPacker.m_lc); + } catch (cta::objectstore::Backend::NoSuchObject & ex){ + cta::log::ScopedParamContainer params(reportPacker.m_lc); + params.add("ExceptionMSG", ex.getMessageValue()) + .add("fileId", m_skippedArchiveJob->archiveFile.archiveFileID); + reportPacker.m_lc.log(cta::log::WARNING,"In MigrationReportPacker::ReportSkipped::execute(): call to m_failedArchiveJob->failed(), job does not exist in the objectstore."); } catch (cta::exception::Exception & ex) { cta::log::ScopedParamContainer params(reportPacker.m_lc); params.add("ExceptionMSG", ex.getMessageValue()) @@ -346,7 +352,13 @@ void MigrationReportPacker::ReportError::execute(MigrationReportPacker& reportPa } try { m_failedArchiveJob->transferFailed(m_failureLog, reportPacker.m_lc); - } catch (cta::exception::Exception & ex) { + } catch (cta::objectstore::Backend::NoSuchObject & ex){ + cta::log::ScopedParamContainer params(reportPacker.m_lc); + params.add("ExceptionMSG", ex.getMessageValue()) + .add("fileId", m_failedArchiveJob->archiveFile.archiveFileID); + reportPacker.m_lc.log(cta::log::WARNING,"In MigrationReportPacker::ReportError::execute(): call to m_failedArchiveJob->failed(), job does not exist in the objectstore."); + } + catch (cta::exception::Exception & ex) { cta::log::ScopedParamContainer params(reportPacker.m_lc); params.add("ExceptionMSG", ex.getMessageValue()) .add("fileId", m_failedArchiveJob->archiveFile.archiveFileID); diff --git a/tapeserver/castor/tape/tapeserver/daemon/RecallReportPacker.cpp b/tapeserver/castor/tape/tapeserver/daemon/RecallReportPacker.cpp index f58f6a0ad72bf6134f976aac761431a912e62089..2e0ddfcded3cefb58551c6e1b9c21c498a383c79 100644 --- a/tapeserver/castor/tape/tapeserver/daemon/RecallReportPacker.cpp +++ b/tapeserver/castor/tape/tapeserver/daemon/RecallReportPacker.cpp @@ -25,6 +25,7 @@ #include "castor/tape/tapeserver/daemon/TaskWatchDog.hpp" #include "common/log/Logger.hpp" #include "common/utils/utils.hpp" +#include "objectstore/Backend.hpp" #include <signal.h> #include <iostream> @@ -115,8 +116,15 @@ void RecallReportPacker::reportTestGoingToEnd(){ //ReportSuccessful::execute //------------------------------------------------------------------------------ void RecallReportPacker::ReportSuccessful::execute(RecallReportPacker& parent){ - m_successfulRetrieveJob->asyncSetSuccessful(); - parent.m_successfulRetrieveJobs.push(std::move(m_successfulRetrieveJob)); + try{ + m_successfulRetrieveJob->asyncSetSuccessful(); + parent.m_successfulRetrieveJobs.push(std::move(m_successfulRetrieveJob)); + } catch (const cta::objectstore::Backend::NoSuchObject &ex){ + cta::log::ScopedParamContainer params(parent.m_lc); + params.add("ExceptionMSG", ex.getMessageValue()) + .add("fileId", m_successfulRetrieveJob->archiveFile.archiveFileID); + parent.m_lc.log(cta::log::WARNING,"In RecallReportPacker::ReportSuccessful::execute(): call to m_successfulRetrieveJob->asyncSetSuccessful() failed, job does not exist in the objectstore."); + } } //------------------------------------------------------------------------------ @@ -212,6 +220,11 @@ void RecallReportPacker::ReportError::execute(RecallReportPacker& reportPacker){ } try { m_failedRetrieveJob->transferFailed(m_failureLog, reportPacker.m_lc); + } catch (const cta::objectstore::Backend::NoSuchObject &ex){ + cta::log::ScopedParamContainer params(reportPacker.m_lc); + params.add("ExceptionMSG", ex.getMessageValue()) + .add("fileId", m_failedRetrieveJob->archiveFile.archiveFileID); + reportPacker.m_lc.log(cta::log::WARNING,"In RecallReportPacker::ReportError::execute(): call to m_failedRetrieveJob->failed() , job does not exist in the objectstore."); } catch (cta::exception::Exception & ex) { cta::log::ScopedParamContainer params(reportPacker.m_lc); params.add("ExceptionMSG", ex.getMessageValue())