From 8aa83eac71e4d88aabdc0a7129af641336acf0df Mon Sep 17 00:00:00 2001 From: Pablo Oliver Cortes <pablo.oliver.cortes@cern.ch> Date: Fri, 28 Apr 2023 13:21:55 +0200 Subject: [PATCH] Add gfal2 tests to CI. --- .gitlab/ci/kube-tests.gitlab-ci.yml | 28 +- CMakeLists.txt | 2 - ReleaseNotes.md | 2 + .../ctafrontend/cc7/opt/run/bin/client.sh | 3 +- .../orchestration/create_instance.sh | 3 +- .../tests/client-ar-abortPrepare.cpp | 201 ------ .../{CMakeLists.txt => client-gfal2_ar.sh} | 30 +- .../tests/client-gfal2_delete.sh | 131 ++++ .../orchestration/tests/client-gfal2_evict.sh | 58 ++ .../tests/client-gfal2_retrieve.sh | 116 ++++ .../tests/client_abortPrepare.sh | 184 +++++ .../orchestration/tests/client_ar.sh | 635 +----------------- .../tests/client_ar_abortPrepare.py | 108 --- .../orchestration/tests/client_archive.sh | 139 ++++ .../orchestration/tests/client_delete.sh | 137 ++++ .../orchestration/tests/client_evict.sh | 57 ++ .../orchestration/tests/client_helper.sh | 103 ++- ...etrieve.sh => client_multiple_retrieve.sh} | 0 .../orchestration/tests/client_results.sh | 69 ++ .../orchestration/tests/client_retrieve.sh | 114 ++++ .../orchestration/tests/client_setup.sh | 232 +++++++ .../orchestration/tests/client_simple_ar.sh | 101 +++ .../tests/client_zero_length_copy.sh | 60 ++ .../orchestration/tests/idempotent_prepare.sh | 2 +- .../orchestration/tests/prepare_tests.sh | 3 +- .../tests/repack_systemtest_wrapper.sh | 6 +- .../orchestration/tests/simple_client_ar.sh | 129 ---- .../orchestration/tests/test_client-gfal2.sh | 115 ++++ .../{archive_retrieve.sh => test_client.sh} | 93 ++- cta.spec.in | 11 - 30 files changed, 1734 insertions(+), 1138 deletions(-) delete mode 100644 continuousintegration/orchestration/tests/client-ar-abortPrepare.cpp rename continuousintegration/orchestration/tests/{CMakeLists.txt => client-gfal2_ar.sh} (62%) mode change 100644 => 100755 create mode 100755 continuousintegration/orchestration/tests/client-gfal2_delete.sh create mode 100755 continuousintegration/orchestration/tests/client-gfal2_evict.sh create mode 100755 continuousintegration/orchestration/tests/client-gfal2_retrieve.sh create mode 100755 continuousintegration/orchestration/tests/client_abortPrepare.sh mode change 100644 => 100755 continuousintegration/orchestration/tests/client_ar.sh delete mode 100755 continuousintegration/orchestration/tests/client_ar_abortPrepare.py create mode 100755 continuousintegration/orchestration/tests/client_archive.sh create mode 100755 continuousintegration/orchestration/tests/client_delete.sh create mode 100755 continuousintegration/orchestration/tests/client_evict.sh mode change 100644 => 100755 continuousintegration/orchestration/tests/client_helper.sh rename continuousintegration/orchestration/tests/{multiple_retrieve.sh => client_multiple_retrieve.sh} (100%) create mode 100755 continuousintegration/orchestration/tests/client_results.sh create mode 100755 continuousintegration/orchestration/tests/client_retrieve.sh create mode 100755 continuousintegration/orchestration/tests/client_setup.sh create mode 100755 continuousintegration/orchestration/tests/client_simple_ar.sh create mode 100755 continuousintegration/orchestration/tests/client_zero_length_copy.sh delete mode 100644 continuousintegration/orchestration/tests/simple_client_ar.sh create mode 100755 continuousintegration/orchestration/tests/test_client-gfal2.sh rename continuousintegration/orchestration/tests/{archive_retrieve.sh => test_client.sh} (52%) diff --git a/.gitlab/ci/kube-tests.gitlab-ci.yml b/.gitlab/ci/kube-tests.gitlab-ci.yml index 7b6c05bea3..b87bdb4bcd 100644 --- a/.gitlab/ci/kube-tests.gitlab-ci.yml +++ b/.gitlab/ci/kube-tests.gitlab-ci.yml @@ -38,25 +38,39 @@ variables: EXTENDED_OPTIONS: "-O -D -t 2400 -C -e eos5-config-quarkdb.yaml" -archiveretrieve: +client: stage: test extends: - .kubernetes_test except: - tags variables: - TEST_SCRIPT: "tests/archive_retrieve.sh" + TEST_SCRIPT: "tests/test_client.sh" EXTENDED_OPTIONS: "-O -D -t 2400 -C" -archiveretrieve-eos5: +client-eos5: stage: test extends: - .eos5 variables: - TEST_SCRIPT: "tests/archive_retrieve.sh" - # I give the name "ar-eos5", because "archiveretrieve-eos5" is too long and it produce an error - # of connection with XRootD - JOB_NAME: "ar-eos5" + TEST_SCRIPT: "tests/test_client.sh" + +client-gfal2: + stage: test + extends: + - .kubernetes_test + rules: + - !reference [.manual_rules, rules] + variables: + TEST_SCRIPT: "tests/test_client-gfal2.sh" + EXTENDED_OPTIONS: "-O -D -t 2400 -C" + +client-gfal2-eos5: + stage: test + extends: + - .eos5 + variables: + TEST_SCRIPT: "tests/test_client-gfal2.sh" repack: stage: test diff --git a/CMakeLists.txt b/CMakeLists.txt index cd0172120f..847c2ec0c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -165,8 +165,6 @@ ELSE(DEFINED PackageOnly) add_subdirectory(statistics) - add_subdirectory(continuousintegration/orchestration/tests) - add_subdirectory(cta-release) #Generate version information diff --git a/ReleaseNotes.md b/ReleaseNotes.md index 5817632350..509d2c9e5f 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -8,6 +8,8 @@ ### Continuous Integration - cta/CTA#262 - Manage CI repositories with `cta-release` code +- cta/CTA#297 - Add gfal2 tests to CI +- cta/CTA#297 - Refactor client pod tests and switched test tracking status to sqlite db # v4.8.7-1 diff --git a/continuousintegration/docker/ctafrontend/cc7/opt/run/bin/client.sh b/continuousintegration/docker/ctafrontend/cc7/opt/run/bin/client.sh index b63f6c53cb..1e4822fd62 100755 --- a/continuousintegration/docker/ctafrontend/cc7/opt/run/bin/client.sh +++ b/continuousintegration/docker/ctafrontend/cc7/opt/run/bin/client.sh @@ -22,7 +22,7 @@ if [ ! -e /etc/buildtreeRunner ]; then yum-config-manager --enable ceph # Install missing RPMs - yum -y install cta-cli cta-immutable-file-test cta-systemtest-helpers cta-debuginfo xrootd-client eos-client jq python36 + yum -y install cta-cli cta-immutable-file-test cta-debuginfo xrootd-client eos-client jq python36 ## Keep this temporary fix that may be needed if going to protobuf3-3.5.1 for CTA # Install eos-protobuf3 separately as eos is OK with protobuf3 but cannot use it.. @@ -36,6 +36,7 @@ cat <<EOF > /etc/cta/cta-cli.conf cta.endpoint ctafrontend:10955 EOF + if [ "-${CI_CONTEXT}-" == '-nosystemd-' ]; then # sleep forever but exit immediately when pod is deleted exec /bin/bash -c "trap : TERM INT; sleep infinity & wait" diff --git a/continuousintegration/orchestration/create_instance.sh b/continuousintegration/orchestration/create_instance.sh index 5f85e689b0..e41cc6bb7d 100755 --- a/continuousintegration/orchestration/create_instance.sh +++ b/continuousintegration/orchestration/create_instance.sh @@ -374,7 +374,7 @@ echo OK echo -n "Configuring KDC clients (frontend, cli...) " kubectl --namespace=${instance} exec kdc -- cat /etc/krb5.conf | kubectl --namespace=${instance} exec -i client -- bash -c "cat > /etc/krb5.conf" -kubectl --namespace=${instance} exec kdc -- cat /etc/krb5.conf | kubectl --namespace=${instance} exec -i ctacli -- bash -c "cat > /etc/krb5.conf" +kubectl --namespace=${instance} exec kdc -- cat /etc/krb5.conf | kubectl --namespace=${instance} exec -i ctacli -- bash -c "cat > /etc/krb5.conf" kubectl --namespace=${instance} exec kdc -- cat /etc/krb5.conf | kubectl --namespace=${instance} exec -i ctafrontend -- bash -c "cat > /etc/krb5.conf" kubectl --namespace=${instance} exec kdc -- cat /etc/krb5.conf | kubectl --namespace=${instance} exec -i ctaeos -- bash -c "cat > /etc/krb5.conf" kubectl --namespace=${instance} exec kdc -- cat /root/ctaadmin1.keytab | kubectl --namespace=${instance} exec -i ctacli -- bash -c "cat > /root/ctaadmin1.keytab" @@ -410,6 +410,7 @@ echo OK echo "klist for client:" kubectl --namespace=${instance} exec client -- klist + echo "klist for ctacli:" kubectl --namespace=${instance} exec ctacli -- klist diff --git a/continuousintegration/orchestration/tests/client-ar-abortPrepare.cpp b/continuousintegration/orchestration/tests/client-ar-abortPrepare.cpp deleted file mode 100644 index add63dc496..0000000000 --- a/continuousintegration/orchestration/tests/client-ar-abortPrepare.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* - * @project The CERN Tape Archive (CTA) - * @copyright Copyright © 2021-2022 CERN - * @license This program is free software, distributed under the terms of the GNU General Public - * Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can - * redistribute it and/or modify it under the terms of the GPL Version 3, or (at your - * option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A - * PARTICULAR PURPOSE. See the GNU General Public License for more details. - * - * In applying this licence, CERN does not waive the privileges and immunities - * granted to it by virtue of its status as an Intergovernmental Organization or - * submit itself to any jurisdiction. - */ - -#include <getopt.h> -#include <string> -#include <iostream> -#include <memory> -#include <fstream> -#include <XrdCl/XrdClFileSystem.hh> -#include <XrdCl/XrdClDefaultEnv.hh> -#include "common/utils/Regex.hpp" -#include "common/exception/XrootCl.hpp" - - -// No short options. -const char short_options[] = ""; -// We require a handful of long options. - -enum class OptionIds: int { - eos_instance = 1, - eos_poweruser = 2, - eos_dir = 3, - subdir = 4, - file = 5, - error_dir = 6 -}; - -const struct ::option long_options[] = { - { "eos-instance", required_argument, nullptr, (int)OptionIds::eos_instance }, - { "eos-poweruser", required_argument, nullptr, (int)OptionIds::eos_poweruser }, - { "eos-dir", required_argument, nullptr, (int)OptionIds::eos_dir }, - { "subdir", required_argument, nullptr, (int)OptionIds::subdir }, - { "file", required_argument, nullptr, (int)OptionIds::file }, - { "error-dir", required_argument, nullptr, (int)OptionIds::error_dir }, - { nullptr, 0, nullptr, 0 } -}; - -void help() { - std::cerr << "Expected parameters are: "; - const struct ::option * pOpt = long_options; - while (pOpt->name) { - std::cerr << "--" << pOpt->name << " "; - ++pOpt; - } - std::cerr << std::endl; -} - -// We make these variables global as they will be part of the process's environment. -std::unique_ptr<char[]> envKRB5CCNAME; -char envXrdSecPROTOCOL[] = "XrdSecPROTOCOL=krb5"; - - -int main(int argc, char **argv) { - - struct { - std::string eos_instance; - std::string eos_poweruser; - std::string eos_dir; - std::string subdir; - std::string file; - std::string error_dir; - } options; - try { - int opt_ret; - while (-1 != (opt_ret = getopt_long(argc, argv, short_options, long_options, nullptr))) { - switch (opt_ret) { - case (int)OptionIds::eos_instance: - options.eos_instance = optarg; - break; - case (int)OptionIds::eos_poweruser: - options.eos_poweruser = optarg; - break; - case (int)OptionIds::eos_dir: - options.eos_dir = optarg; - break; - case (int)OptionIds::subdir: - options.subdir = optarg; - break; - case (int)OptionIds::file: - options.file = optarg; - break; - case (int)OptionIds::error_dir: - options.error_dir = optarg; - break; - case '?': - default: - std::cerr << "Unexpected option or missing argument." << std::endl; - exit(EXIT_FAILURE); - break; - } - } - - if (options.eos_instance.empty() || options.eos_poweruser.empty() || options.error_dir.empty() || options.subdir.empty() || - options.file.empty() || options.error_dir.empty()) { - std::cerr << "At least one option missing." << std::endl; - help(); - exit (EXIT_FAILURE); - } - - // std::cout << "To run again: " << argv[0] - // << " --eos-instance=" << options.eos_instance - // << " --eos-poweruser=" << options.eos_poweruser - // << " --eos-dir=" << options.eos_dir - // << " --subdir=" << options.subdir - // << " --file=" << options.file - // << " --error_dir=" << options.error_dir << std::endl; - - // Get the extended attribute for the retrieve request id - std::string retrieveRequestId, fileName(options.eos_dir + "/" + options.subdir + "/" + options.file); - XrdCl::FileSystem xrdfs(options.eos_instance); - std::string errFileName; - try { - // Prepare Xrootd environment. - errFileName = options.error_dir + '/' + "XATTRGET2_" + options.subdir + '_' + options.file; - XrdCl::DefaultEnv::SetLogLevel("Dump"); - XrdCl::DefaultEnv::SetLogFile(errFileName); - std::string envKRB5CCNAMEvalue = std::string("KRB5CCNAME=/tmp/") + options.eos_poweruser + "/krb5cc_0"; - // We need to copy to an array because of putenv's lack of const correctness. - envKRB5CCNAME.reset(new char[envKRB5CCNAMEvalue.size() + 1]); - strncpy(envKRB5CCNAME.get(), envKRB5CCNAMEvalue.c_str(), envKRB5CCNAMEvalue.size() + 1); - putenv(envKRB5CCNAME.get()); - putenv(envXrdSecPROTOCOL); - - std::string query = fileName + "?mgm.pcmd=xattr&mgm.subcmd=get&mgm.xattrname=sys.retrieve.req_id"; - auto qcOpaque = XrdCl::QueryCode::OpaqueFile; - XrdCl::Buffer xrdArg; - xrdArg.FromString(query); - XrdCl::Buffer *respPtr = nullptr; - auto status = xrdfs.Query(qcOpaque, xrdArg, respPtr, (uint16_t)0 /*timeout=default*/); - // Ensure proper memory management for the response buffer (it is our responsilibity to free it, we delegate to the unique_ptr). - std::unique_ptr<XrdCl::Buffer> respUP(respPtr); - respPtr = nullptr; - cta::exception::XrootCl::throwOnError(status, "Error during XrdCl::Query"); - cta::utils::Regex re("value=(.*)"); - std::string respStr(respUP->GetBuffer(), respUP->GetSize()); - auto reResult = re.exec(respStr); - if (reResult.size() != 2) { - // We did not receive the expected structure - throw cta::exception::Exception(std::string("Unexpected result from xattr query: ") + respStr); - } - retrieveRequestId = reResult[1]; - unlink(errFileName.c_str()); - } catch (cta::exception::Exception & ex) { - std::cerr << "ERROR: failed to get request Id for file " << fileName << " full logs in " << errFileName << std::endl; - std::ofstream errFile(errFileName, std::ios::out | std::ios::app); - errFile << ex.what(); - return 1; - } catch (std::exception & ex) { - std::cerr << "ERROR: a standard exception occurred" << " full logs in " << errFileName << std::endl; - std::ofstream errFile(errFileName, std::ios::out | std::ios::app); - errFile << ex.what(); - return 1; - } - try { - // Prepare Xrootd environment. - errFileName = options.error_dir + '/' + "PREPAREABORT_" + options.subdir + '_' + options.file; - XrdCl::DefaultEnv::SetLogLevel("Dump"); - XrdCl::DefaultEnv::SetLogFile(errFileName); - std::vector<std::string> files = { retrieveRequestId, fileName }; - XrdCl::PrepareFlags::Flags flags = XrdCl::PrepareFlags::Cancel; - XrdCl::Buffer *respPtr = nullptr; - auto abortStatus = xrdfs.Prepare(files, flags, 0, respPtr, 0 /* timeout */); - // Ensure proper memory management for the response buffer (it is our responsilibity to free it, we delegate to the unique_ptr). - std::unique_ptr<XrdCl::Buffer> respUP(respPtr); - respPtr = nullptr; - cta::exception::XrootCl::throwOnError(abortStatus, "Error during XrdCl::Prepare"); - unlink(errFileName.c_str()); - } catch (cta::exception::Exception & ex) { - std::cerr << "ERROR: failed to get request Id for file " << fileName << " full logs in " << errFileName << std::endl; - std::ofstream errFile(errFileName, std::ios::out | std::ios::app); - errFile << ex.what(); - return 1; - } catch (std::exception & ex) { - std::cerr << "ERROR: a standard exception occurred" << " full logs in " << errFileName << std::endl; - std::ofstream errFile(errFileName, std::ios::out | std::ios::app); - errFile << ex.what(); - return 1; - } - return 0; - } catch (std::exception &ex) { - std::cerr << "ERROR: a standard exception occurred " << ex.what() << std::endl; - return 1; - } catch (...) { - std::cerr << "ERROR: an unknown general exception occurred " << std::endl; - return 1; - } -} diff --git a/continuousintegration/orchestration/tests/CMakeLists.txt b/continuousintegration/orchestration/tests/client-gfal2_ar.sh old mode 100644 new mode 100755 similarity index 62% rename from continuousintegration/orchestration/tests/CMakeLists.txt rename to continuousintegration/orchestration/tests/client-gfal2_ar.sh index 515bc3ab4a..3c06657d47 --- a/continuousintegration/orchestration/tests/CMakeLists.txt +++ b/continuousintegration/orchestration/tests/client-gfal2_ar.sh @@ -1,5 +1,7 @@ +#!/bin/bash + # @project The CERN Tape Archive (CTA) -# @copyright Copyright © 2015-2022 CERN +# @copyright Copyright © 2022 CERN # @license This program is free software, distributed under the terms of the GNU General Public # Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can # redistribute it and/or modify it under the terms of the GPL Version 3, or (at your @@ -13,15 +15,23 @@ # granted to it by virtue of its status as an Intergovernmental Organization or # submit itself to any jurisdiction. -cmake_minimum_required (VERSION 3.17) -find_package(xrootd REQUIRED) -find_package(xrootdclient REQUIRED) -find_package(Protobuf3 REQUIRED) +if [[ $DONOTARCHIVE == 0 ]]; then + . /root/client_archive.sh +fi + +if [[ $ARCHIVEONLY == 1 ]]; then + echo "Archiveonly mode: exiting" + test -z $TAILPID || kill ${TAILPID} &> /dev/null + exit 0 +fi + +. /root/client-gfal2_retrieve.sh + +. /root/client-gfal2_evict.sh -include_directories(${XROOTD_INCLUDE_DIR} ${CMAKE_SOURCE_DIR}) +if [[ $REMOVE == 1 ]]; then + . /root/client-gfal2_delete.sh +fi -add_executable(cta-client-ar-abortPrepare client-ar-abortPrepare.cpp) -target_link_libraries(cta-client-ar-abortPrepare XrdCl ctacommon) -install(TARGETS cta-client-ar-abortPrepare DESTINATION usr/bin) -set_property(TARGET cta-client-ar-abortPrepare APPEND PROPERTY INSTALL_RPATH ${PROTOBUF3_RPATH}) +. /root/client_results.sh diff --git a/continuousintegration/orchestration/tests/client-gfal2_delete.sh b/continuousintegration/orchestration/tests/client-gfal2_delete.sh new file mode 100755 index 0000000000..fe59cf80d0 --- /dev/null +++ b/continuousintegration/orchestration/tests/client-gfal2_delete.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + + +# Provide an EOS directory and return the list of tapes containing files under that directory +nsls_tapes() +{ + EOS_DIR=${1:-${EOS_BASEDIR}} + + # 1. Query EOS namespace to get a list of file IDs + # 2. Pipe to "tape ls" to get the list of tapes where those files are archived + eos root://${EOSINSTANCE} find --fid ${EOS_DIR} |\ + admin_cta --json tape ls --fxidfile /dev/stdin |\ + jq '.[] | .vid' | sed 's/"//g' +} + + +# Provide a list of tapes and list the filenames of the files stored on those tapes +tapefile_ls() +{ + for vid in $* + do + admin_cta --json tapefile ls --lookupnamespace --vid ${vid} |\ + jq '.[] | .df.path' + done +} + +# Get list of files currently on tape. +tmp_file=$(mktemp) +initial_files_on_tape=$(mktemp) +for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep '^d[0-9][0-9]*::t1' | awk '{print $10}' > tmp_file + cat $tmp_file | xargs -iFILE_NAME echo ${subdir}/FILE_NAME >> $initial_files_on_tape +done + +# We can now delete the files +echo "Waiting for files to be removed from EOS and tapes" +admin_kdestroy &>/dev/null +admin_kinit &>/dev/null +if $(admin_cta admin ls &>/dev/null); then + echo "Got cta admin privileges, can proceed with the workflow" +else + # displays what failed and fail + admin_cta admin ls + die "Could not launch cta-admin command." +fi +# recount the files on tape as the workflows may have gone further... +VIDLIST=$(nsls_tapes ${EOS_DIR}) +INITIALFILESONTAPE=$(tapefile_ls ${VIDLIST} | wc -l) +echo "Before starting deletion there are ${INITIALFILESONTAPE} files on tape." +#XrdSecPROTOCOL=sss eos -r 0 0 root://${EOSINSTANCE} rm -Fr ${EOS_DIR} & +KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 gfal-rm -r ${GFAL2_PROTOCOL}://${EOSINSTANCE}/${EOS_DIR} 1>/dev/null & + +# wait a bit in case eos prematurely fails... +sleep 0.1i +if test ! -d /proc/${EOSRMPID}; then + # eos rm process died, get its status + wait ${EOSRMPID} + test $? -ne 0 && die "Could not launch eos rm" +fi + +# Now we can start to do something... +# deleted files are the ones that made it on tape minus the ones that are still on tapes... +echo "Waiting for files to be deleted:" +SECONDS_PASSED=0 +WAIT_FOR_DELETED_FILE_TIMEOUT=$((5+${NB_FILES}/9)) +FILESONTAPE=${INITIALFILESONTAPE} + +while test 0 != ${FILESONTAPE}; do + echo "Waiting for files to be deleted from tape: Seconds passed = ${SECONDS_PASSED}" + sleep 1 + let SECONDS_PASSED=SECONDS_PASSED+1 + + if test ${SECONDS_PASSED} == ${WAIT_FOR_DELETED_FILE_TIMEOUT}; then + echo "Timed out after ${WAIT_FOR_DELETED_FILE_TIMEOUT} seconds waiting for file to be deleted from tape" + break + fi + + FILESONTAPE=$(tapefile_ls ${VIDLIST} > >(wc -l) 2> >(cat > /tmp/ctaerr)) + + if [[ $(cat /tmp/ctaerr | wc -l) -gt 0 ]]; then + echo "cta-admin COMMAND FAILED!!" + echo "ERROR CTA ERROR MESSAGE:" + cat /tmp/ctaerr + break + fi + + DELETED=$((${INITIALFILESONTAPE} - ${FILESONTAPE})) + + echo "${DELETED}/${INITIALFILESONTAPE} deleted" +done + + +# kill eos rm command that may run in the background +kill ${EOSRMPID} &> /dev/null + + +LASTCOUNT=0 + +if [[ ${RETRIEVED} -gt ${DELETED} ]]; then + LASTCOUNT=${DELETED} + echo "Some files have not been deleted:" + tapefile_ls ${VIDLIST} +else + echo "All files have been deleted" + LASTCOUNT=${RETRIEVED} + db_begin_transaction + db_update_col "deleted" "+" "1" + db_commit_transaction +fi + +if [ ${LASTCOUNT} -ne $((${NB_FILES} * ${NB_DIRS})) ]; then + #((RC++)) + echo "ERROR there were some lost files during the archive/retrieve test with ${NB_FILES} files" >> /tmp/RC + echo "ERROR there were some lost files during the archive/retrieve test with ${NB_FILES} files (first 10):" + grep -v retrieved ${STATUS_FILE} | sed -e "s;^;${EOS_DIR}/;" | head -10 +fi diff --git a/continuousintegration/orchestration/tests/client-gfal2_evict.sh b/continuousintegration/orchestration/tests/client-gfal2_evict.sh new file mode 100755 index 0000000000..4b59120c0f --- /dev/null +++ b/continuousintegration/orchestration/tests/client-gfal2_evict.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + + +echo "$(date +%s): Trigerring EOS evict workflow as poweruser1:powerusers (12001:1200)" + +# Build the list of files with more than 1 disk copy that have been archived before (ie d>=1::t1) +TMP_FILE=$(mktemp) +TO_EVICT=0 +for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep 'd[1-9][0-9]*::t1' | sed -e "s%\s\+% %g;s%.* \([^ ]\+\)$%\1%" > "${TMP_FILE}${subdir}" + #cat ${TMP_FILE} | sed -e "s%\s\+% %g;s%.* \([^ ]\+\)$%${subdir}/\1%" >> "${TMP_FILE}${subdir}" + TO_EVICT=$(( ${TO_EVICT} + $(cat ${TMP_FILE}${subdir} | wc -l ) )) +done +rm -f ${TMP_FILE} + +# +current_evict_val=0 +NEW_EVICT_VAL=$(( ${current_evict_val} + 1 )) + +echo "$(date +%s): $TO_EVICT files to be evicted from EOS using 'gfal-evict SURL'" + +for (( subdir=0; subdir < ${NB_DIRS}; subdir++ )); do + cat "${TMP_FILE}0" | xargs -iFILE --max-procs=10 bash -c "XrdSecPROTOCOL=krb5 KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 gfal-evict ${GFAL2_PROTOCOL}://${EOSINSTANCE}/${EOS_DIR}/${subdir}/FILE" + + rm -f "${TMP_FILE}${subdir}" +done + +db_begin_transaction +LEFTOVER=0 +TMP_FILE=$(mktemp) +for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + LEFTOVER=$(( ${LEFTOVER} + $(eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep '^d[1-9][0-9]*::t1' | wc -l) )) + + eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep '^d[0][0-9]*::t1' | awk '{print $10}' > ${TMP_FILE} + + cat ${TMP_FILE} | xargs -iTEST_FILE_NAME bash -c "db_update 'evicted' ${subdir}/TEST_FILE_NAME ${NEW_EVICT_VAL} '='" +done +rm -f ${TMP_FILE} +db_commit_transaction + +EVICTED=$((${TO_EVICT}-${LEFTOVER})) +echo "$(date +%s): $EVICTED/$TO_EVICT files evicted from EOS 'gfal-evict SURL'" + diff --git a/continuousintegration/orchestration/tests/client-gfal2_retrieve.sh b/continuousintegration/orchestration/tests/client-gfal2_retrieve.sh new file mode 100755 index 0000000000..2b81559257 --- /dev/null +++ b/continuousintegration/orchestration/tests/client-gfal2_retrieve.sh @@ -0,0 +1,116 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + + +echo "$(date +%s): Trigerring EOS retrieve workflow as poweruser1:powerusers (12001:1200)" + + +#SURLs=$(mktemp) +TMP_FILE=$(mktemp) +db_get_files > ${TMP_FILE} + +# Split the input file into one file per directory to avoid cat and grep +# the entire file for each subdirectory. +seq 0 $(( ${NB_DIRS} - 1 )) | xargs -iSUBDIR bash -c "touch ${TMP_FILE}SUBDIR" +cat ${TMP_FILE} | xargs -iFILE bash -c "subdir=\$(echo FILE | cut -d/ -f1); echo FILE | cut -d/ -f2 >> ${TMP_FILE}\${subdir}" +rm -f ${TMP_FILE} + +# Get initial stage value. +#current_stage_val=$(db_info 'archived') +current_stage_val=0 +NEW_STAGE_VAL=$((${current_stage_val} + 1 )) + +for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + echo -n "Retrieving files to ${EOS_DIR}/${subdir} using gfal-bringonline and ${NB_PROCS} processes..." + + gfal_call="XRD_LOGLEVEL=Dump KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 gfal-bringonline ${GFAL2_PROTOCOL}://${EOSINSTANCE}/${EOS_DIR}/${subdir}/TEST_FILE_NAME 2>${ERROR_DIR}/RETRIEVE_TEST_FILE_NAME && rm ${ERROR_DIR}/RETRIEVE_TEST_FILE_NAME" + + gfal_error="echo Error with gfal-bringonline prepare stage for file TEST_FILE_NAME, full logs in ${ERROR_DIR}/RETRIEVE_TEST_FILE_NAME" + + command_str="${gfal_call} || ${gfal_error}" + + cat "${TMP_FILE}${subdir}" | xargs --max-procs=${NB_PROCS} -iTEST_FILE_NAME bash -c "${command_str}" | tee ${LOGDIR}/prepare_${subdir}.log | grep ^ERROR + + echo Done. + + # Get extended attributes to get sys.retrieve.req_id + xrdfs_call="XRD_LOGLEVEL=Dump KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOSINSTANCE} query opaquefile ${EOS_DIR}/${subdir}/TEST_FILE_NAME?mgm.pcmd=xattr\&mgm.subcmd=get\&mgm.xattrname=sys.retrieve.req_id 2>${ERROR_DIR}/XATTRGET_TEST_FILE_NAME && rm ${ERROR_DIR}/XATTRGET_TEST_FILE_NAME" + + xrdfs_error=" echo ERROR with xrootd xattr get for file TEST_FILE_NAME, full logs in ${ERROR_DIR}/XATTRGET_TEST_FILE_NAME" + + command_str="${xrdfs_call} || ${xrdfs_error}" + + cat "${TMP_FILE}${subdir}" | xargs --max-procs=${NB_PROCS} -iTEST_FILE_NAME bash -c "$command_str" | tee ${LOGDIR}/prepare_sys.retrieve.req_id_${subdir}.log | grep ^ERROR + + rm -f "${TMP_FILE}${subdir}" +done + + +if [ "0" != "$(ls ${ERROR_DIR} 2> /dev/null | wc -l)" ]; then + # there were some prepare errors + echo "Several prepare errors occured during retrieval!" + echo "Please check client pod logs in artifacts" + mv ${ERROR_DIR}/* ${LOGDIR}/xrd_errors/ +fi + +#ARCHIVED=$(cat ${SURLs} | wc -l) +TO_BE_RETRIEVED=$(( ${ARCHIVED} - $(ls ${ERROR_DIR}/RETRIEVE_* 2>/dev/null | wc -l) )) +RETRIEVING=${TO_BE_RETRIEVED} +RETRIEVED=0 +# Wait for the copy to appear on disk +echo "$(date +%s): Waiting for files to be back on disk:" +SECONDS_PASSED=0 +WAIT_FOR_RETRIEVED_FILE_TIMEOUT=$((40+${NB_FILES}/5)) + +status=$(mktemp) +while test 0 -lt ${RETRIEVING}; do + rm -f $status + touch $status + echo "$(date +%s): Waiting for files to be retrieved from tape: Seconds passed = ${SECONDS_PASSED}" + + sleep 3 + + let SECONDS_PASSED=SECONDS_PASSED+1 + + if test ${SECONDS_PASSED} == ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT}; then + echo "$(date +%s): Timed out after ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT} seconds waiting for file to be retrieved tape" + break + fi + + RETRIEVED=0 + for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + # Get retrieve status + eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep '^d[1-9][0-9]*::t1' | awk -v sd="${subdir}/" '{print sd$10}' >> $status + + RETRIEVED=$(( ${RETRIEVED} + $(cat $status | wc -l) )) + + sleep 1 # do not hammer eos too hard + done + + RETRIEVING=$((${TO_BE_RETRIEVED} - ${RETRIEVED})) + + echo "${RETRIEVED}/${TO_BE_RETRIEVED} retrieved" +done + +db_begin_transaction +cat $status | xargs -iFILE bash -c "db_update staged FILE 1 '='" +db_commit_transaction +rm -f ${status} + +echo "###" +echo "${RETRIEVED}/${TO_BE_RETRIEVED} retrieved files" +echo "###" diff --git a/continuousintegration/orchestration/tests/client_abortPrepare.sh b/continuousintegration/orchestration/tests/client_abortPrepare.sh new file mode 100755 index 0000000000..faba6c498e --- /dev/null +++ b/continuousintegration/orchestration/tests/client_abortPrepare.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + + +# Quick function to abort the prepare of files. +# $1: file containing even number of lines, odd lines == req id; even lines == file_name +abortFile(){ + while read -r REQ_ID; do + read -r FILE_PATH + FILE_NAME=$(echo ${FILE_PATH} | cut -d/ -f2) + XRD_LOGLEVEL=Dump KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOSINSTANCE} prepare -a ${REQ_ID} ${EOS_DIR}/${FILE_PATH} 2>${ERROR_DIR}/RETRIEVE_${FILE_NAME} && rm ${ERROR_DIR}/RETRIEVE_${FILE_NAME} || echo ERROR with xrootd prepare stage for file ${FILE_NAME}, full logs in ${ERROR_DIR}/RETRIEVE_${FILE_NAME} | grep ^ERROR + done < $1 +} + +export -f abortFile + +# Build the list of tape only files. +STATUS_FILE=$(mktemp) +for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep 'd0::t[^0]' | sed -e "s%\s\+% %g;s%.* \([^ ]\+\)$%\1%" >> ${STATUS_FILE} +done + +if [[ $(cat ${STATUS_FILE} | wc -l ) -eq 0 ]]; then + echo "ERROR: Can't run abort prepare test as there are no tape only files." + exit 1 +fi + +# Put drives down. +echo "Sleeping 3 seconds to let previous sessions finish." +sleep 3 +admin_kdestroy &>/dev/null +admin_kinit &>/dev/null +INITIAL_DRIVES_STATE=`admin_cta --json dr ls` +echo INITIAL_DRIVES_STATE: +echo ${INITIAL_DRIVES_STATE} | jq -r '.[] | [ .driveName, .driveStatus] | @tsv' | column -t +echo -n "Will put down those drives : " +drivesToSetDown=`echo ${INITIAL_DRIVES_STATE} | jq -r '.[] | select (.driveStatus == "UP") | .driveName'` +echo $drivesToSetDown +for d in `echo $drivesToSetDown`; do + admin_cta drive down $d --reason "PUTTING DRIVE DOWN FOR TESTS" +done + +# Wait for drives to be down. +echo "$(date +%s): Waiting for the drives to be down" +SECONDS_PASSED=0 +WAIT_FOR_DRIVES_DOWN_TIMEOUT=$((10)) +while [[ $SECONDS_PASSED < WAIT_FOR_DRIVES_DOWN_TIMEOUT ]]; do + sleep 1 + oneStatusUpRemaining=0 + for d in `echo $drivesToSetDown`; do + status=`admin_cta --json drive ls | jq -r ". [] | select(.driveName == \"$d\") | .driveStatus"` + if [[ $status == "UP" ]]; then + oneStatusUpRemaining=1 + fi; + done + if [[ $oneStatusUpRemaining -eq 0 ]]; then + echo "Drives : $drivesToSetDown are down" + break; + fi + echo -n "." + SECONDS_PASSED=$SECONDS_PASSED+1 + if [[ $SECONDS_PASSED -gt $WAIT_FOR_DRIVES_DOWN_TIMEOUT ]]; then + die "ERROR: Timeout reach for trying to put all drives down" + fi +done + +# Stage. +for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + echo -n "Retrieving files to ${EOS_DIR}/${subdir} using ?? process (prepare2)..." + + for((slot=1; slot <= ${NB_PROCS}; slot++)); do + touch "slot${slot}" + done + + # NOTE: Parallel max-procs options are in alpha testing according to + # the docs. At the moment just run parallel without the flag making + # parallel take the decission. + cat ${STATUS_FILE} | parallel bash -c "true && XRD_LOGLEVEL=Dump KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOSINSTANCE} prepare -s ${EOS_DIR}/${subdir}/{} 2>${ERROR_DIR}/RETRIEVE_{} | tee -a slot\"{%}\" && echo ${subdir}/{} >> slot\"{%}\" && rm ${ERROR_DIR}/RETRIEVE_{} || echo ERROR with xrootd prepare stage for file {}, full logs in ${ERROR_DIR}/RETRIEVE_{}" | grep ^ERROR + find /root/ -size 0 | xargs rm -f + echo Done. + done + if [ "0" != "$(ls ${ERROR_DIR} 2> /dev/null | wc -l)" ]; then + # there were some prepare errors + echo "Several prepare errors occured during retrieval!" + echo "Please check client pod logs in artifacts" + mv ${ERROR_DIR}/* ${LOGDIR}/xrd_errors/ + fi + sleep 3 + # Ensure all requests files are queued + requestsTotal=`admin_cta --json sq | jq 'map(select (.mountType == "RETRIEVE") | .queuedFiles | tonumber) | add'` + echo "Retrieve requests count: ${requestsTotal}" + filesCount=`cat ${STATUS_FILE} | wc -l` + if [ ${requestsTotal} -ne ${filesCount} ]; then + echo "ERROR: Retrieve queue(s) size mismatch: ${requestsTotal} requests queued for ${filesCount} files." + fi + sleep 3 + # Cancel Stage + # Abort prepare -s requests + for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + echo -n "Cancelling prepare for files in ${EOS_DIR}/${subdir} using ?? process (prepare_abort)..." + ls slot* | parallel abortFile {} + echo Done. + done + rm -f slot* + # Put drive(s) back up to clear the queue + echo -n "Will put back up those drives : " + echo ${INITIAL_DRIVES_STATE} | jq -r '.[] | select (.driveStatus == "UP") | .driveName' + for d in `echo ${INITIAL_DRIVES_STATE} | jq -r '.[] | select (.driveStatus == "UP") | .driveName'`; do + admin_cta dr up $d +done + +sleep 10 + +# Check that queues are empty after a while and files did not get retrieved +echo "$(date +%s): Waiting for retrieve queues to be cleared:" +SECONDS_PASSED=0 +WAIT_FOR_RETRIEVE_QUEUES_CLEAR_TIMEOUT=$((60)) +REMAINING_REQUESTS=`admin_cta --json sq | jq -r 'map(select (.mountType == "RETRIEVE") | .queuedFiles | tonumber) | add'` +echo "${REMAINING_REQUESTS} requests remaining." +# Prevent the result from being empty +if [ -z "$REMAINING_REQUESTS" ]; then REMAINING_REQUESTS='0'; fi +while [[ ${REMAINING_REQUESTS} > 0 ]]; do + echo "$(date +%s): Waiting for retrieve queues to be cleared: Seconds passed = ${SECONDS_PASSED}" + sleep 1 + let SECONDS_PASSED=SECONDS_PASSED+1 + + if test ${SECONDS_PASSED} == ${WAIT_FOR_RETRIEVE_QUEUES_CLEAR_TIMEOUT}; then + echo "$(date +%s): Timed out after ${WAIT_FOR_RETRIEVE_QUEUES_CLEAR_TIMEOUT} seconds waiting for retrieve queues to be cleared" + break + fi + + REMAINING_REQUESTS=`admin_cta --json sq | jq -r 'map(select (.mountType == "RETRIEVE") | .queuedFiles | tonumber) | add'`; + # Prevent the result from being empty + if [ -z "$REMAINING_REQUEST" ]; then REMAINING_REQUESTS='0'; fi + echo "${REMAINING_REQUESTS} requests remaining." +done + +# Check that the files were not retrieved +echo "Checking restaged files..." +RESTAGEDFILES=0 + +db_begin_transaction +for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep '^d[1-9][0-9]*::t1' | awk -v sd="${subdir}/" '{print sd$10}' > ${STATUS_FILE} + + RF=$(cat $STATUS_FILE | wc -l) + echo "Restaged files in directory ${subdir}: ${RF}" + (( RESTAGEDFILES += ${RF} )) + rm -f ${STATUS_FILE} + touch ${STATUS_FILE} + eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep '^d0::t1' | awk -v sd="${subdir}/" '{print sd$10}' > ${STATUS_FILE} + cat ${STATUS_FILE} | xargs -iFILE bash -c "db_update aborted FILE 1 '='" +done +echo "Total restaged files found: ${RESTAGEDFILES}" +db_commit_transaction + +if [ "0" != "$(ls ${ERROR_DIR} 2> /dev/null | wc -l)" ]; then + # there were some prepare errors + echo "Several errors occured during prepare cancel test!" + echo "Please check client pod logs in artifacts" + mv ${ERROR_DIR}/* ${LOGDIR}/xrd_errors/ +fi + +if [ ${RESTAGEDFILES} -ne "0" ]; then + #((RC++)) + echo 'ERROR some files were retrieved in spite of retirve cancellation.' >> /tmp/RC + echo "ERROR some files were retrieved in spite of retrieve cancellation." +fi + +rm -f ${STATUS_FILE} diff --git a/continuousintegration/orchestration/tests/client_ar.sh b/continuousintegration/orchestration/tests/client_ar.sh old mode 100644 new mode 100755 index 7341842ec7..849dd22fb7 --- a/continuousintegration/orchestration/tests/client_ar.sh +++ b/continuousintegration/orchestration/tests/client_ar.sh @@ -16,645 +16,14 @@ # submit itself to any jurisdiction. -EOSINSTANCE=ctaeos -EOS_BASEDIR=/eos/ctaeos/cta -TEST_FILE_NAME_BASE=test -DATA_SOURCE=/dev/urandom -ARCHIVEONLY=0 # Only archive files or do the full test? -DONOTARCHIVE=0 # files were already archived in a previous run NEED TARGETDIR -TARGETDIR='' -LOGDIR='/var/log' - -COMMENT='' -# id of the test so that we can track it -TESTID="$(date +%y%m%d%H%M)" - -NB_PROCS=1 -NB_FILES=1 -NB_DIRS=1 -FILE_KB_SIZE=1 -VERBOSE=0 -REMOVE=0 -TAILPID='' -TAPEAWAREGC=0 - -NB_BATCH_PROCS=500 # number of parallel batch processes -BATCH_SIZE=20 # number of files per batch process - -SSH_OPTIONS='-o BatchMode=yes -o ConnectTimeout=10' - -die() { - echo "$@" 1>&2 - test -z $TAILPID || kill ${TAILPID} &> /dev/null - exit 1 -} - - -usage() { cat <<EOF 1>&2 -Usage: $0 [-n <nb_files_perdir>] [-N <nb_dir>] [-s <file_kB_size>] [-p <# parallel procs>] [-v] [-d <eos_dest_dir>] [-e <eos_instance>] [-S <data_source_file>] [-r] - -v Verbose mode: displays live logs of rmcd to see tapes being mounted/dismounted in real time - -r Remove files at the end: launches the delete workflow on the files that were deleted. WARNING: THIS CAN BE FATAL TO THE NAMESPACE IF THERE ARE TOO MANY FILES AND XROOTD STARTS TO TIMEOUT. - -a Archiveonly mode: exits after file archival - -g Tape aware GC? -EOF -exit 1 -} - - -# Send annotations to Influxdb -annotate() { - TITLE=$1 - TEXT=$2 - TAGS=$3 - LINE="ctapps_tests title=\"${TITLE}\",text=\"${TEXT}\",tags=\"${TAGS}\" $(date +%s)" - curlcmd="curl --connect-timeout 2 -X POST 'https://ctapps-influx02.cern.ch:8086/write?db=annotations&u=annotations&p=annotations&precision=s' --data-binary '${LINE}'" - eval ${curlcmd} -} - - -# Provide an EOS directory and return the list of tapes containing files under that directory -nsls_tapes() -{ - EOS_DIR=${1:-${EOS_BASEDIR}} - - # 1. Query EOS namespace to get a list of file IDs - # 2. Pipe to "tape ls" to get the list of tapes where those files are archived - eos root://${EOSINSTANCE} find --fid ${EOS_DIR} |\ - admin_cta --json tape ls --fxidfile /dev/stdin |\ - jq '.[] | .vid' | sed 's/"//g' -} - -# Provide a list of tapes and list the filenames of the files stored on those tapes -tapefile_ls() -{ - for vid in $* - do - admin_cta --json tapefile ls --lookupnamespace --vid ${vid} |\ - jq '.[] | .df.path' - done -} - - -while getopts "d:e:n:N:s:p:vS:rAPGt:m:" o; do - case "${o}" in - e) - EOSINSTANCE=${OPTARG} - ;; - d) - EOS_BASEDIR=${OPTARG} - ;; - n) - NB_FILES=${OPTARG} - ;; - N) - NB_DIRS=${OPTARG} - ;; - s) - FILE_KB_SIZE=${OPTARG} - ;; - p) - NB_PROCS=${OPTARG} - ;; - v) - VERBOSE=1 - ;; - S) - DATA_SOURCE=${OPTARG} - ;; - r) - REMOVE=1 - ;; - A) - ARCHIVEONLY=1 - ;; - P) - DONOTARCHIVE=1 - ;; - G) - TAPEAWAREGC=1 - ;; - t) - TARGETDIR=${OPTARG} - ;; - m) - COMMENT=${OPTARG} - ;; - *) - usage - ;; - esac -done -shift $((OPTIND-1)) - -if [ ! -z "${error}" ]; then - echo -e "ERROR:\n${error}" - exit 1 -fi - -if [ "x${COMMENT}" = "x" ]; then - echo "No annotation will be pushed to Influxdb" -fi - -if [[ $DONOTARCHIVE == 1 ]]; then - if [[ "x${TARGETDIR}" = "x" ]]; then - echo "You must provide a target directory to run a test and skip archival" - exit 1 - fi - eos root://${EOSINSTANCE} ls -d ${EOS_BASEDIR}/${TARGETDIR} || die "target directory does not exist and there is no archive phase to create it." -fi - -if [[ $TAPEAWAREGC == 1 ]]; then - echo "Enabling tape aware garbage collector" - ssh ${SSH_OPTIONS} -l root ${EOSINSTANCE} eos space config default space.filearchivedgc=off || die "Could not disable filearchivedgc" -fi - -EOS_DIR='' -if [[ "x${TARGETDIR}" = "x" ]]; then - EOS_DIR="${EOS_BASEDIR}/$(uuidgen)" -else - EOS_DIR="${EOS_BASEDIR}/${TARGETDIR}" -fi -LOGDIR="${LOGDIR}/$(basename ${EOS_DIR})" -mkdir -p ${LOGDIR} || die "Cannot create directory LOGDIR: ${LOGDIR}" -mkdir -p ${LOGDIR}/xrd_errors || die "Cannot create directory LOGDIR/xrd_errors: ${LOGDIR}/xrd_errors" - -STATUS_FILE=$(mktemp) -echo "$(date +%s): STATUS_FILE=${STATUS_FILE}" -ERROR_FILE=$(mktemp) -echo "$(date +%s): ERROR_FILE=${ERROR_FILE}" -EOS_BATCHFILE=$(mktemp --suffix=.eosh) -echo "$(date +%s): EOS_BATCHFILE=${EOS_BATCHFILE}" - -# As we are skipping n bytes per file we need a bit more than the file size to accomodate dd to read ${FILE_KB_SIZE} skipping the n first bytes -dd if=/dev/urandom of=/tmp/testfile bs=1k count=$((${FILE_KB_SIZE} + ${NB_FILES}*${NB_DIRS}/1024 + 1)) || exit 1 - -if [[ $VERBOSE == 1 ]]; then - tail -v -f /mnt/logs/tpsrv0*/rmcd/cta/cta-rmcd.log & - TAILPID=$! -fi - -# get some common useful helpers for krb5 -. /root/client_helper.sh - -# Get kerberos credentials for user1 -user_kinit -klist -s || die "Cannot get kerberos credentials for user ${USER}" - -# Get kerberos credentials for poweruser1 -eospower_kdestroy -eospower_kinit - -echo "Starting test ${TESTID}: ${COMMENT}" - -#echo "$(date +%s): Dumping objectstore list" -#ssh root@ctappsfrontend cta-objectstore-list - -test -z ${COMMENT} || annotate "test ${TESTID} STARTED" "comment: ${COMMENT}<br/>files: $((${NB_DIRS}*${NB_FILES}))<br/>filesize: ${FILE_KB_SIZE}kB" 'test,start' - - if [[ $DONOTARCHIVE == 0 ]]; then - -echo "$(date +%s): Creating test dir in eos: ${EOS_DIR}" -# uuid should be unique no need to remove dir before... -# XrdSecPROTOCOL=sss eos -r 0 0 root://${EOSINSTANCE} rm -Fr ${EOS_DIR} - - -eos root://${EOSINSTANCE} mkdir -p ${EOS_DIR} || die "Cannot create directory ${EOS_DIR} in eos instance ${EOSINSTANCE}." - -echo -echo "Listing the EOS extended attributes of ${EOS_DIR}" -eos root://${EOSINSTANCE} attr ls ${EOS_DIR} -echo - -echo yes | cta-immutable-file-test root://${EOSINSTANCE}/${EOS_DIR}/immutable_file || die "The cta-immutable-file-test failed." - -# Create directory for xrootd error reports -ERROR_DIR="/dev/shm/$(basename ${EOS_DIR})" -mkdir ${ERROR_DIR} -echo "$(date +%s): ERROR_DIR=${ERROR_DIR}" -# not more than 100k files per directory so that we can rm and find as a standard user -for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do - eos root://${EOSINSTANCE} mkdir -p ${EOS_DIR}/${subdir} || die "Cannot create directory ${EOS_DIR}/{subdir} in eos instance ${EOSINSTANCE}." - echo -n "Copying files to ${EOS_DIR}/${subdir} using ${NB_PROCS} processes..." - TEST_FILE_NAME_SUBDIR=${TEST_FILE_NAME_BASE}$(printf %.2d ${subdir}) # this is the target filename of xrdcp processes just need to add the filenumber in each directory - # xargs must iterate on the individual file number no subshell can be spawned even for a simple addition in xargs - for ((i=0;i<${NB_FILES};i++)); do - echo $(printf %.6d $i) -done | xargs --max-procs=${NB_PROCS} -iTEST_FILE_NUM bash -c "dd if=/tmp/testfile bs=1k 2>/dev/null | (dd bs=$((${subdir}*${NB_FILES})) count=1 of=/dev/null 2>/dev/null; dd bs=TEST_FILE_NUM count=1 of=/dev/null 2>/dev/null; dd bs=1k count=${FILE_KB_SIZE} 2>/dev/null) | XRD_LOGLEVEL=Dump xrdcp - root://${EOSINSTANCE}/${EOS_DIR}/${subdir}/${TEST_FILE_NAME_SUBDIR}TEST_FILE_NUM 2>${ERROR_DIR}/${TEST_FILE_NAME_SUBDIR}TEST_FILE_NUM && rm ${ERROR_DIR}/${TEST_FILE_NAME_SUBDIR}TEST_FILE_NUM || echo ERROR with xrootd transfer for file ${TEST_FILE_NAME_SUBDIR}TEST_FILE_NUM, full logs in ${ERROR_DIR}/${TEST_FILE_NAME_SUBDIR}TEST_FILE_NUM" - #done | xargs --max-procs=${NB_PROCS} -iTEST_FILE_NAME xrdcp --silent /tmp/testfile root://${EOSINSTANCE}/${EOS_DIR}/${subdir}/TEST_FILE_NAME - # done | xargs -n ${BATCH_SIZE} --max-procs=${NB_BATCH_PROCS} ./batch_xrdcp /tmp/testfile root://${EOSINSTANCE}/${EOS_DIR}/${subdir} - echo Done. -done -if [ "0" != "$(ls ${ERROR_DIR} 2> /dev/null | wc -l)" ]; then - # there were some xrdcp errors - echo "Several xrdcp errors occured during archival!" - echo "Please check client pod logs in artifacts" - mv ${ERROR_DIR}/* ${LOGDIR}/xrd_errors/ + . /root/client_archive.sh fi -COPIED=0 -COPIED_EMPTY=0 -for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do - COPIED=$(( ${COPIED} + $(eos root://${EOSINSTANCE} find -f ${EOS_DIR}/${subdir} | wc -l) )) - COPIED_EMPTY=$(( ${COPIED_EMPTY} + $(eos root://${EOSINSTANCE} find -0 ${EOS_DIR}/${subdir} | wc -l) )) -done - -# Only not empty files are archived by CTA -TO_BE_ARCHIVED=$((${COPIED} - ${COPIED_EMPTY})) - -ARCHIVING=${TO_BE_ARCHIVED} -ARCHIVED=0 -echo "$(date +%s): Waiting for files to be on tape:" -SECONDS_PASSED=0 -WAIT_FOR_ARCHIVED_FILE_TIMEOUT=$((40+${NB_FILES}/5)) -while test 0 != ${ARCHIVING}; do - echo "$(date +%s): Waiting for files to be archived to tape: Seconds passed = ${SECONDS_PASSED}" - sleep 3 - let SECONDS_PASSED=SECONDS_PASSED+1 - - if test ${SECONDS_PASSED} == ${WAIT_FOR_ARCHIVED_FILE_TIMEOUT}; then - echo "$(date +%s): Timed out after ${WAIT_FOR_ARCHIVED_FILE_TIMEOUT} seconds waiting for file to be archived to tape" - break - fi - - ARCHIVED=0 - for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do - ARCHIVED=$(( ${ARCHIVED} + $(eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | grep '^d0::t1' | wc -l) )) - sleep 1 # do not hammer eos too hard - done - - echo "${ARCHIVED}/${TO_BE_ARCHIVED} archived" - - ARCHIVING=$((${TO_BE_ARCHIVED} - ${ARCHIVED})) - NB_TAPE_NOT_FULL=`admin_cta --json ta ls --all | jq "[.[] | select(.full == false)] | length"` - if [[ ${NB_TAPE_NOT_FULL} == 0 ]] - then - echo "$(date +%s): All tapes are full, exiting archiving loop" - break - fi -done - - -echo "###" -echo "${ARCHIVED}/${TO_BE_ARCHIVED} archived" -echo "###" - -fi # DONOTARCHIVE - -#echo "$(date +%s): Dumping objectstore list" -#ssh root@ctappsfrontend cta-objectstore-list - if [[ $ARCHIVEONLY == 1 ]]; then echo "Archiveonly mode: exiting" test -z $TAILPID || kill ${TAILPID} &> /dev/null exit 0 fi - -echo "###" -echo "${TAPEONLY}/${ARCHIVED} on tape only" -echo "###" -echo "Sleeping 10 seconds to allow MGM-FST communication to settle after disk copy deletion." -sleep 10 -echo "###" - - -echo "$(date +%s): Trigerring EOS retrieve workflow as poweruser1:powerusers (12001:1200)" - -rm -f ${STATUS_FILE} -touch ${STATUS_FILE} -for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do - eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | grep 'd0::t1' | sed -e "s%\s\+% %g;s%.* \([^ ]\+\)$%${subdir}/\1%" >> ${STATUS_FILE} - # sleep 3 # do not hammer eos too hard -done - -# We need the -s as we are staging the files from tape (see xrootd prepare definition) -# cat ${STATUS_FILE} | KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xargs --max-procs=${NB_PROCS} -iTEST_FILE_NAME xrdfs ${EOSINSTANCE} prepare -s ${EOS_DIR}/TEST_FILE_NAME 2>&1 | tee ${ERROR_FILE} -# CAREFULL HERE: ${STATUS_FILE} contains lines like: 99/test9900001 -for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do - echo -n "Retrieving files to ${EOS_DIR}/${subdir} using ${NB_PROCS} processes..." - cat ${STATUS_FILE} | grep ^${subdir}/ | cut -d/ -f2 | xargs --max-procs=${NB_PROCS} -iTEST_FILE_NAME bash -c "XRD_LOGLEVEL=Dump KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOSINSTANCE} prepare -s ${EOS_DIR}/${subdir}/TEST_FILE_NAME?activity=T0Reprocess 2>${ERROR_DIR}/RETRIEVE_TEST_FILE_NAME && rm ${ERROR_DIR}/RETRIEVE_TEST_FILE_NAME || echo ERROR with xrootd prepare stage for file TEST_FILE_NAME, full logs in ${ERROR_DIR}/RETRIEVE_TEST_FILE_NAME" | tee ${LOGDIR}/prepare_${subdir}.log | grep ^ERROR - echo Done. - cat ${STATUS_FILE} | grep ^${subdir}/ | cut -d/ -f2 | xargs --max-procs=${NB_PROCS} -iTEST_FILE_NAME bash -c "XRD_LOGLEVEL=Dump KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOSINSTANCE} query opaquefile ${EOS_DIR}/${subdir}/TEST_FILE_NAME?mgm.pcmd=xattr\&mgm.subcmd=get\&mgm.xattrname=sys.retrieve.req_id 2>${ERROR_DIR}/XATTRGET_TEST_FILE_NAME && rm ${ERROR_DIR}/XATTRGET_TEST_FILE_NAME || echo ERROR with xrootd xattr get for file TEST_FILE_NAME, full logs in ${ERROR_DIR}/XATTRGET_TEST_FILE_NAME" | tee ${LOGDIR}/prepare_sys.retrieve.req_id_${subdir}.log | grep ^ERROR -done -if [ "0" != "$(ls ${ERROR_DIR} 2> /dev/null | wc -l)" ]; then - # there were some prepare errors - echo "Several prepare errors occured during retrieval!" - echo "Please check client pod logs in artifacts" - mv ${ERROR_DIR}/* ${LOGDIR}/xrd_errors/ -fi - -ARCHIVED=$(cat ${STATUS_FILE} | wc -l) -TO_BE_RETRIEVED=$(( ${ARCHIVED} - $(ls ${ERROR_DIR}/RETRIEVE_* 2>/dev/null | wc -l) )) -RETRIEVING=${TO_BE_RETRIEVED} -RETRIEVED=0 -# Wait for the copy to appear on disk -echo "$(date +%s): Waiting for files to be back on disk:" -SECONDS_PASSED=0 -WAIT_FOR_RETRIEVED_FILE_TIMEOUT=$((40+${NB_FILES}/5)) -while test 0 -lt ${RETRIEVING}; do - echo "$(date +%s): Waiting for files to be retrieved from tape: Seconds passed = ${SECONDS_PASSED}" - sleep 3 - let SECONDS_PASSED=SECONDS_PASSED+1 - - if test ${SECONDS_PASSED} == ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT}; then - echo "$(date +%s): Timed out after ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT} seconds waiting for file to be retrieved tape" - break - fi - - RETRIEVED=0 - for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do - RETRIEVED=$(( ${RETRIEVED} + $(eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep '^d[1-9][0-9]*::t1' | wc -l) )) - sleep 1 # do not hammer eos too hard - done - - RETRIEVING=$((${TO_BE_RETRIEVED} - ${RETRIEVED})) - - echo "${RETRIEVED}/${TO_BE_RETRIEVED} retrieved" -done - -echo "###" -echo "${RETRIEVED}/${TO_BE_RETRIEVED} retrieved files" -echo "###" - - -#echo "$(date +%s): Dumping objectstore list" -#ssh root@ctappsfrontend cta-objectstore-list - - -# Build the list of files with more than 1 disk copy that have been archived before (ie d>=1::t1) -rm -f ${STATUS_FILE} -touch ${STATUS_FILE} -for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do - eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep 'd[1-9][0-9]*::t1' | sed -e "s%\s\+% %g;s%.* \([^ ]\+\)$%${subdir}/\1%" >> ${STATUS_FILE} -done - -TO_EVICT=$(cat ${STATUS_FILE} | wc -l) - -echo "$(date +%s): $TO_EVICT files to be evicted from EOS using 'xrdfs prepare -e'" -# We need the -e as we are evicting the files from disk cache (see xrootd prepare definition) -cat ${STATUS_FILE} | sed -e "s%^%${EOS_DIR}/%" | XrdSecPROTOCOL=krb5 KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 xargs --max-procs=10 -n 40 xrdfs ${EOSINSTANCE} prepare -e > /dev/null - - -LEFTOVER=0 -for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do - LEFTOVER=$(( ${LEFTOVER} + $(eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep '^d[1-9][0-9]*::t1' | wc -l) )) -done - -EVICTED=$((${TO_EVICT}-${LEFTOVER})) -echo "$(date +%s): $EVICTED/$TO_EVICT files evicted from EOS 'xrdfs prepare -e'" - -LASTCOUNT=${EVICTED} - -# Build the list of tape only files. -rm -f ${STATUS_FILE} -touch ${STATUS_FILE} -for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do - eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep 'd0::t[^0]' | sed -e "s%\s\+% %g;s%.* \([^ ]\+\)$%${subdir}/\1%" >> ${STATUS_FILE} -done - -# Put all tape drives down -echo "Sleeping 3 seconds to let previous sessions finish." -sleep 3 -admin_kdestroy &>/dev/null -admin_kinit &>/dev/null -INITIAL_DRIVES_STATE=`admin_cta --json dr ls` -echo INITIAL_DRIVES_STATE: -echo ${INITIAL_DRIVES_STATE} | jq -r '.[] | [ .driveName, .driveStatus] | @tsv' | column -t -echo -n "Will put down those drives : " -drivesToSetDown=`echo ${INITIAL_DRIVES_STATE} | jq -r '.[] | select (.driveStatus == "UP") | .driveName'` -echo $drivesToSetDown -for d in `echo $drivesToSetDown`; do - admin_cta drive down $d --reason "PUTTING DRIVE DOWN FOR TESTS" -done - -echo "$(date +%s): Waiting for the drives to be down" -SECONDS_PASSED=0 -WAIT_FOR_DRIVES_DOWN_TIMEOUT=$((10)) -while [[ $SECONDS_PASSED < WAIT_FOR_DRIVES_DOWN_TIMEOUT ]]; do - sleep 1 - oneStatusUpRemaining=0 - for d in `echo $drivesToSetDown`; do - status=`admin_cta --json drive ls | jq -r ". [] | select(.driveName == \"$d\") | .driveStatus"` - if [[ $status == "UP" ]]; then - oneStatusUpRemaining=1 - fi; - done - if [[ $oneStatusUpRemaining -eq 0 ]]; then - echo "Drives : $drivesToSetDown are down" - break; - fi - echo -n "." - SECONDS_PASSED=$SECONDS_PASSED+1 - if [[ $SECONDS_PASSED -gt $WAIT_FOR_DRIVES_DOWN_TIMEOUT ]]; then - die "ERROR: Timeout reach for trying to put all drives down" - fi -done - -# Prepare-stage the files -#cat ${STATUS_FILE} | perl -p -e "s|^(.*)$|${EOS_DIR}/\$1?activity=T0Reprocess|" | \ -# XRD_LOGLEVEL=Dump KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xargs -n 40 --max-procs=10 \ -# echo bash -c "echo xrdfs ${EOSINSTANCE} prepare -s $@" bash -# | \ -# tee ${LOGDIR}/prepare_${subdir}.log | grep -i error - -for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do - echo -n "Retrieving files to ${EOS_DIR}/${subdir} using ${NB_PROCS} processes (prepare2)..." - cat ${STATUS_FILE} | grep ^${subdir}/ | cut -d/ -f2 | xargs --max-procs=${NB_PROCS} -iTEST_FILE_NAME bash -c "XRD_LOGLEVEL=Dump KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOSINSTANCE} prepare -s ${EOS_DIR}/${subdir}/TEST_FILE_NAME?activity=T0Reprocess 2>${ERROR_DIR}/RETRIEVE_TEST_FILE_NAME && rm ${ERROR_DIR}/RETRIEVE_TEST_FILE_NAME || echo ERROR with xrootd prepare stage for file TEST_FILE_NAME, full logs in ${ERROR_DIR}/RETRIEVE_TEST_FILE_NAME" | tee ${LOGDIR}/prepare2_${subdir}.log | grep ^ERROR - echo Done. - echo -n "Checking the presence of the sys.retrieve.req_id extended attributes..." - cat ${STATUS_FILE} | grep ^${subdir}/ | cut -d/ -f2 | xargs --max-procs=${NB_PROCS} -iTEST_FILE_NAME bash -c "XRD_LOGLEVEL=Dump KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOSINSTANCE} query opaquefile ${EOS_DIR}/${subdir}/TEST_FILE_NAME?mgm.pcmd=xattr\&mgm.subcmd=get\&mgm.xattrname=sys.retrieve.req_id 2>${ERROR_DIR}/XATTRGET_TEST_FILE_NAME && rm ${ERROR_DIR}/XATTRGET_TEST_FILE_NAME || echo ERROR with xrootd xattr get for file TEST_FILE_NAME, full logs in ${ERROR_DIR}/XATTRGET_TEST_FILE_NAME" | tee ${LOGDIR}/prepare2_sys.retrieve.req_id_${subdir}.log | grep ^ERROR - echo Done. -done -if [ "0" != "$(ls ${ERROR_DIR} 2> /dev/null | wc -l)" ]; then - # there were some prepare errors - echo "Several prepare errors occured during retrieval!" - echo "Please check client pod logs in artifacts" - mv ${ERROR_DIR}/* ${LOGDIR}/xrd_errors/ -fi - -# Ensure all requests files are queued -requestsTotal=`admin_cta --json sq | jq 'map(select (.mountType == "RETRIEVE") | .queuedFiles | tonumber) | add'` -echo "Retrieve requests count: ${requestsTotal}" -filesCount=`cat ${STATUS_FILE} | wc -l` -if [ ${requestsTotal} -ne ${filesCount} ]; then - echo "ERROR: Retrieve queue(s) size mismatch: ${requestsTotal} requests queued for ${filesCount} files." -fi - -# Abort prepare -s requests -for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do - echo -n "Cancelling prepare for files in ${EOS_DIR}/${subdir} using ${NB_PROCS} processes (prepare_abort)..." - cat ${STATUS_FILE} | grep ^${subdir}/ | cut -d/ -f2 \ - | xargs --max-procs=${NB_PROCS} -iTEST_FILE_NAME cta-client-ar-abortPrepare --eos-instance ${EOSINSTANCE} \ - --eos-poweruser ${EOSPOWER_USER} --eos-dir ${EOS_DIR} --subdir ${subdir} --file TEST_FILE_NAME --error-dir ${ERROR_DIR} \ - | tee ${LOGDIR}/prepare_abort_sys.retrieve.req_id_${subdir}.log # | grep ^ERROR - echo Done. -done - -# Put drive(s) back up to clear the queue -echo -n "Will put back up those drives : " -echo ${INITIAL_DRIVES_STATE} | jq -r '.[] | select (.driveStatus == "UP") | .driveName' -for d in `echo ${INITIAL_DRIVES_STATE} | jq -r '.[] | select (.driveStatus == "UP") | .driveName'`; do - admin_cta dr up $d -done - -# Check that queues are empty after a while and files did not get retrieved -echo "$(date +%s): Waiting for retrieve queues to be cleared:" -SECONDS_PASSED=0 -WAIT_FOR_RETRIEVE_QUEUES_CLEAR_TIMEOUT=$((60)) -REMAINING_REQUESTS=`admin_cta --json sq | jq -r 'map(select (.mountType == "RETRIEVE") | .queuedFiles | tonumber) | add'` -echo "${REMAINING_REQUESTS} requests remaining." -# Prevent the result from being empty -if [ -z "$REMAINING_REQUESTS" ]; then REMAINING_REQUESTS='0'; fi -while [[ ${REMAINING_REQUESTS} > 0 ]]; do - echo "$(date +%s): Waiting for retrieve queues to be cleared: Seconds passed = ${SECONDS_PASSED}" - sleep 1 - let SECONDS_PASSED=SECONDS_PASSED+1 - - if test ${SECONDS_PASSED} == ${WAIT_FOR_RETRIEVE_QUEUES_CLEAR_TIMEOUT}; then - echo "$(date +%s): Timed out after ${WAIT_FOR_RETRIEVE_QUEUES_CLEAR_TIMEOUT} seconds waiting for retrieve queues to be cleared" - break - fi - - REMAINING_REQUESTS=`admin_cta --json sq | jq -r 'map(select (.mountType == "RETRIEVE") | .queuedFiles | tonumber) | add'`; - # Prevent the result from being empty - if [ -z "$REMAINING_REQUEST" ]; then REMAINING_REQUESTS='0'; fi - echo "${REMAINING_REQUESTS} requests remaining." -done - -# Check that the files were not retrieved -echo "Checking restaged files..." -RESTAGEDFILES=0 -for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do - RF=$(eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep '^d[1-9][0-9]*::t1' | wc -l) - echo "Restaged files in directory ${subdir}: ${RF}" - (( RESTAGEDFILES += ${RF} )) -done -echo "Total restaged files found: ${RESTAGEDFILES}" - -if [ "0" != "$(ls ${ERROR_DIR} 2> /dev/null | wc -l)" ]; then - # there were some prepare errors - echo "Several errors occured during prepare cancel test!" - echo "Please check client pod logs in artifacts" - mv ${ERROR_DIR}/* ${LOGDIR}/xrd_errors/ -fi - -# We can now delete the files -DELETED=0 -if [[ $REMOVE == 1 ]]; then - echo "Waiting for files to be removed from EOS and tapes" - # . /root/client_helper.sh - admin_kdestroy &>/dev/null - admin_kinit &>/dev/null - if $(admin_cta admin ls &>/dev/null); then - echo "Got cta admin privileges, can proceed with the workflow" - else - # displays what failed and fail - admin_cta admin ls - die "Could not launch cta-admin command." - fi - # recount the files on tape as the workflows may have gone further... - VIDLIST=$(nsls_tapes ${EOS_DIR}) - INITIALFILESONTAPE=$(tapefile_ls ${VIDLIST} | wc -l) - echo "Before starting deletion there are ${INITIALFILESONTAPE} files on tape." - #XrdSecPROTOCOL=sss eos -r 0 0 root://${EOSINSTANCE} rm -Fr ${EOS_DIR} & - KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 eos root://${EOSINSTANCE} rm -Fr ${EOS_DIR} & - EOSRMPID=$! - # wait a bit in case eos prematurely fails... - sleep 0.1 - if test ! -d /proc/${EOSRMPID}; then - # eos rm process died, get its status - wait ${EOSRMPID} - test $? -ne 0 && die "Could not launch eos rm" - fi - # Now we can start to do something... - # deleted files are the ones that made it on tape minus the ones that are still on tapes... - echo "Waiting for files to be deleted:" - SECONDS_PASSED=0 - WAIT_FOR_DELETED_FILE_TIMEOUT=$((5+${NB_FILES}/9)) - FILESONTAPE=${INITIALFILESONTAPE} - while test 0 != ${FILESONTAPE}; do - echo "Waiting for files to be deleted from tape: Seconds passed = ${SECONDS_PASSED}" - sleep 1 - let SECONDS_PASSED=SECONDS_PASSED+1 - - if test ${SECONDS_PASSED} == ${WAIT_FOR_DELETED_FILE_TIMEOUT}; then - echo "Timed out after ${WAIT_FOR_DELETED_FILE_TIMEOUT} seconds waiting for file to be deleted from tape" - break - fi - FILESONTAPE=$(tapefile_ls ${VIDLIST} > >(wc -l) 2> >(cat > /tmp/ctaerr)) - if [[ $(cat /tmp/ctaerr | wc -l) -gt 0 ]]; then - echo "cta-admin COMMAND FAILED!!" - echo "ERROR CTA ERROR MESSAGE:" - cat /tmp/ctaerr - break - fi - DELETED=$((${INITIALFILESONTAPE} - ${FILESONTAPE})) - echo "${DELETED}/${INITIALFILESONTAPE} deleted" - done - - # kill eos rm command that may run in the background - kill ${EOSRMPID} &> /dev/null - - # As we deleted the directory we may have deleted more files than the ones we retrieved - # therefore we need to take the smallest of the 2 values to decide if the system test was - # successful or not - if [[ ${RETRIEVED} -gt ${DELETED} ]]; then - LASTCOUNT=${DELETED} - echo "Some files have not been deleted:" - tapefile_ls ${VIDLIST} - else - echo "All files have been deleted" - LASTCOUNT=${RETRIEVED} - fi -fi - - -echo "###" -echo "$(date +%s): Results:" -echo "REMOVED/EVICTED/RETRIEVED/ARCHIVED/RESTAGEDFILES/NB_FILES" -echo "${DELETED}/${EVICTED}/${RETRIEVED}/${ARCHIVED}/${RESTAGEDFILES}/$((${NB_FILES} * ${NB_DIRS}))" -echo "###" - -test -z ${COMMENT} || annotate "test ${TESTID} FINISHED" "Summary:</br>NB_FILES: $((${NB_FILES} * ${NB_DIRS}))</br>ARCHIVED: ${ARCHIVED}<br/>RETRIEVED: ${RETRIEVED}<br/>STAGERRMED: ${STAGERRMED}</br>DELETED: ${DELETED}" 'test,end' - - -#echo "$(date +%s): Dumping objectstore list" -#ssh root@ctappsfrontend cta-objectstore-list - - -# stop tail -test -z $TAILPID || kill ${TAILPID} &> /dev/null - -RC=0 -if [ ${LASTCOUNT} -ne $((${NB_FILES} * ${NB_DIRS})) ]; then - ((RC++)) - echo "ERROR there were some lost files during the archive/retrieve test with ${NB_FILES} files (first 10):" - grep -v retrieved ${STATUS_FILE} | sed -e "s;^;${EOS_DIR}/;" | head -10 -fi - -if [ $(cat ${LOGDIR}/prepare_sys.retrieve.req_id_*.log | grep -v value= | wc -l) -ne 0 ]; then - # THIS IS NOT YET AN ERROR: UNCOMMENT THE FOLLOWING LINE WHEN https://gitlab.cern.ch/cta/CTA/issues/606 is fixed - # ((RC++)) - echo "ERROR $(cat ${LOGDIR}/prepare_sys.retrieve.req_id_*.log | grep -v value= | wc -l) files out of $(cat ${LOGDIR}/prepare_sys.retrieve.req_id_*.log | wc -l) prepared files have no sys.retrieve.req_id extended attribute set" -fi - - -if [ ${RESTAGEDFILES} -ne "0" ]; then - ((RC++)) - echo "ERROR some files were retrieved in spite of retrieve cancellation." -fi - -# This one does not change the return code -# WARNING if everything else was OK -# ERROR otherwise as these xrootd failures could be the reason of the failure -if [ $(ls ${LOGDIR}/xrd_errors | wc -l) -ne 0 ]; then - # ((RC++)) # do not change RC - if [ ${RC} -eq 0 ]; then - echo "WARNING several xrootd failures occured during this run, please check client dumps in ${LOGDIR}/xrd_errors." - else - echo "ERROR several xrootd failures occured during this run, please check client dumps in ${LOGDIR}/xrd_errors." - fi -fi - -exit ${RC} +. /root/client_retrieve.sh diff --git a/continuousintegration/orchestration/tests/client_ar_abortPrepare.py b/continuousintegration/orchestration/tests/client_ar_abortPrepare.py deleted file mode 100755 index 55dc47cc3c..0000000000 --- a/continuousintegration/orchestration/tests/client_ar_abortPrepare.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/python3.6 - -# @project The CERN Tape Archive (CTA) -# @copyright Copyright(C) 2022 CERN -# @license This program is free software, distributed under the terms of the GNU General Public -# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can -# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. See the GNU General Public License for more details. -# -# In applying this licence, CERN does not waive the privileges and immunities -# granted to it by virtue of its status as an Intergovernmental Organization or -# submit itself to any jurisdiction. - - -import argparse -import subprocess -import os -import copy -import traceback -import re - -# Instantiate the parser and parse command line -parser = argparse.ArgumentParser( - description='Utility program to abort a retrieve on an EOS+CTA system.') -parser.add_argument('--eos-instance', required=True) -parser.add_argument('--eos-poweruser', required=True) -parser.add_argument('--eos-dir', required=True) -parser.add_argument('--subdir', required=True) -parser.add_argument('--file', required=True) -parser.add_argument('--error-dir', required=True) -options = parser.parse_args() - -# Construct various parameters. -filepath = options.eos_dir + '/' + options.subdir + '/' + options.file -xattrgeterrorfilepath = options.error_dir + '/' + 'XATTRGET2_' -xattrgeterrorfilepath += options.subdir + '_' + options.file - -aborterrorfilepath = options.error_dir + '/' + 'PREPAREABORT_' -aborterrorfilepath += options.subdir + '_' + options.file - -# Get the xattr of the file -# Prepare the environment -env = copy.deepcopy(os.environ) -env['XRD_LOGLEVEL'] = 'Dump' -env['KRB5CCNAME'] = '/tmp/' + options.eos_poweruser + '/krb5cc_0' -env['XrdSecPROTOCOL'] = 'krb5' -try: - xattrRes = subprocess.run( - ['xrdfs', options.eos_instance, 'query', 'opaquefile', - filepath+'?mgm.pcmd=xattr&mgm.subcmd=get&mgm.xattrname=sys.retrieve.req_id'], - env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - xattrRes.check_returncode() -except subprocess.CalledProcessError as cpe: - print('ERROR with xrdfs query for file ' + options.file + ': '+str(cpe.stderr)+' full logs in ' + - xattrgeterrorfilepath) - traceback.print_exc() - print(cpe.stdout) - errFile = open(xattrgeterrorfilepath, 'w') - errFile.write(str(xattrRes.stderr)) - errFile.close() -except Exception as e: - print('ERROR with xrdfs query for file ' + options.file + ': got exception of type: ' + - str(type(e)) + '['.join(arg + ', ' for arg in e.args) +'] full logs in ' + xattrgeterrorfilepath) - traceback.print_exc() - errFile = open(xattrgeterrorfilepath, 'w') - errFile.write(str(xattrRes.stderr)) - errFile.close() -# OK, worked... -requestId=xattrRes.stdout.decode('utf8') -#print('requestId(pre-match)=' + requestId) -reComp=re.compile(r'.*value=(.*)') -reExec=reComp.match(requestId) -requestId = re.compile(r'.*value=(.*)').match(requestId).group(1) -#print('requestId(post-match)=' + requestId) - -# We can now abort the prepare -try: - # print('Will xrdfs ' + str(options.eos_instance).rstrip() + 'prepare -a ' + requestId + ' ' + filepath) - abortRes = subprocess.run( - ['xrdfs', str(options.eos_instance).rstrip(), 'prepare', '-a', requestId, filepath], - env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - abortRes.check_returncode() - # print('abortRes.stdout') - # print(abortRes.stdout.decode('utf8')) - # print('abortRes.stderr') - # print(abortRes.stderr.decode('utf8')) -except subprocess.CalledProcessError as cpe: - print('ERROR with xrdfs prepare -a ' + options.file + '(' + str(cpe.returncode) + ') full logs in ' + - aborterrorfilepath) - traceback.print_exc() - print('cpe.stderr:') - print(cpe.stderr.decode('utf8')) - print('cpe.stdout') - print(cpe.stdout.decode('utf8')) - errFile = open(aborterrorfilepath, 'w') - errFile.write(str(abortRes.stderr)) - errFile.close() -except Exception as e: - print('ERROR with xrdfs prepare -a for file ' + options.file + ': got exception of type: ' + - str(type(e)) + '['.join(arg + ', ' for arg in e.args) +'] full logs in '+ aborterrorfilepath) - traceback.print_exc() - errFile = open(aborterrorfilepath, 'w') - errFile.write(str(e)) - errFile.close() diff --git a/continuousintegration/orchestration/tests/client_archive.sh b/continuousintegration/orchestration/tests/client_archive.sh new file mode 100755 index 0000000000..bc0ad8ba45 --- /dev/null +++ b/continuousintegration/orchestration/tests/client_archive.sh @@ -0,0 +1,139 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + + + +echo "$(date +%s): Creating test dir in eos: ${EOS_DIR}" +# uuid should be unique no need to remove dir before... +# XrdSecPROTOCOL=sss eos -r 0 0 root://${EOSINSTANCE} rm -Fr ${EOS_DIR} + + +eos root://${EOSINSTANCE} mkdir -p ${EOS_DIR} || die "Cannot create directory ${EOS_DIR} in eos instance ${EOSINSTANCE}." + +echo +echo "Listing the EOS extended attributes of ${EOS_DIR}" +eos root://${EOSINSTANCE} attr ls ${EOS_DIR} +echo + + +# As we are skipping n bytes per file we need a bit more than the file size to accomodate dd to read ${FILE_KB_SIZE} skipping the n first bytes +dd if=/dev/urandom of=/tmp/testfile bs=1k count=$((${FILE_KB_SIZE} + ${NB_FILES}*${NB_DIRS}/1024 + 1)) || exit 1 + + +db_begin_transaction + +# not more than 100k files per directory so that we can rm and find as a standard user +for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + for((slot=1; slot <= ${NB_PROCS}; slot++)); do + touch "slot${slot}" + done + + eos root://${EOSINSTANCE} mkdir -p ${EOS_DIR}/${subdir} || die "Cannot create directory ${EOS_DIR}/{subdir} in eos instance ${EOSINSTANCE}." + + echo -n "Copying files to ${EOS_DIR}/${subdir} using ${NB_PROCS} processes..." + TEST_FILE_NAME_SUBDIR=${TEST_FILE_NAME_BASE}$(printf %.2d ${subdir}) # this is the target filename of xrdcp processes just need to add the filenumber in each directory + + file_creation="dd if=/tmp/testfile bs=1k 2>/dev/null | (dd bs=$((${subdir}*${NB_FILES})) count=1 of=/dev/null 2>/dev/null; dd bs=TEST_FILE_NUM count=1 of=/dev/null 2>/dev/null; dd bs=1k count=${FILE_KB_SIZE} 2>/dev/null) " + + xrdcp_call="XRD_LOGLEVEL=Dump xrdcp - root://${EOSINSTANCE}/${EOS_DIR}/${subdir}/${TEST_FILE_NAME_SUBDIR}TEST_FILE_NUM 2>${ERROR_DIR}/${TEST_FILE_NAME_SUBDIR}TEST_FILE_NUM" + + xrdcp_succes=" rm ${ERROR_DIR}/${TEST_FILE_NAME_SUBDIR}TEST_FILE_NUM && echo ${subdir}/${TEST_FILE_NAME_SUBDIR}TEST_FILE_NUM >> slot\"{%}\"" + + xrdcp_error="ERROR with xrootd transfer for file ${TEST_FILE_NAME_SUBDIR}TEST_FILE_NUM, full logs in ${ERROR_DIR}/${TEST_FILE_NAME_SUBDIR}TEST_FILE_NUM" + + command_str="${file_creation} | ${xrdcp_call} && ${xrdcp_succes} || ${xrdcp_error}" + + for ((i=0;i<${NB_FILES};i++)); do + echo $(printf %.6d $i) + done | parallel --max-procs=${NB_PROCS} -iTEST_FILE_NUM bash -c "true && $command_str" + #TODO: Figure out why we need that 'true &&' in parallel and not for xargs. + + # Initialize db + for((i=1; i <= ${NB_PROCS}; i++)); do + cat slot${i} | xargs -iFILE bash -c "db_insert FILE" + rm -f slot${i} + done + + echo Done. +done +db_commit_transaction +db_begin_transaction +if [ "0" != "$(ls ${ERROR_DIR} 2> /dev/null | wc -l)" ]; then + # there were some xrdcp errors + echo "Several xrdcp errors occured during archival!" + echo "Please check client pod logs in artifacts" + mv ${ERROR_DIR}/* ${LOGDIR}/xrd_errors/ +fi + +COPIED=0 +COPIED_EMPTY=0 +for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + COPIED=$(( ${COPIED} + $(eos root://${EOSINSTANCE} find -f ${EOS_DIR}/${subdir} | wc -l) )) + COPIED_EMPTY=$(( ${COPIED_EMPTY} + $(eos root://${EOSINSTANCE} find -0 ${EOS_DIR}/${subdir} | wc -l) )) +done + +# Only not empty files are archived by CTA +TO_BE_ARCHIVED=$((${COPIED} - ${COPIED_EMPTY})) + +ARCHIVED=0 +echo "$(date +%s): Waiting for files to be on tape:" +SECONDS_PASSED=0 +WAIT_FOR_ARCHIVED_FILE_TIMEOUT=$((40+${NB_FILES}/5)) +TMPFILE=$(mktemp) +while test ${TO_BE_ARCHIVED} != ${ARCHIVED}; do + echo "$(date +%s): Waiting for files to be archived to tape: Seconds passed = ${SECONDS_PASSED}" + sleep 3 + let SECONDS_PASSED=SECONDS_PASSED+1 + + if test ${SECONDS_PASSED} == ${WAIT_FOR_ARCHIVED_FILE_TIMEOUT}; then + echo "$(date +%s): Timed out after ${WAIT_FOR_ARCHIVED_FILE_TIMEOUT} seconds waiting for file to be archived to tape" + break + fi + + ARCHIVED=0 + for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | grep '^d0::t1' | awk '{ print $10 }' | sed "s|^|${subdir}/|" > ${TMPFILE} + db_update_from_file ${TMPFILE} archived 0 + ARCHIVED=$(( ${ARCHIVED} + $(cat ${TMPFILE} | wc -l) )) + sleep 1 # do not hammer eos too hard + done + + echo "${ARCHIVED}/${TO_BE_ARCHIVED} archived" + + NB_TAPE_NOT_FULL=`admin_cta --json ta ls --all | jq "[.[] | select(.full == false)] | length"` + if [[ ${NB_TAPE_NOT_FULL} == 0 ]] + then + echo "$(date +%s): All tapes are full, exiting archiving loop" + break + fi +done +rm -f ${TMPFILE} +db_commit_transaction + + +echo "###" +echo "${ARCHIVED}/${TO_BE_ARCHIVED} archived" +echo "###" + +echo "Archiving done." +echo "###" +echo "${TAPEONLY}/${ARCHIVED} on tape only" +echo "###" +echo "Sleeping 10 seconds to allow MGM-FST communication to settle after disk copy deletion." +sleep 10 +echo "###" +db_info "*" 10 diff --git a/continuousintegration/orchestration/tests/client_delete.sh b/continuousintegration/orchestration/tests/client_delete.sh new file mode 100755 index 0000000000..48b2970874 --- /dev/null +++ b/continuousintegration/orchestration/tests/client_delete.sh @@ -0,0 +1,137 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + + + +# Provide an EOS directory and return the list of tapes containing files under that directory +nsls_tapes() +{ + # EOS_DIR=${1:-${EOS_BASEDIR}} + # |-> Commented to get only the VID of the files the test setup is working with. + # 1. Query EOS namespace to get a list of file IDs + # 2. Pipe to "tape ls" to get the list of tapes where those files are archived + eos root://${EOSINSTANCE} find --fid ${EOS_DIR} |\ + admin_cta --json tape ls --fxidfile /dev/stdin |\ + jq '.[] | .vid' | sed 's/"//g' +} + + +# Provide a list of tapes and list the filenames of the files stored on those tapes +tapefile_ls() +{ + for vid in $* + do + admin_cta --json tapefile ls --lookupnamespace --vid ${vid} |\ + jq '.[] | .df.path' + done +} + +# Get list of files currently on tape. +tmp_file=$(mktemp) +initial_files_on_tape=$(mktemp) +for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep '^d[0-9][0-9]*::t1' | awk '{print $10}' > tmp_file + cat $tmp_file | xargs -iFILE_NAME echo ${subdir}/FILE_NAME >> $initial_files_on_tape +done + + +# We can now delete the files +echo "Waiting for files to be removed from EOS and tapes" +admin_kdestroy &>/dev/null +admin_kinit &>/dev/null +if $(admin_cta admin ls &>/dev/null); then + echo "Got cta admin privileges, can proceed with the workflow" +else + # displays what failed and fail + admin_cta admin ls + die "Could not launch cta-admin command." +fi +# recount the files on tape as the workflows may have gone further... +VIDLIST=$(nsls_tapes ${EOS_DIR}) +INITIALFILESONTAPE=$(tapefile_ls ${VIDLIST} | wc -l) +echo "Before starting deletion there are ${INITIALFILESONTAPE} files on tape." +#XrdSecPROTOCOL=sss eos -r 0 0 root://${EOSINSTANCE} rm -Fr ${EOS_DIR} & +KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 eos root://${EOSINSTANCE} rm -Fr ${EOS_DIR} & +EOSRMPID=$! +# wait a bit in case eos prematurely fails... +sleep 0.1i +if test ! -d /proc/${EOSRMPID}; then + # eos rm process died, get its status + wait ${EOSRMPID} + test $? -ne 0 && die "Could not launch eos rm" +fi + +# Now we can start to do something... +# deleted files are the ones that made it on tape minus the ones that are still on tapes... +echo "Waiting for files to be deleted:" +SECONDS_PASSED=0 +WAIT_FOR_DELETED_FILE_TIMEOUT=$((5+${NB_FILES}/9)) +FILESONTAPE=${INITIALFILESONTAPE} + +while test 0 != ${FILESONTAPE}; do + echo "Waiting for files to be deleted from tape: Seconds passed = ${SECONDS_PASSED}" + sleep 1 + let SECONDS_PASSED=SECONDS_PASSED+1 + + if test ${SECONDS_PASSED} == ${WAIT_FOR_DELETED_FILE_TIMEOUT}; then + echo "Timed out after ${WAIT_FOR_DELETED_FILE_TIMEOUT} seconds waiting for file to be deleted from tape" + break + fi + + FILESONTAPE=$(tapefile_ls ${VIDLIST} > >(wc -l) 2> >(cat > /tmp/ctaerr)) + + if [[ $(cat /tmp/ctaerr | wc -l) -gt 0 ]]; then + echo "cta-admin COMMAND FAILED!!" + echo "ERROR CTA ERROR MESSAGE:" + cat /tmp/ctaerr + break + fi + + DELETED=$((${INITIALFILESONTAPE} - ${FILESONTAPE})) + + echo "${DELETED}/${INITIALFILESONTAPE} deleted" +done + + +# kill eos rm command that may run in the background +kill ${EOSRMPID} &> /dev/null + +# As we deleted the directory we may have deleted more files than the ones we retrieved +# therefore we need to take the smallest of the 2 values to decide if the system test was +# successful or not +LASTCOUNT=0 +if [[ ${RETRIEVED} -gt ${DELETED} ]]; then + LASTCOUNT=${DELETED} + echo "Some files have not been deleted:" + tapefile_ls ${VIDLIST} + # For some reason the tapefile_ls command return "Bad response from nameserver" + # So, we cant compare against the list of files for the current test. An + # alternative would be to check directly with EOS. + # TODO: Update db delete column with the actual deleted files. +else + echo "All files have been deleted" + LASTCOUNT=${RETRIEVED} + db_begin_transaction + db_update_col "deleted" "+" "1" + db_commit_transaction +fi + +if [ ${LASTCOUNT} -ne $((${NB_FILES} * ${NB_DIRS})) ]; then + echo "ERROR there were some lost files during the archive/retrieve test with ${NB_FILES} files" >> /tmp/RC + echo "ERROR there were some lost files during the archive/retrieve test with ${NB_FILES} files (first 10):" + grep -v retrieved ${STATUS_FILE} | sed -e "s;^;${EOS_DIR}/;" | head -10 +fi diff --git a/continuousintegration/orchestration/tests/client_evict.sh b/continuousintegration/orchestration/tests/client_evict.sh new file mode 100755 index 0000000000..5fe6e3f117 --- /dev/null +++ b/continuousintegration/orchestration/tests/client_evict.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + + +echo "$(date +%s): Trigerring EOS evict workflow as poweruser1:powerusers (12001:1200)" + +# Build the list of files with more than 1 disk copy that have been archived before (ie d>=1::t1) +TMP_FILE=$(mktemp) +TO_EVICT=0 +for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + touch "${TMP_FILE}${subdir}" + eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep 'd[1-9][0-9]*::t1' | sed -e "s%\s\+% %g;s%.* \([^ ]\+\)$%${subdir}/\1%" >> "${TMP_FILE}${subdir}" + TO_EVICT=$(( ${TO_EVICT} + $(cat ${TMP_FILE}${subdir} | wc -l ) )) +done +rm -f ${TMP_FILE} + + +# Get base evict value an new one. +current_evict_val=0 +NEW_EVICT_VAL=$(( ${current_evict_val} + 1 )) + +echo "$(date +%s): $TO_EVICT files to be evicted from EOS using 'xrdfs prepare -e'" +# We need the -e as we are evicting the files from disk cache (see xrootd prepare definition) +for (( subdir=0; subdir < ${NB_DIRS}; subdir++ )); do + cat "${TMP_FILE}${subdir}" | sed -e "s%^%${EOS_DIR}/%" | XrdSecPROTOCOL=krb5 KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 xargs --max-procs=10 -n 40 xrdfs ${EOSINSTANCE} prepare -e > /dev/null +done + + +db_begin_transaction +LEFTOVER=0 +TMP_FILE=$(mktemp) +for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + LEFTOVER=$(( ${LEFTOVER} + $(eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep '^d[1-9][0-9]*::t1' | wc -l) )) + + eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep '^d[0][0-9]*::t1' | awk '{print $10}' > ${TMP_FILE} + + cat ${TMP_FILE} | xargs -iTEST_FILE_NAME bash -c "db_update 'evicted' ${subdir}/TEST_FILE_NAME ${NEW_EVICT_VAL} '='" +done +rm -f ${TMP_FILE} +db_commit_transaction + +EVICTED=$((${TO_EVICT}-${LEFTOVER})) +echo "$(date +%s): $EVICTED/$TO_EVICT files evicted from EOS 'xrdfs prepare -e'" diff --git a/continuousintegration/orchestration/tests/client_helper.sh b/continuousintegration/orchestration/tests/client_helper.sh old mode 100644 new mode 100755 index d9401ce4cc..f7bee8badf --- a/continuousintegration/orchestration/tests/client_helper.sh +++ b/continuousintegration/orchestration/tests/client_helper.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # @project The CERN Tape Archive (CTA) # @copyright Copyright © 2022 CERN # @license This program is free software, distributed under the terms of the GNU General Public @@ -36,6 +38,7 @@ USER="user1" die() { echo "$@" 1>&2 + test -z ${TAILPID} || kill ${TAILPID} &> /dev/null exit 1 } @@ -103,7 +106,7 @@ eosadmin_kdestroy() { ################################################################ # Pass list of files waiting for archival - +# This sciprt fails if there are files stored in the target directory as it just counts the lines. wait_for_archive () { EOS_INSTANCE=$1 @@ -111,12 +114,12 @@ wait_for_archive () { WAIT_FOR_ARCHIVED_FILE_TIMEOUT=90 while test $(($# - 1)) != $(echo "${@:2}" | tr " " "\n" | xargs -iFILE eos root://${EOS_INSTANCE} info FILE | awk '{print $4;}' | grep tape | wc -l); do - echo "Waiting for files to be archived to tape: seconds passed = ${SECONDS_PASSED}" + echo "$(date +%s) Waiting for files to be archived to tape: seconds passed = ${SECONDS_PASSED}" sleep 1 let SECONDS_PASSED=SECONDS_PASSED+1 if test ${SECONDS_PASSED} == ${WAIT_FOR_ARCHIVED_FILE_TIMEOUT}; then - echo "ERROR: Timed out after ${WAIT_FOR_ARCHIVED_FILE_TIMEOUT} seconds waiting for files to be archived to tape" + echo "$(date +%s) ERROR: Timed out after ${WAIT_FOR_ARCHIVED_FILE_TIMEOUT} seconds waiting for files to be archived to tape" exit 1 fi done @@ -131,12 +134,12 @@ wait_for_retrieve () { SECONDS_PASSED=0 WAIT_FOR_RETRIEVED_FILE_TIMEOUT=90 while test $(($# - 1)) != $(echo "${@:2}" | tr " " "\n" | xargs -iFILE eos root://${EOS_INSTANCE} info FILE | awk '{print $4;}' | grep -F "default.0" | wc -l); do - echo "Waiting for files to be retrieved from tape: Seconds passed = ${SECONDS_PASSED}" + echo "$(date +%s) Waiting for files to be retrieved from tape: Seconds passed = ${SECONDS_PASSED}" sleep 1 let SECONDS_PASSED=SECONDS_PASSED+1 if test ${SECONDS_PASSED} == ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT}; then - echo "Timed out after ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT} seconds waiting for files to be retrieved from tape" + echo "$(date +%s) ERROR: Timed out after ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT} seconds waiting for files to be retrieved from tape" exit 1 fi done @@ -151,12 +154,12 @@ wait_for_evict () { SECONDS_PASSED=0 WAIT_FOR_EVICTED_FILE_TIMEOUT=90 while test 0 != $(echo "${@:2}" | tr " " "\n" | xargs -iFILE eos root://${EOS_INSTANCE} info FILE | awk '{print $4;}' | grep -F "default.0" | wc -l); do - echo "Waiting for files to be evicted from disk: Seconds passed = ${SECONDS_PASSED}" + echo "$(date +%s) Waiting for files to be evicted from disk: Seconds passed = ${SECONDS_PASSED}" sleep 1 let SECONDS_PASSED=SECONDS_PASSED+1 if test ${SECONDS_PASSED} == ${WAIT_FOR_EVICTED_FILE_TIMEOUT}; then - echo "Timed out after ${WAIT_FOR_EVICTED_FILE_TIMEOUT} seconds waiting for files to be evicted from disk" + echo "$(date +%s) ERROR: Timed out after ${WAIT_FOR_EVICTED_FILE_TIMEOUT} seconds waiting for files to be evicted from disk" exit 1 fi done @@ -169,14 +172,14 @@ wait_for_tape_state() { SECONDS_PASSED=0 WAIT_FOR_EVICTED_FILE_TIMEOUT=90 - echo "Waiting for tape $1 state to change to $2: Seconds passed = ${SECONDS_PASSED}" + echo "$(date +%s) Waiting for tape $1 state to change to $2: Seconds passed = ${SECONDS_PASSED}" while test $2 != $(admin_cta --json tape ls --vid $1 | jq -r '.[] | .state'); do sleep 1 let SECONDS_PASSED=SECONDS_PASSED+1 - echo "Waiting for tape $1 state to change to $2: Seconds passed = ${SECONDS_PASSED}" + echo "$(date +%s) Waiting for tape $1 state to change to $2: Seconds passed = ${SECONDS_PASSED}" if test ${SECONDS_PASSED} == ${WAIT_FOR_EVICTED_FILE_TIMEOUT}; then - echo "Timed out after ${WAIT_FOR_EVICTED_FILE_TIMEOUT} seconds waiting for tape $1 state to change to $2" + echo "$(date +%s) ERROR: Timed out after ${WAIT_FOR_EVICTED_FILE_TIMEOUT} seconds waiting for tape $1 state to change to $2" exit 1 fi done @@ -224,13 +227,13 @@ put_all_drives () { fi; done if [[ $oneStatusRemaining -eq 0 ]]; then - echo "Drives : $drivesToModify are $next_state" + echo "$(date +%s) Drives : $drivesToModify are $next_state" break; fi echo -n "." SECONDS_PASSED=$SECONDS_PASSED+1 if [[ $SECONDS_PASSED -gt $WAIT_FOR_DRIVES_TIMEOUT ]]; then - die "ERROR: Timeout reach for trying to put all drives $next_state" + die "$(date +%s) ERROR: Timeout reach for trying to put all drives $next_state" fi done @@ -243,3 +246,79 @@ put_all_drives_up () { put_all_drives_down () { put_all_drives "DOWN" } + +################################################### +# Helper functions to update the tracker DB status. +################################################### + +db_results() { + sqlite3 ${DB_NAME} "SELECT SUM(archived), SUM(staged), SUM(evicted), SUM(aborted), SUM(deleted) FROM ${TEST_TABLE};" +} + +db_begin_transaction() { + rm -f new_transaction + touch new_transaction + echo 'BEGIN TRANSACTION;' > new_transaction +} + +db_commit_transaction() { + echo 'COMMIT;' >> new_transaction + sqlite3 ${DB_NAME} < new_transaction +} + + +db_info() { + ROW_LIMIT="" + + if [[ -n $2 ]]; then + ROW_LIMIT="LIMIT $2" + fi + + sqlite3 -header ${DB_NAME} "SELECT $1 FROM ${TEST_TABLE} ${ROW_LIMIT};" +} + +db_get_files() { + sqlite3 ${DB_NAME} "SELECT filename FROM ${TEST_TABLE};" +} + +db_insert() { + echo "INSERT INTO ${TEST_TABLE} ('filename') VALUES ('$1');" >> new_transaction +} + +db_status_count() { + sqlite3 ${DB_NAME} "SELECT filename FROM ${TEST_TABLE} WHERE $1 == $2;" | wc -l +} + +db_update() { + query="SELECT $1 FROM ${TEST_TABLE} WHERE filename = '$2'" + query_res=$(sqlite3 ${DB_NAME} "${query}") + if [[ $4 != "=" ]]; then + new_val=$(expr $query_res $4 $3) + else + new_val=$3 + fi + echo "UPDATE ${TEST_TABLE} SET $1 = '$new_val' WHERE filename = '$2';" >> new_transaction +} + +# Positional arguments: +# $1: Filename +# $2: Column to update +# $3: 'Source column value'. TODO: Rethink this. +db_update_from_file() { + # Get list of archive files. + archived=$(mktemp) + + sqlite3 ${DB_NAME} "SELECT filename FROM ${TEST_TABLE} WHERE $2 != $3" | sort > $archived + comm -2 -3 $1 $archived | xargs --max-procs=1 -iFILE bash -c "db_update $2 FILE 1 '+'" + + rm -f ${archived} +} + +db_update_col() { + echo "UPDATE ${TEST_TABLE} SET $1=$1 $2 $3;" >> new_transaction +} + +db_custom_query() { + echo "${1} ${TEST_TABLE} ${2}" >> new_transaction +} + diff --git a/continuousintegration/orchestration/tests/multiple_retrieve.sh b/continuousintegration/orchestration/tests/client_multiple_retrieve.sh similarity index 100% rename from continuousintegration/orchestration/tests/multiple_retrieve.sh rename to continuousintegration/orchestration/tests/client_multiple_retrieve.sh diff --git a/continuousintegration/orchestration/tests/client_results.sh b/continuousintegration/orchestration/tests/client_results.sh new file mode 100755 index 0000000000..70d1673509 --- /dev/null +++ b/continuousintegration/orchestration/tests/client_results.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + + + +RC=0 + +TOTAL_FILES=$((${NB_FILES} * ${NB_DIRS})) +IFS='|' read -a results_array <<< $(db_results) + +ARCHIVED=${results_array[0]} +RETRIEVED=${results_array[1]} +EVICTED=${results_array[2]} +ABORTED=${results_array[3]} +DELETED=${results_array[4]} + +names_arr=("ARCHIVED" "RETRIEVED" "EVICTED" "ABORTED" "DELETED") + +echo "###" +echo "$(date +%s): Results:" +echo "NB FILES / ARCHIVED / RETRIEVED / EVICTED / ABORTED / DELETED" +echo "${TOTAL_FILES} / ${ARCHIVED} / ${RETRIEVED} / ${EVICTED} / ${ABORTED} / ${DELETED}" +echo "###" + +test -z ${COMMENT} || annotate "test ${TESTID} FINISHED" "Summary:</br>NB_FILES: $((${NB_FILES} * ${NB_DIRS}))</br>ARCHIVED: ${ARCHIVED}<br/>RETRIEVED: ${RETRIEVED}</br>EVICTED: ${EVICTED}<br/>DELETED: ${DELETED}" 'test,end' + +TOTAL_FILES=$(( ${NB_FILES} * ${NB_DIRS} )) +for i in "${!resuls_array[@]}"; do + if [[ ${TOTAL_FILES} -ne ${results_array[$i]} ]]; then + echo "ERROR: ${names_arr[${i}]} count value ${results_arr[${i}]} does not match the expected value ${TOTAL_FILES}." + fi +done + + +if [ $(cat ${LOGDIR}/prepare_sys.retrieve.req_id_*.log | grep -v value= | wc -l) -ne 0 ]; then + # THIS IS NOT YET AN ERROR: UNCOMMENT THE FOLLOWING LINE WHEN https://gitlab.cern.ch/cta/CTA/issues/606 is fixed + # ((RC++)) + echo "ERROR $(cat ${LOGDIR}/prepare_sys.retrieve.req_id_*.log | grep -v value= | wc -l) files out of $(cat ${LOGDIR}/prepare_sys.retrieve.req_id_*.log | wc -l) prepared files have no sys.retrieve.req_id extended attribute set" +fi + + +# This one does not change the return code +# WARNING if everything else was OK +# ERROR otherwise as these xrootd failures could be the reason of the failure +if [ $(ls ${LOGDIR}/xrd_errors | wc -l) -ne 0 ]; then + # ((RC++)) # do not change RC + #if [ ${RC} -eq 0 ]; then + if [[ $(cat /tmp/RC | wc -l) -eq 0 ]]; then + echo "WARNING several xrootd failures occured during this run, please check client dumps in ${LOGDIR}/xrd_errors." + else + echo "ERROR several xrootd failures occured during this run, please check client dumps in ${LOGDIR}/xrd_errors." + fi +fi + +exit $(cat /tmp/RC | wc -l) diff --git a/continuousintegration/orchestration/tests/client_retrieve.sh b/continuousintegration/orchestration/tests/client_retrieve.sh new file mode 100755 index 0000000000..19c41b4416 --- /dev/null +++ b/continuousintegration/orchestration/tests/client_retrieve.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + + +echo "$(date +%s): Trigerring EOS retrieve workflow as poweruser1:powerusers (12001:1200)" + +# Get list of files from test DB table. +TMP_FILE=$(mktemp) +db_get_files > ${TMP_FILE} +ARCHIVED=$(cat ${TMP_FILE} | wc -l) + +# Split the input file into one file per directory to avoid cat and grep +# the entire file for each subdirectory. +seq 0 $(( ${NB_DIRS} - 1 )) | xargs -iSUBDIR bash -c "touch ${TMP_FILE}SUBDIR" +cat ${TMP_FILE} | xargs -iFILE bash -c "subdir=\$(echo FILE | cut -d/ -f1); echo FILE | cut -d/ -f2 >> ${TMP_FILE}\${subdir}" +rm -f ${TMP_FILE} + +# Get initial stage value. +#current_stage_val=$(db_info 'archived') +current_stage_val=0 +NEW_STAGE_VAL=$((${current_stage_val} + 1 )) + +# We need the -s as we are staging the files from tape (see xrootd prepare definition) +for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + echo -n "Retrieving files to ${EOS_DIR}/${subdir} using ${NB_PROCS} processes..." + + xrdfs_call=" XRD_LOGLEVEL=Dump KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOSINSTANCE} prepare -s ${EOS_DIR}/${subdir}/TEST_FILE_NAME?activity=T0Reprocess 2>${ERROR_DIR}/RETRIEVE_TEST_FILE_NAME && rm ${ERROR_DIR}/RETRIEVE_TEST_FILE_NAME " + + xrdfs_error=" echo ERROR with xrootd prepare stage for file TEST_FILE_NAME, full logs in ${ERROR_DIR}/RETRIEVE_TEST_FILE_NAME " + + command_str="${xrdfs_call} || ${xrdfs_error}" + + cat "${TMP_FILE}${subdir}" | xargs --max-procs=${NB_PROCS} -iTEST_FILE_NAME bash -c "$command_str" | tee ${LOGDIR}/prepare_${subdir}.log | grep ^ERROR + + echo Done. + + xrdfs_call="XRD_LOGLEVEL=Dump KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOSINSTANCE} query opaquefile ${EOS_DIR}/${subdir}/TEST_FILE_NAME?mgm.pcmd=xattr\&mgm.subcmd=get\&mgm.xattrname=sys.retrieve.req_id 2>${ERROR_DIR}/XATTRGET_TEST_FILE_NAME && rm ${ERROR_DIR}/XATTRGET_TEST_FILE_NAME" + + xrdfs_error=" echo ERROR with xrootd xattr get for file TEST_FILE_NAME, full logs in ${ERROR_DIR}/XATTRGET_TEST_FILE_NAME" + + command_str="${xrdfs_call} || ${xrdfs_error}" + + cat "${TMP_FILE}${subdir}" | xargs --max-procs=${NB_PROCS} -iTEST_FILE_NAME bash -c "$command_str" | tee ${LOGDIR}/prepare_sys.retrieve.req_id_${subdir}.log | grep ^ERROR + + rm -f "${TMP_FILE}${subdir}" +done + +if [ "0" != "$(ls ${ERROR_DIR} 2> /dev/null | wc -l)" ]; then + # there were some prepare errors + echo "Several prepare errors occured during retrieval!" + echo "Please check client pod logs in artifacts" + mv ${ERROR_DIR}/* ${LOGDIR}/xrd_errors/ +fi + +#ARCHIVED=$(cat ${TMP_FILE} | wc -l) +TO_BE_RETRIEVED=$(( ${ARCHIVED} - $(ls ${ERROR_DIR}/RETRIEVE_* 2>/dev/null | wc -l) )) +RETRIEVING=${TO_BE_RETRIEVED} +RETRIEVED=0 +# Wait for the copy to appear on disk +echo "$(date +%s): Waiting for files to be back on disk:" +SECONDS_PASSED=0 +WAIT_FOR_RETRIEVED_FILE_TIMEOUT=$((40+${NB_FILES}/5)) + +status=$(mktemp) +while test 0 -lt ${RETRIEVING}; do + rm -f $status + touch $status + echo "$(date +%s): Waiting for files to be retrieved from tape: Seconds passed = ${SECONDS_PASSED}" + + sleep 3 + + let SECONDS_PASSED=SECONDS_PASSED+1 + + if test ${SECONDS_PASSED} == ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT}; then + echo "$(date +%s): Timed out after ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT} seconds waiting for file to be retrieved tape" + break + fi + + RETRIEVED=0 + for ((subdir=0; subdir < ${NB_DIRS}; subdir++)); do + + eos root://${EOSINSTANCE} ls -y ${EOS_DIR}/${subdir} | egrep '^d[1-9][0-9]*::t1' | awk -v sd="${subdir}/" '{print sd$10}' >> $status + + RETRIEVED=$(( ${RETRIEVED} + $(cat $status | wc -l) )) + + sleep 1 # do not hammer eos too hard + done + RETRIEVING=$((${TO_BE_RETRIEVED} - ${RETRIEVED})) + + echo "${RETRIEVED}/${TO_BE_RETRIEVED} retrieved" +done + +db_begin_transaction +cat $status | xargs -iFILE bash -c "db_update staged FILE 1 '='" +db_commit_transaction +rm -f ${status} + +echo "###" +echo "${RETRIEVED}/${TO_BE_RETRIEVED} retrieved files" +echo "###" diff --git a/continuousintegration/orchestration/tests/client_setup.sh b/continuousintegration/orchestration/tests/client_setup.sh new file mode 100755 index 0000000000..4494a108d6 --- /dev/null +++ b/continuousintegration/orchestration/tests/client_setup.sh @@ -0,0 +1,232 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + +set -a + +touch /tmp/RC +EOSINSTANCE=ctaeos +EOS_BASEDIR=/eos/ctaeos/cta +TEST_FILE_NAME_BASE=test +DATA_SOURCE=/dev/urandom +ARCHIVEONLY=0 # Only archive files or do the full test? +DONOTARCHIVE=0 # files were already archived in a previous run NEED TARGETDIR +TARGETDIR='' +LOGDIR='/var/log' + +COMMENT='' +# id of the test so that we can track it +TESTID="$(date +%y%m%d%H%M)" + +NB_PROCS=1 +NB_FILES=1 +NB_DIRS=1 +FILE_KB_SIZE=1 +VERBOSE=0 +REMOVE=0 +TAPEAWAREGC=0 + +NB_BATCH_PROCS=500 # number of parallel batch processes +BATCH_SIZE=20 # number of files per batch process + +SSH_OPTIONS='-o BatchMode=yes -o ConnectTimeout=10' + +# Setup sqlite3 DB. +# Table client_tests +# filename +# archived - amount of time a file has been archived +# staged - amount of times a file has been evicted +# evicted - amount of times the evict call has been called for the file. +# deleted - deleted files. +cat <<EOF > /opt/run/bin/tracker.schema + +CREATE TABLE client_tests_${TESTID}( + filename TEXT PRIMARY KEY, + archived INTEGER DEFAULT 0, + staged INTEGER DEFAULT 0, + evicted INTEGER DEFAULT 0, + aborted INTEGER DEFAULT 0, + deleted INTEGER DEFAULT 0 +); +EOF + +export DB_NAME="/root/trackerdb.db" +export TEST_TABLE="client_tests_${TESTID}" + +sqlite3 /root/trackerdb.db < /opt/run/bin/tracker.schema + +die() { + echo "$@" 1>&2 + test -z $TAILPID || kill ${TAILPID} &> /dev/null + exit 1 +} + + +usage() { cat <<EOF 1>&2 +Usage: $0 [-n <nb_files_perdir>] [-N <nb_dir>] [-s <file_kB_size>] [-p <# parallel procs>] [-v] [-d <eos_dest_dir>] [-e <eos_instance>] [-S <data_source_file>] [-r] + -v Verbose mode: displays live logs of rmcd to see tapes being mounted/dismounted in real time + -r Remove files at the end: launches the delete workflow on the files that were deleted. WARNING: THIS CAN BE FATAL TO THE NAMESPACE IF THERE ARE TOO MANY FILES AND XROOTD STARTS TO TIMEOUT. + -a Archiveonly mode: exits after file archival + -g Tape aware GC? +EOF +exit 1 +} + + +# Send annotations to Influxdb +annotate() { + TITLE=$1 + TEXT=$2 + TAGS=$3 + LINE="ctapps_tests title=\"${TITLE}\",text=\"${TEXT}\",tags=\"${TAGS}\" $(date +%s)" + curlcmd="curl --connect-timeout 2 -X POST 'https://ctapps-influx02.cern.ch:8086/write?db=annotations&u=annotations&p=annotations&precision=s' --data-binary '${LINE}'" + eval ${curlcmd} +} + +while getopts "Z:d:e:n:N:s:p:vS:rAPGt:m:" o; do + case "${o}" in + e) + EOSINSTANCE=${OPTARG} + ;; + d) + EOS_BASEDIR=${OPTARG} + ;; + n) + NB_FILES=${OPTARG} + ;; + N) + NB_DIRS=${OPTARG} + ;; + s) + FILE_KB_SIZE=${OPTARG} + ;; + p) + NB_PROCS=${OPTARG} + ;; + v) + VERBOSE=1 + ;; + S) + DATA_SOURCE=${OPTARG} + ;; + r) + REMOVE=1 + ;; + A) + ARCHIVEONLY=1 + ;; + P) + DONOTARCHIVE=1 + ;; + G) + TAPEAWAREGC=1 + ;; + t) + TARGETDIR=${OPTARG} + ;; + m) + COMMENT=${OPTARG} + ;; + Z) + GFAL2_PROTOCOL=${OPTARG} + ;; + *) + usage + ;; + esac +done +shift $((OPTIND-1)) + + +if [[ -n ${GFAL2_PROTOCOL} ]]; then + # Test gfal protocol is supported. + if [[ ! "${GFAL2_PROTOCOL}" =~ ^(https|root)$ ]]; then + echo "Invalid gfal2 protocol: ${GFAL2_PROTOCOL}" + echo "Current supported protocols: https, root" + exit 1 + fi +fi + +if [ ! -z "${error}" ]; then + echo -e "ERROR:\n${error}" + exit 1 +fi + +if [ "x${COMMENT}" = "x" ]; then + echo "No annotation will be pushed to Influxdb" +fi + +if [[ $DONOTARCHIVE == 1 ]]; then + if [[ "x${TARGETDIR}" = "x" ]]; then + echo "You must provide a target directory to run a test and skip archival" + exit 1 + fi + eos root://${EOSINSTANCE} ls -d ${EOS_BASEDIR}/${TARGETDIR} || die "target directory does not exist and there is no archive phase to create it." +fi + +if [[ $TAPEAWAREGC == 1 ]]; then + echo "Enabling tape aware garbage collector" + ssh ${SSH_OPTIONS} -l root ${EOSINSTANCE} eos space config default space.filearchivedgc=off || die "Could not disable filearchivedgc" +fi + +EOS_DIR='' +if [[ "x${TARGETDIR}" = "x" ]]; then + EOS_DIR="${EOS_BASEDIR}/$(uuidgen)" +else + EOS_DIR="${EOS_BASEDIR}/${TARGETDIR}" +fi +LOGDIR="${LOGDIR}/$(basename ${EOS_DIR})" +mkdir -p ${LOGDIR} || die "Cannot create directory LOGDIR: ${LOGDIR}" +mkdir -p ${LOGDIR}/xrd_errors || die "Cannot create directory LOGDIR/xrd_errors: ${LOGDIR}/xrd_errors" + +echo "$(date +%s): TRACKERDB_FILE=${DB_NAME}" +echo "$(date +%s): TRACKERDB_TABLE=${DB_TABLE}" +ERROR_FILE=$(mktemp) +echo "$(date +%s): ERROR_FILE=${ERROR_FILE}" +EOS_BATCHFILE=$(mktemp --suffix=.eosh) +echo "$(date +%s): EOS_BATCHFILE=${EOS_BATCHFILE}" + +# Create directory for xrootd error reports +ERROR_DIR="/dev/shm/$(basename ${EOS_DIR})" +mkdir ${ERROR_DIR} +echo "$(date +%s): ERROR_DIR=${ERROR_DIR}" + +# get some common useful helpers for krb5 +. /root/client_helper.sh + +# Get kerberos credentials for user1 +user_kinit +klist -s || die "Cannot get kerberos credentials for user ${USER}" + +# Get kerberos credentials for poweruser1 +eospower_kdestroy +eospower_kinit + +echo "Starting test ${TESTID}: ${COMMENT}" + +#echo "$(date +%s): Dumping objectstore list" +#ssh root@ctappsfrontend cta-objectstore-list + +test -z ${COMMENT} || annotate "test ${TESTID} STARTED" "comment: ${COMMENT}<br/>files: $((${NB_DIRS}*${NB_FILES}))<br/>filesize: ${FILE_KB_SIZE}kB" 'test,start' + + +set +a + +# Store the setup environment into a file and +# source it every time we spawn a shell in the +# pod. +export -p > /root/client_env +export -f -p >> /root/client_env diff --git a/continuousintegration/orchestration/tests/client_simple_ar.sh b/continuousintegration/orchestration/tests/client_simple_ar.sh new file mode 100755 index 0000000000..8633b92961 --- /dev/null +++ b/continuousintegration/orchestration/tests/client_simple_ar.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + +EOSINSTANCE=ctaeos +TEST_FILE_NAME=$(uuidgen | sed 's/-//g') +TEST_DIR=/eos/ctaeos/cta/ + +# get some common useful helpers for krb5 +eospower_kdestroy +eospower_kinit + +cat <<EOF > /opt/run/bin/tracker_simple.schema + +CREATE TABLE client_simple_tests_${TEST_FILE_NAME}( + filename TEXT PRIMARY KEY, + archived INTEGER DEFAULT 1, + staged INTEGER DEFAULT 0, + deleted INTEGER DEFAULT 0, + evicted INTEGER DEFAULT 0 +); +EOF + +sqlite3 /root/trackerdb_simple.db < /opt/run/bin/tracker_simple.schema +DB_NAME="/root/trackerdb_simple.db" +TEST_TABLE="client_simple_tests_${TEST_FILE_NAME}" + +db_insert ${TEST_FILE_NAME} + +echo "xrdcp /etc/group root://${EOSINSTANCE}/${TEST_DIR}${TEST_FILE_NAME}" +xrdcp /etc/group root://${EOSINSTANCE}/${TEST_DIR}${TEST_FILE_NAME} + +wait_for_archive ${EOSINSTANCE} "${TEST_DIR}${TEST_FILE_NAME}" + +echo +echo "FILE ARCHIVED TO TAPE" +echo +eos root://${EOSINSTANCE} info ${TEST_DIR}${TEST_FILE_NAME} +echo "Updating test DB" +db_update 'archived' ${TEST_FILE_NAME} 1 '=' +db_info '*' + +echo +echo "Information about the testing file:" +echo "********" + eos root://${EOSINSTANCE} attr ls ${TEST_DIR}${TEST_FILE_NAME} + eos root://${EOSINSTANCE} ls -l ${TEST_DIR}${TEST_FILE_NAME} +eos root://${EOSINSTANCE} info ${TEST_DIR}${TEST_FILE_NAME} + +echo +echo "Removing disk replica as poweruser1:powerusers (12001:1200)" +# XrdSecPROTOCOL=sss eos -r 12001 1200 root://${EOSINSTANCE} file drop /eos/ctaeos/cta/${TEST_FILE_NAME} 1 +XrdSecPROTOCOL=sss eos -r 0 0 root://${EOSINSTANCE} file drop "${TEST_DIR}${TEST_FILE_NAME}" 1 + + +echo +echo "Information about the testing file without disk replica" + eos root://${EOSINSTANCE} ls -l ${TEST_DIR}${TEST_FILE_NAME} + eos root://${EOSINSTANCE} info ${TEST_DIR}${TEST_FILE_NAME} + + +echo +echo "Trigerring EOS retrieve workflow as poweruser1:powerusers (12001:1200)" +#echo "XrdSecPROTOCOL=sss xrdfs ${EOSINSTANCE} prepare -s \"/eos/ctaeos/cta/${TEST_FILE_NAME}?eos.ruid=12001&eos.rgid=1200\"" +# XrdSecPROTOCOL=sss xrdfs ${EOSINSTANCE} prepare -s "/eos/ctaeos/cta/${TEST_FILE_NAME}?eos.ruid=12001&eos.rgid=1200" + +# We need the -s as we are staging the files from tape (see xrootd prepare definition) +KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOSINSTANCE} prepare -s ${TEST_DIR}${TEST_FILE_NAME} + +# Wait for the copy to appear on disk +wait_for_retrieve ${EOSINSTANCE} "${TEST_DIR}${TEST_FILE_NAME}" +db_update "staged" ${TEST_FILE_NAME} 1 "+" +db_info "*" + +echo +echo "FILE RETRIEVED FROM DISK" +echo +echo "Information about the testing file:" +echo "********" + eos root://${EOSINSTANCE} attr ls ${TEST_DIR}${TEST_FILE_NAME} + eos root://${EOSINSTANCE} ls -l ${TEST_DIR}${TEST_FILE_NAME} + eos root://${EOSINSTANCE} info ${TEST_DIR}${TEST_FILE_NAME} + +# Delete the file so it doesn't interfere with tests in client_ar.sh +echo "eos root://${EOSINSTANCE} rm ${TEST_DIR}${TEST_FILE_NAME}" +eos root://${EOSINSTANCE} rm ${TEST_DIR}${TEST_FILE_NAME} +db_update "deleted" ${TEST_FILE_NAME} 1 "+" +db_info "*" diff --git a/continuousintegration/orchestration/tests/client_zero_length_copy.sh b/continuousintegration/orchestration/tests/client_zero_length_copy.sh new file mode 100755 index 0000000000..4adac69720 --- /dev/null +++ b/continuousintegration/orchestration/tests/client_zero_length_copy.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + + + +TEST_DIR=${EOS_BASEDIR} +TEST_FILE_NAME=`uuidgen` + +# +# Check we can copy zero-length files into the namespace by touch and copy +# +echo "eos root://${EOSINSTANCE} touch ${TEST_DIR}${TEST_FILE_NAME}.touch" +eos root://${EOSINSTANCE} touch ${TEST_DIR}${TEST_FILE_NAME}.touch +echo "eos root://${EOSINSTANCE} cp ${TEST_DIR}${TEST_FILE_NAME}.touch /tmp/${TEST_FILE_NAME}.touch" +eos root://${EOSINSTANCE} cp ${TEST_DIR}${TEST_FILE_NAME}.touch /tmp/${TEST_FILE_NAME}.touch +echo "eos root://${EOSINSTANCE} cp /tmp/${TEST_FILE_NAME}.touch ${TEST_DIR}${TEST_FILE_NAME}.zero" +eos root://${EOSINSTANCE} cp /tmp/${TEST_FILE_NAME}.touch ${TEST_DIR}${TEST_FILE_NAME}.zero +echo "eos root://${EOSINSTANCE} cp ${TEST_DIR}${TEST_FILE_NAME}.zero /tmp/${TEST_FILE_NAME}.zero" +eos root://${EOSINSTANCE} cp ${TEST_DIR}${TEST_FILE_NAME}.zero /tmp/${TEST_FILE_NAME}.zero + +if [ -f /tmp/${TEST_FILE_NAME}.touch -a ! -s /tmp/${TEST_FILE_NAME}.touch -a -f /tmp/${TEST_FILE_NAME}.zero -a ! -s /tmp/${TEST_FILE_NAME}.zero ]; then + echo "Zero-length file copy succeeded" + zeroLengthTests=1 +else + echo "Zero-length file copy failed" + zeroLengthTests=0 +fi + +# Clean up +echo "eos root://${EOSINSTANCE} rm ${TEST_DIR}${TEST_FILE_NAME}.touch" +eos root://${EOSINSTANCE} rm ${TEST_DIR}${TEST_FILE_NAME}.touch +echo "eos root://${EOSINSTANCE} rm ${TEST_DIR}${TEST_FILE_NAME}.zero" +eos root://${EOSINSTANCE} rm ${TEST_DIR}${TEST_FILE_NAME}.zero +rm -f /tmp/${TEST_FILE_NAME}.touch /tmp/${TEST_FILE_NAME}.zero + +# Report results +msgNum=$(grep "\"File suc" /mnt/logs/tpsrv*/taped/cta/cta-taped.log | grep ${TEST_FILE_NAME} | tail -n 4 | wc -l) +if [ "$msgNum" = "4" -a $zeroLengthTests -eq 1 ]; then + echo "OK: all tests passed" + #rc=0 + exit 0 +else + echo "FAIL: tests failed" + #rc=1 + exit 1 +fi diff --git a/continuousintegration/orchestration/tests/idempotent_prepare.sh b/continuousintegration/orchestration/tests/idempotent_prepare.sh index e0ffa7c8ec..bada9c8072 100755 --- a/continuousintegration/orchestration/tests/idempotent_prepare.sh +++ b/continuousintegration/orchestration/tests/idempotent_prepare.sh @@ -591,7 +591,7 @@ if [ $? -eq 0 ]; then exit 1 fi -wait_for_evict ${EOS_INSTANCE} {TEMP_FILE_TAPE} +wait_for_evict ${EOS_INSTANCE} ${TEMP_FILE_TAPE} echo "Test completed successfully" diff --git a/continuousintegration/orchestration/tests/prepare_tests.sh b/continuousintegration/orchestration/tests/prepare_tests.sh index 7e87507b08..09e9a76e49 100755 --- a/continuousintegration/orchestration/tests/prepare_tests.sh +++ b/continuousintegration/orchestration/tests/prepare_tests.sh @@ -346,14 +346,13 @@ echo "EOS server version is used:" # Super client capabilities echo "Adding super client capabilities" - -clientIP=`kubectl --namespace ${NAMESPACE} describe pod client | grep IP | sed -E 's/IP:[[:space:]]+//'` kubectl --namespace ${NAMESPACE} exec ctacli -- cta-admin admin add --username ctaadmin2 --comment "ctaadmin2" kubectl --namespace=${NAMESPACE} exec kdc -- cat /root/ctaadmin2.keytab | kubectl --namespace=${NAMESPACE} exec -i client -- bash -c "cat > /root/ctaadmin2.keytab; mkdir -p /tmp/ctaadmin2" kubectl --namespace=${NAMESPACE} exec kdc -- cat /root/poweruser1.keytab | kubectl --namespace=${NAMESPACE} exec -i client -- bash -c "cat > /root/poweruser1.keytab; mkdir -p /tmp/poweruser1" kubectl --namespace=${NAMESPACE} exec kdc -- cat /root/eosadmin1.keytab | kubectl --namespace=${NAMESPACE} exec -i client -- bash -c "cat > /root/eosadmin1.keytab; mkdir -p /tmp/eosadmin1" + ### # Filling services in DNS on all pods ### diff --git a/continuousintegration/orchestration/tests/repack_systemtest_wrapper.sh b/continuousintegration/orchestration/tests/repack_systemtest_wrapper.sh index d612506189..9272c25280 100755 --- a/continuousintegration/orchestration/tests/repack_systemtest_wrapper.sh +++ b/continuousintegration/orchestration/tests/repack_systemtest_wrapper.sh @@ -81,11 +81,13 @@ archiveFiles() { echo "Launching client_ar.sh on client pod" echo " Archiving ${NB_FILES} files of ${FILE_SIZE_KB}kB each" echo " Archiving files: xrdcp as user1" - kubectl -n ${NAMESPACE} exec client -- bash /root/client_ar.sh -n ${NB_FILES} -s ${FILE_SIZE_KB} -p 100 -d /eos/ctaeos/preprod -v -A || exit 1 + kubectl -n ${NAMESPACE} exec client -- bash -c "yum -y install parallel && echo 'will cite ' | parallel --bibtex" + kubectl -n ${NAMESPACE} exec client -- bash -c "/root/client_setup.sh -n ${NB_FILES} -s ${FILE_SIZE_KB} -p 100 -d /eos/ctaeos/preprod -v -A" || exit 1 + kubectl -n ${NAMESPACE} exec client -- bash -c "tail -v -f /mnt/logs/tpsrv0*/rmcd/cta/cta-rmcd.log & export TAILPID=\$! && . /root/client_env && /root/client_archive.sh && kill \${TAILPID} &> /dev/null" || exit 1 } echo -kubectl -n ${NAMESPACE} cp client_ar.sh client:/root/client_ar.sh +kubectl -n ${NAMESPACE} cp . client:/root/ REPACK_BUFFER_URL=/eos/ctaeos/repack echo "Creating the repack buffer URL directory (${REPACK_BUFFER_URL})" diff --git a/continuousintegration/orchestration/tests/simple_client_ar.sh b/continuousintegration/orchestration/tests/simple_client_ar.sh deleted file mode 100644 index 24511f2a0b..0000000000 --- a/continuousintegration/orchestration/tests/simple_client_ar.sh +++ /dev/null @@ -1,129 +0,0 @@ -#!/bin/bash - -# @project The CERN Tape Archive (CTA) -# @copyright Copyright © 2022 CERN -# @license This program is free software, distributed under the terms of the GNU General Public -# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can -# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. See the GNU General Public License for more details. -# -# In applying this licence, CERN does not waive the privileges and immunities -# granted to it by virtue of its status as an Intergovernmental Organization or -# submit itself to any jurisdiction. - -EOSINSTANCE=ctaeos -TEST_FILE_NAME=`uuidgen` - -# get some common useful helpers for krb5 -. /root/client_helper.sh - -eospower_kdestroy -eospower_kinit - -echo "xrdcp /etc/group root://${EOSINSTANCE}//eos/ctaeos/cta/${TEST_FILE_NAME}" -xrdcp /etc/group root://${EOSINSTANCE}//eos/ctaeos/cta/${TEST_FILE_NAME} - -SECONDS_PASSED=0 -WAIT_FOR_ARCHIVED_FILE_TIMEOUT=90 -while test 0 = `eos root://${EOSINSTANCE} info /eos/ctaeos/cta/${TEST_FILE_NAME} | awk '{print $4;}' | grep tape | wc -l`; do - echo "Waiting for file to be archived to tape: Seconds passed = ${SECONDS_PASSED}" - sleep 1 - let SECONDS_PASSED=SECONDS_PASSED+1 - - if test ${SECONDS_PASSED} == ${WAIT_FOR_ARCHIVED_FILE_TIMEOUT}; then - echo "Timed out after ${WAIT_FOR_ARCHIVED_FILE_TIMEOUT} seconds waiting for file to be archived to tape" - exit 1 - fi -done - -echo -echo "FILE ARCHIVED TO TAPE" -echo -eos root://${EOSINSTANCE} info /eos/ctaeos/cta/${TEST_FILE_NAME} -echo -echo "Information about the testing file:" -echo "********" - eos root://${EOSINSTANCE} attr ls /eos/ctaeos/cta/${TEST_FILE_NAME} - eos root://${EOSINSTANCE} ls -l /eos/ctaeos/cta/${TEST_FILE_NAME} - eos root://${EOSINSTANCE} info /eos/ctaeos/cta/${TEST_FILE_NAME} -echo -echo "Removing disk replica as poweruser1:powerusers (12001:1200)" -# XrdSecPROTOCOL=sss eos -r 12001 1200 root://${EOSINSTANCE} file drop /eos/ctaeos/cta/${TEST_FILE_NAME} 1 -XrdSecPROTOCOL=sss eos -r 0 0 root://${EOSINSTANCE} file drop /eos/ctaeos/cta/${TEST_FILE_NAME} 1 -echo -echo "Information about the testing file without disk replica" - eos root://${EOSINSTANCE} ls -l /eos/ctaeos/cta/${TEST_FILE_NAME} - eos root://${EOSINSTANCE} info /eos/ctaeos/cta/${TEST_FILE_NAME} -echo -echo "Trigerring EOS retrieve workflow as poweruser1:powerusers (12001:1200)" -#echo "XrdSecPROTOCOL=sss xrdfs ${EOSINSTANCE} prepare -s \"/eos/ctaeos/cta/${TEST_FILE_NAME}?eos.ruid=12001&eos.rgid=1200\"" -# XrdSecPROTOCOL=sss xrdfs ${EOSINSTANCE} prepare -s "/eos/ctaeos/cta/${TEST_FILE_NAME}?eos.ruid=12001&eos.rgid=1200" - -# We need the -s as we are staging the files from tape (see xrootd prepare definition) -KRB5CCNAME=/tmp/${EOSPOWER_USER}/krb5cc_0 XrdSecPROTOCOL=krb5 xrdfs ${EOSINSTANCE} prepare -s /eos/ctaeos/cta/${TEST_FILE_NAME} - -# Wait for the copy to appear on disk -SECONDS_PASSED=0 -WAIT_FOR_RETRIEVED_FILE_TIMEOUT=90 -while test 0 = `eos root://${EOSINSTANCE} info /eos/ctaeos/cta/${TEST_FILE_NAME} | awk '{print $4;}' | grep -F "default.0" | wc -l`; do - echo "Waiting for file to be retrieved from tape: Seconds passed = ${SECONDS_PASSED}" - sleep 1 - let SECONDS_PASSED=SECONDS_PASSED+1 - - if test ${SECONDS_PASSED} == ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT}; then - echo "Timed out after ${WAIT_FOR_RETRIEVED_FILE_TIMEOUT} seconds waiting for file to be retrieved from tape" - exit 1 - fi -done -echo -echo "FILE RETRIEVED FROM DISK" -echo -echo "Information about the testing file:" -echo "********" - eos root://${EOSINSTANCE} attr ls /eos/ctaeos/cta/${TEST_FILE_NAME} - eos root://${EOSINSTANCE} ls -l /eos/ctaeos/cta/${TEST_FILE_NAME} - eos root://${EOSINSTANCE} info /eos/ctaeos/cta/${TEST_FILE_NAME} - -# Delete the file so it doesn't interfere with tests in client_ar.sh -echo "eos root://${EOSINSTANCE} rm /eos/ctaeos/cta/${TEST_FILE_NAME}" -eos root://${EOSINSTANCE} rm /eos/ctaeos/cta/${TEST_FILE_NAME} - -# -# Check we can copy zero-length files into the namespace by touch and copy -# -echo "eos root://${EOSINSTANCE} touch /eos/ctaeos/cta/${TEST_FILE_NAME}.touch" -eos root://${EOSINSTANCE} touch /eos/ctaeos/cta/${TEST_FILE_NAME}.touch -echo "eos root://${EOSINSTANCE} cp /eos/ctaeos/cta/${TEST_FILE_NAME}.touch /tmp/${TEST_FILE_NAME}.touch" -eos root://${EOSINSTANCE} cp /eos/ctaeos/cta/${TEST_FILE_NAME}.touch /tmp/${TEST_FILE_NAME}.touch -echo "eos root://${EOSINSTANCE} cp /tmp/${TEST_FILE_NAME}.touch /eos/ctaeos/cta/${TEST_FILE_NAME}.zero" -eos root://${EOSINSTANCE} cp /tmp/${TEST_FILE_NAME}.touch /eos/ctaeos/cta/${TEST_FILE_NAME}.zero -echo "eos root://${EOSINSTANCE} cp /eos/ctaeos/cta/${TEST_FILE_NAME}.zero /tmp/${TEST_FILE_NAME}.zero" -eos root://${EOSINSTANCE} cp /eos/ctaeos/cta/${TEST_FILE_NAME}.zero /tmp/${TEST_FILE_NAME}.zero - -if [ -f /tmp/${TEST_FILE_NAME}.touch -a ! -s /tmp/${TEST_FILE_NAME}.touch -a -f /tmp/${TEST_FILE_NAME}.zero -a ! -s /tmp/${TEST_FILE_NAME}.zero ]; then - echo "Zero-length file copy succeeded" - zeroLengthTests=1 -else - echo "Zero-length file copy failed" - zeroLengthTests=0 -fi -# Clean up -echo "eos root://${EOSINSTANCE} rm /eos/ctaeos/cta/${TEST_FILE_NAME}.touch" -eos root://${EOSINSTANCE} rm /eos/ctaeos/cta/${TEST_FILE_NAME}.touch -echo "eos root://${EOSINSTANCE} rm /eos/ctaeos/cta/${TEST_FILE_NAME}.zero" -eos root://${EOSINSTANCE} rm /eos/ctaeos/cta/${TEST_FILE_NAME}.zero -rm -f /tmp/${TEST_FILE_NAME}.touch /tmp/${TEST_FILE_NAME}.zero - -# Report results -msgNum=$(grep "\"File suc" /mnt/logs/tpsrv*/taped/cta/cta-taped.log | grep ${TEST_FILE_NAME} | tail -n 4 | wc -l) -if [ "$msgNum" = "4" -a $zeroLengthTests -eq 1 ]; then - echo "OK: all tests passed" - rc=0 -else - echo "FAIL: tests failed" - rc=1 -fi diff --git a/continuousintegration/orchestration/tests/test_client-gfal2.sh b/continuousintegration/orchestration/tests/test_client-gfal2.sh new file mode 100755 index 0000000000..a7e4e72740 --- /dev/null +++ b/continuousintegration/orchestration/tests/test_client-gfal2.sh @@ -0,0 +1,115 @@ +#!/bin/bash + +# @project The CERN Tape Archive (CTA) +# @copyright Copyright © 2022 CERN +# @license This program is free software, distributed under the terms of the GNU General Public +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can +# redistribute it and/or modify it under the terms of the GPL Version 3, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# In applying this licence, CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization or +# submit itself to any jurisdiction. + + +usage() { cat <<EOF 1>&2 +Usage: $0 -n <namespace> +EOF +exit 1 +} + +while getopts "n:" o; do + case "${o}" in + n) + NAMESPACE=${OPTARG} + ;; + *) + usage + ;; + esac +done +shift $((OPTIND-1)) + +if [ -z "${NAMESPACE}" ]; then + usage +fi + +if [ ! -z "${error}" ]; then + echo -e "ERROR:\n${error}" + exit 1 +fi + +echo "Preparing namespace for the tests" + . prepare_tests.sh -n ${NAMESPACE} +if [ $? -ne 0 ]; then + echo "ERROR: failed to prepare namespace for the tests" + exit 1 +fi + +echo "Installing parallel" +kubectl -n ${NAMESPACE} exec client -- bash -c "yum -y install parallel" || exit 1 +kubectl -n ${NAMESPACE} exec client -- bash -c "echo 'will cite' | parallel --bibtex" || exit 1 + +echo "Installing gfal2 utility" +kubectl -n ${NAMESPACE} exec client -- bash -c "yum -y install gfal2-util" || exit 1 + +echo +echo "Copying test scripts to client pod" +kubectl -n ${NAMESPACE} cp . client:/root/ +kubectl -n ${NAMESPACE} cp grep_xrdlog_mgm_for_error.sh ctaeos:/root/ + +NB_FILES=10000 +FILE_SIZE_KB=15 +NB_PROCS=100 + +TEST_PRERUN=". /root/client_env " +TEST_POSTRUN="" + +VERBOSE=1 +if [[ $VERBOSE == 1 ]]; then + TEST_PRERUN="tail -v -f /mnt/logs/tpsrv0*/rmcd/cta/cta-rmcd.log & export TAILPID=\$! && ${TEST_PRERUN}" + TEST_POSTRUN=" && kill \${TAILPID} &> /dev/null" +fi + +clientgfal2_options="-n ${NB_FILES} -s ${FILE_SIZE_KB} -p ${NB_PROCS} -d /eos/ctaeos/preprod -v -r" + +# Tests +# Check for xrd vesion as xrd gfal plugin only runs under xrd version 5. +if [[ ${XROOTD_VERSION} == 5 ]]; then + GFAL2_PROTOCOL='root' + echo "Installing gfal2-plugin-xrootd for gfal-${GFAL2_PROTOCOL} tests." + kubectl -n ${NAMESPACE} exec client -- bash -c "yum -y install gfal2-plugin-xrootd" + + echo "Setting up environment for gfal-${GFAL2_PROTOCOL} test." + kubectl -n ${NAMESPACE} exec client -- bash -c "/root/client_setup.sh ${clientgfal2_options} -Z ${GFAL2_PROTOCOL}" + + echo + echo "Launching client-gfal2_ar.sh on client pod using ${TEST_PROTOCOL} protocol" + echo " Archiving files: xrdcp as user1" + echo " Retrieving files with gfal xrootd" + + kubectl -n ${NAMESPACE} exec client -- bash -c "${TEST_PRERUN} && /root/client-gfal2_ar.sh ${TEST_POSTRUN}" || exit 1 + kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 +fi + +kubectl -n ${NAMESPACE} cp client:/root/trackerdb.db ../../../pod_logs/${NAMESPACE}/trackerdb_gfalxrd.db 2>/dev/null + + +# Test gfal http plugin. +#GFAL2_PROTOCOL='https' +# TODO: Reset DB, etc. +# echo "Setting up environment for gfal-${GFAL2_PROTOCOL} tests +# kubectl -n ${NAMESPACE} exec client -- bash -c "/root/client_setup.sh ${clientgfal2_options}" +# echo "Installing gfal2-plugin-http for http gfal test." +# kubectl -n ${NAMESPACE} exec client -- bash -c "sudo yum -y install gfal2-plugin-http" || exit 1 +#echo +#echo "Launching client-gfal2_ar.sh on client pod using ${TEST_PROTOCOL} protocol" +#echo " Archiving files: xrdcp as user1" +#echo " Retrieving files with gfal https" +#kubectl -n ${NAMESPACE} exec client -- bash -c "${TEST_PRERUN} && /root/client-gfal2_ar.sh && ${TEST_POSTRUN}" || exit 1 +#kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 +exit 0 diff --git a/continuousintegration/orchestration/tests/archive_retrieve.sh b/continuousintegration/orchestration/tests/test_client.sh similarity index 52% rename from continuousintegration/orchestration/tests/archive_retrieve.sh rename to continuousintegration/orchestration/tests/test_client.sh index 706073fff4..8bb18a47dd 100755 --- a/continuousintegration/orchestration/tests/archive_retrieve.sh +++ b/continuousintegration/orchestration/tests/test_client.sh @@ -50,37 +50,98 @@ if [ $? -ne 0 ]; then exit 1 fi +echo "Installing parallel" +kubectl -n ${NAMESPACE} exec client -- bash -c "yum -y install parallel" || exit 1 +kubectl -n ${NAMESPACE} exec client -- bash -c "echo 'will cite' | parallel --bibtex" || exit 1 + +echo +echo "Copying test scripts to client pod." +kubectl -n ${NAMESPACE} cp . client:/root/ +kubectl -n ${NAMESPACE} cp grep_xrdlog_mgm_for_error.sh ctaeos:/root/ + +NB_FILES=10000 +FILE_SIZE_KB=15 +NB_PROCS=100 + +echo +echo "Setting up environment for tests." +kubectl -n ${NAMESPACE} exec client -- bash -c "/root/client_setup.sh -n ${NB_FILES} -s ${FILE_SIZE_KB} -p ${NB_PROCS} -d /eos/ctaeos/preprod -v -r" || exit 1 + +# Test are run under the cta user account which doesn't have a login +# option so to be able to export the test setup we need to source the file +# client_env (file generated in client_setup with all env varss and fucntions) +# +# Also, to show the output of tpsrv0X rmcd to the logs we need to tail the files +# before every related script and kill it a the end. Another way to do this would +# require to change the stdin/out/err of the tail process and set//reset it +# at the beginning and end of each kubectl exec command. +TEST_PRERUN=". /root/client_env " +TEST_POSTRUN="" + +VERBOSE=1 +if [[ $VERBOSE == 1 ]]; then + TEST_PRERUN="tail -v -f /mnt/logs/tpsrv0*/rmcd/cta/cta-rmcd.log & export TAILPID=\$! && ${TEST_PRERUN}" + TEST_POSTRUN=" && kill \${TAILPID} &> /dev/null" +fi + echo -echo "Launching simple_client_ar.sh on client pod" +echo "Launching immutable file test on client pod" +kubectl -n ${NAMESPACE} exec client -- bash -c "${TEST_PRERUN} && echo yes | cta-immutable-file-test root://\${EOSINSTANCE}/\${EOS_DIR}/immutable_file ${TEST_POSTRUN} || die 'The cta-immutable-file-test failed.'" || exit 1 + +echo +echo "Launching client_simple_ar.sh on client pod" echo " Archiving file: xrdcp as user1" echo " Retrieving it as poweruser1" -kubectl -n ${NAMESPACE} cp simple_client_ar.sh client:/root/simple_client_ar.sh -kubectl -n ${NAMESPACE} cp client_helper.sh client:/root/client_helper.sh -kubectl -n ${NAMESPACE} exec client -- bash /root/simple_client_ar.sh || exit 1 - -kubectl -n ${NAMESPACE} cp grep_xrdlog_mgm_for_error.sh ctaeos:/root/grep_xrdlog_mgm_for_error.sh +kubectl -n ${NAMESPACE} exec client -- bash -c "${TEST_PRERUN} && /root/client_simple_ar.sh ${TEST_POSTRUN}" || exit 1 kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 -NB_FILES=10000 -FILE_SIZE_KB=15 echo echo "Launching client_ar.sh on client pod" echo " Archiving ${NB_FILES} files of ${FILE_SIZE_KB}kB each" echo " Archiving files: xrdcp as user1" echo " Retrieving them as poweruser1" -kubectl -n ${NAMESPACE} cp client_ar.sh client:/root/client_ar.sh -kubectl -n ${NAMESPACE} cp client_ar_abortPrepare.py client:/root/client_abortPrepare.sh -kubectl -n ${NAMESPACE} exec client -- bash /root/client_ar.sh -n ${NB_FILES} -s ${FILE_SIZE_KB} -p 100 -d /eos/ctaeos/preprod -v -r || exit 1 +kubectl -n ${NAMESPACE} exec client -- bash -c "${TEST_PRERUN} && /root/client_ar.sh ${TEST_POSTRUN}" || exit 1 kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 + +echo +echo "Launching client_evict.sh on client pod" +echo " Evicting files: xrdfs as poweruser1" +kubectl -n ${NAMESPACE} exec client -- bash -c "${TEST_PRERUN} && /root/client_evict.sh ${TEST_POSTRUN}" || exit 1 + +kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 + + +echo +echo "Launching client_abortPrepare.sh on client pod" +echo " Retrieving files: xrdfs as poweruser1" +echo " Aborting prepare: xrdfs as poweruser1" +kubectl -n ${NAMESPACE} exec client -- bash -c "${TEST_PRERUN} && /root/client_abortPrepare.sh ${TEST_POSTRUN}" || exit 1 + +kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 + + +echo +echo "Launching client_delete.sh on client pod" +echo " Deleting files:" +kubectl -n ${NAMESPACE} exec client -- bash -c "${TEST_PRERUN} && /root/client_delete.sh ${TEST_POSTRUN}" || exit 1 + +kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 + + +echo +echo "Results for base client tests." +kubectl -n ${NAMESPACE} exec client -- bash -c "${TEST_PRERUN} && /root/client_results.sh ${TEST_POSTRUN}" || exit 1 + +kubectl -n ${NAMESPACE} cp client:/root/trackerdb.db ../../../pod_logs/${NAMESPACE}/trackerdb.db 2>/dev/null + echo -echo "Launching multiple_retrieve.sh on client pod" +echo "Launching client_multiple_retrieve.sh on client pod" echo " Archiving file: xrdcp as user1" echo " Retrieving it as poweruser1" -kubectl -n ${NAMESPACE} cp multiple_retrieve.sh client:/root/multiple_retrieve.sh -kubectl -n ${NAMESPACE} exec client -- bash /root/multiple_retrieve.sh || exit 1 +kubectl -n ${NAMESPACE} exec client -- bash /root/client_multiple_retrieve.sh || exit 1 kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 @@ -88,7 +149,6 @@ echo echo "Launching idempotent_prepare.sh on client pod" echo " Archiving file: xrdcp as user1" echo " Retrieving it as poweruser1" -kubectl -n ${NAMESPACE} cp idempotent_prepare.sh client:/root/idempotent_prepare.sh kubectl -n ${NAMESPACE} exec client -- bash /root/idempotent_prepare.sh || exit 1 kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 @@ -97,7 +157,6 @@ echo echo "Launching delete_on_closew_error.sh on client pod" echo " Archiving file: xrdcp as user1" echo " Retrieving it as poweruser1" -kubectl -n ${NAMESPACE} cp delete_on_closew_error.sh client:/root/delete_on_closew_error.sh kubectl -n ${NAMESPACE} exec client -- bash /root/delete_on_closew_error.sh || exit 1 kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 @@ -106,7 +165,6 @@ echo echo "Launching try_evict_before_archive_completed.sh on client pod" echo " Archiving file: xrdcp as user1" echo " Retrieving it as poweruser1" -kubectl -n ${NAMESPACE} cp try_evict_before_archive_completed.sh client:/root/try_evict_before_archive_completed.sh kubectl -n ${NAMESPACE} exec client -- bash /root/try_evict_before_archive_completed.sh || exit 1 kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 @@ -117,7 +175,6 @@ echo echo "Launching retrieve_queue_cleanup.sh on client pod" echo " Archiving file: xrdcp as user1" echo " Retrieving it as poweruser1" -kubectl -n ${NAMESPACE} cp retrieve_queue_cleanup.sh client:/root/retrieve_queue_cleanup.sh kubectl -n ${NAMESPACE} exec client -- bash /root/retrieve_queue_cleanup.sh || exit 1 kubectl -n ${NAMESPACE} exec ctaeos -- bash /root/grep_xrdlog_mgm_for_error.sh || exit 1 diff --git a/cta.spec.in b/cta.spec.in index 9149c20451..718e6ea2de 100644 --- a/cta.spec.in +++ b/cta.spec.in @@ -551,17 +551,6 @@ collects EOS disk copies that have been safely stored to tape. %systemd_postun cta-fst-gcd.service %systemdDaemonReload -%package -n cta-systemtest-helpers -Summary: Collection of utilities deployed in system test client containers. -Group: Application/CTA -Requires: cta-lib = %{version}-%{release} -Requires: xrootd-client-libs >= %{xrootdVersion} -%description -n cta-systemtest-helpers -Collection of utilities deployed in system test client containers. -Currently contains a helper for the client-ar script, which should be installed alongside it. -%files -n cta-systemtest-helpers -%attr(0755,root,root) /usr/bin/cta-client-ar-abortPrepare - %package -n cta-release Summary: Repository configuration for CTA and its dependencies Group: Application/CTA -- GitLab