From 9662d44673ffdd328a72b8c8f4fdbaec7bd2d88e Mon Sep 17 00:00:00 2001 From: Lasse Tjernaes Wardenaer <lasse.tjernaes.wardenaer@cern.ch> Date: Fri, 3 Mar 2023 14:45:56 +0100 Subject: [PATCH] Resolve "Simple safe tool for EOS NS injection" --- ReleaseNotes.md | 3 +- .../EosNamespaceInjection.cpp | 234 ++++++++++++------ .../EosNamespaceInjection.hpp | 61 +++++ common/utils/utils.cpp | 6 + common/utils/utils.hpp | 7 + 5 files changed, 234 insertions(+), 77 deletions(-) diff --git a/ReleaseNotes.md b/ReleaseNotes.md index d3929e636e..bdaa2f7a36 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,10 +1,11 @@ # v4.Next ### Features - cta/CTA#213 - Improve error messages for `cta-eos-namespace-inject` +- cta/CTA#213 - Skip valid paths that exists with valid metadata - cta/CTA#308 - Remove catalogue autogenerated files - cta/CTA#294 - Improve error messages for 'Exec' in the gRPC client - cta/CTA#245 - Implements cta-admin functions in cta-frontend-grpc - +- cta/CTA#213 - Skip files with paths that have valid metadata ### Bug Fixes - cta/CTA#305 - AllCatalogueSchema file not created when updating schema diff --git a/cmdline/standalone_cli_tools/eos_namespace_injection/EosNamespaceInjection.cpp b/cmdline/standalone_cli_tools/eos_namespace_injection/EosNamespaceInjection.cpp index 91b468885d..41240f3e7f 100644 --- a/cmdline/standalone_cli_tools/eos_namespace_injection/EosNamespaceInjection.cpp +++ b/cmdline/standalone_cli_tools/eos_namespace_injection/EosNamespaceInjection.cpp @@ -14,6 +14,8 @@ * granted to it by virtue of its status as an Intergovernmental Organization or * submit itself to any jurisdiction. */ + +#include <ctime> #include <sys/stat.h> #include <string> #include <memory> @@ -129,81 +131,33 @@ EosNamespaceInjection::~EosNamespaceInjection() = default; // exceptionThrowingMain //------------------------------------------------------------------------------ int EosNamespaceInjection::exceptionThrowingMain(const int argc, char *const *const argv) { - CmdLineArgs cmdLineArgs(argc, argv, StandaloneCliTool::EOS_NAMESPACE_INJECTION); - auto [serviceProvider, endpointmap] = ConnConfiguration::readAndSetConfiguration(m_log, getUsername(), cmdLineArgs); - m_serviceProviderPtr = std::move(serviceProvider); - m_endpointMapPtr = std::move(endpointmap); - if(cmdLineArgs.m_help) { - cmdLineArgs.printUsage(std::cout); - throw exception::UserError(""); - } + setCmdLineArguments(argc, argv); - if(cmdLineArgs.m_json) { - m_jsonPath = cmdLineArgs.m_json.value(); - } else { - cmdLineArgs.printUsage(std::cout); - throw exception::UserError("The required json file was not provided."); - } - - MetaData metaData(m_jsonPath); - for(const auto &metaDataObject : metaData.m_mdCollection) { - const uint64_t archiveId = cta::utils::toUint64(metaDataObject.archiveId); - if(const bool archiveIdExists = getMetaDataFromCatalogue(archiveId); !archiveIdExists) { - throw exception::UserError("archivefile id " + metaDataObject.archiveId + " does not exist"); - } - compareJsonAndCtaMetaData(metaDataObject, g_metaDataObjectCatalogue); + MetaData userInput(m_jsonPath); + for(const auto &metaDataFromUser : userInput.m_mdCollection) { + const uint64_t archiveId = cta::utils::toUint64(metaDataFromUser.archiveId); + checkArchiveIdExistsInCatalogue(archiveId); + compareJsonAndCtaMetaData(metaDataFromUser, g_metaDataObjectCatalogue); - const auto enclosingPath = cta::utils::getEnclosingPath(metaDataObject.eosPath); - const auto [parentId, uid, gid] = getContainerIdsEos(metaDataObject.diskInstance, enclosingPath); - if(!parentId) { - throw exception::UserError("Could not find: " + enclosingPath + ". Check that gRPC authentication is set up correctly, and that the path exists"); - } - - if(const auto fid = getFileIdEos(metaDataObject.diskInstance, metaDataObject.eosPath); fid != 0) { - throw cta::cliTool::EosNameSpaceInjectionError("The file with path " + metaDataObject.eosPath + " already exists for instance " + metaDataObject.diskInstance + ". This tool does not overwrite existing files"); - } + const auto enclosingPath = cta::utils::getEnclosingPath(metaDataFromUser.eosPath); + const auto [parentId, uid, gid] = getContainerIdsEos(metaDataFromUser.diskInstance, enclosingPath); + checkParentContainerExists(parentId, enclosingPath); - const auto newFid = createFileInEos(metaDataObject, parentId, uid, gid); - if (newFid != 0) { - std::list<cta::log::Param> params; - params.push_back(cta::log::Param("diskFileId", newFid)); - m_log(cta::log::INFO, "File was created in the EOS namespace", params); - } else { - std::list<cta::log::Param> params; - params.push_back(cta::log::Param("diskFileId", newFid)); - m_log(cta::log::WARNING, "Could not find file in the EOS namespace. Check that gRPC authentication is set up correctly, and that the path exists", params); + if(const auto& fid = getFileIdEos(metaDataFromUser.diskInstance, metaDataFromUser.eosPath); pathExists(fid)) { + checkExistingPathHasInvalidMetadata(archiveId, fid, metaDataFromUser); + continue; } - auto decimalToHexadecimal = [](const std::string &decimalNumber) { - std::stringstream fxIdStream; - fxIdStream << std::hex << decimalNumber; - return fxIdStream.str(); - }; + const auto newFid = createFileInEos(metaDataFromUser, parentId, uid, gid); + checkFileCreated(newFid); - std::string fxId = decimalToHexadecimal(std::to_string(newFid)); - updateFxidAndDiskInstanceInCatalogue(metaDataObject.archiveId, fxId, metaDataObject.diskInstance); + std::string newFxId = cta::utils::decimalToHexadecimal(std::to_string(newFid)); + updateFxidAndDiskInstanceInCatalogue(metaDataFromUser.archiveId, newFxId, metaDataFromUser.diskInstance); - // sanity check - getMetaDataFromCatalogue(archiveId); - const auto [eosArchiveFileId, eosChecksumDecimal] = getArchiveFileIdAndChecksumFromEOS(metaDataObject.diskInstance, fxId); - const std::string eosChecksum = decimalToHexadecimal(eosChecksumDecimal); - const auto& ctaChecksum = g_metaDataObjectCatalogue.checksumValue; - std::list<cta::log::Param> params; - params.push_back(cta::log::Param("archiveFileId", archiveId)); - params.push_back(cta::log::Param("diskFileId in EOS for new file", fxId)); - params.push_back(cta::log::Param("diskFileId in Catalogue", g_metaDataObjectCatalogue.fxId)); - params.push_back(cta::log::Param("diskInstance in Catalogue", g_metaDataObjectCatalogue.diskInstance)); - params.push_back(cta::log::Param("checksum", ctaChecksum)); - if(eosArchiveFileId == archiveId && eosChecksum == ctaChecksum && g_metaDataObjectCatalogue.fxId == fxId) { - m_log(cta::log::INFO, "File metadata in EOS and CTA matches", params); - } else { - params.push_back(cta::log::Param("eosArchiveFileId", eosArchiveFileId)); - params.push_back(cta::log::Param("eosChecksum", eosChecksum)); - m_log(cta::log::WARNING, "File metadata in EOS and CTA does not match", params); - throw cta::cliTool::EosNameSpaceInjectionError("Sanity check failed."); - } + checkEosCtaConsistency(archiveId, newFxId, metaDataFromUser); } + createTxtFileWithSkippedMetadata(); return 0; } @@ -362,18 +316,18 @@ uint64_t EosNamespaceInjection::getFileIdEos(const std::string &diskInstance, co //------------------------------------------------------------------------------ -// restoreDeletedFileEos +// createFileInEos //------------------------------------------------------------------------------ -uint64_t EosNamespaceInjection::createFileInEos(const MetaDataObject &metaDataObject, const uint64_t &parentId, const uint64_t uid, const uint64_t gid) const { +uint64_t EosNamespaceInjection::createFileInEos(const MetaDataObject &metaDataFromUser, const uint64_t &parentId, const uint64_t uid, const uint64_t gid) const { ::eos::rpc::FileMdProto file; - const auto& fullPath = metaDataObject.eosPath; + const auto& fullPath = metaDataFromUser.eosPath; file.set_id(0); // Setting a fid as 0 will tell eos to generate a new ID file.set_cont_id(parentId); file.set_uid(uid); file.set_gid(gid); - file.set_size(cta::utils::toUint64(metaDataObject.size)); + file.set_size(cta::utils::toUint64(metaDataFromUser.size)); file.set_layout_id(m_defaultFileLayout); // Filemode: filter out S_ISUID, S_ISGID and S_ISVTX because EOS does not follow POSIX semantics for these bits @@ -393,15 +347,15 @@ uint64_t EosNamespaceInjection::createFileInEos(const MetaDataObject &metaDataOb file.set_name(cta::utils::getEnclosedName(fullPath)); // Checksums - file.mutable_checksum()->set_type(metaDataObject.checksumType);{{{{{}}}}} - const auto byteArray = checksum::ChecksumBlob::HexToByteArray(metaDataObject.checksumValue); + file.mutable_checksum()->set_type(metaDataFromUser.checksumType);{{{{{}}}}} + const auto byteArray = checksum::ChecksumBlob::HexToByteArray(metaDataFromUser.checksumValue); file.mutable_checksum()->set_value(std::string(byteArray.rbegin(),byteArray.rend())); // Extended attributes: // // 1. Archive File ID - std::string archiveId(metaDataObject.archiveId); - file.mutable_xattrs()->insert(google::protobuf::MapPair<std::string,std::string>("sys.archive.file_id", metaDataObject.archiveId)); + std::string archiveId(metaDataFromUser.archiveId); + file.mutable_xattrs()->insert(google::protobuf::MapPair<std::string,std::string>("sys.archive.file_id", metaDataFromUser.archiveId)); // 2. Storage Class file.mutable_xattrs()->insert(google::protobuf::MapPair<std::string,std::string>("sys.archive.storage_class", g_metaDataObjectCatalogue.storageClass)); // 3. Birth Time @@ -414,9 +368,9 @@ uint64_t EosNamespaceInjection::createFileInEos(const MetaDataObject &metaDataOb file.mutable_locations()->Add(65535); } - auto reply = m_endpointMapPtr->fileInsert(metaDataObject.diskInstance, file); + auto reply = m_endpointMapPtr->fileInsert(metaDataFromUser.diskInstance, file); - const auto new_fid = getFileIdEos(metaDataObject.diskInstance, metaDataObject.eosPath); + const auto new_fid = getFileIdEos(metaDataFromUser.diskInstance, metaDataFromUser.eosPath); return new_fid; } @@ -457,4 +411,132 @@ std::pair<ArchiveId, Checksum> EosNamespaceInjection::getArchiveFileIdAndChecksu return std::make_pair(archiveFileId,checksumValue); } +//------------------------------------------------------------------------------ +// setCmdLineArguments +//------------------------------------------------------------------------------ +void EosNamespaceInjection::setCmdLineArguments(const int argc, char *const *const argv) { + CmdLineArgs cmdLineArgs(argc, argv, StandaloneCliTool::EOS_NAMESPACE_INJECTION); + auto [serviceProvider, endpointmap] = ConnConfiguration::readAndSetConfiguration(m_log, getUsername(), cmdLineArgs); + m_serviceProviderPtr = std::move(serviceProvider); + m_endpointMapPtr = std::move(endpointmap); + + if(cmdLineArgs.m_help) { + cmdLineArgs.printUsage(std::cout); + throw exception::UserError(""); + } + + if(cmdLineArgs.m_json) { + m_jsonPath = cmdLineArgs.m_json.value(); + } else { + cmdLineArgs.printUsage(std::cout); + throw exception::UserError("The required json file was not provided."); + } +} + +//------------------------------------------------------------------------------ +// checkEosCtaConsistency +//------------------------------------------------------------------------------ +bool EosNamespaceInjection::checkEosCtaConsistency(const uint64_t& archiveId, const std::string& newFxIdEos, const MetaDataObject &metaDataFromUser) { + getMetaDataFromCatalogue(archiveId); + const auto [eosArchiveFileId, eosChecksumDecimal] = getArchiveFileIdAndChecksumFromEOS(metaDataFromUser.diskInstance, newFxIdEos); + const std::string eosChecksum = cta::utils::decimalToHexadecimal(eosChecksumDecimal); + const auto& ctaChecksum = g_metaDataObjectCatalogue.checksumValue; + std::list<cta::log::Param> params; + params.push_back(cta::log::Param("archiveFileId", archiveId)); + params.push_back(cta::log::Param("diskFileId in EOS for new file", newFxIdEos)); + params.push_back(cta::log::Param("diskFileId in Catalogue", g_metaDataObjectCatalogue.fxId)); + params.push_back(cta::log::Param("diskInstance in Catalogue", g_metaDataObjectCatalogue.diskInstance)); + params.push_back(cta::log::Param("checksum", ctaChecksum)); + if(eosArchiveFileId == archiveId && eosChecksum == ctaChecksum && g_metaDataObjectCatalogue.fxId == newFxIdEos) { + m_log(cta::log::INFO, "File metadata in EOS and CTA matches", params); + return true; + } else { + params.push_back(cta::log::Param("eosArchiveFileId", eosArchiveFileId)); + params.push_back(cta::log::Param("eosChecksum", eosChecksum)); + m_log(cta::log::WARNING, "File metadata in EOS and CTA does not match", params); + m_inconsistentMetadata.push_back(metaDataFromUser); + return false; + } +} + +//------------------------------------------------------------------------------ +// pathExists +//------------------------------------------------------------------------------ +bool EosNamespaceInjection::pathExists(const uint64_t fid) const { + return (fid != 0); +} + +//------------------------------------------------------------------------------ +// checkFileCreated +//------------------------------------------------------------------------------ +void EosNamespaceInjection::checkFileCreated(const uint64_t newFid) { + if (pathExists(newFid)) { + std::list<cta::log::Param> params; + params.push_back(cta::log::Param("diskFileId", newFid)); + m_log(cta::log::INFO, "File was created in the EOS namespace", params); + } else { + std::list<cta::log::Param> params; + params.push_back(cta::log::Param("diskFileId", newFid)); + m_log(cta::log::WARNING, "Could not find file in the EOS namespace. Check that gRPC authentication is set up correctly, and that the path exists", params); + } +} + +//------------------------------------------------------------------------------ +// checkParentContainerExists +//------------------------------------------------------------------------------ +void EosNamespaceInjection::checkParentContainerExists(const uint64_t parentId, const std::string& enclosingPath) const { + if(!pathExists(parentId)) { + throw exception::UserError("Could not find: " + enclosingPath + ". Check that gRPC authentication is set up correctly, and that the path exists"); + } +} + +//------------------------------------------------------------------------------ +// checkArchiveIdExistsInCatalogue +//------------------------------------------------------------------------------ +void EosNamespaceInjection::checkArchiveIdExistsInCatalogue(const uint64_t &archiveId) const { + if(!getMetaDataFromCatalogue(archiveId)) { + throw exception::UserError("archive file id " + std::to_string(archiveId) + " does not exist"); + } +} + +//------------------------------------------------------------------------------ +// checkExistingPathHasInvalidMetadata +//------------------------------------------------------------------------------ +void EosNamespaceInjection::checkExistingPathHasInvalidMetadata(const uint64_t &archiveId, const uint64_t& fid, const MetaDataObject& metaDataFromUser) { + const std::string fxId = cta::utils::decimalToHexadecimal(std::to_string(fid)); + if(!checkEosCtaConsistency(archiveId, fxId, metaDataFromUser)) { + throw cta::cliTool::EosNameSpaceInjectionError("The file with path " + metaDataFromUser.eosPath + " already exists for instance " + metaDataFromUser.diskInstance + ". This tool does not overwrite existing files"); + } +} + +//------------------------------------------------------------------------------ +// writeSkippedArchiveIdsToFile +//------------------------------------------------------------------------------ +void EosNamespaceInjection::createTxtFileWithSkippedMetadata() const { + auto unix_epoch_time = std::time(0); + const std::string currentTime = std::to_string(unix_epoch_time); + const std::filesystem::path filePath = "/tmp/skippedMetadataEosInjection" + currentTime + ".txt"; + std::ofstream archiveIdFile(filePath); + + if (archiveIdFile.fail()) { + throw std::runtime_error("Unable to open file " + filePath.string()); + } + + if (archiveIdFile.is_open()) { + for (const auto& metadata : m_inconsistentMetadata) { + archiveIdFile << + "{ eosPath: " << metadata.eosPath << + " , diskInstance: " << metadata.diskInstance << + " , archiveId: " << metadata.archiveId << + " , size: " << metadata.size << + " , checksumType: " << metadata.checksumType << + " , checksumValue: " << metadata.checksumValue << + " }" << std::endl; + } + archiveIdFile.close(); + std::cout << m_inconsistentMetadata.size() << " entries finished with inconsistent metadata." << std::endl; + std::cout << "The skipped metadata can be found here: " << filePath << std::endl; + } +} + } // namespace cta::cliTool \ No newline at end of file diff --git a/cmdline/standalone_cli_tools/eos_namespace_injection/EosNamespaceInjection.hpp b/cmdline/standalone_cli_tools/eos_namespace_injection/EosNamespaceInjection.hpp index bec46677b0..1dca6860fa 100644 --- a/cmdline/standalone_cli_tools/eos_namespace_injection/EosNamespaceInjection.hpp +++ b/cmdline/standalone_cli_tools/eos_namespace_injection/EosNamespaceInjection.hpp @@ -18,6 +18,7 @@ #pragma once #include <filesystem> +#include <vector> #include "cmdline/standalone_cli_tools/common/CmdLineTool.hpp" #include "cmdline/standalone_cli_tools/eos_namespace_injection/MetaData.hpp" @@ -36,6 +37,7 @@ namespace eos::client { class EndpointMap; } namespace cta::log { class StdoutLogger; } namespace cta::cliTool { +class CmdLineArgs; class EosNamespaceInjection final: public CmdLineTool { public: @@ -115,6 +117,59 @@ class EosNamespaceInjection final: public CmdLineTool { */ std::pair<ArchiveId, Checksum> getArchiveFileIdAndChecksumFromEOS(const std::string& diskInstance, const std::string& fxId); + /** + * Validates the command line arguments + * @param argc The number of command-line arguments including the program name. + * @param argv The command-line arguments. + */ + void setCmdLineArguments(const int argc, char *const *const argv); + + /** + * Checks if path exists in EOS + * @param fid EOS file id + */ + bool pathExists(const uint64_t fid) const; + + /** + * Checks consistency between EOS and CTA + * @param archiveId CTA archive file id + * @param fxId The eos file id + * @param metaDataFromUser metaData for the eos file + */ + bool checkEosCtaConsistency(const uint64_t& archiveId, const std::string& newFxIdEos, const MetaDataObject &metaDataFromUser); + + /** + * Checks if file was created in EOS + * @param newFid EOS file id + */ + void checkFileCreated(const uint64_t newFid); + + /** + * Checks if parent container exists in EOS + * @param parentId The id of the parent container in EOS + * @param enclosingPath The full EOS path of the parent container + */ + void checkParentContainerExists(const uint64_t parentId, const std::string& enclosingPath) const; + + /** + * Checks if archive id exists + * @param CTA archive file id + */ + void checkArchiveIdExistsInCatalogue(const uint64_t &archiveId) const; + + /** + * Throws error if existing path has invalid metadata + * @param archiveId CTA archive file id + * @param fid The eos file id + * @param metaDataFromUser metaData for the eos file + */ + void checkExistingPathHasInvalidMetadata(const uint64_t &archiveId, const uint64_t& fid, const MetaDataObject& metaDataFromUser); + + /** + * Writes the skipped metadata for file to txt file + */ + void createTxtFileWithSkippedMetadata() const; + /** * Meta data from CTA catalogue */ @@ -140,6 +195,12 @@ class EosNamespaceInjection final: public CmdLineTool { */ cta::log::StdoutLogger &m_log; + /** + * When a file is skipped due to inconsistent meta data between EOS and CTA, + * they are added to this vector + */ + std::vector<MetaDataObject> m_inconsistentMetadata; + /** * CTA Frontend service provider */ diff --git a/common/utils/utils.cpp b/common/utils/utils.cpp index 83c7946824..0282354b1e 100644 --- a/common/utils/utils.cpp +++ b/common/utils/utils.cpp @@ -1023,5 +1023,11 @@ std::vector<std::string> commaSeparatedStringToVector(const std::string &commaSe return result; } +std::string decimalToHexadecimal(const std::string &decimalNumber) { + std::stringstream fxIdStream; + fxIdStream << std::hex << decimalNumber; + return fxIdStream.str(); +} + } // namespace utils } // namespace cta diff --git a/common/utils/utils.hpp b/common/utils/utils.hpp index 8bcd2119af..be45c81ecd 100644 --- a/common/utils/utils.hpp +++ b/common/utils/utils.hpp @@ -494,6 +494,13 @@ namespace utils { */ std::vector<std::string> commaSeparatedStringToVector(const std::string &commaSeparated); + /** + * Converts a number from a decimal number to a hexidecimal number + * @param decimalNumber The number that will be transformed + * @return the hexadecimal version of the number + */ + std::string decimalToHexadecimal(const std::string &decimalNumber); + } // namespace utils } // namespace cta -- GitLab