Commit 3c27e67e authored by Steven Murray's avatar Steven Murray
Browse files

[catalogue] Fixed cta/CTA#901 cta-admin tapefile ls too slow

parent 98a0cdf0
......@@ -52,6 +52,7 @@ set (CATALOGUE_LIB_SRC_FILES
RdbmsCatalogueGetArchiveFilesForRepackItor.cpp
RdbmsCatalogueGetArchiveFilesItor.cpp
RdbmsCatalogueGetDeletedArchiveFilesItor.cpp
RdbmsCatalogueTapeContentsItor.cpp
SchemaCreatingSqliteCatalogue.cpp
SqliteCatalogue.cpp
SchemaVersion.cpp
......
......@@ -9462,6 +9462,177 @@ TEST_P(cta_catalogue_CatalogueTest, filesWrittenToTape_many_archive_files) {
}
}
 
// Look at all files on tape 1
{
catalogue::TapeFileSearchCriteria searchCriteria;
searchCriteria.vid = tape1.vid;
auto archiveFileItor = m_catalogue->getArchiveFilesItor(searchCriteria);
std::map<uint64_t, common::dataStructures::ArchiveFile> m = archiveFileItorToMap(archiveFileItor);
ASSERT_EQ(nbArchiveFiles, m.size());
for(uint64_t i = 1; i <= nbArchiveFiles; i++) {
std::ostringstream diskFileId;
diskFileId << (12345677 + i);
std::ostringstream diskFilePath;
diskFilePath << "/public_dir/public_file_" << i;
catalogue::TapeFileWritten fileWritten1;
fileWritten1.archiveFileId = i;
fileWritten1.diskInstance = diskInstance;
fileWritten1.diskFileId = diskFileId.str();
fileWritten1.diskFileOwnerUid = PUBLIC_DISK_USER;
fileWritten1.diskFileGid = PUBLIC_DISK_GROUP;
fileWritten1.size = archiveFileSize;
fileWritten1.checksumBlob.insert(checksum::ADLER32, "1357");
fileWritten1.storageClassName = m_storageClassDualCopy.name;
fileWritten1.vid = tape1.vid;
fileWritten1.fSeq = i;
fileWritten1.blockId = i * 100;
fileWritten1.copyNb = 1;
const auto idAndFile = m.find(i);
ASSERT_FALSE(m.end() == idAndFile);
const common::dataStructures::ArchiveFile archiveFile = idAndFile->second;
ASSERT_EQ(fileWritten1.archiveFileId, archiveFile.archiveFileID);
ASSERT_EQ(fileWritten1.diskInstance, archiveFile.diskInstance);
ASSERT_EQ(fileWritten1.diskFileId, archiveFile.diskFileId);
ASSERT_EQ(fileWritten1.diskFileOwnerUid, archiveFile.diskFileInfo.owner_uid);
ASSERT_EQ(fileWritten1.diskFileGid, archiveFile.diskFileInfo.gid);
ASSERT_EQ(fileWritten1.size, archiveFile.fileSize);
ASSERT_EQ(fileWritten1.checksumBlob, archiveFile.checksumBlob);
ASSERT_EQ(fileWritten1.storageClassName, archiveFile.storageClass);
ASSERT_EQ(1, archiveFile.tapeFiles.size());
// Tape copy 1
{
const auto it = archiveFile.tapeFiles.find(1);
ASSERT_NE(archiveFile.tapeFiles.end(), it);
ASSERT_EQ(fileWritten1.vid, it->vid);
ASSERT_EQ(fileWritten1.fSeq, it->fSeq);
ASSERT_EQ(fileWritten1.blockId, it->blockId);
ASSERT_EQ(fileWritten1.checksumBlob, it->checksumBlob);
ASSERT_EQ(fileWritten1.copyNb, it->copyNb);
}
}
}
// Look at all files on tape 1 with "show superseded" on even though there
// aren't any. This is just check for syntax errors in the underlying
// SELECT statement.
{
catalogue::TapeFileSearchCriteria searchCriteria;
searchCriteria.vid = tape1.vid;
searchCriteria.showSuperseded = true;
auto archiveFileItor = m_catalogue->getArchiveFilesItor(searchCriteria);
std::map<uint64_t, common::dataStructures::ArchiveFile> m = archiveFileItorToMap(archiveFileItor);
ASSERT_EQ(nbArchiveFiles, m.size());
for(uint64_t i = 1; i <= nbArchiveFiles; i++) {
std::ostringstream diskFileId;
diskFileId << (12345677 + i);
std::ostringstream diskFilePath;
diskFilePath << "/public_dir/public_file_" << i;
catalogue::TapeFileWritten fileWritten1;
fileWritten1.archiveFileId = i;
fileWritten1.diskInstance = diskInstance;
fileWritten1.diskFileId = diskFileId.str();
fileWritten1.diskFileOwnerUid = PUBLIC_DISK_USER;
fileWritten1.diskFileGid = PUBLIC_DISK_GROUP;
fileWritten1.size = archiveFileSize;
fileWritten1.checksumBlob.insert(checksum::ADLER32, "1357");
fileWritten1.storageClassName = m_storageClassDualCopy.name;
fileWritten1.vid = tape1.vid;
fileWritten1.fSeq = i;
fileWritten1.blockId = i * 100;
fileWritten1.copyNb = 1;
const auto idAndFile = m.find(i);
ASSERT_FALSE(m.end() == idAndFile);
const common::dataStructures::ArchiveFile archiveFile = idAndFile->second;
ASSERT_EQ(fileWritten1.archiveFileId, archiveFile.archiveFileID);
ASSERT_EQ(fileWritten1.diskInstance, archiveFile.diskInstance);
ASSERT_EQ(fileWritten1.diskFileId, archiveFile.diskFileId);
ASSERT_EQ(fileWritten1.diskFileOwnerUid, archiveFile.diskFileInfo.owner_uid);
ASSERT_EQ(fileWritten1.diskFileGid, archiveFile.diskFileInfo.gid);
ASSERT_EQ(fileWritten1.size, archiveFile.fileSize);
ASSERT_EQ(fileWritten1.checksumBlob, archiveFile.checksumBlob);
ASSERT_EQ(fileWritten1.storageClassName, archiveFile.storageClass);
ASSERT_EQ(1, archiveFile.tapeFiles.size());
// Tape copy 1
{
const auto it = archiveFile.tapeFiles.find(1);
ASSERT_NE(archiveFile.tapeFiles.end(), it);
ASSERT_EQ(fileWritten1.vid, it->vid);
ASSERT_EQ(fileWritten1.fSeq, it->fSeq);
ASSERT_EQ(fileWritten1.blockId, it->blockId);
ASSERT_EQ(fileWritten1.checksumBlob, it->checksumBlob);
ASSERT_EQ(fileWritten1.copyNb, it->copyNb);
}
}
}
// Look at all files on tape 2
{
catalogue::TapeFileSearchCriteria searchCriteria;
searchCriteria.vid = tape2.vid;
auto archiveFileItor = m_catalogue->getArchiveFilesItor(searchCriteria);
std::map<uint64_t, common::dataStructures::ArchiveFile> m = archiveFileItorToMap(archiveFileItor);
ASSERT_EQ(nbArchiveFiles, m.size());
for(uint64_t i = 1; i <= nbArchiveFiles; i++) {
std::ostringstream diskFileId;
diskFileId << (12345677 + i);
std::ostringstream diskFilePath;
diskFilePath << "/public_dir/public_file_" << i;
catalogue::TapeFileWritten fileWritten2;
fileWritten2.archiveFileId = i;
fileWritten2.diskInstance = diskInstance;
fileWritten2.diskFileId = diskFileId.str();
fileWritten2.diskFileOwnerUid = PUBLIC_DISK_USER;
fileWritten2.diskFileGid = PUBLIC_DISK_GROUP;
fileWritten2.size = archiveFileSize;
fileWritten2.checksumBlob.insert(checksum::ADLER32, "1357");
fileWritten2.storageClassName = m_storageClassDualCopy.name;
fileWritten2.vid = tape2.vid;
fileWritten2.fSeq = i;
fileWritten2.blockId = i * 100;
fileWritten2.copyNb = 2;
const auto idAndFile = m.find(i);
ASSERT_FALSE(m.end() == idAndFile);
const common::dataStructures::ArchiveFile archiveFile = idAndFile->second;
ASSERT_EQ(fileWritten2.archiveFileId, archiveFile.archiveFileID);
ASSERT_EQ(fileWritten2.diskInstance, archiveFile.diskInstance);
ASSERT_EQ(fileWritten2.diskFileId, archiveFile.diskFileId);
ASSERT_EQ(fileWritten2.diskFileOwnerUid, archiveFile.diskFileInfo.owner_uid);
ASSERT_EQ(fileWritten2.diskFileGid, archiveFile.diskFileInfo.gid);
ASSERT_EQ(fileWritten2.size, archiveFile.fileSize);
ASSERT_EQ(fileWritten2.checksumBlob, archiveFile.checksumBlob);
ASSERT_EQ(fileWritten2.storageClassName, archiveFile.storageClass);
ASSERT_EQ(1, archiveFile.tapeFiles.size());
// Tape copy 2
{
const auto it = archiveFile.tapeFiles.find(2);
ASSERT_NE(archiveFile.tapeFiles.end(), it);
ASSERT_EQ(fileWritten2.vid, it->vid);
ASSERT_EQ(fileWritten2.fSeq, it->fSeq);
ASSERT_EQ(fileWritten2.blockId, it->blockId);
ASSERT_EQ(fileWritten2.checksumBlob, it->checksumBlob);
ASSERT_EQ(fileWritten2.copyNb, it->copyNb);
}
}
}
{
const uint64_t startFseq = 1;
auto archiveFileItor = m_catalogue->getArchiveFilesForRepackItor(tape1.vid, startFseq);
......
......@@ -22,6 +22,7 @@
#include "catalogue/RdbmsCatalogueGetArchiveFilesItor.hpp"
#include "catalogue/RdbmsCatalogueGetArchiveFilesForRepackItor.hpp"
#include "catalogue/RdbmsCatalogueGetDeletedArchiveFilesItor.hpp"
#include "catalogue/RdbmsCatalogueTapeContentsItor.hpp"
#include "catalogue/SchemaVersion.hpp"
#include "catalogue/SqliteCatalogueSchema.hpp"
#include "common/dataStructures/TapeFile.hpp"
......@@ -6636,6 +6637,13 @@ Catalogue::ArchiveFileItor RdbmsCatalogue::getArchiveFilesItor(const TapeFileSea
checkTapeFileSearchCriteria(searchCriteria);
// If this is the listing of the contents of a tape
if (!searchCriteria.archiveFileId && !searchCriteria.diskInstance && !searchCriteria.diskFileIds &&
searchCriteria.vid) {
const bool showSuperseded = searchCriteria.showSuperseded ? searchCriteria.showSuperseded.value() : false;
return getTapeContentsItor(searchCriteria.vid.value(), showSuperseded);
}
try {
// Create a connection to populate the temporary table (specialised by database type)
auto conn = m_archiveFileListingConnPool.getConn();
......@@ -6651,6 +6659,23 @@ Catalogue::ArchiveFileItor RdbmsCatalogue::getArchiveFilesItor(const TapeFileSea
}
}
//------------------------------------------------------------------------------
// getTapeContentsItor
//------------------------------------------------------------------------------
Catalogue::ArchiveFileItor RdbmsCatalogue::getTapeContentsItor(const std::string &vid, const bool showSuperseded)
const {
try {
// Create a connection to populate the temporary table (specialised by database type)
auto impl = new RdbmsCatalogueTapeContentsItor(m_log, m_connPool, vid, showSuperseded);
return ArchiveFileItor(impl);
} catch(exception::UserError &) {
throw;
} catch(exception::Exception &ex) {
ex.getMessage().str(std::string(__FUNCTION__) + ": " + ex.getMessage().str());
throw;
}
}
//------------------------------------------------------------------------------
// getDeletedArchiveFilesItor
//------------------------------------------------------------------------------
......
......@@ -1926,6 +1926,16 @@ protected:
*/
void executeGetTapesBy100VidsStmtAndCollectResults(rdbms::Stmt &stmt,
common::dataStructures::VidToTapeMap &vidToTapeMap) const;
/**
* Returns an iterator across the files on the specified tape ordered by
* FSEQ.
*
* @param vid The volume identifier of the tape.
* @param showSuperseded Include superseded files in the output?
* @return The iterator.
*/
ArchiveFileItor getTapeContentsItor(const std::string &vid, const bool showSuperseded) const;
/**
* Cached versions of tape copy to tape tape pool mappings for specific
......
/*
* The CERN Tape Archive (CTA) project
* Copyright (C) 2015 CERN
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "catalogue/RdbmsCatalogueTapeContentsItor.hpp"
#include "common/exception/Exception.hpp"
#include "common/exception/LostDatabaseConnection.hpp"
#include "common/exception/UserError.hpp"
#include "common/log/LogContext.hpp"
namespace cta {
namespace catalogue {
namespace {
/**
* Populates an ArchiveFile object with the current column values of the
* specified result set.
*
* @param rset The result set to be used to populate the ArchiveFile object.
* @return The populated ArchiveFile object.
*/
static common::dataStructures::ArchiveFile rsetToArchiveFile(const rdbms::Rset &rset) {
common::dataStructures::ArchiveFile archiveFile;
archiveFile.archiveFileID = rset.columnUint64("ARCHIVE_FILE_ID");
archiveFile.diskInstance = rset.columnString("DISK_INSTANCE_NAME");
archiveFile.diskFileId = rset.columnString("DISK_FILE_ID");
archiveFile.diskFileInfo.owner_uid = rset.columnUint64("DISK_FILE_UID");
archiveFile.diskFileInfo.gid = rset.columnUint64("DISK_FILE_GID");
archiveFile.fileSize = rset.columnUint64("SIZE_IN_BYTES");
archiveFile.checksumBlob.deserializeOrSetAdler32(rset.columnBlob("CHECKSUM_BLOB"), rset.columnUint64("CHECKSUM_ADLER32"));
archiveFile.storageClass = rset.columnString("STORAGE_CLASS_NAME");
archiveFile.creationTime = rset.columnUint64("ARCHIVE_FILE_CREATION_TIME");
archiveFile.reconciliationTime = rset.columnUint64("RECONCILIATION_TIME");
common::dataStructures::TapeFile tapeFile;
tapeFile.vid = rset.columnString("VID");
tapeFile.fSeq = rset.columnUint64("FSEQ");
tapeFile.blockId = rset.columnUint64("BLOCK_ID");
tapeFile.fileSize = rset.columnUint64("LOGICAL_SIZE_IN_BYTES");
tapeFile.copyNb = rset.columnUint64("COPY_NB");
tapeFile.creationTime = rset.columnUint64("TAPE_FILE_CREATION_TIME");
tapeFile.checksumBlob = archiveFile.checksumBlob; // Duplicated for convenience
if(!rset.columnIsNull("SUPERSEDED_BY_VID") && !rset.columnIsNull("SUPERSEDED_BY_FSEQ")){
tapeFile.supersededByVid = rset.columnString("SUPERSEDED_BY_VID");
tapeFile.supersededByFSeq = rset.columnUint64("SUPERSEDED_BY_FSEQ");
}
archiveFile.tapeFiles.push_back(tapeFile);
return archiveFile;
}
} // anonymous namespace
//------------------------------------------------------------------------------
// constructor
//------------------------------------------------------------------------------
RdbmsCatalogueTapeContentsItor::RdbmsCatalogueTapeContentsItor(
log::Logger &log,
rdbms::ConnPool &connPool,
const std::string &vid,
const bool showSuperseded) :
m_log(log),
m_vid(vid),
m_rsetIsEmpty(true),
m_hasMoreHasBeenCalled(false)
{
try {
if (vid.empty()) throw exception::Exception("vid is an empty string");
std::string sql =
"SELECT /*+ INDEX (TAPE_FILE TAPE_FILE_VID_IDX) */" "\n"
"ARCHIVE_FILE.ARCHIVE_FILE_ID AS ARCHIVE_FILE_ID," "\n"
"ARCHIVE_FILE.DISK_INSTANCE_NAME AS DISK_INSTANCE_NAME," "\n"
"ARCHIVE_FILE.DISK_FILE_ID AS DISK_FILE_ID," "\n"
"ARCHIVE_FILE.DISK_FILE_UID AS DISK_FILE_UID," "\n"
"ARCHIVE_FILE.DISK_FILE_GID AS DISK_FILE_GID," "\n"
"ARCHIVE_FILE.SIZE_IN_BYTES AS SIZE_IN_BYTES," "\n"
"ARCHIVE_FILE.CHECKSUM_BLOB AS CHECKSUM_BLOB," "\n"
"ARCHIVE_FILE.CHECKSUM_ADLER32 AS CHECKSUM_ADLER32," "\n"
"STORAGE_CLASS.STORAGE_CLASS_NAME AS STORAGE_CLASS_NAME," "\n"
"ARCHIVE_FILE.CREATION_TIME AS ARCHIVE_FILE_CREATION_TIME," "\n"
"ARCHIVE_FILE.RECONCILIATION_TIME AS RECONCILIATION_TIME," "\n"
"TAPE_FILE.VID AS VID," "\n"
"TAPE_FILE.FSEQ AS FSEQ," "\n"
"TAPE_FILE.BLOCK_ID AS BLOCK_ID," "\n"
"TAPE_FILE.LOGICAL_SIZE_IN_BYTES AS LOGICAL_SIZE_IN_BYTES," "\n"
"TAPE_FILE.COPY_NB AS COPY_NB," "\n"
"TAPE_FILE.CREATION_TIME AS TAPE_FILE_CREATION_TIME," "\n"
"TAPE_FILE.SUPERSEDED_BY_VID AS SUPERSEDED_BY_VID," "\n"
"TAPE_FILE.SUPERSEDED_BY_FSEQ AS SUPERSEDED_BY_FSEQ," "\n"
"TAPE_POOL.TAPE_POOL_NAME AS TAPE_POOL_NAME" "\n"
"FROM" "\n"
"ARCHIVE_FILE" "\n"
"INNER JOIN STORAGE_CLASS ON" "\n"
"ARCHIVE_FILE.STORAGE_CLASS_ID = STORAGE_CLASS.STORAGE_CLASS_ID" "\n"
"INNER JOIN TAPE_FILE ON" "\n"
"ARCHIVE_FILE.ARCHIVE_FILE_ID = TAPE_FILE.ARCHIVE_FILE_ID" "\n"
"INNER JOIN TAPE ON" "\n"
"TAPE_FILE.VID = TAPE.VID" "\n"
"INNER JOIN TAPE_POOL ON" "\n"
"TAPE.TAPE_POOL_ID = TAPE_POOL.TAPE_POOL_ID" "\n"
"WHERE" "\n"
"TAPE_FILE.VID = :VID" "\n";
if (!showSuperseded) {
sql += "AND TAPE_FILE.SUPERSEDED_BY_VID IS NULL" "\n";
}
sql += "ORDER BY FSEQ";
m_conn = connPool.getConn();
m_stmt = m_conn.createStmt(sql);
m_stmt.bindString(":VID", vid);
m_rset = m_stmt.executeQuery();
m_rsetIsEmpty = !m_rset.next();
if(m_rsetIsEmpty) releaseDbResources();
} catch(exception::UserError &) {
throw;
} catch(exception::Exception &ex) {
ex.getMessage().str(std::string(__FUNCTION__) + ": " + ex.getMessage().str());
throw;
}
}
//------------------------------------------------------------------------------
// destructor
//------------------------------------------------------------------------------
RdbmsCatalogueTapeContentsItor::~RdbmsCatalogueTapeContentsItor() {
releaseDbResources();
}
//------------------------------------------------------------------------------
// releaseDbResources
//------------------------------------------------------------------------------
void RdbmsCatalogueTapeContentsItor::releaseDbResources() noexcept {
m_rset.reset();
m_stmt.reset();
m_conn.reset();
}
//------------------------------------------------------------------------------
// hasMore
//------------------------------------------------------------------------------
bool RdbmsCatalogueTapeContentsItor::hasMore() {
m_hasMoreHasBeenCalled = true;
return !m_rsetIsEmpty;
}
//------------------------------------------------------------------------------
// next
//------------------------------------------------------------------------------
common::dataStructures::ArchiveFile RdbmsCatalogueTapeContentsItor::next() {
try {
if(!m_hasMoreHasBeenCalled) {
throw exception::Exception("hasMore() must be called before next()");
}
m_hasMoreHasBeenCalled = false;
// If there are no more rows in the result set
if(m_rsetIsEmpty) throw exception::Exception("next() was called with no more rows in the result set");
auto archiveFile = rsetToArchiveFile(m_rset);
m_rsetIsEmpty = !m_rset.next();
if(m_rsetIsEmpty) releaseDbResources();
return archiveFile;
} catch(exception::UserError &) {
throw;
} catch(exception::Exception &ex) {
ex.getMessage().str(std::string(__FUNCTION__) + ": " + ex.getMessage().str());
throw;
}
}
} // namespace catalogue
} // namespace cta
/*
* The CERN Tape Archive (CTA) project
* Copyright (C) 2015 CERN
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "catalogue/ArchiveFileBuilder.hpp"
#include "catalogue/Catalogue.hpp"
#include "catalogue/TapeFileSearchCriteria.hpp"
#include "common/dataStructures/ArchiveFile.hpp"
#include "common/log/Logger.hpp"
#include "rdbms/ConnPool.hpp"
#include "rdbms/Stmt.hpp"
#include "rdbms/Rset.hpp"
namespace cta {
namespace catalogue {
/**
* Iteratess across the tape files that make up the contents of a given tape.
*/
class RdbmsCatalogueTapeContentsItor: public Catalogue::ArchiveFileItor::Impl {
public:
/**
* Constructor.
*
* @param log Object representing the API to the CTA logging system.
* @param connPool The database connection pool.
* @param vid The volume identifier of the tape.
* @param showSuperseded Include superseded files in the output?
*/
RdbmsCatalogueTapeContentsItor(
log::Logger &log,
rdbms::ConnPool &connPool,
const std::string &vid,
const bool showSuperseded);
/**
* Destructor.
*/
~RdbmsCatalogueTapeContentsItor() override;
/**
* Returns true if a call to next would return another archive file.
*/
bool hasMore() override;
/**
* Returns the next archive or throws an exception if there isn't one.
*/
common::dataStructures::ArchiveFile next() override;
private:
/**
* Object representing the API to the CTA logging system.
*/
log::Logger &m_log;
/**
* The volume identifier of the tape.
*/
std::string m_vid;
/**
* True if the result set is empty.
*/
bool m_rsetIsEmpty;
/**
* True if hasMore() has been called and the corresponding call to next() has
* not.
*
* This member-variable is used to prevent next() being called before
* hasMore().
*/
bool m_hasMoreHasBeenCalled;
/**
* The database connection.
*/
rdbms::Conn m_conn;
/**
* The database statement.
*/
rdbms::Stmt m_stmt;
/**
* The result set of archive files that is to be iterated over.
*/
rdbms::Rset m_rset;
/**
* Releases the database resources.
*
* This method is idempotent.
*/
void releaseDbResources() noexcept;
}; // class RdbmsCatalogueTapeContentsItor
} // namespace catalogue
} // namespace cta
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment