Commit 24f8436a authored by Cedric Caffy's avatar Cedric Caffy
Browse files

[cta-taped] Drive is put down if the CleanerSession fails to eject the tape from the drive

parent 686bc9c9
......@@ -38,7 +38,8 @@ castor::tape::tapeserver::daemon::CleanerSession::CleanerSession(
const bool waitMediaInDrive,
const uint32_t waitMediaInDriveTimeout,
const std::string & externalEncryptionKeyScript,
cta::catalogue::Catalogue & catalogue):
cta::catalogue::Catalogue & catalogue,
cta::Scheduler & scheduler):
m_capUtils(capUtils),
m_mc(mc),
m_log(log),
......@@ -48,7 +49,8 @@ castor::tape::tapeserver::daemon::CleanerSession::CleanerSession(
m_waitMediaInDrive(waitMediaInDrive),
m_waitMediaInDriveTimeout(waitMediaInDriveTimeout),
m_encryptionControl(externalEncryptionKeyScript),
m_catalogue(catalogue)
m_catalogue(catalogue),
m_scheduler(scheduler)
{}
//------------------------------------------------------------------------------
......@@ -68,15 +70,49 @@ castor::tape::tapeserver::daemon::Session::EndOfSessionAction
errorMessage = "Caught an unknown exception";
}
// Reaching this point means the cleaner failed and an exception was thrown
//Reaching this point means the cleaner failed and an exception was thrown
std::list<cta::log::Param> params = {
cta::log::Param("tapeVid", m_vid),
cta::log::Param("tapeDrive", m_driveConfig.unitName),
cta::log::Param("message", errorMessage)};
m_log(cta::log::ERR, "Cleaner failed", params);
m_log(cta::log::ERR, "Cleaner failed. Putting the drive down.", params);
//Putting the drive down
try {
setDriveDownAfterCleanerFailed(std::string("Cleaner failed. ") + errorMessage);
} catch(const cta::exception::Exception &ex) {
std::list<cta::log::Param> params = {
cta::log::Param("tapeVid", m_vid),
cta::log::Param("tapeDrive", m_driveConfig.unitName),
cta::log::Param("message", ex.getMessageValue())};
m_log(cta::log::ERR, "Cleaner failed. Failed to put the drive down", params);
}
return MARK_DRIVE_AS_DOWN;
}
void castor::tape::tapeserver::daemon::CleanerSession::setDriveDownAfterCleanerFailed(const std::string & errorMsg) {
std::string logicalLibrary = m_driveConfig.logicalLibrary;
std::string hostname=cta::utils::getShortHostname();
std::string driveName = m_driveConfig.unitName;
cta::common::dataStructures::DriveInfo driveInfo;
driveInfo.driveName = driveName;
driveInfo.logicalLibrary = logicalLibrary;
driveInfo.host = hostname;
cta::log::LogContext lc(m_log);
m_scheduler.reportDriveStatus(driveInfo, cta::common::dataStructures::MountType::NoMount, cta::common::dataStructures::DriveStatus::Down, lc);
cta::common::dataStructures::SecurityIdentity cliId;
cta::common::dataStructures::DesiredDriveState driveState;
driveState.up = false;
driveState.forceDown = false;
driveState.setReasonFromLogMsg(cta::log::ERR,errorMsg);
m_scheduler.setDesiredDriveState(cliId, m_driveConfig.unitName, driveState, lc);
}
//------------------------------------------------------------------------------
// exceptionThrowingExecute
//------------------------------------------------------------------------------
......
......@@ -34,6 +34,7 @@
#include "tapeserver/castor/tape/tapeserver/SCSI/Device.hpp"
#include "tapeserver/castor/tape/tapeserver/daemon/EncryptionControl.hpp"
#include "catalogue/Catalogue.hpp"
#include "scheduler/Scheduler.hpp"
#include <memory>
......@@ -75,7 +76,8 @@ namespace daemon {
const bool waitMediaInDrive,
const uint32_t waitMediaInDriveTimeout,
const std::string & externalEncryptionKeyScript,
cta::catalogue::Catalogue & catalogue);
cta::catalogue::Catalogue & catalogue,
cta::Scheduler & scheduler);
/**
* Execute the session and return the type of action to be performed
......@@ -141,6 +143,11 @@ namespace daemon {
*/
cta::catalogue::Catalogue & m_catalogue;
/**
* CTA scheduler
*/
cta::Scheduler & m_scheduler;
/**
* Variable used to log UPDATE_USER_NAME in the DB
*/
......@@ -232,6 +239,11 @@ namespace daemon {
*/
void dismountTape(const std::string &vid);
/**
* Put the drive down in case the Cleaner has failed
*/
void setDriveDownAfterCleanerFailed(const std::string & errorMsg);
}; // class CleanerSession
} // namespace daemon
......
......@@ -147,7 +147,7 @@ schedule:
cta::common::dataStructures::DesiredDriveState driveState;
driveState.up = false;
driveState.forceDown = false;
std::string errorMsg = "A tape was detected in the drive. Putting the drive back down.";
std::string errorMsg = "A tape was detected in the drive. Putting the drive down.";
int logLevel = cta::log::ERR;
driveState.setReasonFromLogMsg(logLevel,errorMsg);
m_scheduler.setDesiredDriveState(securityIdentity, m_driveConfig.unitName, driveState, lc);
......
......@@ -56,6 +56,7 @@
#include "scheduler/testingMocks/MockArchiveMount.hpp"
#include "tests/TempFile.hpp"
#include "objectstore/BackendRadosTestSwitch.hpp"
#include "CleanerSession.hpp"
#include <dirent.h>
#include <fcntl.h>
......@@ -2589,6 +2590,119 @@ TEST_P(DataTransferSessionTest, DataTransferSessionTapeFullOnFlushMigration) {
"mountTotalReadRetries=\"25\" mountTotalWriteRetries=\"25\" mountWriteTransients=\"10\""));
}
TEST_P(DataTransferSessionTest, CleanerSessionFailsShouldPutTheDriveDown) {
// 0) Prepare the logger for everyone
cta::log::StringLogger logger("dummy","tapeServerUnitTest",cta::log::DEBUG);
cta::log::LogContext logContext(logger);
setupDefaultCatalogue();
// 1) prepare the fake scheduler
// cta::MountType::Enum mountType = cta::MountType::RETRIEVE;
// 3) Prepare the necessary environment (logger, plus system wrapper),
castor::tape::System::mockWrapper mockSys;
mockSys.delegateToFake();
mockSys.disableGMockCallsCounting();
mockSys.fake.setupForVirtualDriveSLC6();
// 4) Create the scheduler
auto & catalogue = getCatalogue();
auto & scheduler = getScheduler();
// Always use the same requester
const cta::common::dataStructures::SecurityIdentity requester("user", "group");
// List to remember the path of each remote file so that the existance of the
// files can be tested for at the end of the test
std::list<std::string> remoteFilePaths;
// 5) Create the environment for the migration to happen (library + tape)
const std::string libraryComment = "Library comment";
const bool libraryIsDisabled = false;
catalogue.createLogicalLibrary(s_adminOnAdminHost, s_libraryName,
libraryIsDisabled, libraryComment);
{
auto libraries = catalogue.getLogicalLibraries();
ASSERT_EQ(1, libraries.size());
ASSERT_EQ(s_libraryName, libraries.front().name);
ASSERT_EQ(libraryComment, libraries.front().comment);
}
const std::string tapeComment = "Tape comment";
bool notDisabled = false;
bool notFull = false;
bool notReadOnly = false;
{
cta::catalogue::CreateTapeAttributes tape;
tape.vid = s_vid;
tape.mediaType = s_mediaType;
tape.vendor = s_vendor;
tape.logicalLibraryName = s_libraryName;
tape.tapePoolName = s_tapePoolName;
tape.full = notFull;
tape.disabled = notDisabled;
tape.readOnly = notReadOnly;
tape.comment = tapeComment;
catalogue.createTape(s_adminOnAdminHost, tape);
}
// Create the mount criteria
catalogue.createMountPolicy(requester, "immediateMount", 1000, 0, 1000, 0, 1, "Policy comment");
catalogue.createRequesterMountRule(requester, "immediateMount", s_diskInstance, requester.username, "Rule comment");
//delete is unnecessary
//pointer with ownership will be passed to the application,
//which will do the delete
const uint64_t tapeSize = 5000;
mockSys.fake.m_pathToDrive["/dev/nst0"] = new castor::tape::tapeserver::drive::FakeDrive(tapeSize,
castor::tape::tapeserver::drive::FakeDrive::OnFlush);
// Report the drive's existence and put it up in the drive register.
cta::tape::daemon::TpconfigLine driveConfig("T10D6116", "TestLogicalLibrary", "/dev/tape_T10D6116", "manual");
cta::common::dataStructures::DriveInfo driveInfo;
driveInfo.driveName=driveConfig.unitName;
driveInfo.logicalLibrary=driveConfig.logicalLibrary;
driveInfo.host=="host";
// We need to create the drive in the registry before being able to put it up.
scheduler.reportDriveStatus(driveInfo, cta::common::dataStructures::MountType::NoMount, cta::common::dataStructures::DriveStatus::Down, logContext);
cta::common::dataStructures::DesiredDriveState driveState;
driveState.up = true;
driveState.forceDown = false;
scheduler.setDesiredDriveState(s_adminOnAdminHost, driveConfig.unitName, driveState, logContext);
// Create cleaner session
DataTransferConfig castorConf;
castorConf.bufsz = 1024*1024; // 1 MB memory buffers
castorConf.nbBufs = 10;
castorConf.bulkRequestRecallMaxBytes = UINT64_C(100)*1000*1000*1000;
castorConf.bulkRequestRecallMaxFiles = 1000;
castorConf.bulkRequestMigrationMaxBytes = UINT64_C(100)*1000*1000*1000;
castorConf.bulkRequestMigrationMaxFiles = 1000;
castorConf.nbDiskThreads = 1;
cta::log::DummyLogger dummyLog("dummy", "dummy");
cta::mediachanger::MediaChangerFacade mc(dummyLog);
cta::server::ProcessCapDummy capUtils;
castor::messages::TapeserverProxyDummy initialProcess;
CleanerSession cleanerSession(
capUtils,
mc,
logger,
driveConfig,
mockSys,
s_vid,
false,
0,
"",
catalogue,
scheduler
);
auto endOfSessionAction = cleanerSession.execute();
//the tape has not been labeled so the cleanerSession should have failed and put the drive down.
cta::common::dataStructures::DesiredDriveState newDriveState = scheduler.getDesiredDriveState(driveConfig.unitName,logContext);
ASSERT_FALSE(newDriveState.up);
ASSERT_EQ(castor::tape::tapeserver::daemon::Session::MARK_DRIVE_AS_DOWN,endOfSessionAction);
}
#undef TEST_MOCK_DB
#ifdef TEST_MOCK_DB
static cta::MockSchedulerDatabaseFactory mockDbFactory;
......
......@@ -1035,6 +1035,21 @@ int DriveHandler::runChild() {
// sleep(1);
// return castor::tape::tapeserver::daemon::Session::MARK_DRIVE_AS_DOWN;
// }
try {
scheduler.ping(lc);
} catch (const cta::catalogue::WrongSchemaVersionException &ex) {
log::ScopedParamContainer param (lc);
param.add("errorMessage", ex.getMessageValue());
lc.log(log::CRIT, "In DriveHandler::runChild() before cleanerSession: catalogue MAJOR version mismatch. Reporting fatal error.");
driveHandlerProxy.reportState(tape::session::SessionState::Fatal, tape::session::SessionType::Undetermined, "");
return castor::tape::tapeserver::daemon::Session::MARK_DRIVE_AS_DOWN;
} catch (cta::exception::Exception &ex) {
log::ScopedParamContainer param (lc);
param.add("errorMessage", ex.getMessageValue());
lc.log(log::CRIT, "In DriveHandler::runChild() before cleanerSession: failed to ping central storage before session. Reporting fatal error.");
driveHandlerProxy.reportState(tape::session::SessionState::Fatal, tape::session::SessionType::Undetermined, "");
return castor::tape::tapeserver::daemon::Session::MARK_DRIVE_AS_DOWN;
}
castor::tape::tapeserver::daemon::CleanerSession cleanerSession(
capUtils,
......@@ -1046,7 +1061,8 @@ int DriveHandler::runChild() {
true,
60,
"",
*m_catalogue);
*m_catalogue,
scheduler);
return cleanerSession.execute();
} else {
// The next session will be a normal session (no crash with a mounted tape before).
......@@ -1157,29 +1173,68 @@ int DriveHandler::runChild() {
//------------------------------------------------------------------------------
SubprocessHandler::ProcessingStatus DriveHandler::shutdown() {
// TODO: improve in the future (preempt the child process)
log::ScopedParamContainer params(m_processManager.logContext());
auto &lc = m_processManager.logContext();
log::ScopedParamContainer params(lc);
params.add("tapeDrive", m_configLine.unitName);
m_processManager.logContext().log(log::INFO, "In DriveHandler::shutdown(): simply killing the process.");
lc.log(log::INFO, "In DriveHandler::shutdown(): simply killing the process.");
kill();
std::set<SessionState> statesRequiringCleaner = { SessionState::Mounting,
SessionState::Running, SessionState::Unmounting };
if ( statesRequiringCleaner.count(m_sessionState)) {
if (!m_sessionVid.size()) {
m_processManager.logContext().log(log::ERR, "In DriveHandler::shutdown(): Should run cleaner but VID is missing. Do not nothing.");
lc.log(log::ERR, "In DriveHandler::shutdown(): Should run cleaner but VID is missing. Do nothing.");
} else {
log::ScopedParamContainer params(m_processManager.logContext());
params.add("tapeVid", m_sessionVid)
.add("tapeDrive", m_configLine.unitName)
.add("sessionState", session::toString(m_sessionState))
.add("sessionType", session::toString(m_sessionType));
m_processManager.logContext().log(log::INFO, "In DriveHandler::shutdown(): starting cleaner.");
lc.log(log::INFO, "In DriveHandler::shutdown(): starting cleaner.");
// Capabilities management.
cta::server::ProcessCap capUtils;
// Mounting management.
if(!m_catalogue)
m_catalogue = createCatalogue("DriveHandler::shutdown()");
//Create the scheduler
//Create the backend
std::unique_ptr<cta::objectstore::Backend> backend;
try {
backend.reset(cta::objectstore::BackendFactory::createBackend(m_tapedConfig.backendPath.value(), lc.logger()).release());
} catch (cta::exception::Exception &ex) {
log::ScopedParamContainer param (lc);
param.add("errorMessage", ex.getMessageValue());
lc.log(log::CRIT, "In DriveHandler::shutdown(): failed to connect to objectstore.");
goto exitShutdown;
}
// If the backend is a VFS, make sure we don't delete it on exit.
// If not, nevermind.
try {
dynamic_cast<cta::objectstore::BackendVFS &>(*backend).noDeleteOnExit();
} catch (std::bad_cast &){}
// Create the agent entry in the object store. This could fail (even before ping, so
// handle failure like a ping failure).
std::unique_ptr<cta::objectstore::BackendPopulator> backendPopulator;
std::unique_ptr<cta::OStoreDBWithAgent> osdb;
try {
std::string processName="DriveHandlerShutdown-";
processName+=m_configLine.unitName;
log::ScopedParamContainer params(lc);
params.add("processName", processName);
lc.log(log::DEBUG, "In DriveHandler::shutdown(): will create agent entry. Enabling leaving non-empty agent behind.");
backendPopulator.reset(new cta::objectstore::BackendPopulator(*backend, processName, lc));
} catch(cta::exception::Exception &ex) {
log::ScopedParamContainer param(lc);
param.add("errorMessage", ex.getMessageValue());
lc.log(log::CRIT, "In DriveHandler::shutdown(): failed to instantiate agent entry. Reporting fatal error.");
goto exitShutdown;
}
osdb.reset(new cta::OStoreDBWithAgent(*backend, backendPopulator->getAgentReference(), *m_catalogue, lc.logger()));
lc.log(log::DEBUG, "In DriveHandler::shutdown(): will create scheduler.");
std::unique_ptr<cta::Scheduler> scheduler(new Scheduler(*m_catalogue, *osdb, 0,0));
cta::mediachanger::MediaChangerFacade mediaChangerFacade(m_processManager.logContext().logger());
castor::tape::System::realWrapper sWrapper;
castor::tape::tapeserver::daemon::CleanerSession cleanerSession(
......@@ -1192,18 +1247,21 @@ SubprocessHandler::ProcessingStatus DriveHandler::shutdown() {
true,
60,
"",
*m_catalogue);
*m_catalogue,
*scheduler
);
cleanerSession.execute();
}
}
m_sessionState = SessionState::Shutdown;
m_processingStatus.nextTimeout=m_processingStatus.nextTimeout.max();
m_processingStatus.forkRequested = false;
m_processingStatus.killRequested = false;
m_processingStatus.shutdownComplete = true;
m_processingStatus.sigChild = false;
return m_processingStatus;
exitShutdown:
m_sessionState = SessionState::Shutdown;
m_processingStatus.nextTimeout=m_processingStatus.nextTimeout.max();
m_processingStatus.forkRequested = false;
m_processingStatus.killRequested = false;
m_processingStatus.shutdownComplete = true;
m_processingStatus.sigChild = false;
return m_processingStatus;
}
std::unique_ptr<cta::catalogue::Catalogue> DriveHandler::createCatalogue(const std::string & methodCaller){
......
......@@ -26,6 +26,7 @@
#include "tapeserver/session/SessionState.hpp"
#include "tapeserver/session/SessionType.hpp"
#include "catalogue/Catalogue.hpp"
#include "scheduler/Scheduler.hpp"
#include <memory>
namespace cta { namespace tape { namespace daemon {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment