Commit 82f9fcbc authored by Eric Cano's avatar Eric Cano
Browse files

CASTOR-4801: Failed recall mount of tapeserverd is not propagated to disk IO threads

Created a new unit test to try and reproduce the problem seen here. It did not.
The best explanation we have is a stuck file client. As we totally fail to read data
from the tape, it makes no sense to open a file for which we have no data. So we deferred
the file opening when the first memory block arrives from the tape thread. The outputs of the
unit test showed that the file opening has been successfully deferred.
parent 2f97f2af
......@@ -341,6 +341,96 @@ TEST(tapeServer, DataTransferSessionNoSuchDrive) {
ASSERT_EQ(SEINTERNAL, sim.m_sessionErrorCode);
}
TEST(tapeServer, DataTransferSessionFailtoMount) {
// This test is the same as the previous one, with
// wrong parameters set for the recall, so that we fail
// to recall the first file and cancel the second.
castor::log::StringLogger logger("tapeServerUnitTest");
// 1) prepare the client and run it in another thread
uint32_t volReq = 0xBEEF;
std::string vid = "V12345";
std::string density = "8000GC";
client::ClientSimulator sim(volReq, vid, density,
castor::tape::tapegateway::TAPE_GATEWAY,
castor::tape::tapegateway::READ);
client::ClientSimulator::ipPort clientAddr = sim.getCallbackAddress();
clientRunner simRun(sim);
simRun.start();
// 2) Prepare the VDQM request
castor::legacymsg::RtcpJobRqstMsgBody VDQMjob;
snprintf(VDQMjob.clientHost, CA_MAXHOSTNAMELEN+1, "%d.%d.%d.%d",
clientAddr.a, clientAddr.b, clientAddr.c, clientAddr.d);
snprintf(VDQMjob.driveUnit, CA_MAXUNMLEN+1, "T10D6116");
snprintf(VDQMjob.dgn, CA_MAXDGNLEN+1, "LIBXX");
VDQMjob.clientPort = clientAddr.port;
VDQMjob.volReqId = volReq;
// 3) Prepare the necessary environment (logger, plus system wrapper),
// construct and run the session.
castor::tape::System::mockWrapper mockSys;
mockSys.delegateToFake();
mockSys.disableGMockCallsCounting();
mockSys.fake.setupForVirtualDriveSLC6();
//delete is unnecessary
//pointer with ownership will be passed to the application,
//which will do the delete
const bool failOnMount=true;
mockSys.fake.m_pathToDrive["/dev/nst0"] = new castor::tape::tapeserver::drive::FakeDrive(failOnMount);
// We can prepare files for reading on the drive
{
// Label the tape
castor::tape::tapeFile::LabelSession ls(*mockSys.fake.m_pathToDrive["/dev/nst0"],
"V12345", true);
mockSys.fake.m_pathToDrive["/dev/nst0"]->rewind();
// And write to it
castor::tape::tapeserver::client::ClientInterface::VolumeInfo volInfo;
volInfo.vid="V12345";
volInfo.clientType=castor::tape::tapegateway::READ_TP;
// Prepare a non-empty files to recall list to pass the empty session
// detection
for (int fseq=1; fseq <= 10 ; fseq ++) {
castor::tape::tapegateway::FileToRecallStruct ftr;
ftr.setFseq(fseq);
ftr.setFileid(1000 + fseq);
// Set the recall destination (/dev/null)
ftr.setPath("/dev/null");
// Record the file for recall, with an out of tape fSeq
ftr.setFseq(ftr.fseq() + 1000);
sim.addFileToRecall(ftr, 1000);
}
}
castor::tape::utils::DriveConfig driveConfig;
driveConfig.unitName = "T10D6116";
driveConfig.dgn = "T10KD6";
driveConfig.devFilename = "/dev/tape_T10D6116";
driveConfig.librarySlot = castor::mediachanger::ConfigLibrarySlot("manual");
DataTransferConfig castorConf;
castorConf.bufsz = 1024*1024; // 1 MB memory buffers
castorConf.nbBufs = 10;
castorConf.bulkRequestRecallMaxBytes = UINT64_C(100)*1000*1000*1000;
castorConf.bulkRequestRecallMaxFiles = 1000;
castorConf.nbDiskThreads = 3;
castor::messages::AcsProxyDummy acs;
castor::mediachanger::MmcProxyDummy mmc;
castor::legacymsg::RmcProxyDummy rmc;
castor::mediachanger::MediaChangerFacade mc(acs, mmc, rmc);
castor::server::ProcessCap capUtils;
castor::messages::TapeserverProxyDummy initialProcess;
DataTransferSession sess("tapeHost", VDQMjob, logger, mockSys,
driveConfig, mc, initialProcess, capUtils, castorConf);
sess.execute();
simRun.wait();
std::string temp = logger.getLog();
temp += "";
ASSERT_EQ("V12345", sess.getVid());
// Currently, failures are reported by files and recall sessions do not fail.
ASSERT_EQ(0, sim.m_sessionErrorCode);
}
class tempFile {
public:
tempFile(size_t size): m_size(size) {
......
......@@ -49,13 +49,11 @@ bool DiskWriteTask::execute(RecallReportPacker& reporter,log::LogContext& lc,
using log::Param;
castor::utils::Timer localTime;
try{
std::auto_ptr<tape::diskFile::WriteFile> writeFile(
fileFactory.createWriteFile(m_recallingFile->path()));
// Placeholder for the disk file. We will open it only
// after getting a first correct memory block.
std::auto_ptr<tape::diskFile::WriteFile> writeFile;
log::ScopedParamContainer URLcontext(lc);
URLcontext.add("actualURL", writeFile->URL())
.add("path", m_recallingFile->path());
lc.log(LOG_INFO, "Opened disk file for write");
m_stats.openingTime+=localTime.secs(castor::utils::Timer::resetCounter);
URLcontext.add("path", m_recallingFile->path());
int blockId = 0;
unsigned long checksum = Payload::zeroAdler32();
......@@ -73,9 +71,17 @@ bool DiskWriteTask::execute(RecallReportPacker& reporter,log::LogContext& lc,
//will throw (thus exiting the loop) if something is wrong
checkErrors(mb,blockId,lc);
m_stats.checkingErrorTime += localTime.secs(castor::utils::Timer::resetCounter);
// If we got that far on the first pass, it's now good enough to open
// the disk file for writing...
if (!writeFile.get()) {
writeFile.reset(fileFactory.createWriteFile(m_recallingFile->path()));
URLcontext.add("actualURL", writeFile->URL());
lc.log(LOG_INFO, "Opened disk file for write");
m_stats.openingTime+=localTime.secs(castor::utils::Timer::resetCounter);
}
// Write the data.
m_stats.dataVolume+=mb->m_payload.size();
mb->m_payload.write(*writeFile);
m_stats.transferTime+=localTime.secs(castor::utils::Timer::resetCounter);
......
......@@ -29,12 +29,20 @@ namespace {
}
castor::tape::tapeserver::drive::FakeDrive::FakeDrive(uint64_t capacity,
FailureMoment failureMoment) throw():
FailureMoment failureMoment, bool failToMount) throw():
m_currentPosition(0), m_tapeCapacity(capacity), m_beginOfCompressStats(0),
m_failureMoment(failureMoment), m_tapeOverflow(false)
m_failureMoment(failureMoment), m_tapeOverflow(false), m_failToMount(failToMount)
{
m_tape.reserve(max_fake_drive_record_length);
}
castor::tape::tapeserver::drive::FakeDrive::FakeDrive(bool failToMount) throw():
m_currentPosition(0), m_tapeCapacity(std::numeric_limits<uint64_t>::max()), m_beginOfCompressStats(0),
m_failureMoment(OnWrite), m_tapeOverflow(false), m_failToMount(failToMount)
{
m_tape.reserve(max_fake_drive_record_length);
}
castor::tape::tapeserver::drive::compressionStats castor::tape::tapeserver::drive::FakeDrive::getCompression() {
castor::tape::tapeserver::drive::compressionStats stats;
for(unsigned int i=m_beginOfCompressStats;i<m_tape.size();++i){
......@@ -209,6 +217,8 @@ void castor::tape::tapeserver::drive::FakeDrive::readFileMark(std::string contex
m_currentPosition++;
}
bool castor::tape::tapeserver::drive::FakeDrive::waitUntilReady(int timeoutSecond) {
if (m_failToMount)
throw castor::exception::Exception("In FakeDrive::waitUntilReady: Failed to mount the tape");
return true;
}
bool castor::tape::tapeserver::drive::FakeDrive::isWriteProtected() {
......
......@@ -48,11 +48,14 @@ namespace drive {
private:
const enum FailureMoment m_failureMoment;
bool m_tapeOverflow;
bool m_failToMount;
public:
std::string contentToString() throw();
FakeDrive(uint64_t capacity=std::numeric_limits<uint64_t>::max(),
enum FailureMoment failureMoment=OnWrite) throw();
enum FailureMoment failureMoment=OnWrite,
bool failOnMount = false) throw();
FakeDrive(bool failOnMount) throw ();
virtual ~FakeDrive() throw(){}
virtual compressionStats getCompression() ;
virtual void clearCompressionStats() ;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment