Commit 9694a7e4 authored by Cedric CAFFY's avatar Cedric CAFFY
Browse files

Corrected race condition when the disk threads finished after the tape is unmounted.

If the disk threads are not finished after the TapeReadSingleThread finished, the status of the drive will be "DrainingToDisk"
This removed a RecallReportPacker exception when trying to release disk space reservation for backpressure
parent f53e2a9e
......@@ -2978,6 +2978,7 @@ void OStoreDB::updateDriveStatus(const common::dataStructures::DriveInfo& driveI
log::ScopedParamContainer params(lc);
params.add("diskSystem", dr.first)
.add("bytes", dr.second)
.add("previousStatus",toString(previousStatus))
.add("newStatus", toString(driveState.driveStatus));
lc.log(log::WARNING, "In OStoreDB::updateDriveStatus(): will clear non-empty disk space reservation on status change.");
}
......@@ -3901,7 +3902,6 @@ void OStoreDB::RetrieveMount::reserveDiskSpace(const DiskSpaceReservationRequest
.add("objectName", ds.getAddressIfSet());
lc.log(log::DEBUG, "In RetrieveMount::reserveDiskSpace(): state after reservation.");
}
//TODO BUG HERE, see mountId=\"10\" in downloads/tpsrv0*
ds.commit();
}
......
......@@ -86,6 +86,10 @@ namespace unitTests{
cta::threading::MutexLocker ml(m_mutex);
endSessionsWithError++;
}
void setDiskDone() override {}
void setTapeDone() override {}
MockRecallReportPacker(cta::RetrieveMount *rm,cta::log::LogContext lc):
RecallReportPacker(rm,lc), completeJobs(0), failedJobs(0),
endSessions(0), endSessionsWithError(0) {}
......
......@@ -178,8 +178,6 @@ void DiskWriteThreadPool::DiskWriteWorkerThread::run() {
logWithStat(cta::log::INFO, "Finishing DiskWriteWorkerThread");
m_parentThreadPool.addThreadStats(m_threadStat);
if(0 == --m_parentThreadPool.m_nbActiveThread){
m_parentThreadPool.m_reporter.setDiskDone();
m_parentThreadPool.m_reporter.setTapeDone();
//Im the last Thread alive, report end of session
if(m_parentThreadPool.m_failedWriteCount==0){
......
......@@ -131,6 +131,7 @@ void RecallReportPacker::ReportSuccessful::execute(RecallReportPacker& parent){
//ReportEndofSession::execute
//------------------------------------------------------------------------------
void RecallReportPacker::ReportEndofSession::execute(RecallReportPacker& parent){
parent.setDiskDone();
if(!parent.errorHappened()){
parent.m_lc.log(cta::log::INFO,"Nominal RecallReportPacker::EndofSession has been reported");
if (parent.m_watchdog) {
......@@ -167,7 +168,6 @@ bool RecallReportPacker::ReportEndofSession::goingToEnd() {
void RecallReportPacker::ReportDriveStatus::execute(RecallReportPacker& parent){
parent.m_retrieveMount->setDriveStatus(m_status);
if(m_status==cta::common::dataStructures::DriveStatus::Unmounting) {
parent.m_retrieveMount->diskComplete();
parent.m_retrieveMount->tapeComplete();
}
}
......@@ -183,6 +183,7 @@ bool RecallReportPacker::ReportDriveStatus::goingToEnd() {
//ReportEndofSessionWithErrors::execute
//------------------------------------------------------------------------------
void RecallReportPacker::ReportEndofSessionWithErrors::execute(RecallReportPacker& parent){
parent.setDiskDone();
if(parent.m_errorHappened) {
LogContext::ScopedParam(parent.m_lc,Param("errorCode",m_error_code));
parent.m_lc.log(cta::log::ERR,m_message);
......@@ -320,7 +321,13 @@ void RecallReportPacker::WorkerThread::run(){
// Make sure the last batch of reports got cleaned up.
try {
m_parent.fullCheckAndFinishAsyncExecute();
m_parent.fullCheckAndFinishAsyncExecute();
if(m_parent.m_diskThreadComplete){
//The m_parent.m_diskThreadComplete is set to true when a ReportEndOfSession or a ReportAndOfSessionWithError
//has been put. It is only after the fullCheckandFinishAsyncExecute is finished that we can say to the mount that the disk thread is complete.
m_parent.m_lc.log(cta::log::DEBUG, "In RecallReportPacker::WorkerThread::run(): all disk threads are finished, telling the mount that Disk threads are complete");
m_parent.m_retrieveMount->diskComplete();
}
} catch(const cta::exception::Exception& e){
cta::log::ScopedParamContainer params(m_parent.m_lc);
params.add("exceptionWhat", e.getMessageValue())
......@@ -389,6 +396,7 @@ void RecallReportPacker::fullCheckAndFinishAsyncExecute() {
//------------------------------------------------------------------------------
void RecallReportPacker::setTapeDone() {
m_tapeThreadComplete = true;
m_retrieveMount->tapeComplete();
}
//------------------------------------------------------------------------------
......
......@@ -95,9 +95,6 @@ TEST_F(castor_tape_tapeserver_daemon_RecallReportPackerTest, RecallReportPackerN
rrp.reportCompletedJob(std::move(job1));
rrp.reportCompletedJob(std::move(job2));
rrp.setDiskDone();
rrp.setTapeDone();
rrp.reportDriveStatus(cta::common::dataStructures::DriveStatus::Unmounting);
rrp.reportEndOfSession();
......@@ -151,9 +148,6 @@ TEST_F(castor_tape_tapeserver_daemon_RecallReportPackerTest, RecallReportPackerB
const cta::exception::Exception ex(error_msg);
rrp.reportFailedJob(std::move(job3), ex);
rrp.setDiskDone();
rrp.setTapeDone();
rrp.reportDriveStatus(cta::common::dataStructures::DriveStatus::Unmounting);
rrp.reportEndOfSession();
......
......@@ -111,11 +111,10 @@ castor::tape::tapeserver::daemon::TapeReadSingleThread::TapeCleaning::~TapeClean
// And return the tape to the library
// In case of manual mode, this will be filtered by the rmc daemon
// (which will do nothing)
currentErrorToCount = "Error_tapeDismount";
currentErrorToCount = "Error_tapeDismount";
m_this.m_rrp.reportDriveStatus(cta::common::dataStructures::DriveStatus::Unmounting);
m_this.m_mc.dismountTape(m_this.m_volInfo.vid, m_this.m_drive.config.librarySlot());
m_this.m_drive.disableLogicalBlockProtection();
m_this.m_rrp.reportDriveStatus(cta::common::dataStructures::DriveStatus::Up);
m_this.m_stats.unmountTime += m_timer.secs(cta::utils::Timer::resetCounter);
m_this.m_logContext.log(cta::log::INFO, cta::mediachanger::TAPE_LIBRARY_TYPE_MANUAL != m_this.m_drive.config.librarySlot().getLibraryType() ?
"TapeReadSingleThread : tape unmounted":"TapeReadSingleThread : tape NOT unmounted (manual mode)");
......@@ -356,6 +355,7 @@ void castor::tape::tapeserver::daemon::TapeReadSingleThread::run() {
cta::log::ScopedParamContainer params(m_logContext);
params.add("status", "success");
m_stats.totalTime = totalTimer.secs();
m_rrp.setTapeDone();
logWithStat(cta::log::INFO, "Tape thread complete",
params);
// Report one last time the stats, after unloading/unmounting.
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment