Commit d773b3d0 authored by mvelosob's avatar mvelosob
Browse files

Remove tape unmount during archive if file has a wrong checksum/size (#1096)

Wrong reported size or checksum now simply causes the errro to be reported and the file to be
skipped during a tape mount, instead of unmounting the tape, making queue consumption faster
in such cases.
parent 2575b715
......@@ -191,6 +191,20 @@ namespace daemon {
m_taskStats.totalTime = localTime.secs();
// Log the successful transfer
logWithStats(cta::log::INFO, "Left placeholder on tape after skipping unreadable file.", lc);
} catch(const RecoverableMigrationErrorException &e) {
//The disk reading failed due to a size missmatch or wrong checksum
//just want to report a failed job and proceed with the mount
if(currentErrorToCount.size()) {
watchdog.addToErrorCount(currentErrorToCount);
}
//log and circulate blocks
LogContext::ScopedParam sp(lc, Param("exceptionCode", cta::log::ERR));
LogContext::ScopedParam sp1(lc, Param("exceptionMessage", e.getMessageValue()));
lc.log( cta::log::ERR,"An error occurred for this file, but migration will proceed as error is recoverable");
circulateMemBlocks();
reportPacker.reportFailedJob(std::move(m_archiveJob),e, lc);
return;
} catch(const cta::exception::Exception& e){
//we can end up there because
//we failed to open the WriteFile
......@@ -276,7 +290,14 @@ namespace daemon {
tape::utils::suppresUnusedVariable(sp);
std::string errorMsg;
if(mb->isFailed()){
//blocks are marked as failed by the DiskReadTask due to a size mismatch
//or wrong checksums
//both errors should just result in skipping the migration of the file
//so we use a different exception to distinguish this case
errorMsg=mb->errorMsg();
m_errorFlag.set();
lc.log(cta::log::ERR,errorMsg);
throw RecoverableMigrationErrorException(errorMsg);
} else if (mb->isCanceled()) {
errorMsg="Received a block marked as cancelled";
} else{
......@@ -329,17 +350,8 @@ namespace daemon {
// watchdog.notify();
}
}
//------------------------------------------------------------------------------
// hasAnotherTaskTailed
//------------------------------------------------------------------------------
void TapeWriteTask::hasAnotherTaskTailed() const {
//if a task has signaled an error, we stop our job
if(m_errorFlag){
throw castor::tape::tapeserver::daemon::ErrorFlag();
}
}
void TapeWriteTask::logWithStats(int level, const std::string& msg,
void TapeWriteTask::logWithStats(int level, const std::string& msg,
cta::log::LogContext& lc) const{
cta::log::ScopedParamContainer params(lc);
params.add("readWriteTime", m_taskStats.readWriteTime)
......
......@@ -35,9 +35,16 @@ namespace tape {
namespace tapeserver {
namespace daemon {
class MigrationReportPacker;
class Memblock;
class TapeSessionStats;
/**
* @brief Used when an error happens during a migration that should not result in unmounting the tape,
* but rather just in skipping the file migration
*
*/
CTA_GENERATE_EXCEPTION_CLASS(RecoverableMigrationErrorException);
class MigrationReportPacker;
class Memblock;
class TapeSessionStats;
/**
* The TapeWriteFileTask is responsible to write a single file onto tape as part of a migration
* session. Being a consumer of memory blocks, it inherits from the DataConsumer class. It also
......@@ -104,10 +111,11 @@ public:
void circulateMemBlocks();
/**
* Return the tasl stats. Should only be called after execute
* Return the task stats. Should only be called after execute
* @return
*/
const TapeSessionStats getTaskStats() const ;
private:
/**
* Log all localStats' stats + m_fileToMigrate's parameters
......@@ -115,12 +123,7 @@ private:
*/
void logWithStats(int level, const std::string& msg,
cta::log::LogContext& lc) const;
/**
*Throw an exception if m_errorFlag is set
*/
void hasAnotherTaskTailed() const ;
/**
* This function will check the consistency of the mem block and
* throw exception is something goes wrong
......@@ -195,7 +198,6 @@ private:
* The remote file information
*/
std::string m_srcURL;
};
}}}}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment