Added missing catch of CTA exceptions in TapeWriteSingleThread.

Added protection agains creation of checksums of the wrong size and added casts where needed.

Added missing catch of CTA exceptions in TapeWriteSingleThread.
00b6d862 · Eric Cano · 7bf5c804 · 00b6d862 · 00b6d862 · 00b6d862
Commit 00b6d862 authored 9 years ago by Eric Cano
--- a/common/checksum/Checksum.hpp
+++ b/common/checksum/Checksum.hpp
@@ -60,7 +60,21 @@ public:
   * @param val A numeric value to store in the byte array.
   */
  template <typename t>
-  Checksum(const ChecksumType &type, t val): m_type(type) { setNumeric(val); }
+  Checksum(const ChecksumType &type, t val): m_type(type) { 
+    switch (m_type) {
+      case CHECKSUMTYPE_ADLER32:
+        if (sizeof(t) != 4) {
+          std::stringstream err;
+          err << "In Checksum::Checksum(type,value): unexpected value size="
+                  << sizeof(t) << " expected=4";
+          throw cta::exception::Exception(err.str());
+        }
+        break;
+      default:
+        throw cta::exception::Exception("In Checksum::Checksum(type,value): unsupported type for any value");
+    }
+    setNumeric(val);
+  }
  
  /**
   * String based constructor.

--- a/tapeserver/castor/tape/tapeserver/daemon/DiskWriteTask.cpp
+++ b/tapeserver/castor/tape/tapeserver/daemon/DiskWriteTask.cpp
@@ -121,7 +121,7 @@ bool DiskWriteTask::execute(RecallReportPacker& reporter,log::LogContext& lc,
    } //end of while(1)
    logWithStat(LOG_INFO, "File successfully transfered to disk",lc);
    m_retrieveJob->transferredSize = m_stats.dataVolume;
-    m_retrieveJob->transferredChecksum = cta::Checksum(cta::Checksum::CHECKSUMTYPE_ADLER32, checksum);
+    m_retrieveJob->transferredChecksum = cta::Checksum(cta::Checksum::CHECKSUMTYPE_ADLER32, (uint32_t)checksum);
    reporter.reportCompletedJob(std::move(m_retrieveJob));
    m_stats.waitReportingTime+=localTime.secs(castor::utils::Timer::resetCounter);
    m_stats.transferTime = transferTime.secs();

--- a/tapeserver/castor/tape/tapeserver/daemon/TapeWriteSingleThread.cpp
+++ b/tapeserver/castor/tape/tapeserver/daemon/TapeWriteSingleThread.cpp
@@ -23,6 +23,7 @@

 #include "castor/tape/tapeserver/daemon/TapeWriteSingleThread.hpp"
 #include "castor/tape/tapeserver/daemon/MigrationTaskInjector.hpp"
+#include "shift/serrno.h"
 //------------------------------------------------------------------------------
 //constructor
 //------------------------------------------------------------------------------
@@ -310,7 +311,73 @@ void castor::tape::tapeserver::daemon::TapeWriteSingleThread::run() {
    logWithStats(LOG_INFO, "Tape thread complete",
            params);
    m_reportPacker.reportEndOfSessionWithErrors(errorMessage,errorCode);
-  }    
+  }
+  catch(const cta::exception::Exception& e){
+    //we end there because write session could not be opened 
+    //or because a task failed or because flush failed
+    
+    // First off, indicate the problem to the task injector so it does not inject
+    // more work in the pipeline
+    // If the problem did not originate here, we just re-flag the error, and
+    // this has no effect, but if we had a problem with a non-file operation
+    // like mounting the tape, then we have to signal the problem to the disk
+    // side and the task injector, which will trigger the end of session.
+    m_injector->setErrorFlag();
+    // We can still update the session stats one last time (unmount timings
+    // should have been updated by the RAII cleaner/unmounter).
+    m_watchdog.updateStats(m_stats);
+    
+    // If we reached the end of tape, this is not an error (ENOSPC)
+    try {
+      // If it's not the error we're looking for, we will go about our business
+      // in the catch section. dynamic cast will throw, and we'll do ourselves
+      // if the error code is not the one we want.
+      const castor::exception::Errnum & en = 
+        dynamic_cast<const castor::exception::Errnum &>(e);
+      if(en.errorNumber()!= ENOSPC) {
+        throw 0;
+      }
+      // This is indeed the end of the tape. Not an error.
+      m_watchdog.setErrorCount("Info_tapeFilledUp",1);
+    } catch (...) {
+      // The error is not an ENOSPC, so it is, indeed, an error.
+      // If we got here with a new error, currentErrorToCount will be non-empty,
+      // and we will pass the error name to the watchdog.
+      if(currentErrorToCount.size()) {
+        m_watchdog.addToErrorCount(currentErrorToCount);
+      }
+    }
+    
+    //first empty all the tasks and circulate mem blocks
+    while(1) {
+      std::unique_ptr<TapeWriteTask>  task(m_tasks.pop());
+      if(task.get()==NULL) {
+        break;
+      }
+      task->circulateMemBlocks();
+    }
+    // Prepare the standard error codes for the session
+    std::string errorMessage(e.getMessageValue());
+    int errorCode(SEINTERNAL);
+    // Override if we got en ENOSPC error (end of tape)
+    // This is 
+    try {
+      const castor::exception::Errnum & errnum = 
+          dynamic_cast<const castor::exception::Errnum &> (e);
+      if (ENOSPC == errnum.errorNumber()) {
+        errorCode = ENOSPC;
+        errorMessage = "End of migration due to tape full";
+      }
+    } catch (...) {}
+    // then log the end of write thread
+    log::ScopedParamContainer params(m_logContext);
+    params.add("status", "error")
+          .add("ErrorMesage", errorMessage);
+    m_stats.totalTime = totalTimer.secs();
+    logWithStats(LOG_INFO, "Tape thread complete",
+            params);
+    m_reportPacker.reportEndOfSessionWithErrors(errorMessage,errorCode);
+  }      
 }

 //------------------------------------------------------------------------------

--- a/tapeserver/castor/tape/tapeserver/daemon/TapeWriteTask.cpp
+++ b/tapeserver/castor/tape/tapeserver/daemon/TapeWriteTask.cpp
@@ -133,7 +133,7 @@ namespace daemon {
      // Record the fSeq in the tape session
      session.reportWrittenFSeq(m_archiveJob->nameServerTapeFile.tapeFileLocation.fSeq);
      m_archiveJob->nameServerTapeFile.checksum = 
-          cta::Checksum(cta::Checksum::CHECKSUMTYPE_ADLER32, ckSum);
+          cta::Checksum(cta::Checksum::CHECKSUMTYPE_ADLER32, (uint32_t)ckSum);
      m_archiveJob->nameServerTapeFile.compressedSize = m_taskStats.dataVolume;
      m_archiveJob->nameServerTapeFile.tapeFileLocation.blockId = output->getBlockId();
      reportPacker.reportCompletedJob(std::move(m_archiveJob));