From e1764d11d7b8fef4dd3bde5832fe78af063f24f9 Mon Sep 17 00:00:00 2001
From: Eric Wayne Vaandering <ewv@fnal.gov>
Date: Tue, 9 Aug 2022 13:04:41 +0200
Subject: [PATCH] Add code for EnstoreFileReader and EnstoreReadSession.

---
 common/dataStructures/LabelFormat.hpp         |   4 +-
 .../tape/tapeserver/file/CMakeLists.txt       |   2 +
 .../file/CpioFileHeaderStructure.cpp          |   2 +-
 .../tapeserver/file/EnstoreFileReader.cpp     | 145 ++++++++++++++++++
 .../tapeserver/file/EnstoreFileReader.hpp     |  62 ++++++++
 .../tapeserver/file/EnstoreReadSession.cpp    |  51 ++++++
 .../tapeserver/file/EnstoreReadSession.hpp    |  54 +++++++
 .../tape/tapeserver/file/FileReader.hpp       |   2 +-
 .../tapeserver/file/FileReaderFactory.cpp     |   5 +
 .../tapeserver/file/ReadSessionFactory.cpp    |   3 +
 10 files changed, 327 insertions(+), 3 deletions(-)
 create mode 100644 tapeserver/castor/tape/tapeserver/file/EnstoreFileReader.cpp
 create mode 100644 tapeserver/castor/tape/tapeserver/file/EnstoreFileReader.hpp
 create mode 100644 tapeserver/castor/tape/tapeserver/file/EnstoreReadSession.cpp
 create mode 100644 tapeserver/castor/tape/tapeserver/file/EnstoreReadSession.hpp

diff --git a/common/dataStructures/LabelFormat.hpp b/common/dataStructures/LabelFormat.hpp
index cf6e7d8b13..10ad63ca4e 100644
--- a/common/dataStructures/LabelFormat.hpp
+++ b/common/dataStructures/LabelFormat.hpp
@@ -31,7 +31,8 @@ namespace dataStructures {
 struct Label {
   enum class Format : std::uint8_t {
     CTA = 0x00,
-    OSM = 0x01
+    OSM = 0x01,
+    Enstore = 0x02
   };
 
   static Format validateFormat(const std::optional<std::uint8_t>& ouiFormat, const std::string& strContext) {
@@ -43,6 +44,7 @@ struct Label {
     switch (format) {
       case Format::CTA:
       case Format::OSM:
+      case Format::Enstore:
         return format;
       default:
       {
diff --git a/tapeserver/castor/tape/tapeserver/file/CMakeLists.txt b/tapeserver/castor/tape/tapeserver/file/CMakeLists.txt
index 618ae492f0..3e42441adf 100644
--- a/tapeserver/castor/tape/tapeserver/file/CMakeLists.txt
+++ b/tapeserver/castor/tape/tapeserver/file/CMakeLists.txt
@@ -30,6 +30,8 @@ set(TAPESERVER_FILE_LIBRARY_SRCS
   LabelSession.cpp
   OsmFileReader.cpp
   OsmReadSession.cpp
+  EnstoreFileReader.cpp
+  EnstoreReadSession.cpp
   OsmFileStructure.cpp
   CpioFileHeaderStructure.cpp
   ReadSession.cpp
diff --git a/tapeserver/castor/tape/tapeserver/file/CpioFileHeaderStructure.cpp b/tapeserver/castor/tape/tapeserver/file/CpioFileHeaderStructure.cpp
index 2b04c0f288..c4b2e0106d 100644
--- a/tapeserver/castor/tape/tapeserver/file/CpioFileHeaderStructure.cpp
+++ b/tapeserver/castor/tape/tapeserver/file/CpioFileHeaderStructure.cpp
@@ -48,7 +48,7 @@ size_t castor::tape::tapeFile::CPIO::decode(const uint8_t* puiData, const size_t
       &m_uiNlink, &m_uiRdev, reinterpret_cast<uint64_t*>(&m_ulMtime),
       &m_uiNameSize, &m_ui64FileSize, &m_strFid[0]);
   } else {
-    strFormat << "%06c%06o%06o%06o%06o%06o%06o%06o%011lo%06oH%010lX%" <<  CPIO::PATHLEN - 1 << "s";
+    strFormat << "%06c%06o%06o%06o%06o%06o%06o%06o%011lo%06o%011lo%" <<  CPIO::PATHLEN - 1 << "s";
     sscanf(reinterpret_cast<const char*>(puiData), strFormat.str().c_str(),
       &m_strMagic[0], &m_uiDev, &m_uiIno, &m_uiMode, &m_uiUid, &m_uiGid,
       &m_uiNlink, &m_uiRdev, reinterpret_cast<uint64_t*>(&m_ulMtime),
diff --git a/tapeserver/castor/tape/tapeserver/file/EnstoreFileReader.cpp b/tapeserver/castor/tape/tapeserver/file/EnstoreFileReader.cpp
new file mode 100644
index 0000000000..278b82139d
--- /dev/null
+++ b/tapeserver/castor/tape/tapeserver/file/EnstoreFileReader.cpp
@@ -0,0 +1,145 @@
+/*
+ * @project      The CERN Tape Archive (CTA)
+ * @copyright    Copyright © 2022 CERN
+ * @license      This program is free software, distributed under the terms of the GNU General Public
+ *               Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can
+ *               redistribute it and/or modify it under the terms of the GPL Version 3, or (at your
+ *               option) any later version.
+ *
+ *               This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ *               WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ *               PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ *               In applying this licence, CERN does not waive the privileges and immunities
+ *               granted to it by virtue of its status as an Intergovernmental Organization or
+ *               submit itself to any jurisdiction.
+ */
+
+#include <limits>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "castor/tape/tapeserver/drive/DriveInterface.hpp"
+#include "castor/tape/tapeserver/file/CtaReadSession.hpp"
+#include "castor/tape/tapeserver/file/EnstoreFileReader.hpp"
+#include "castor/tape/tapeserver/file/HeaderChecker.hpp"
+#include "castor/tape/tapeserver/file/Structures.hpp"
+#include "scheduler/RetrieveJob.hpp"
+
+namespace castor {
+namespace tape {
+namespace tapeFile {
+
+EnstoreFileReader::EnstoreFileReader(const std::unique_ptr<ReadSession> &rs, const cta::RetrieveJob &fileToRecall)
+  : FileReader(rs, fileToRecall) {
+  setPositioningMethod(cta::PositioningMethod::ByFSeq);  // Enstore did not store block IDs
+}
+
+void EnstoreFileReader::setPositioningMethod(const cta::PositioningMethod &newMethod) {
+  m_positionCommandCode = newMethod;
+}
+
+void EnstoreFileReader::positionByFseq(const cta::RetrieveJob &fileToRecall) {
+  /* This is a bit tricky since CTA is starts with fSeq=1 before reading the label
+     and Enstore store fSeq=1 as the first file AFTER the label
+  */
+
+  const auto fSeq = fileToRecall.selectedTapeFile().fSeq;
+  if (fSeq < 1) {
+    std::stringstream err;
+    err << "Unexpected fileId in EnstoreFileReader::positionByFseq fSeq expected >=1, got: "
+        << fileToRecall.selectedTapeFile().fSeq << ")";
+    throw cta::exception::InvalidArgument(err.str());
+  }
+
+  const int64_t fSeq_delta = static_cast<int64_t>(fSeq) - static_cast<int64_t>(m_session->getCurrentFseq());
+
+  if (fSeq == 1) {
+    m_session->m_drive.rewind();
+    m_session->m_drive.spaceFileMarksForward(1);
+  } else if (fSeq_delta == -1) {
+      // do nothing we are in the correct place
+  } else if (fSeq_delta >= 0) {
+    m_session->m_drive.spaceFileMarksForward(static_cast<uint32_t>(fSeq_delta+1));
+  } else { //fSeq_delta < 0
+    m_session->m_drive.spaceFileMarksBackwards(static_cast<uint32_t>(abs(fSeq_delta)));
+    m_session->m_drive.readFileMark("[EnstoreFileReader::position] Reading file mark right before the header of the file we want to read");
+  }
+  m_session->setCurrentFseq(fSeq);
+  setBlockSize(1024*1024);  // Enstore used 1M size blocks for T10K, M8, and LTO-8 tapes
+}
+
+void EnstoreFileReader::positionByBlockID(const cta::RetrieveJob &fileToRecall) {
+  throw NotImplemented("EnstoreFileReader::positionByBlockID() Cannot be implemented. Enstore did not store block IDs");
+}
+
+void EnstoreFileReader::setBlockSize(size_t uiBlockSize) {
+  m_currentBlockSize = uiBlockSize;
+  if (m_currentBlockSize < 1) {
+    std::ostringstream ex_str;
+    ex_str << "[EnstoreFileReader::setBlockSize] - Invalid block size detected";
+    throw TapeFormatError(ex_str.str());
+  }
+}
+
+size_t EnstoreFileReader::readNextDataBlock(void *data, const size_t size) {
+  if (size != m_currentBlockSize) {
+    throw WrongBlockSize();
+  }
+  size_t bytes_read = 0;
+  /*
+   * CPIO filter for Enstore/OSM file
+   * - caclulate the file size and position of the trailer
+   */
+  if (size < CPIO::MAXHEADERSIZE) {
+    std::ostringstream ex_str;
+    ex_str << "Invalid block size: " << size << " - " << "the block size is smaller then max size of a CPIO header: " << CPIO::MAXHEADERSIZE;
+    throw TapeFormatError(ex_str.str());
+  }
+  if (!m_cpioHeader.valid()) {
+    size_t uiHeaderSize = 0;
+    size_t uiResiduesSize = 0;
+    uint8_t* pucTmpData = new uint8_t[size];
+
+    bytes_read = m_session->m_drive.readBlock(pucTmpData, size);
+    uiHeaderSize = m_cpioHeader.decode(pucTmpData, size);
+    uiResiduesSize = bytes_read - uiHeaderSize;
+
+    // Copy the rest of data to the buffer
+
+    if (uiResiduesSize >= m_cpioHeader.m_ui64FileSize) {
+      bytes_read = m_cpioHeader.m_ui64FileSize;
+      m_ui64CPIODataSize = bytes_read;
+      memcpy(data, pucTmpData + uiHeaderSize, bytes_read);
+    } else {
+      memcpy(data, pucTmpData + uiHeaderSize, uiResiduesSize);
+      bytes_read = uiResiduesSize;
+      m_ui64CPIODataSize = bytes_read >= m_cpioHeader.m_ui64FileSize ? m_cpioHeader.m_ui64FileSize : bytes_read;
+    }
+    delete[] pucTmpData;
+
+  } else {
+    bytes_read = m_session->m_drive.readBlock(data, size);
+    m_ui64CPIODataSize += bytes_read;
+
+    if (m_ui64CPIODataSize > m_cpioHeader.m_ui64FileSize && bytes_read > 0) {
+      // File is ready
+      bytes_read = bytes_read - (m_ui64CPIODataSize - m_cpioHeader.m_ui64FileSize);
+    }
+  }
+
+  // end of file reached!
+  if (!bytes_read) {
+    m_session->setCurrentFseq(m_session->getCurrentFseq() + 2); // +1 for after the current file, +1 for label being file #1
+    m_session->setCurrentFilePart(PartOfFile::Header);
+    // the following is a normal day exception: end of files exceptions are thrown at the end of each file being read
+    throw EndOfFile();
+  }
+
+  return bytes_read;
+}
+
+}  // namespace tapeFile
+}  // namespace tape
+}  // namespace castor
diff --git a/tapeserver/castor/tape/tapeserver/file/EnstoreFileReader.hpp b/tapeserver/castor/tape/tapeserver/file/EnstoreFileReader.hpp
new file mode 100644
index 0000000000..13810b2c2c
--- /dev/null
+++ b/tapeserver/castor/tape/tapeserver/file/EnstoreFileReader.hpp
@@ -0,0 +1,62 @@
+/*
+ * @project      The CERN Tape Archive (CTA)
+ * @copyright    Copyright © 2022 CERN
+ * @license      This program is free software, distributed under the terms of the GNU General Public
+ *               Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can
+ *               redistribute it and/or modify it under the terms of the GPL Version 3, or (at your
+ *               option) any later version.
+ *
+ *               This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ *               WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ *               PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ *               In applying this licence, CERN does not waive the privileges and immunities
+ *               granted to it by virtue of its status as an Intergovernmental Organization or
+ *               submit itself to any jurisdiction.
+ */
+
+#pragma once
+
+#include <fstream>
+#include <memory>
+
+#include "castor/tape/tapeserver/file/CpioFileHeaderStructure.hpp"
+#include "castor/tape/tapeserver/file/FileReader.hpp"
+
+namespace castor {
+namespace tape {
+namespace tapeFile {
+
+class EnstoreFileReader : public FileReader {
+public:
+  CTA_GENERATE_EXCEPTION_CLASS(NotImplemented);
+  /**
+    * Constructor of the EnstoreFileReader. It will bind itself to an existing read session
+    * and position the tape right at the beginning of the file
+    * @param rs: session to be bound to
+    * @param fileToRecall: the file which will be recalled
+    */
+  EnstoreFileReader(const std::unique_ptr<ReadSession> &rs, const cta::RetrieveJob &fileToRecall);
+
+  /**
+    * Destructor of the FileReader. It will release the lock on the read session.
+    */
+  ~EnstoreFileReader() override = default;
+
+  size_t readNextDataBlock(void *data, const size_t size) override;
+
+private:
+
+  // Stuff for CPIO file
+  CPIO m_cpioHeader;
+  uint64_t m_ui64CPIODataSize = 0;
+
+  void setPositioningMethod(const cta::PositioningMethod &newMethod);
+  void positionByFseq(const cta::RetrieveJob &fileToRecall) override;
+  void positionByBlockID(const cta::RetrieveJob &fileToRecall) override;
+  void setBlockSize(const size_t uiBlockSize);
+};
+
+}  // namespace tapeFile
+}  // namespace tape
+}  // namespace castor
diff --git a/tapeserver/castor/tape/tapeserver/file/EnstoreReadSession.cpp b/tapeserver/castor/tape/tapeserver/file/EnstoreReadSession.cpp
new file mode 100644
index 0000000000..bfb2a006f2
--- /dev/null
+++ b/tapeserver/castor/tape/tapeserver/file/EnstoreReadSession.cpp
@@ -0,0 +1,51 @@
+/*
+ * @project      The CERN Tape Archive (CTA)
+ * @copyright    Copyright © 2022 CERN
+ * @license      This program is free software, distributed under the terms of the GNU General Public
+ *               Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can
+ *               redistribute it and/or modify it under the terms of the GPL Version 3, or (at your
+ *               option) any later version.
+ *
+ *               This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ *               WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ *               PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ *               In applying this licence, CERN does not waive the privileges and immunities
+ *               granted to it by virtue of its status as an Intergovernmental Organization or
+ *               submit itself to any jurisdiction.
+ */
+
+#include <memory>
+#include <string>
+
+#include "castor/tape/tapeserver/file/Exceptions.hpp"
+#include "castor/tape/tapeserver/file/HeaderChecker.hpp"
+#include "castor/tape/tapeserver/file/EnstoreReadSession.hpp"
+#include "castor/tape/tapeserver/file/Structures.hpp"
+
+namespace castor {
+namespace tape {
+namespace tapeFile {
+
+EnstoreReadSession::EnstoreReadSession(tapeserver::drive::DriveInterface &drive,
+  const tapeserver::daemon::VolumeInfo &volInfo, const bool useLbp)
+  : ReadSession(drive, volInfo, useLbp) {
+  m_drive.rewind();
+  m_drive.disableLogicalBlockProtection();
+  m_detectedLbp = false;
+
+  VOL1 vol1;
+  m_drive.readExactBlock(reinterpret_cast<void *>(&vol1), sizeof(vol1), "[ReadSession::ReadSession()] - Reading VOL1");
+  try {
+    vol1.verify("0");
+  } catch (std::exception &e) {
+    throw TapeFormatError(e.what());
+  }
+  HeaderChecker::checkVOL1(vol1, volInfo.vid);
+  // after which we are at the end of VOL1 header (e.g. beginning of first file)
+
+}
+
+}  // namespace tapeFile
+}  // namespace tape
+}  // namespace castor
diff --git a/tapeserver/castor/tape/tapeserver/file/EnstoreReadSession.hpp b/tapeserver/castor/tape/tapeserver/file/EnstoreReadSession.hpp
new file mode 100644
index 0000000000..cd88daeace
--- /dev/null
+++ b/tapeserver/castor/tape/tapeserver/file/EnstoreReadSession.hpp
@@ -0,0 +1,54 @@
+/*
+ * @project      The CERN Tape Archive (CTA)
+ * @copyright    Copyright © 2022 CERN
+ * @license      This program is free software, distributed under the terms of the GNU General Public
+ *               Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". You can
+ *               redistribute it and/or modify it under the terms of the GPL Version 3, or (at your
+ *               option) any later version.
+ *
+ *               This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ *               WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ *               PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ *               In applying this licence, CERN does not waive the privileges and immunities
+ *               granted to it by virtue of its status as an Intergovernmental Organization or
+ *               submit itself to any jurisdiction.
+ */
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "castor/tape/tapeserver/daemon/VolumeInfo.hpp"
+#include "castor/tape/tapeserver/file/ReadSession.hpp"
+
+namespace castor {
+namespace tape {
+namespace tapeFile {
+
+/**
+  * Class keeping track of a whole tape read session over an Enstore formatted
+  * tape. The session will keep track of the overall coherency of the session
+  * and check for everything to be coherent. The tape should be mounted in
+  * the drive before the EnstoreReadSession is started (i.e. constructed).
+  * Likewise, tape unmount is the business of the user.
+  */
+class EnstoreReadSession : public ReadSession {
+public:
+  /**
+    * Constructor of the EnstoreReadSession. It will rewind the tape, and check the
+    * VID value. Throws an exception in case of mismatch.
+    * @param drive: drive object to which we bind the session
+    * @param volInfo: volume name of the tape we would like to read from
+    * @param useLbp: castor.conf option to use or not to use LBP in tapeserverd
+    */
+  EnstoreReadSession(tapeserver::drive::DriveInterface &drive, const tapeserver::daemon::VolumeInfo &volInfo,
+    const bool useLbp);
+
+  ~EnstoreReadSession() override = default;
+};
+
+}  // namespace tapeFile
+}  // namespace tape
+}  // namespace castor
diff --git a/tapeserver/castor/tape/tapeserver/file/FileReader.hpp b/tapeserver/castor/tape/tapeserver/file/FileReader.hpp
index 330ad8624f..2c99267e23 100644
--- a/tapeserver/castor/tape/tapeserver/file/FileReader.hpp
+++ b/tapeserver/castor/tape/tapeserver/file/FileReader.hpp
@@ -94,12 +94,12 @@ protected:
     */
   const std::unique_ptr<ReadSession> &m_session;
 
-private:
   /**
     * What kind of command we use to position ourself on the tape (fseq or blockid)
     */
   cta::PositioningMethod m_positionCommandCode;
 
+private:
   /**
     * Description of the LBP mode with which the files is read.
     */
diff --git a/tapeserver/castor/tape/tapeserver/file/FileReaderFactory.cpp b/tapeserver/castor/tape/tapeserver/file/FileReaderFactory.cpp
index 53e54c8db4..f6c5f0bc43 100644
--- a/tapeserver/castor/tape/tapeserver/file/FileReaderFactory.cpp
+++ b/tapeserver/castor/tape/tapeserver/file/FileReaderFactory.cpp
@@ -19,6 +19,7 @@
 #include <sstream>
 
 #include "castor/tape/tapeserver/file/CtaFileReader.hpp"
+#include "castor/tape/tapeserver/file/EnstoreFileReader.hpp"
 #include "castor/tape/tapeserver/file/FileReaderFactory.hpp"
 #include "castor/tape/tapeserver/file/OsmFileReader.hpp"
 #include "castor/tape/tapeserver/file/ReadSession.hpp"
@@ -42,6 +43,10 @@ std::unique_ptr<FileReader> FileReaderFactory::create(const std::unique_ptr<Read
       reader = std::make_unique<OsmFileReader>(readSession, fileToRecall);
       break;
     }
+    case LabelFormat::Enstore: {
+      reader = std::make_unique<EnstoreFileReader>(readSession, fileToRecall);
+      break;
+    }
     default: {
       std::ostringstream ossLabelFormat;
       ossLabelFormat << std::showbase << std::internal << std::setfill('0') << std::hex << std::setw(4)
diff --git a/tapeserver/castor/tape/tapeserver/file/ReadSessionFactory.cpp b/tapeserver/castor/tape/tapeserver/file/ReadSessionFactory.cpp
index 171e04ed7f..5138d5f9ec 100644
--- a/tapeserver/castor/tape/tapeserver/file/ReadSessionFactory.cpp
+++ b/tapeserver/castor/tape/tapeserver/file/ReadSessionFactory.cpp
@@ -19,6 +19,7 @@
 #include <sstream>
 
 #include "castor/tape/tapeserver/file/CtaReadSession.hpp"
+#include "castor/tape/tapeserver/file/EnstoreReadSession.hpp"
 #include "castor/tape/tapeserver/file/OsmReadSession.hpp"
 #include "castor/tape/tapeserver/file/ReadSessionFactory.hpp"
 #include "common/dataStructures/LabelFormat.hpp"
@@ -36,6 +37,8 @@ std::unique_ptr<ReadSession> ReadSessionFactory::create(tapeserver::drive::Drive
       return std::make_unique<CtaReadSession>(drive, volInfo, useLbp);
     case LabelFormat::OSM:
       return std::make_unique<OsmReadSession>(drive, volInfo, useLbp);
+    case LabelFormat::Enstore:
+      return std::make_unique<EnstoreReadSession>(drive, volInfo, useLbp);
     default: {
       std::ostringstream ossLabelFormat;
       ossLabelFormat << std::showbase << std::internal << std::setfill('0') << std::hex << std::setw(4)
-- 
GitLab