diff --git a/castor/tape/tapeserver/daemon/CatalogueTransferSession.cpp b/castor/tape/tapeserver/daemon/CatalogueTransferSession.cpp index 996040a69924376475977f7a04fd9a09dd32547c..a4a0371f26478379b8325b948031f99ea9bb7479 100644 --- a/castor/tape/tapeserver/daemon/CatalogueTransferSession.cpp +++ b/castor/tape/tapeserver/daemon/CatalogueTransferSession.cpp @@ -142,18 +142,37 @@ bool castor::tape::tapeserver::daemon::CatalogueTransferSession:: "Killing data-transfer session because transfer job is too late", params); - if(kill(m_pid, SIGKILL)) { - const std::string errnoStr = castor::utils::errnoToString(errno); - params.push_back(log::Param("message", errnoStr)); - m_log(LOG_ERR, "Failed to kill data-transfer session", params); - } else { + try { + idempotentKill(m_pid, SIGKILL); m_state = WAIT_TIMEOUT_KILL; + } catch(castor::exception::Exception &ex) { + params.push_back(log::Param("message", ex.getMessage())); + m_log(LOG_ERR, "Failed to kill data-transfer session", params); } } return true; // Continue the main event loop } +//------------------------------------------------------------------------------ +// idempotentKill +//------------------------------------------------------------------------------ +void castor::tape::tapeserver::daemon::CatalogueTransferSession::idempotentKill( + const pid_t pid, const int signal) { + // Try to kill the process + const int killRc = kill(m_pid, signal); + + // If the kill failed for a reason other than the fact the process was already + // dead + if(killRc && ESRCH != errno) { + const std::string errnoStr = castor::utils::errnoToString(errno); + castor::exception::Exception ex; + ex.getMessage() << "Failed to kill process" + ": pid=" << pid << " signal=" << signal << ": " << errnoStr; + throw ex; + } +} + //------------------------------------------------------------------------------ // handleTickWhilstWaitMounted //------------------------------------------------------------------------------ @@ -173,12 +192,12 @@ bool castor::tape::tapeserver::daemon::CatalogueTransferSession:: "Killing data-transfer session because tape mount is taking too long", params); - if(kill(m_pid, SIGKILL)) { - const std::string errnoStr = castor::utils::errnoToString(errno); - params.push_back(log::Param("message", errnoStr)); - m_log(LOG_ERR, "Failed to kill data-transfer session", params); - } else { + try { + idempotentKill(m_pid, SIGKILL); m_state = WAIT_TIMEOUT_KILL; + } catch(castor::exception::Exception &ex) { + params.push_back(log::Param("message", ex.getMessage())); + m_log(LOG_ERR, "Failed to kill data-transfer session", params); } } @@ -204,12 +223,12 @@ bool castor::tape::tapeserver::daemon::CatalogueTransferSession:: "Killing data-transfer session because data blocks are not being moved", params); - if(kill(m_pid, SIGKILL)) { - const std::string errnoStr = castor::utils::errnoToString(errno); - params.push_back(log::Param("message", errnoStr)); - m_log(LOG_ERR, "Failed to kill data-transfer session", params); - } else { + try { + idempotentKill(m_pid, SIGKILL); m_state = WAIT_TIMEOUT_KILL; + } catch(castor::exception::Exception &ex) { + params.push_back(log::Param("message", ex.getMessage())); + m_log(LOG_ERR, "Failed to kill data-transfer session", params); } } diff --git a/castor/tape/tapeserver/daemon/CatalogueTransferSession.hpp b/castor/tape/tapeserver/daemon/CatalogueTransferSession.hpp index c3c4ab859307e89b757bcec625d5a0498e900bde..f87f36142a0209ea02a100e4dceb1e5b48330208 100644 --- a/castor/tape/tapeserver/daemon/CatalogueTransferSession.hpp +++ b/castor/tape/tapeserver/daemon/CatalogueTransferSession.hpp @@ -381,6 +381,15 @@ private: */ bool handleTickWhilstWaitJob(); + /** + * Tries to kill the specified process using the specified signal. This + * method calls the system call kill() but differs in its overall + * functionality in that it is idempotent. If the underlying kill() fails due + * to the fact the process is already dead (errno = ESRCH), then this method + * return successfully. + */ + void idempotentKill(const pid_t pid, const int signal); + /** * Handles a tick in time whilst in the TRANSFERSTATE_WAIT_MOUNTED state. Time * driven actions such as alarms should be implemented here.