TapeDaemon.cpp 31.7 KB
Newer Older
Steven Murray's avatar
Steven Murray committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/******************************************************************************
 *
 * This file is part of the Castor project.
 * See http://castor.web.cern.ch/castor
 *
 * Copyright (C) 2003  CERN
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 *
 *
21
 * @author Castor Dev team, castor-dev@cern.ch
Steven Murray's avatar
Steven Murray committed
22
23
 *****************************************************************************/
 
24
#include "castor/common/CastorConfiguration.hpp"
Steven Murray's avatar
Steven Murray committed
25
#include "castor/exception/Errnum.hpp"
26
#include "common/exception/BadAlloc.hpp"
27
#include "castor/io/io.hpp"
Daniele Kruse's avatar
Daniele Kruse committed
28
29
#include "castor/legacymsg/CommonMarshal.hpp"
#include "castor/legacymsg/TapeMarshal.hpp"
30
#include "castor/tape/tapeserver/daemon/CleanerSession.hpp"
31
#include "castor/tape/tapeserver/daemon/Constants.hpp"
32
#include "castor/tape/tapeserver/daemon/DataTransferSession.hpp"
33
#include "castor/tape/tapeserver/daemon/LabelSession.hpp"
34
#include "castor/tape/tapeserver/daemon/ProcessForker.hpp"
35
#include "castor/tape/tapeserver/daemon/ProcessForkerConnectionHandler.hpp"
36
#include "castor/tape/tapeserver/daemon/ProcessForkerProxySocket.hpp"
Steven Murray's avatar
Steven Murray committed
37
#include "castor/tape/tapeserver/daemon/TapeDaemon.hpp"
Steven Murray's avatar
Steven Murray committed
38
#include "castor/tape/tapeserver/daemon/TapeDaemonConfig.hpp"
39
#include "castor/tape/tapeserver/daemon/TapeMessageHandler.hpp"
Daniele Kruse's avatar
Daniele Kruse committed
40
#include "castor/tape/tapeserver/file/File.hpp"
41
#include "castor/tape/tapeserver/TapeBridgeConstants.hpp"
42
#include "castor/utils/SmartFd.hpp"
43
#include "castor/utils/utils.hpp"
44
#include "rmc_constants.h"
45

Steven Murray's avatar
Steven Murray committed
46
#include <algorithm>
47
#include <errno.h>
48
#include <limits.h>
Steven Murray's avatar
Steven Murray committed
49
50
#include <memory>
#include <signal.h>
51
52
#include <sys/prctl.h>
#include <sys/socket.h>
53
54
55
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
Steven Murray's avatar
Steven Murray committed
56
57
58
59

//------------------------------------------------------------------------------
// constructor
//------------------------------------------------------------------------------
60
castor::tape::tapeserver::daemon::TapeDaemon::TapeDaemon(
61
62
63
64
65
  const int argc,
  char **const argv,
  std::ostream &stdOut,
  std::ostream &stdErr,
  log::Logger &log,
66
  const int netTimeout,
67
  const DriveConfigMap &driveConfigs,
68
  reactor::ZMQReactor &reactor,
69
70
  castor::server::ProcessCap &capUtils,
  const TapeDaemonConfig &tapeDaemonConfig):
71
  castor::server::Daemon(stdOut, stdErr, log),
72
73
  m_state(TAPEDAEMON_STATE_RUNNING),
  m_startOfShutdown(0),
74
75
  m_argc(argc),
  m_argv(argv),
76
  m_netTimeout(netTimeout),
77
  m_driveConfigs(driveConfigs),
78
  m_reactor(reactor),
79
  m_capUtils(capUtils),
80
  m_tapeDaemonConfig(tapeDaemonConfig),
81
  m_programName("tapeserverd"),
82
  m_hostName(getHostName()),
83
  m_processForker(NULL),
84
  m_processForkerPid(0),
85
  m_catalogue(NULL),
86
  m_zmqContext(NULL) {
87
88
}

89
90
91
92
93
94
95
96
97
98
99
100
//------------------------------------------------------------------------------
// stateToStr
//------------------------------------------------------------------------------
const char *castor::tape::tapeserver::daemon::TapeDaemon::stateToStr(
  const State state) throw () {
  switch(state) {
  case TAPEDAEMON_STATE_RUNNING     : return "RUNNING";
  case TAPEDAEMON_STATE_SHUTTINGDOWN: return "SHUTTINGDOWN";
  default                           : return "UNKNOWN";
  }
}

101
102
103
//------------------------------------------------------------------------------
// getHostName
//------------------------------------------------------------------------------
104
std::string castor::tape::tapeserver::daemon::TapeDaemon::getHostName() const {
105
106
  char nameBuf[81];
  if(gethostname(nameBuf, sizeof(nameBuf))) {
107
    const std::string message = castor::utils::errnoToString(errno);
108
    cta::exception::Exception ex;
109
    ex.getMessage() << "Failed to get host name: " << message;
110
111
112
113
    throw ex;
  }

  return nameBuf;
Steven Murray's avatar
Steven Murray committed
114
115
116
117
118
119
}

//------------------------------------------------------------------------------
// destructor
//------------------------------------------------------------------------------
castor::tape::tapeserver::daemon::TapeDaemon::~TapeDaemon() throw() {
120
121
122
  if(NULL != m_processForker) {
    m_processForker->stopProcessForker("TapeDaemon destructor called");
    delete m_processForker;
123
  }
124
125
126
127
128
  if(NULL != m_catalogue) {
    m_log(LOG_WARNING,
      "Tape-server parent-process killing any running tape-sessions");
    m_catalogue->killSessions();
  }
129
  delete m_catalogue;
130
  //destroyZmqContext();
131
  google::protobuf::ShutdownProtobufLibrary();
Steven Murray's avatar
Steven Murray committed
132
}
133

Steven Murray's avatar
Steven Murray committed
134
135
136
137
//------------------------------------------------------------------------------
// destroyZmqContext
//------------------------------------------------------------------------------
void castor::tape::tapeserver::daemon::TapeDaemon::destroyZmqContext() throw() {
138
139
  if(NULL != m_zmqContext) {
    if(zmq_term(m_zmqContext)) {
140
      const std::string message = castor::utils::errnoToString(errno);
141
      std::list<castor::log::Param> params = {castor::log::Param("message", message)};
142
      m_log(LOG_ERR, "Failed to destroy ZMQ context", params);
Steven Murray's avatar
Steven Murray committed
143
144
145
    } else {
      m_zmqContext = NULL;
      m_log(LOG_INFO, "Successfully destroyed ZMQ context");
146
147
    }
  }
Steven Murray's avatar
Steven Murray committed
148
149
150
151
152
}

//------------------------------------------------------------------------------
// main
//------------------------------------------------------------------------------
153
int castor::tape::tapeserver::daemon::TapeDaemon::main() throw() {
Steven Murray's avatar
Steven Murray committed
154
  try {
Steven Murray's avatar
Steven Murray committed
155
    exceptionThrowingMain(m_argc, m_argv);
156
  } catch (cta::exception::Exception &ex) {
157
    // Log the error
158
    std::list<log::Param> params = {
159
      log::Param("Message", ex.getMessage().str())};
160
    m_log(LOG_ERR, "Aborting", params);
Steven Murray's avatar
Steven Murray committed
161

162
    return EXIT_FAILURE;
Steven Murray's avatar
Steven Murray committed
163
  }
164
  return EXIT_SUCCESS;
Steven Murray's avatar
Steven Murray committed
165
166
167
168
169
}

//------------------------------------------------------------------------------
// exceptionThrowingMain
//------------------------------------------------------------------------------
170
void  castor::tape::tapeserver::daemon::TapeDaemon::exceptionThrowingMain(
171
  const int argc, char **const argv)  {
Steven Murray's avatar
Steven Murray committed
172
  parseCommandLine(argc, argv);
173

174
  if(m_driveConfigs.empty()) {
175
    cta::exception::Exception ex;
176
177
178
179
    ex.getMessage() << "/etc/castor/TPCONFIG is empty";
    throw ex;
  }

180
181
  // Process must be able to change user now and should be permitted to perform
  // raw IO in the future
182
  setProcessCapabilities("cap_setgid,cap_setuid+ep cap_sys_rawio+p");
183

184
  const bool runAsStagerSuperuser = true;
185
  daemonizeIfNotRunInForeground(runAsStagerSuperuser);
186
  setDumpable();
187
188
189
190
191
192
193
194

  // Create two socket pairs for ProcessForker communications
  const ForkerCmdPair cmdPair = createForkerCmdPair();
  const ForkerReaperPair reaperPair = createForkerReaperPair();

  m_processForkerPid = forkProcessForker(cmdPair, reaperPair);

  m_processForker = new ProcessForkerProxySocket(m_log, cmdPair.tapeDaemon);
195
  castor::tape::System::realWrapper sysWrapper;
Steven Murray's avatar
Steven Murray committed
196
197
198
199
200
  m_catalogue = new Catalogue(
    m_netTimeout,
    m_log,
    *m_processForker,
    m_hostName,
201
202
203
    m_tapeDaemonConfig.catalogueConfig,
    sysWrapper
  );
204

205
  m_catalogue->populate(m_driveConfigs);
206

207
  // There is no longer any need for the process to be able to change user,
208
209
  // however the process should still be permitted to perform raw IO in the
  // future
210
211
  setProcessCapabilities("cap_sys_rawio+p");

Steven Murray's avatar
Steven Murray committed
212
  blockSignals();
213
  initZmqContext();
214
  setUpReactor(reaperPair.tapeDaemon);
215
  mainEventLoop();
Steven Murray's avatar
Steven Murray committed
216
217
}

218
219
220
221
222
223
//------------------------------------------------------------------------------
// setDumpable
//------------------------------------------------------------------------------
void castor::tape::tapeserver::daemon::TapeDaemon::setDumpable() {
  castor::utils::setDumpableProcessAttribute(true);
  const bool dumpable = castor::utils::getDumpableProcessAttribute();
224
  std::list<log::Param> params = {
225
226
227
    log::Param("dumpable", dumpable ? "true" : "false")};
  m_log(LOG_INFO, "Got dumpable attribute of process", params);
  if(!dumpable) {
228
    cta::exception::Exception ex;
229
230
231
232
233
234
235
236
237
238
239
240
    ex.getMessage() << "Failed to set dumpable attribute of process to true";
    throw ex;
  }
}

//------------------------------------------------------------------------------
// setProcessCapabilities
//------------------------------------------------------------------------------
void castor::tape::tapeserver::daemon::TapeDaemon::setProcessCapabilities(
  const std::string &text) {
  try {
    m_capUtils.setProcText(text);
241
    std::list<log::Param> params =
242
243
      {log::Param("capabilities", m_capUtils.getProcText())};
    m_log(LOG_INFO, "Set process capabilities", params);
244
245
  } catch(cta::exception::Exception &ne) {
    cta::exception::Exception ex;
246
247
248
249
250
251
252
253
254
    ex.getMessage() << "Failed to set process capabilities to '" << text <<
      "': " << ne.getMessage().str();
    throw ex;
  }
}

//------------------------------------------------------------------------------
// forkProcessForker
//------------------------------------------------------------------------------
255
256
pid_t castor::tape::tapeserver::daemon::TapeDaemon::forkProcessForker(
  const ForkerCmdPair &cmdPair, const ForkerReaperPair &reaperPair) {
257
258
259
260
261
262
  m_log.prepareForFork();

  const pid_t forkRc = fork();

  // If fork failed
  if(0 > forkRc) {
263
    const std::string message = castor::utils::errnoToString(errno);
264
265
266
267

    closeForkerCmdPair(cmdPair);
    closeForkerReaperPair(reaperPair);

268
    cta::exception::Exception ex;
269
270
271
272
273
274
    ex.getMessage() << "Failed to fork ProcessForker: " << message;
    throw ex;

  // Else if this is the parent process
  } else if(0 < forkRc) {
    {
275
      std::list<log::Param> params = {
276
        log::Param("processForkerPid", forkRc)};
277
278
279
      m_log(LOG_INFO, "Successfully forked the ProcessForker", params);
    }

280
281
    closeProcessForkerSideOfCmdPair(cmdPair);
    closeProcessForkerSideOfReaperPair(reaperPair);
282

283
    return forkRc;
284
285
286

  // Else this is the child process
  } else {
287
288
    closeTapeDaemonSideOfCmdPair(cmdPair);
    closeTapeDaemonSideOfReaperPair(reaperPair);
289

290
291
    castor::utils::setProcessNameAndCmdLine(m_argv[0], "tpforker");

292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
    exit(runProcessForker(cmdPair.processForker, reaperPair.processForker));
  }
}

//------------------------------------------------------------------------------
// createForkerCmdPair
//------------------------------------------------------------------------------
castor::tape::tapeserver::daemon::TapeDaemon::ForkerCmdPair
  castor::tape::tapeserver::daemon::TapeDaemon::createForkerCmdPair() {
  ForkerCmdPair cmdPair;

  try {
    const std::pair<int, int> socketPair = createSocketPair();
    cmdPair.tapeDaemon = socketPair.first;
    cmdPair.processForker = socketPair.second;
307
308
  } catch(cta::exception::Exception &ne) {
    cta::exception::Exception ex;
309
310
311
312
313
314
    ex.getMessage() << "Failed to create socket pair to control the"
      " ProcessForker: " << ne.getMessage().str();
    throw ex; 
  }

  {
315
    std::list<log::Param> params = {
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
      log::Param("cmdPair.tapeDaemon", cmdPair.tapeDaemon),
      log::Param("cmdPair.processForker", cmdPair.processForker)};
    m_log(LOG_INFO, "TapeDaemon parent process succesfully created socket"
      " pair to control the ProcessForker", params);
  }

  return cmdPair;
}

//------------------------------------------------------------------------------
// createForkerReaperPair
//------------------------------------------------------------------------------
castor::tape::tapeserver::daemon::TapeDaemon::ForkerReaperPair
  castor::tape::tapeserver::daemon::TapeDaemon::createForkerReaperPair() {
  ForkerReaperPair reaperPair;

  try {
    const std::pair<int, int> socketPair = createSocketPair();
    reaperPair.tapeDaemon = socketPair.first;
    reaperPair.processForker = socketPair.second;
336
337
  } catch(cta::exception::Exception &ne) {
    cta::exception::Exception ex;
338
339
340
341
342
343
    ex.getMessage() << "Failed to create socket pair for the ProcessForker"
      " to report terminated processes: " << ne.getMessage().str();
    throw ex;
  }

  {
344
    std::list<log::Param> params = {
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
      log::Param("reaperPair.tapeDaemon", reaperPair.tapeDaemon),
      log::Param("reaperPair.processForker", reaperPair.processForker)};
    m_log(LOG_INFO, "TapeDaemon parent process succesfully created socket"
      " pair for ProcessForker to report terminated processes", params);
  }

  return reaperPair;
}

//------------------------------------------------------------------------------
// createSocketPair
//------------------------------------------------------------------------------
std::pair<int, int>
  castor::tape::tapeserver::daemon::TapeDaemon::createSocketPair() {
  int sv[2] = {-1, -1};
  if(socketpair(AF_LOCAL, SOCK_STREAM, 0, sv)) {
    char message[100];
    strerror_r(errno, message, sizeof(message));
363
    cta::exception::Exception ex;
364
365
366
367
368
369
370
371
372
373
374
    ex.getMessage() << "Failed to create socket pair: " << message;
    throw ex;
  }

  return std::pair<int, int> (sv[0], sv[1]);
}

//------------------------------------------------------------------------------
// closeForkerCmdPair
//------------------------------------------------------------------------------
void castor::tape::tapeserver::daemon::TapeDaemon::closeForkerCmdPair(
375
  const ForkerCmdPair &cmdPair) const {
376
  if(close(cmdPair.tapeDaemon)) {
377
    const std::string message = castor::utils::errnoToString(errno);
378
    cta::exception::Exception ex;
379
380
381
382
383
384
    ex.getMessage() << "Failed to close TapeDaemon side of cmdPair"
      ": cmdPair.tapeDaemon=" << cmdPair.tapeDaemon << ": " << message;
    throw ex;
  }

  if(close(cmdPair.processForker)) {
385
    const std::string message = castor::utils::errnoToString(errno);
386
    cta::exception::Exception ex;
387
388
389
390
391
392
393
394
395
396
    ex.getMessage() << "Failed to close ProcessForker side of cmdPair"
      ": cmdPair.processForker=" << cmdPair.processForker << ": " << message;
    throw ex;
  }
}

//------------------------------------------------------------------------------
// closeForkerReaperPair
//------------------------------------------------------------------------------
void castor::tape::tapeserver::daemon::TapeDaemon::closeForkerReaperPair(
397
  const ForkerReaperPair &reaperPair) const {
398
  if(close(reaperPair.tapeDaemon)) {
399
    const std::string message = castor::utils::errnoToString(errno);
400
    cta::exception::Exception ex;
401
402
403
404
405
406
    ex.getMessage() << "Failed to close TapeDaemon side of reaperPair"
      ": reaperPair.tapeDaemon=" << reaperPair.tapeDaemon << ": " << message;
    throw ex;
  }

  if(close(reaperPair.processForker)) {
407
    const std::string message = castor::utils::errnoToString(errno);
408
    cta::exception::Exception ex;
409
410
411
412
413
414
415
416
417
418
419
    ex.getMessage() << "Failed to close ProcessForker side of reaperPair"
      ": reaperPair.processForker=" << reaperPair.processForker << ": " <<
      message;
    throw ex;
  }
}

//------------------------------------------------------------------------------
// closeProcessForkerSideOfCmdPair
//------------------------------------------------------------------------------
void castor::tape::tapeserver::daemon::TapeDaemon::
420
  closeProcessForkerSideOfCmdPair(const ForkerCmdPair &cmdPair) const {
421
  if(close(cmdPair.processForker)) {
422
    const std::string message = castor::utils::errnoToString(errno);
423
    cta::exception::Exception ex;
424
425
426
427
428
429
430
431
432
433
434
    ex.getMessage() << "TapeDaemon parent process failed to close"
      " ProcessForker side of cmdPair: cmdPair.processForker=" <<
      cmdPair.processForker << ": " << message;
    throw ex;
  }
}

//------------------------------------------------------------------------------
// closeProcessForkerSideOfReaperPair
//------------------------------------------------------------------------------
void castor::tape::tapeserver::daemon::TapeDaemon::
435
436
  closeProcessForkerSideOfReaperPair(const ForkerReaperPair &reaperPair)
  const {
437
  if(close(reaperPair.processForker)) {
438
    const std::string message = castor::utils::errnoToString(errno);
439
    cta::exception::Exception ex;
440
441
442
443
444
445
    ex.getMessage() << "TapeDaemon parent process failed to close"
      " ProcessForker side of reaperPair: reaperPair.processForker=" <<
      reaperPair.processForker << ": " << message;
    throw ex;
  }
}
446

447
448
449
450
//------------------------------------------------------------------------------
// closeTapeDaemonSideOfCmdPair
//------------------------------------------------------------------------------
void castor::tape::tapeserver::daemon::TapeDaemon::
451
  closeTapeDaemonSideOfCmdPair(const ForkerCmdPair &cmdPair) const {
452
  if(close(cmdPair.tapeDaemon)) {
453
    const std::string message = castor::utils::errnoToString(errno);
454
    cta::exception::Exception ex;
455
456
457
458
459
460
461
462
463
464
465
    ex.getMessage() << "ProcessForker process failed to close"
      " TapeDaemon side of cmdPair: cmdPair.tapeDaemon=" << cmdPair.tapeDaemon
      << ": " << message;
    throw ex;
  }
}

//------------------------------------------------------------------------------
// closeTapeDaemonSideOfReaperPair
//------------------------------------------------------------------------------
void castor::tape::tapeserver::daemon::TapeDaemon::
466
  closeTapeDaemonSideOfReaperPair(const ForkerReaperPair &reaperPair) const {
467
  if(close(reaperPair.tapeDaemon)) {
468
    const std::string message = castor::utils::errnoToString(errno);
469
    cta::exception::Exception ex;
470
471
472
473
    ex.getMessage() << "ProcessForker process failed to close"
      " TapeDaemon side of reaperPair: reaperPair.tapeDaemon=" <<
      reaperPair.tapeDaemon << ": " << message;
    throw ex;
474
475
476
477
478
479
  }
}

//------------------------------------------------------------------------------
// runProcessForker
//------------------------------------------------------------------------------
480
int castor::tape::tapeserver::daemon::TapeDaemon::runProcessForker(
481
  const int cmdReceiverSocket, const int reaperSenderSocket) throw() {
482
  try {
483
    ProcessForker processForker(m_log, cmdReceiverSocket, reaperSenderSocket,
484
      m_hostName, m_argv[0], m_tapeDaemonConfig);
485
    processForker.execute();
486
    return 0;
487
  } catch(cta::exception::Exception &ex) {
488
    std::list<log::Param> params = {log::Param("message", ex.getMessage().str())};
489
490
    m_log(LOG_ERR, "ProcessForker threw an unexpected exception", params);
  } catch(std::exception &se) {
491
    std::list<log::Param> params = {log::Param("message", se.what())};
492
493
494
    m_log(LOG_ERR, "ProcessForker threw an unexpected exception", params);
  } catch(...) {
    m_log(LOG_ERR, "ProcessForker threw an unknown and unexpected exception");
495
  }
496
  return 1;
497
498
}

Steven Murray's avatar
Steven Murray committed
499
500
501
//------------------------------------------------------------------------------
// blockSignals
//------------------------------------------------------------------------------
502
void castor::tape::tapeserver::daemon::TapeDaemon::blockSignals() const {
Steven Murray's avatar
Steven Murray committed
503
504
  sigset_t sigs;
  sigemptyset(&sigs);
505
  // The signals that should not asynchronously disturb the daemon
Steven Murray's avatar
Steven Murray committed
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
  sigaddset(&sigs, SIGHUP);
  sigaddset(&sigs, SIGINT);
  sigaddset(&sigs, SIGQUIT);
  sigaddset(&sigs, SIGPIPE);
  sigaddset(&sigs, SIGTERM);
  sigaddset(&sigs, SIGUSR1);
  sigaddset(&sigs, SIGUSR2);
  sigaddset(&sigs, SIGCHLD);
  sigaddset(&sigs, SIGTSTP);
  sigaddset(&sigs, SIGTTIN);
  sigaddset(&sigs, SIGTTOU);
  sigaddset(&sigs, SIGPOLL);
  sigaddset(&sigs, SIGURG);
  sigaddset(&sigs, SIGVTALRM);
  castor::exception::Errnum::throwOnNonZero(
    sigprocmask(SIG_BLOCK, &sigs, NULL),
522
523
524
    "Failed to block signals: sigprocmask() failed");
}

525
526
527
528
529
530
531
//------------------------------------------------------------------------------
// initZmqContext
//------------------------------------------------------------------------------
void castor::tape::tapeserver::daemon::TapeDaemon::initZmqContext() {
  const int sizeOfIOThreadPoolForZMQ = 1;
  m_zmqContext = zmq_init(sizeOfIOThreadPoolForZMQ);
  if(NULL == m_zmqContext) {
532
    const std::string message = castor::utils::errnoToString(errno);
533
    cta::exception::Exception ex;
534
535
536
537
538
    ex.getMessage() << "Failed to instantiate ZMQ context: " << message;
    throw ex;
  }
}

539
//------------------------------------------------------------------------------
540
// setUpReactor
541
//------------------------------------------------------------------------------
542
543
544
void castor::tape::tapeserver::daemon::TapeDaemon::setUpReactor(
  const int reaperSocket) {
  createAndRegisterProcessForkerConnectionHandler(reaperSocket);
545
  createAndRegisterTapeMessageHandler();
546
547
548
}

//------------------------------------------------------------------------------
549
// createAndRegisterProcessForkerConnectionHandler
550
//------------------------------------------------------------------------------
551
void castor::tape::tapeserver::daemon::TapeDaemon::
552
  createAndRegisterProcessForkerConnectionHandler(const int reaperSocket)  {
553
  try {
554
    std::unique_ptr<ProcessForkerConnectionHandler> handler;
555
556
    try {
      handler.reset(new ProcessForkerConnectionHandler(reaperSocket, m_reactor,
557
        m_log, *m_catalogue));
558
    } catch(std::bad_alloc &ba) {
559
      cta::exception::BadAlloc ex;
560
      ex.getMessage() <<
561
562
        "Failed to create event handler for communicating with the"
        " ProcessForker: " << ba.what();
563
564
565
566
      throw ex;
    }
    m_reactor.registerHandler(handler.get());
    handler.release();
567
568
  } catch(cta::exception::Exception &ne) {
    cta::exception::Exception ex;
569
    ex.getMessage() <<
570
571
      "Failed to create and register ProcessForkerConnectionHandler: " <<
      ne.getMessage().str();
572
    throw ex;
573
  }
574
575
}

576
//------------------------------------------------------------------------------
577
// createAndRegisterTapeMessageHandler
578
579
//------------------------------------------------------------------------------
void castor::tape::tapeserver::daemon::TapeDaemon::
580
  createAndRegisterTapeMessageHandler()  {
581
  try {
582
    std::unique_ptr<TapeMessageHandler> handler;
583
    try {
584
      handler.reset(new TapeMessageHandler(m_tapeDaemonConfig.internalPort,
585
        m_reactor, m_log, *m_catalogue, m_hostName, m_zmqContext));
586
    } catch(std::bad_alloc &ba) {
587
      cta::exception::BadAlloc ex;
588
589
590
591
592
593
594
      ex.getMessage() <<
        "Failed to create event handler for communicating with forked sessions"
        ": " << ba.what();
      throw ex;
    }
    m_reactor.registerHandler(handler.get());
    handler.release();
595
596
  } catch(cta::exception::Exception &ne) {
    cta::exception::Exception ex;
597
    ex.getMessage() <<
598
599
      "Failed to create and register TapeMessageHandler: " <<
      ne.getMessage().str();
600
601
    throw ex;
  }
602
603
}

604
//------------------------------------------------------------------------------
605
// mainEventLoop
606
//------------------------------------------------------------------------------
607
void castor::tape::tapeserver::daemon::TapeDaemon::mainEventLoop() {
608
  while (handleIOEvents() && handleTick() && handlePendingSignals()) {
609
610
611
  }
}

612
//------------------------------------------------------------------------------
613
// handleIOEvents
614
//------------------------------------------------------------------------------
615
bool castor::tape::tapeserver::daemon::TapeDaemon::handleIOEvents() throw() {
616
617
618
  try {
    const int timeout = 100; // 100 milliseconds
    m_reactor.handleEvents(timeout);
619
  } catch(cta::exception::Exception &ex) {
620
    // Log exception and continue
621
    std::list<log::Param> params = {
622
623
624
      log::Param("message", ex.getMessage().str()),
      log::Param("backtrace", ex.backtrace())
    };
625
626
    m_log(LOG_ERR, "Unexpected castor exception thrown when handling an I/O"
      " event", params);
627
628
  } catch(std::exception &se) {
    // Log exception and continue
629
    std::list<log::Param> params = {log::Param("message", se.what())};
630
631
632
633
634
635
636
637
    m_log(LOG_ERR, "Unexpected exception thrown when handling an I/O event",
      params);
  } catch(...) {
    // Log exception and continue
    m_log(LOG_ERR,
      "Unexpected and unknown exception thrown when handling an I/O event");
  }

638
  return true; // Continue the main event loop
639
640
641
642
643
644
}

//------------------------------------------------------------------------------
// handleTick
//------------------------------------------------------------------------------
bool castor::tape::tapeserver::daemon::TapeDaemon::handleTick() throw() {
645
646
647
648
649
650
651
652
653
654
655
  if(m_catalogue->allDrivesAreShutdown()) {
    m_log(LOG_WARNING, "Tape-server parent-process ending main loop because"
      " all tape drives are shutdown");
    return false; // Do not continue the main event loop
  }

  if(TAPEDAEMON_STATE_SHUTTINGDOWN == m_state) {
    const time_t now = time(NULL);
    const time_t timeSpentShuttingDown = now - m_startOfShutdown;
    const time_t shutdownTimeout = 9*60; // 9 minutes
    if(shutdownTimeout <= timeSpentShuttingDown) {
656
      std::list<log::Param> params = {log::Param("shutdownTimeout", shutdownTimeout)};
657
658
659
660
661
662
      m_log(LOG_WARNING, "Tape-server parent-process ending main loop because"
        " shutdown timeout has been reached", params);
      return false; // Do not continue the main event loop
    }
  }

663
664
  try {
    return m_catalogue->handleTick();
665
  } catch(cta::exception::Exception &ex) {
666
    // Log exception and continue
667
    std::list<log::Param> params = {
668
669
670
671
672
673
674
      log::Param("message", ex.getMessage().str()),
      log::Param("backtrace", ex.backtrace())
    };
    m_log(LOG_ERR, "Unexpected castor exception thrown when handling a tick"
      " in time", params);
  } catch(std::exception &se) {
    // Log exception and continue
675
    std::list<log::Param> params = {log::Param("message", se.what())};
676
677
678
679
680
681
682
    m_log(LOG_ERR, "Unexpected exception thrown when handling a tick in time",
      params);
  } catch(...) {
    // Log exception and continue
    m_log(LOG_ERR,
      "Unexpected and unknown exception thrown when handling a tick in time");
  }
683

684
  return true; // Continue the main event loop
685
686
687
688
689
690
691
}

//------------------------------------------------------------------------------
// handlePendingSignals
//------------------------------------------------------------------------------
bool castor::tape::tapeserver::daemon::TapeDaemon::handlePendingSignals()
  throw() {
692
693
694
695
696
697
698
699
700
701
702
703
704
705
  try {
    int sig = 0;
    sigset_t allSignals;
    siginfo_t sigInfo;
    sigfillset(&allSignals);
    const struct timespec immediateTimeout = {0, 0};

    // While there is a pending signal to be handled
    while (0 < (sig = sigtimedwait(&allSignals, &sigInfo, &immediateTimeout))) {
      const bool continueMainEventLoop = handleSignal(sig, sigInfo);

      if(!continueMainEventLoop) {
        return false;
      }
706
    }
707
  } catch(cta::exception::Exception &ex) {
708
    // Log exception and continue
709
    std::list<log::Param> params = {
710
711
712
713
714
715
716
      log::Param("message", ex.getMessage().str()),
      log::Param("backtrace", ex.backtrace())
    };
    m_log(LOG_ERR, "Unexpected castor exception thrown when handling a"
      " pending signal", params);
  } catch(std::exception &se) {
    // Log exception and continue
717
    std::list<log::Param> params = {log::Param("message", se.what())};
718
719
720
721
722
723
    m_log(LOG_ERR, "Unexpected exception thrown when handling a pending signal",
      params);
  } catch(...) {
    // Log exception and continue
    m_log(LOG_ERR,
      "Unexpected and unknown exception thrown when handling a pending signal");
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
  }

  return true; // Continue the main event loop
}

//------------------------------------------------------------------------------
// handleSignal
//------------------------------------------------------------------------------
bool castor::tape::tapeserver::daemon::TapeDaemon::handleSignal(const int sig,
  const siginfo_t &sigInfo) {
  switch(sig) {
  case SIGINT : return handleSIGINT(sigInfo);
  case SIGTERM: return handleSIGTERM(sigInfo);
  case SIGCHLD: return handleSIGCHLD(sigInfo);
  default:
    {
740
      std::list<log::Param> params = {log::Param("signal", sig)};
741
      m_log(LOG_INFO, "Ignoring signal", params);
742
      return true; // Continue the main event loop
743
744
    }
  }
745
746
747
748
749
750
751
}

//------------------------------------------------------------------------------
// handleSIGINT
//------------------------------------------------------------------------------
bool castor::tape::tapeserver::daemon::TapeDaemon::handleSIGINT(
  const siginfo_t &sigInfo) {
752
753
754
755
756
757
758
759
760
761
762
763
764
  if(TAPEDAEMON_STATE_RUNNING == m_state) {
    m_log(LOG_WARNING, "Tape-server parent-process starting shutdown sequence"
      " because SIGINT was received");

    m_state = TAPEDAEMON_STATE_SHUTTINGDOWN;
    m_startOfShutdown = time(NULL);
    m_catalogue->shutdown();
  } else {
    m_log(LOG_WARNING, "Tape-server parent-process ignoring SIGINT because the"
      " shutdown sequence has already been started");
  }

  return true; // Continue the main event loop
765
}
766

767
768
769
770
771
//------------------------------------------------------------------------------
// handleSIGTERM
//------------------------------------------------------------------------------
bool castor::tape::tapeserver::daemon::TapeDaemon::handleSIGTERM(
  const siginfo_t &sigInfo) {
772
773
774
775
776
777
778
779
780
781
782
783
784
  if(TAPEDAEMON_STATE_RUNNING == m_state) {
    m_log(LOG_WARNING, "Tape-server parent-process starting shutdown sequence"
      " because SIGTERM was received");

    m_state = TAPEDAEMON_STATE_SHUTTINGDOWN;
    m_startOfShutdown = time(NULL);
    m_catalogue->shutdown();
  } else {
    m_log(LOG_WARNING, "Tape-server parent-process ignoring SIGTERM because the"
      " shutdown sequence has already been started");
  }

  return true; // Continue the main event loop
785
786
787
}

//------------------------------------------------------------------------------
788
// handleSIGCHLD
789
//------------------------------------------------------------------------------
790
791
792
bool castor::tape::tapeserver::daemon::TapeDaemon::handleSIGCHLD(
  const siginfo_t &sigInfo) {
  // Reap zombie processes
793
  pid_t pid = 0;
794
  int waitpidStat = 0;
795

796
  while (0 < (pid = waitpid(-1, &waitpidStat, WNOHANG))) {
797
798
799
800
    const bool continueMainEventLoop = handleReapedProcess(pid, waitpidStat);
    if(!continueMainEventLoop) {
      return false;
    }
801
  }
802
803

  return true; // Continue the main event loop
804
805
806
}

//------------------------------------------------------------------------------
807
// handleReapedProcess
808
//------------------------------------------------------------------------------
809
bool castor::tape::tapeserver::daemon::TapeDaemon::handleReapedProcess(
810
811
  const pid_t pid, const int waitpidStat) throw() {
  logChildProcessTerminated(pid, waitpidStat);
812

813
  if(pid == m_processForkerPid) {
814
    return handleReapedProcessForker(pid, waitpidStat);
815
  } else {
816
    std::list<log::Param> params = {log::Param("pid", pid)};
817
    m_log(LOG_ERR, "Reaped process was unknown", params);
818
    return true; // Continue the main event loop
819
820
  }
}
821

822
823
824
//------------------------------------------------------------------------------
// handleReapedProcessForker
//------------------------------------------------------------------------------
825
bool castor::tape::tapeserver::daemon::TapeDaemon::handleReapedProcessForker(
826
  const pid_t pid, const int waitpidStat) throw() {
827
  std::list<log::Param> params = {
828
    log::Param("processForkerPid", pid)};
829
830
831
  m_log(LOG_WARNING, "Tape-server parent-process stopping gracefully because"
    " ProcessForker has terminated", params);
  return false; // Do not continue the main event loop
832
}
833

834
//------------------------------------------------------------------------------
835
// logChildProcessTerminated
836
//------------------------------------------------------------------------------
837
838
void castor::tape::tapeserver::daemon::TapeDaemon::logChildProcessTerminated(
  const pid_t pid, const int waitpidStat) throw() {
839
  std::list<log::Param> params;
840
  params.push_back(log::Param("terminatedPid", pid));
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865

  if(WIFEXITED(waitpidStat)) {
    params.push_back(log::Param("WEXITSTATUS", WEXITSTATUS(waitpidStat)));
  }

  if(WIFSIGNALED(waitpidStat)) {
    params.push_back(log::Param("WTERMSIG", WTERMSIG(waitpidStat)));
  }

  if(WCOREDUMP(waitpidStat)) {
    params.push_back(log::Param("WCOREDUMP", "true"));
  } else {
    params.push_back(log::Param("WCOREDUMP", "false"));
  }

  if(WIFSTOPPED(waitpidStat)) {
    params.push_back(log::Param("WSTOPSIG", WSTOPSIG(waitpidStat)));
  }

  if(WIFCONTINUED(waitpidStat)) {
    params.push_back(log::Param("WIFCONTINUED", "true"));
  } else {
    params.push_back(log::Param("WIFCONTINUED", "false"));
  }

866
  m_log(LOG_INFO, "Child process terminated", params);
867
}