RecallReportPacker.cpp 12.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/******************************************************************************
 *
 * This file is part of the Castor project.
 * See http://castor.web.cern.ch/castor
 *
 * Copyright (C) 2003  CERN
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * 
 *
 * @author Castor Dev team, castor-dev@cern.ch
 *****************************************************************************/

#include "castor/tape/tapeserver/daemon/RecallReportPacker.hpp"
25
#include "castor/tape/tapeserver/daemon/TaskWatchDog.hpp"
26
#include "castor/log/Logger.hpp"
27
28
#include "log.h"
#include "serrno.h"
29

30
#include <signal.h>
31
#include <iostream>
32

33
namespace{
34
  struct failedReportRecallResult : public castor::exception::Exception{
35
36
37
38
    failedReportRecallResult(const std::string& s): Exception(s){}
  };
}

David COME's avatar
David COME committed
39
40
41
using castor::log::LogContext;
using castor::log::Param;

42
43
44
45
namespace castor {
namespace tape {
namespace tapeserver {
namespace daemon {
46
47
48
//------------------------------------------------------------------------------
//Constructor
//------------------------------------------------------------------------------
49
RecallReportPacker::RecallReportPacker(cta::RetrieveMount *retrieveMount, log::LogContext lc):
50
51
52
53
  ReportPackerInterface<detail::Recall>(lc),
  m_workerThread(*this), m_errorHappened(false), m_retrieveMount(retrieveMount),
  m_tapeThreadComplete(false), m_diskThreadComplete(false)
{
54
55

}
56
57
58
//------------------------------------------------------------------------------
//Destructor
//------------------------------------------------------------------------------
59
RecallReportPacker::~RecallReportPacker(){
60
  castor::server::MutexLocker ml(&m_producterProtection);
61
}
62
63
64
//------------------------------------------------------------------------------
//reportCompletedJob
//------------------------------------------------------------------------------
65
66
void RecallReportPacker::reportCompletedJob(std::unique_ptr<cta::RetrieveJob> successfulRetrieveJob){
  std::unique_ptr<Report> rep(new ReportSuccessful(std::move(successfulRetrieveJob)));
67
  castor::server::MutexLocker ml(&m_producterProtection);
68
69
  m_fifo.push(rep.release());
}
70
71
72
//------------------------------------------------------------------------------
//reportFailedJob
//------------------------------------------------------------------------------  
73
74
void RecallReportPacker::reportFailedJob(std::unique_ptr<cta::RetrieveJob> failedRetrieveJob){
  std::unique_ptr<Report> rep(new ReportError(std::move(failedRetrieveJob)));
75
  castor::server::MutexLocker ml(&m_producterProtection);
76
77
  m_fifo.push(rep.release());
}
78
79
80
//------------------------------------------------------------------------------
//reportEndOfSession
//------------------------------------------------------------------------------
81
void RecallReportPacker::reportEndOfSession(){
82
  castor::server::MutexLocker ml(&m_producterProtection);
83
84
  m_fifo.push(new ReportEndofSession());
}
85
86
87
88
89
90
91
92

//------------------------------------------------------------------------------
//setDriveStatus
//------------------------------------------------------------------------------
void RecallReportPacker::setDriveStatus(cta::DriveStatus status) {
  m_retrieveMount->setDriveStatus(status);
}

93
  
94
95
96
//------------------------------------------------------------------------------
//reportEndOfSessionWithErrors
//------------------------------------------------------------------------------
97
void RecallReportPacker::reportEndOfSessionWithErrors(const std::string msg,int error_code){
98
  castor::server::MutexLocker ml(&m_producterProtection);
99
100
  m_fifo.push(new ReportEndofSessionWithErrors(msg,error_code));
}
101

102
103
//------------------------------------------------------------------------------
//ReportSuccessful::execute
104
//------------------------------------------------------------------------------
105
void RecallReportPacker::ReportSuccessful::execute(RecallReportPacker& parent){
106
  m_successfulRetrieveJob->complete();
107
}
108

109
110
111
//------------------------------------------------------------------------------
//ReportEndofSession::execute
//------------------------------------------------------------------------------
112
void RecallReportPacker::ReportEndofSession::execute(RecallReportPacker& parent){
113
114
115
116
117
118
119
120
121
  if(!parent.errorHappened()){
    parent.m_retrieveMount->diskComplete();
    parent.m_lc.log(LOG_INFO,"Nominal RecallReportPacker::EndofSession has been reported");
    if (parent.m_watchdog) {
      parent.m_watchdog->addParameter(log::Param("status","success"));
      // We have a race condition here between the processing of this message by
      // the initial process and the printing of the end-of-session log, triggered
      // by the end our process. To delay the latter, we sleep half a second here.
      usleep(500*1000);
122
    }
123
124
125
126
127
128
129
130
131
132
133
  }
  else {
    const std::string& msg ="RecallReportPacker::EndofSession has been reported  but an error happened somewhere in the process";
    parent.m_lc.log(LOG_ERR,msg);
    parent.m_retrieveMount->diskComplete();
    if (parent.m_watchdog) {
      parent.m_watchdog->addParameter(log::Param("status","failure"));
      // We have a race condition here between the processing of this message by
      // the initial process and the printing of the end-of-session log, triggered
      // by the end our process. To delay the latter, we sleep half a second here.
      usleep(500*1000);
134
    }
135
136
137
138
139
140
141
142
  }
}

//------------------------------------------------------------------------------
//ReportEndofSession::goingToEnd
//------------------------------------------------------------------------------
bool RecallReportPacker::ReportEndofSession::goingToEnd(RecallReportPacker& packer) {
  return packer.allThreadsDone();
143
}
144

145
146
147
//------------------------------------------------------------------------------
//ReportEndofSessionWithErrors::execute
//------------------------------------------------------------------------------
148
149
void RecallReportPacker::ReportEndofSessionWithErrors::execute(RecallReportPacker& parent){
  if(parent.m_errorHappened) {
150
    parent.m_retrieveMount->diskComplete();
151
152
    LogContext::ScopedParam(parent.m_lc,Param("errorCode",m_error_code));
    parent.m_lc.log(LOG_ERR,m_message);
153
154
  }
  else{
155
   const std::string& msg ="RecallReportPacker::EndofSessionWithErrors has been reported  but NO error was detected during the process";
156
   parent.m_lc.log(LOG_ERR,msg);  
157
   parent.m_retrieveMount->diskComplete();
158
  }
159
160
  if (parent.m_watchdog) {
    parent.m_watchdog->addParameter(log::Param("status","failure"));
161
162
163
164
    // We have a race condition here between the processing of this message by
    // the initial process and the printing of the end-of-session log, triggered
    // by the end our process. To delay the latter, we sleep half a second here.
    usleep(500*1000);
165
  }
166
}
167
168
169
170
171
172
173
174

//------------------------------------------------------------------------------
//ReportEndofSessionWithErrors::goingToEnd
//------------------------------------------------------------------------------
bool RecallReportPacker::ReportEndofSessionWithErrors::goingToEnd(RecallReportPacker& packer) {
  return packer.allThreadsDone();
}

175
176
177
//------------------------------------------------------------------------------
//ReportError::execute
//------------------------------------------------------------------------------
178
179
void RecallReportPacker::ReportError::execute(RecallReportPacker& parent){
  parent.m_errorHappened=true;
180
181
  parent.m_lc.log(LOG_ERR,m_failedRetrieveJob->failureMessage);
  m_failedRetrieveJob->failed();
182
}
183

184
//------------------------------------------------------------------------------
185
186
//WorkerThread::WorkerThread
//------------------------------------------------------------------------------
187
188
189
RecallReportPacker::WorkerThread::WorkerThread(RecallReportPacker& parent):
m_parent(parent) {
}
190
191
192
//------------------------------------------------------------------------------
//WorkerThread::run
//------------------------------------------------------------------------------
193
void RecallReportPacker::WorkerThread::run(){
194
195
  m_parent.m_lc.pushOrReplace(Param("thread", "RecallReportPacker"));
  m_parent.m_lc.log(LOG_DEBUG, "Starting RecallReportPacker thread");
196
  bool endFound = false;
197
  try{
198
199
200
201
    while(1) {    
      std::unique_ptr<Report> rep(m_parent.m_fifo.pop());    
      rep->execute(m_parent);

202
      if(rep->goingToEnd(m_parent)) {
203
        endFound = true;
204
205
        break;
      }
206
    }
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
  } catch(const castor::exception::Exception& e){
    //we get there because to tried to close the connection and it failed
    //either from the catch a few lines above or directly from rep->execute
    std::stringstream ssEx;
    ssEx << "Tried to report and got a castor exception, cant do much more. The exception is the following: " << e.getMessageValue();
    m_parent.m_lc.log(LOG_ERR, ssEx.str());
    if (m_parent.m_watchdog) {
      m_parent.m_watchdog->addToErrorCount("Error_clientCommunication");
      m_parent.m_watchdog->addParameter(log::Param("status","failure"));
    }
  } catch(const cta::exception::Exception& e){
    //we get there because to tried to close the connection and it failed
    //either from the catch a few lines above or directly from rep->execute
    std::stringstream ssEx;
    ssEx << "Tried to report and got a CTA exception, cant do much more. The exception is the following: " << e.getMessageValue();
    m_parent.m_lc.log(LOG_ERR, ssEx.str());
    if (m_parent.m_watchdog) {
      m_parent.m_watchdog->addToErrorCount("Error_clientCommunication");
      m_parent.m_watchdog->addParameter(log::Param("status","failure"));
    }
  } catch(const std::exception& e){
    //we get there because to tried to close the connection and it failed
    //either from the catch a few lines above or directly from rep->execute
    std::stringstream ssEx;
    ssEx << "Tried to report and got a standard exception, cant do much more. The exception is the following: " << e.what();
    m_parent.m_lc.log(LOG_ERR, ssEx.str());
    if (m_parent.m_watchdog) {
      m_parent.m_watchdog->addToErrorCount("Error_clientCommunication");
      m_parent.m_watchdog->addParameter(log::Param("status","failure"));
    }
  } catch(...){
238
239
    //we get there because to tried to close the connection and it failed
    //either from the catch a few lines above or directly from rep->execute
240
241
242
    std::stringstream ssEx;
    ssEx << "Tried to report and got an unknown exception, cant do much more.";
    m_parent.m_lc.log(LOG_ERR, ssEx.str());
243
244
245
246
    if (m_parent.m_watchdog) {
      m_parent.m_watchdog->addToErrorCount("Error_clientCommunication");
      m_parent.m_watchdog->addParameter(log::Param("status","failure"));
    }
247
  }
248
249
250
251
  // Drain the fifo in case we got an exception
  if (!endFound) {
    while (1) {
      std::unique_ptr<Report> report(m_parent.m_fifo.pop());
252
      if (report->goingToEnd(m_parent))
253
254
255
        break;
    }
  }
256
  m_parent.m_lc.log(LOG_DEBUG, "Finishing RecallReportPacker thread");
257
}
258
259
260
261
262
263
264
265

//------------------------------------------------------------------------------
//errorHappened()
//------------------------------------------------------------------------------
bool RecallReportPacker::errorHappened() {
  return m_errorHappened || (m_watchdog && m_watchdog->errorHappened());
}

266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
//------------------------------------------------------------------------------
//reportTapeDone()
//------------------------------------------------------------------------------
void RecallReportPacker::setTapeDone() {
  m_tapeThreadComplete = true;
}

//------------------------------------------------------------------------------
//reportDiskDone()
//------------------------------------------------------------------------------
void RecallReportPacker::setDiskDone() {
  m_diskThreadComplete = true;
}

//------------------------------------------------------------------------------
//reportDiskDone()
//------------------------------------------------------------------------------
bool RecallReportPacker::allThreadsDone() {
  return m_tapeThreadComplete && m_diskThreadComplete;
}

287
}}}}