RecallReportPacker.cpp 12.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/******************************************************************************
 *
 * This file is part of the Castor project.
 * See http://castor.web.cern.ch/castor
 *
 * Copyright (C) 2003  CERN
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * 
 *
 * @author Castor Dev team, castor-dev@cern.ch
 *****************************************************************************/

#include "castor/tape/tapeserver/daemon/RecallReportPacker.hpp"
25
#include "castor/tape/tapeserver/daemon/TaskWatchDog.hpp"
26
#include "castor/log/Logger.hpp"
27
28
#include "log.h"
#include "serrno.h"
29

30
#include <signal.h>
31

32
namespace{
33
  struct failedReportRecallResult : public castor::exception::Exception{
34
35
36
37
    failedReportRecallResult(const std::string& s): Exception(s){}
  };
}

David COME's avatar
David COME committed
38
39
40
using castor::log::LogContext;
using castor::log::Param;

41
42
43
44
namespace castor {
namespace tape {
namespace tapeserver {
namespace daemon {
45
46
47
//------------------------------------------------------------------------------
//Constructor
//------------------------------------------------------------------------------
48
RecallReportPacker::RecallReportPacker(cta::RetrieveMount *retrieveMount, log::LogContext lc):
49
50
51
52
  ReportPackerInterface<detail::Recall>(lc),
  m_workerThread(*this), m_errorHappened(false), m_retrieveMount(retrieveMount),
  m_tapeThreadComplete(false), m_diskThreadComplete(false)
{
53
54

}
55
56
57
//------------------------------------------------------------------------------
//Destructor
//------------------------------------------------------------------------------
58
RecallReportPacker::~RecallReportPacker(){
59
  castor::server::MutexLocker ml(&m_producterProtection);
60
}
61
62
63
//------------------------------------------------------------------------------
//reportCompletedJob
//------------------------------------------------------------------------------
64
65
void RecallReportPacker::reportCompletedJob(std::unique_ptr<cta::RetrieveJob> successfulRetrieveJob){
  std::unique_ptr<Report> rep(new ReportSuccessful(std::move(successfulRetrieveJob)));
66
  castor::server::MutexLocker ml(&m_producterProtection);
67
68
  m_fifo.push(rep.release());
}
69
70
71
//------------------------------------------------------------------------------
//reportFailedJob
//------------------------------------------------------------------------------  
72
73
void RecallReportPacker::reportFailedJob(std::unique_ptr<cta::RetrieveJob> failedRetrieveJob){
  std::unique_ptr<Report> rep(new ReportError(std::move(failedRetrieveJob)));
74
  castor::server::MutexLocker ml(&m_producterProtection);
75
76
  m_fifo.push(rep.release());
}
77
78
79
//------------------------------------------------------------------------------
//reportEndOfSession
//------------------------------------------------------------------------------
80
void RecallReportPacker::reportEndOfSession(){
81
  castor::server::MutexLocker ml(&m_producterProtection);
82
83
  m_fifo.push(new ReportEndofSession());
}
84
85
86
87
88
89
90
91

//------------------------------------------------------------------------------
//setDriveStatus
//------------------------------------------------------------------------------
void RecallReportPacker::setDriveStatus(cta::DriveStatus status) {
  m_retrieveMount->setDriveStatus(status);
}

92
  
93
94
95
//------------------------------------------------------------------------------
//reportEndOfSessionWithErrors
//------------------------------------------------------------------------------
96
void RecallReportPacker::reportEndOfSessionWithErrors(const std::string msg,int error_code){
97
  castor::server::MutexLocker ml(&m_producterProtection);
98
99
  m_fifo.push(new ReportEndofSessionWithErrors(msg,error_code));
}
100

101
102
//------------------------------------------------------------------------------
//ReportSuccessful::execute
103
//------------------------------------------------------------------------------
104
void RecallReportPacker::ReportSuccessful::execute(RecallReportPacker& parent){
105
  m_successfulRetrieveJob->complete();
106
}
107

108
109
110
//------------------------------------------------------------------------------
//ReportEndofSession::execute
//------------------------------------------------------------------------------
111
void RecallReportPacker::ReportEndofSession::execute(RecallReportPacker& parent){
112
113
114
115
116
117
118
119
120
  if(!parent.errorHappened()){
    parent.m_retrieveMount->diskComplete();
    parent.m_lc.log(LOG_INFO,"Nominal RecallReportPacker::EndofSession has been reported");
    if (parent.m_watchdog) {
      parent.m_watchdog->addParameter(log::Param("status","success"));
      // We have a race condition here between the processing of this message by
      // the initial process and the printing of the end-of-session log, triggered
      // by the end our process. To delay the latter, we sleep half a second here.
      usleep(500*1000);
121
    }
122
123
124
125
126
127
128
129
130
131
132
  }
  else {
    const std::string& msg ="RecallReportPacker::EndofSession has been reported  but an error happened somewhere in the process";
    parent.m_lc.log(LOG_ERR,msg);
    parent.m_retrieveMount->diskComplete();
    if (parent.m_watchdog) {
      parent.m_watchdog->addParameter(log::Param("status","failure"));
      // We have a race condition here between the processing of this message by
      // the initial process and the printing of the end-of-session log, triggered
      // by the end our process. To delay the latter, we sleep half a second here.
      usleep(500*1000);
133
    }
134
135
136
137
138
139
140
141
142
  }
}

//------------------------------------------------------------------------------
//ReportEndofSession::goingToEnd
//------------------------------------------------------------------------------
bool RecallReportPacker::ReportEndofSession::goingToEnd(RecallReportPacker& packer) {
  packer.setDiskDone();
  return packer.allThreadsDone();
143
}
144

145
146
147
//------------------------------------------------------------------------------
//ReportEndofSessionWithErrors::execute
//------------------------------------------------------------------------------
148
149
void RecallReportPacker::ReportEndofSessionWithErrors::execute(RecallReportPacker& parent){
  if(parent.m_errorHappened) {
150
    parent.m_retrieveMount->diskComplete();
151
152
    LogContext::ScopedParam(parent.m_lc,Param("errorCode",m_error_code));
    parent.m_lc.log(LOG_ERR,m_message);
153
154
  }
  else{
155
   const std::string& msg ="RecallReportPacker::EndofSessionWithErrors has been reported  but NO error was detected during the process";
156
   parent.m_lc.log(LOG_ERR,msg);  
157
   parent.m_retrieveMount->diskComplete();
158
  }
159
160
  if (parent.m_watchdog) {
    parent.m_watchdog->addParameter(log::Param("status","failure"));
161
162
163
164
    // We have a race condition here between the processing of this message by
    // the initial process and the printing of the end-of-session log, triggered
    // by the end our process. To delay the latter, we sleep half a second here.
    usleep(500*1000);
165
  }
166
}
167
168
169
170
171
172
173
174
175

//------------------------------------------------------------------------------
//ReportEndofSessionWithErrors::goingToEnd
//------------------------------------------------------------------------------
bool RecallReportPacker::ReportEndofSessionWithErrors::goingToEnd(RecallReportPacker& packer) {
  packer.setDiskDone();
  return packer.allThreadsDone();
}

176
177
178
//------------------------------------------------------------------------------
//ReportError::execute
//------------------------------------------------------------------------------
179
180
void RecallReportPacker::ReportError::execute(RecallReportPacker& parent){
  parent.m_errorHappened=true;
181
182
  parent.m_lc.log(LOG_ERR,m_failedRetrieveJob->failureMessage);
  m_failedRetrieveJob->failed();
183
}
184

185
//------------------------------------------------------------------------------
186
187
//WorkerThread::WorkerThread
//------------------------------------------------------------------------------
188
189
190
RecallReportPacker::WorkerThread::WorkerThread(RecallReportPacker& parent):
m_parent(parent) {
}
191
192
193
//------------------------------------------------------------------------------
//WorkerThread::run
//------------------------------------------------------------------------------
194
void RecallReportPacker::WorkerThread::run(){
195
196
  m_parent.m_lc.pushOrReplace(Param("thread", "RecallReportPacker"));
  m_parent.m_lc.log(LOG_DEBUG, "Starting RecallReportPacker thread");
197
  bool endFound = false;
198
  try{
199
200
201
202
    while(1) {    
      std::unique_ptr<Report> rep(m_parent.m_fifo.pop());    
      rep->execute(m_parent);

203
      if(rep->goingToEnd(m_parent)) {
204
        endFound = true;
205
206
        break;
      }
207
    }
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
  } catch(const castor::exception::Exception& e){
    //we get there because to tried to close the connection and it failed
    //either from the catch a few lines above or directly from rep->execute
    std::stringstream ssEx;
    ssEx << "Tried to report and got a castor exception, cant do much more. The exception is the following: " << e.getMessageValue();
    m_parent.m_lc.log(LOG_ERR, ssEx.str());
    if (m_parent.m_watchdog) {
      m_parent.m_watchdog->addToErrorCount("Error_clientCommunication");
      m_parent.m_watchdog->addParameter(log::Param("status","failure"));
    }
  } catch(const cta::exception::Exception& e){
    //we get there because to tried to close the connection and it failed
    //either from the catch a few lines above or directly from rep->execute
    std::stringstream ssEx;
    ssEx << "Tried to report and got a CTA exception, cant do much more. The exception is the following: " << e.getMessageValue();
    m_parent.m_lc.log(LOG_ERR, ssEx.str());
    if (m_parent.m_watchdog) {
      m_parent.m_watchdog->addToErrorCount("Error_clientCommunication");
      m_parent.m_watchdog->addParameter(log::Param("status","failure"));
    }
  } catch(const std::exception& e){
    //we get there because to tried to close the connection and it failed
    //either from the catch a few lines above or directly from rep->execute
    std::stringstream ssEx;
    ssEx << "Tried to report and got a standard exception, cant do much more. The exception is the following: " << e.what();
    m_parent.m_lc.log(LOG_ERR, ssEx.str());
    if (m_parent.m_watchdog) {
      m_parent.m_watchdog->addToErrorCount("Error_clientCommunication");
      m_parent.m_watchdog->addParameter(log::Param("status","failure"));
    }
  } catch(...){
239
240
    //we get there because to tried to close the connection and it failed
    //either from the catch a few lines above or directly from rep->execute
241
242
243
    std::stringstream ssEx;
    ssEx << "Tried to report and got an unknown exception, cant do much more.";
    m_parent.m_lc.log(LOG_ERR, ssEx.str());
244
245
246
247
    if (m_parent.m_watchdog) {
      m_parent.m_watchdog->addToErrorCount("Error_clientCommunication");
      m_parent.m_watchdog->addParameter(log::Param("status","failure"));
    }
248
  }
249
250
251
252
  // Drain the fifo in case we got an exception
  if (!endFound) {
    while (1) {
      std::unique_ptr<Report> report(m_parent.m_fifo.pop());
253
      if (report->goingToEnd(m_parent))
254
255
256
        break;
    }
  }
257
  m_parent.m_lc.log(LOG_DEBUG, "Finishing RecallReportPacker thread");
258
}
259
260
261
262
263
264
265
266

//------------------------------------------------------------------------------
//errorHappened()
//------------------------------------------------------------------------------
bool RecallReportPacker::errorHappened() {
  return m_errorHappened || (m_watchdog && m_watchdog->errorHappened());
}

267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
//------------------------------------------------------------------------------
//reportTapeDone()
//------------------------------------------------------------------------------
void RecallReportPacker::setTapeDone() {
  m_tapeThreadComplete = true;
}

//------------------------------------------------------------------------------
//reportDiskDone()
//------------------------------------------------------------------------------
void RecallReportPacker::setDiskDone() {
  m_diskThreadComplete = true;
}

//------------------------------------------------------------------------------
//reportDiskDone()
//------------------------------------------------------------------------------
bool RecallReportPacker::allThreadsDone() {
  return m_tapeThreadComplete && m_diskThreadComplete;
}

288
}}}}