RecallReportPacker.cpp 13.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/******************************************************************************
 *
 * This file is part of the Castor project.
 * See http://castor.web.cern.ch/castor
 *
 * Copyright (C) 2003  CERN
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * 
 *
 * @author Castor Dev team, castor-dev@cern.ch
 *****************************************************************************/

#include "castor/tape/tapeserver/daemon/RecallReportPacker.hpp"
25
#include "castor/tape/tapeserver/daemon/TaskWatchDog.hpp"
26
27
#include "castor/tape/tapegateway/FileRecalledNotificationStruct.hpp"
#include "castor/tape/tapegateway/FileRecalledNotificationStruct.hpp"
28
#include "castor/log/Logger.hpp"
29
#include "log.h"
30
#include <signal.h>
31

32
namespace{
33
  struct failedReportRecallResult : public castor::exception::Exception{
34
35
36
37
    failedReportRecallResult(const std::string& s): Exception(s){}
  };
}

David COME's avatar
David COME committed
38
39
40
using castor::log::LogContext;
using castor::log::Param;

41
42
43
44
namespace castor {
namespace tape {
namespace tapeserver {
namespace daemon {
45
46
47
//------------------------------------------------------------------------------
//Constructor
//------------------------------------------------------------------------------
48
49
RecallReportPacker::RecallReportPacker(client::ClientInterface & tg, 
    unsigned int reportFilePeriod,log::LogContext lc):
50
ReportPackerInterface<detail::Recall>(tg,lc),
51
        m_workerThread(*this),m_reportFilePeriod(reportFilePeriod),m_errorHappened(false){
52
53

}
54
55
56
//------------------------------------------------------------------------------
//Destructor
//------------------------------------------------------------------------------
57
RecallReportPacker::~RecallReportPacker(){
58
  castor::server::MutexLocker ml(&m_producterProtection);
59
}
60
61
62
//------------------------------------------------------------------------------
//reportCompletedJob
//------------------------------------------------------------------------------
63
64
65
void RecallReportPacker::reportCompletedJob(const FileStruct& recalledFile,
  u_int32_t checksum, u_int64_t size){
  std::auto_ptr<Report> rep(new ReportSuccessful(recalledFile,checksum,size));
66
  castor::server::MutexLocker ml(&m_producterProtection);
67
68
  m_fifo.push(rep.release());
}
69
70
71
//------------------------------------------------------------------------------
//reportFailedJob
//------------------------------------------------------------------------------  
72
void RecallReportPacker::reportFailedJob(const FileStruct & recalledFile
73
74
,const std::string& msg,int error_code){
  std::auto_ptr<Report> rep(new ReportError(recalledFile,msg,error_code));
75
  castor::server::MutexLocker ml(&m_producterProtection);
76
77
  m_fifo.push(rep.release());
}
78
79
80
//------------------------------------------------------------------------------
//reportEndOfSession
//------------------------------------------------------------------------------
81
void RecallReportPacker::reportEndOfSession(){
82
  castor::server::MutexLocker ml(&m_producterProtection);
83
84
85
  m_fifo.push(new ReportEndofSession());
}
  
86
87
88
//------------------------------------------------------------------------------
//reportEndOfSessionWithErrors
//------------------------------------------------------------------------------
89
void RecallReportPacker::reportEndOfSessionWithErrors(const std::string msg,int error_code){
90
  castor::server::MutexLocker ml(&m_producterProtection);
91
92
  m_fifo.push(new ReportEndofSessionWithErrors(msg,error_code));
}
93

94
95
//------------------------------------------------------------------------------
//ReportSuccessful::execute
96
//------------------------------------------------------------------------------
97
void RecallReportPacker::ReportSuccessful::execute(RecallReportPacker& parent){
98
99
  std::auto_ptr<FileSuccessStruct> successRecall(new FileSuccessStruct);
  
100
101
102
103
104
  successRecall->setFseq(m_recalledFile.fseq());
  successRecall->setFileTransactionId(m_recalledFile.fileTransactionId());
  successRecall->setId(m_recalledFile.id());
  successRecall->setNshost(m_recalledFile.nshost());
  successRecall->setFileid(m_recalledFile.fileid());
105
106
  successRecall->setPath(m_recalledFile.path());
  successRecall->setFileSize(m_size);
107

David COME's avatar
David COME committed
108
109
  //WARNING : ad hoc name of checksum algorithm
  successRecall->setChecksumName("adler32");
110
111
  successRecall->setChecksum(m_checksum);
  
112
  parent.m_listReports->addSuccessfulRecalls(successRecall.release());
113
}
114
115
116
//------------------------------------------------------------------------------
//flush
//------------------------------------------------------------------------------
117
void RecallReportPacker::flush(){
118
119
120
121
122
123
  //we dont want to send empty reports
  unsigned int totalSize = m_listReports->failedRecalls().size() +
                           m_listReports->successfulRecalls().size();
  if(totalSize==0) {
    return;
  }
124
 
125
126
127
128
129
130
131
132
133
134
135
136
  client::ClientInterface::RequestReport chrono;
  try{
    m_client.reportRecallResults(*m_listReports,chrono);
    {
      log::ScopedParamContainer params(m_lc);
      params.add("successCount", m_listReports->successfulRecalls().size())
            .add("failureCount", m_listReports->failedRecalls().size());
      logRequestReport(chrono,"RecallReportList successfully transmitted to client (contents follow)");
    }
    logReport(m_listReports->failedRecalls(),"Reported failed recall to client");
    logReport(m_listReports->successfulRecalls(),"Reported successful recall to client");
  } catch(const castor::exception::Exception& e){
137
    LogContext::ScopedParam s(m_lc, Param("exceptionCode",e.code()));
138
139
    LogContext::ScopedParam ss(m_lc, Param("exceptionMessageValue", e.getMessageValue()));
    LogContext::ScopedParam sss(m_lc, Param("exceptionWhat",e.what()));
140
141
142
    const std::string msg_error="An exception was caught trying to call reportRecallResults";
    m_lc.log(LOG_ERR,msg_error);
    throw failedReportRecallResult(msg_error);
143
144
145
146
  }
  //delete the old pointer and replace it with the new one provided
  //that way, all the reports that have been send are deleted (by FileReportList's destructor)
  m_listReports.reset(new FileReportList);
147
}
148
149
150
//------------------------------------------------------------------------------
//ReportEndofSession::execute
//------------------------------------------------------------------------------
151
void RecallReportPacker::ReportEndofSession::execute(RecallReportPacker& parent){
152
  client::ClientInterface::RequestReport chrono;
153
154
155
    if(!parent.m_errorHappened){
      parent.m_client.reportEndOfSession(chrono);
      parent.logRequestReport(chrono,"Nominal RecallReportPacker::EndofSession has been reported",LOG_INFO);
156
      if (parent.m_watchdog) {
157
        parent.m_watchdog->addParameter(log::Param("status","success"));
158
159
160
161
        // We have a race condition here between the processing of this message by
        // the initial process and the printing of the end-of-session log, triggered
        // by the end our process. To delay the latter, we sleep half a second here.
        usleep(500*1000);
162
      }
163
164
165
    }
    else {
      const std::string& msg ="RecallReportPacker::EndofSession has been reported  but an error happened somewhere in the process";
166
167
168
      parent.m_lc.log(LOG_ERR,msg);
      parent.m_client.reportEndOfSessionWithError(msg,SEINTERNAL,chrono);
      parent.logRequestReport(chrono,"reporting EndOfSessionWithError done",LOG_ERR);
169
170
      if (parent.m_watchdog) {
        parent.m_watchdog->addParameter(log::Param("status","failure"));
171
172
173
174
        // We have a race condition here between the processing of this message by
        // the initial process and the printing of the end-of-session log, triggered
        // by the end our process. To delay the latter, we sleep half a second here.
        usleep(500*1000);
175
      }
176
    }
177
}
178
179
180
//------------------------------------------------------------------------------
//ReportEndofSessionWithErrors::execute
//------------------------------------------------------------------------------
181
void RecallReportPacker::ReportEndofSessionWithErrors::execute(RecallReportPacker& parent){
182
  client::ClientInterface::RequestReport chrono;
183
184
185
186
  if(parent.m_errorHappened) {
  parent.m_client.reportEndOfSessionWithError(m_message,m_error_code,chrono); 
  LogContext::ScopedParam(parent.m_lc,Param("errorCode",m_error_code));
  parent.m_lc.log(LOG_ERR,m_message);
187
188
  }
  else{
189
   const std::string& msg ="RecallReportPacker::EndofSessionWithErrors has been reported  but NO error was detected during the process";
190
191
   parent.m_lc.log(LOG_ERR,msg);
   parent.m_client.reportEndOfSessionWithError(msg,SEINTERNAL,chrono); 
192
  }
193
194
  if (parent.m_watchdog) {
    parent.m_watchdog->addParameter(log::Param("status","failure"));
195
196
197
198
    // We have a race condition here between the processing of this message by
    // the initial process and the printing of the end-of-session log, triggered
    // by the end our process. To delay the latter, we sleep half a second here.
    usleep(500*1000);
199
  }
200
}
201
202
203
//------------------------------------------------------------------------------
//ReportError::execute
//------------------------------------------------------------------------------
204
void RecallReportPacker::ReportError::execute(RecallReportPacker& parent){
205
206
   
  std::auto_ptr<FileErrorStruct> failed(new FileErrorStruct);
207
  //failedMigration->setFileMigrationReportList(parent.m_listReports.get());
208
209
  failed->setErrorCode(m_error_code);
  failed->setErrorMessage(m_error_msg);
210
211
212
213
  failed->setFseq(m_recalledFile.fseq());
  failed->setFileTransactionId(m_recalledFile.fileTransactionId());
  failed->setId(m_recalledFile.id());
  failed->setNshost(m_recalledFile.nshost());
214
  
215
216
217
218
  parent.m_listReports->addFailedRecalls(failed.release());
  parent.m_errorHappened=true;
}
//------------------------------------------------------------------------------
219
220
//WorkerThread::WorkerThread
//------------------------------------------------------------------------------
221
222
223
RecallReportPacker::WorkerThread::WorkerThread(RecallReportPacker& parent):
m_parent(parent) {
}
224
225
226
//------------------------------------------------------------------------------
//WorkerThread::run
//------------------------------------------------------------------------------
227
void RecallReportPacker::WorkerThread::run(){
228
229
  m_parent.m_lc.pushOrReplace(Param("thread", "RecallReportPacker"));
  m_parent.m_lc.log(LOG_DEBUG, "Starting RecallReportPacker thread");
230
231
232
233
234
  client::ClientInterface::RequestReport chrono;
  try{
      while(1) {    
        std::auto_ptr<Report> rep (m_parent.m_fifo.pop());    
        
235
236
237
238
239
240
241
242
243
244
245
246
247
248
        /*
         * this boolean is only true if it is the last turn of the loop
         * == rep is ReportEndOFSession or ReportEndOFSessionWithError
         */
        bool isItTheEnd = rep->goingToEnd();
        
        /*
         * if it is not the last turn, we want to execute the report 
         * (= insert the file into thr right list of results) BEFORE (trying to) 
         * flush
         */
        if(!isItTheEnd){
          rep->execute(m_parent);
        }
249
        //how mane files we have globally treated 
250
251
        unsigned int totalSize = m_parent.m_listReports->failedRecalls().size() +
                                 m_parent.m_listReports->successfulRecalls().size();
252
        
253
        //If we have enough reports or we are going to end the loop
254
255
256
257
        // or it is the end (== unconditional flush ) 
        // or we bypass the queuing system if the client is readtp
        // then we flush        
        if(totalSize >= m_parent.m_reportFilePeriod || isItTheEnd ||
258
           detail::ReportByFile == m_parent.m_reportBatching)
259
260
261
        {
        
          try{
262
            m_parent.flush();
263
264
          }
          catch(const failedReportRecallResult& e){
265
266
267
            //got there because we failed to report the recall results
            //we have to try to close the connection. 
            //reportEndOfSessionWithError might throw 
268
269
            m_parent.m_client.reportEndOfSessionWithError(e.getMessageValue(),SEINTERNAL,chrono);
            m_parent.logRequestReport(chrono,"Successfully closed client's session after the failed report RecallResult");
270
271
272
273
274
275
            
            // We need to wait until the end of session is signaled from upsteam
            while (!isItTheEnd) {
              std::auto_ptr<Report> r(m_parent.m_fifo.pop());
              isItTheEnd = r->goingToEnd();
            }
276
            break;
277
278
          }
        }
279
        
280
281
282
283
284
285
286
287
         /* 
          * It is the last turn of loop, we are going to send 
          * an EndOfSession (WithError) to the client. We need to have flushed 
          * all leftover BEFORE. Because as soon as we report the end, we can not 
          * report any the longer the success or failure of any job
         */
        if(isItTheEnd) {
          rep->execute(m_parent);
288
289
          break;
        }
290
    }
291
  }
292
  catch(const castor::exception::Exception& e){
293
294
295
    //we get there because to tried to close the connection and it failed
    //either from the catch a few lines above or directly from rep->execute
    m_parent.logRequestReport(chrono,"tried to report endOfSession(WithError) and got an exception, cant do much more",LOG_ERR);
296
297
    m_parent.m_watchdog->addToErrorCount("Error_clientCommunication");
    m_parent.m_watchdog->addParameter(log::Param("status","failure"));
298
  }
299
  m_parent.m_lc.log(LOG_DEBUG, "Finishing RecallReportPacker thread");
300
301
  
  //When we end up there, we might have still 
302
303
}
}}}}