RecallReportPacker.cpp 13.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/******************************************************************************
 *
 * This file is part of the Castor project.
 * See http://castor.web.cern.ch/castor
 *
 * Copyright (C) 2003  CERN
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * 
 *
 * @author Castor Dev team, castor-dev@cern.ch
 *****************************************************************************/

#include "castor/tape/tapeserver/daemon/RecallReportPacker.hpp"
25
#include "castor/tape/tapeserver/daemon/TaskWatchDog.hpp"
26
27
#include "castor/tape/tapegateway/FileRecalledNotificationStruct.hpp"
#include "castor/tape/tapegateway/FileRecalledNotificationStruct.hpp"
28
#include "castor/log/Logger.hpp"
29
30
#include "log.h"
#include "serrno.h"
31

32
#include <signal.h>
33

34
namespace{
35
  struct failedReportRecallResult : public castor::exception::Exception{
36
37
38
39
    failedReportRecallResult(const std::string& s): Exception(s){}
  };
}

David COME's avatar
David COME committed
40
41
42
using castor::log::LogContext;
using castor::log::Param;

43
44
45
46
namespace castor {
namespace tape {
namespace tapeserver {
namespace daemon {
47
48
49
//------------------------------------------------------------------------------
//Constructor
//------------------------------------------------------------------------------
50
51
RecallReportPacker::RecallReportPacker(client::ClientInterface & tg, 
    unsigned int reportFilePeriod,log::LogContext lc):
52
ReportPackerInterface<detail::Recall>(tg,lc),
53
        m_workerThread(*this),m_reportFilePeriod(reportFilePeriod),m_errorHappened(false){
54
55

}
56
57
58
//------------------------------------------------------------------------------
//Destructor
//------------------------------------------------------------------------------
59
RecallReportPacker::~RecallReportPacker(){
60
  castor::server::MutexLocker ml(&m_producterProtection);
61
}
62
63
64
//------------------------------------------------------------------------------
//reportCompletedJob
//------------------------------------------------------------------------------
65
66
67
void RecallReportPacker::reportCompletedJob(const FileStruct& recalledFile,
  u_int32_t checksum, u_int64_t size){
  std::auto_ptr<Report> rep(new ReportSuccessful(recalledFile,checksum,size));
68
  castor::server::MutexLocker ml(&m_producterProtection);
69
70
  m_fifo.push(rep.release());
}
71
72
73
//------------------------------------------------------------------------------
//reportFailedJob
//------------------------------------------------------------------------------  
74
void RecallReportPacker::reportFailedJob(const FileStruct & recalledFile
75
76
,const std::string& msg,int error_code){
  std::auto_ptr<Report> rep(new ReportError(recalledFile,msg,error_code));
77
  castor::server::MutexLocker ml(&m_producterProtection);
78
79
  m_fifo.push(rep.release());
}
80
81
82
//------------------------------------------------------------------------------
//reportEndOfSession
//------------------------------------------------------------------------------
83
void RecallReportPacker::reportEndOfSession(){
84
  castor::server::MutexLocker ml(&m_producterProtection);
85
86
87
  m_fifo.push(new ReportEndofSession());
}
  
88
89
90
//------------------------------------------------------------------------------
//reportEndOfSessionWithErrors
//------------------------------------------------------------------------------
91
void RecallReportPacker::reportEndOfSessionWithErrors(const std::string msg,int error_code){
92
  castor::server::MutexLocker ml(&m_producterProtection);
93
94
  m_fifo.push(new ReportEndofSessionWithErrors(msg,error_code));
}
95

96
97
//------------------------------------------------------------------------------
//ReportSuccessful::execute
98
//------------------------------------------------------------------------------
99
void RecallReportPacker::ReportSuccessful::execute(RecallReportPacker& parent){
100
101
  std::auto_ptr<FileSuccessStruct> successRecall(new FileSuccessStruct);
  
102
103
104
105
106
  successRecall->setFseq(m_recalledFile.fseq());
  successRecall->setFileTransactionId(m_recalledFile.fileTransactionId());
  successRecall->setId(m_recalledFile.id());
  successRecall->setNshost(m_recalledFile.nshost());
  successRecall->setFileid(m_recalledFile.fileid());
107
108
  successRecall->setPath(m_recalledFile.path());
  successRecall->setFileSize(m_size);
109

David COME's avatar
David COME committed
110
111
  //WARNING : ad hoc name of checksum algorithm
  successRecall->setChecksumName("adler32");
112
113
  successRecall->setChecksum(m_checksum);
  
114
  parent.m_listReports->addSuccessfulRecalls(successRecall.release());
115
}
116
117
118
//------------------------------------------------------------------------------
//flush
//------------------------------------------------------------------------------
119
void RecallReportPacker::flush(){
120
121
122
123
124
125
  //we dont want to send empty reports
  unsigned int totalSize = m_listReports->failedRecalls().size() +
                           m_listReports->successfulRecalls().size();
  if(totalSize==0) {
    return;
  }
126
 
127
128
129
130
131
132
133
134
135
136
137
138
  client::ClientInterface::RequestReport chrono;
  try{
    m_client.reportRecallResults(*m_listReports,chrono);
    {
      log::ScopedParamContainer params(m_lc);
      params.add("successCount", m_listReports->successfulRecalls().size())
            .add("failureCount", m_listReports->failedRecalls().size());
      logRequestReport(chrono,"RecallReportList successfully transmitted to client (contents follow)");
    }
    logReport(m_listReports->failedRecalls(),"Reported failed recall to client");
    logReport(m_listReports->successfulRecalls(),"Reported successful recall to client");
  } catch(const castor::exception::Exception& e){
139
    LogContext::ScopedParam s(m_lc, Param("exceptionCode",e.code()));
140
141
    LogContext::ScopedParam ss(m_lc, Param("exceptionMessageValue", e.getMessageValue()));
    LogContext::ScopedParam sss(m_lc, Param("exceptionWhat",e.what()));
142
143
144
    const std::string msg_error="An exception was caught trying to call reportRecallResults";
    m_lc.log(LOG_ERR,msg_error);
    throw failedReportRecallResult(msg_error);
145
146
147
148
  }
  //delete the old pointer and replace it with the new one provided
  //that way, all the reports that have been send are deleted (by FileReportList's destructor)
  m_listReports.reset(new FileReportList);
149
}
150
151
152
//------------------------------------------------------------------------------
//ReportEndofSession::execute
//------------------------------------------------------------------------------
153
void RecallReportPacker::ReportEndofSession::execute(RecallReportPacker& parent){
154
  client::ClientInterface::RequestReport chrono;
155
156
157
    if(!parent.m_errorHappened){
      parent.m_client.reportEndOfSession(chrono);
      parent.logRequestReport(chrono,"Nominal RecallReportPacker::EndofSession has been reported",LOG_INFO);
158
      if (parent.m_watchdog) {
159
        parent.m_watchdog->addParameter(log::Param("status","success"));
160
161
162
163
        // We have a race condition here between the processing of this message by
        // the initial process and the printing of the end-of-session log, triggered
        // by the end our process. To delay the latter, we sleep half a second here.
        usleep(500*1000);
164
      }
165
166
167
    }
    else {
      const std::string& msg ="RecallReportPacker::EndofSession has been reported  but an error happened somewhere in the process";
168
169
170
      parent.m_lc.log(LOG_ERR,msg);
      parent.m_client.reportEndOfSessionWithError(msg,SEINTERNAL,chrono);
      parent.logRequestReport(chrono,"reporting EndOfSessionWithError done",LOG_ERR);
171
172
      if (parent.m_watchdog) {
        parent.m_watchdog->addParameter(log::Param("status","failure"));
173
174
175
176
        // We have a race condition here between the processing of this message by
        // the initial process and the printing of the end-of-session log, triggered
        // by the end our process. To delay the latter, we sleep half a second here.
        usleep(500*1000);
177
      }
178
    }
179
}
180
181
182
//------------------------------------------------------------------------------
//ReportEndofSessionWithErrors::execute
//------------------------------------------------------------------------------
183
void RecallReportPacker::ReportEndofSessionWithErrors::execute(RecallReportPacker& parent){
184
  client::ClientInterface::RequestReport chrono;
185
186
187
188
  if(parent.m_errorHappened) {
  parent.m_client.reportEndOfSessionWithError(m_message,m_error_code,chrono); 
  LogContext::ScopedParam(parent.m_lc,Param("errorCode",m_error_code));
  parent.m_lc.log(LOG_ERR,m_message);
189
190
  }
  else{
191
   const std::string& msg ="RecallReportPacker::EndofSessionWithErrors has been reported  but NO error was detected during the process";
192
193
   parent.m_lc.log(LOG_ERR,msg);
   parent.m_client.reportEndOfSessionWithError(msg,SEINTERNAL,chrono); 
194
  }
195
196
  if (parent.m_watchdog) {
    parent.m_watchdog->addParameter(log::Param("status","failure"));
197
198
199
200
    // We have a race condition here between the processing of this message by
    // the initial process and the printing of the end-of-session log, triggered
    // by the end our process. To delay the latter, we sleep half a second here.
    usleep(500*1000);
201
  }
202
}
203
204
205
//------------------------------------------------------------------------------
//ReportError::execute
//------------------------------------------------------------------------------
206
void RecallReportPacker::ReportError::execute(RecallReportPacker& parent){
207
208
   
  std::auto_ptr<FileErrorStruct> failed(new FileErrorStruct);
209
  //failedMigration->setFileMigrationReportList(parent.m_listReports.get());
210
211
  failed->setErrorCode(m_error_code);
  failed->setErrorMessage(m_error_msg);
212
213
214
215
  failed->setFseq(m_recalledFile.fseq());
  failed->setFileTransactionId(m_recalledFile.fileTransactionId());
  failed->setId(m_recalledFile.id());
  failed->setNshost(m_recalledFile.nshost());
216
  
217
218
219
220
  parent.m_listReports->addFailedRecalls(failed.release());
  parent.m_errorHappened=true;
}
//------------------------------------------------------------------------------
221
222
//WorkerThread::WorkerThread
//------------------------------------------------------------------------------
223
224
225
RecallReportPacker::WorkerThread::WorkerThread(RecallReportPacker& parent):
m_parent(parent) {
}
226
227
228
//------------------------------------------------------------------------------
//WorkerThread::run
//------------------------------------------------------------------------------
229
void RecallReportPacker::WorkerThread::run(){
230
231
  m_parent.m_lc.pushOrReplace(Param("thread", "RecallReportPacker"));
  m_parent.m_lc.log(LOG_DEBUG, "Starting RecallReportPacker thread");
232
233
234
235
236
  client::ClientInterface::RequestReport chrono;
  try{
      while(1) {    
        std::auto_ptr<Report> rep (m_parent.m_fifo.pop());    
        
237
238
239
240
241
242
243
244
245
246
247
248
249
250
        /*
         * this boolean is only true if it is the last turn of the loop
         * == rep is ReportEndOFSession or ReportEndOFSessionWithError
         */
        bool isItTheEnd = rep->goingToEnd();
        
        /*
         * if it is not the last turn, we want to execute the report 
         * (= insert the file into thr right list of results) BEFORE (trying to) 
         * flush
         */
        if(!isItTheEnd){
          rep->execute(m_parent);
        }
251
        //how mane files we have globally treated 
252
253
        unsigned int totalSize = m_parent.m_listReports->failedRecalls().size() +
                                 m_parent.m_listReports->successfulRecalls().size();
254
        
255
        //If we have enough reports or we are going to end the loop
256
257
258
259
        // or it is the end (== unconditional flush ) 
        // or we bypass the queuing system if the client is readtp
        // then we flush        
        if(totalSize >= m_parent.m_reportFilePeriod || isItTheEnd ||
260
           detail::ReportByFile == m_parent.m_reportBatching)
261
262
263
        {
        
          try{
264
            m_parent.flush();
265
266
          }
          catch(const failedReportRecallResult& e){
267
268
269
            //got there because we failed to report the recall results
            //we have to try to close the connection. 
            //reportEndOfSessionWithError might throw 
270
271
            m_parent.m_client.reportEndOfSessionWithError(e.getMessageValue(),SEINTERNAL,chrono);
            m_parent.logRequestReport(chrono,"Successfully closed client's session after the failed report RecallResult");
272
273
            m_parent.m_watchdog->addToErrorCount("Error_clientCommunication");
            m_parent.m_watchdog->addParameter(log::Param("status","failure"));
274
275
276
277
278
            // We need to wait until the end of session is signaled from upsteam
            while (!isItTheEnd) {
              std::auto_ptr<Report> r(m_parent.m_fifo.pop());
              isItTheEnd = r->goingToEnd();
            }
279
            break;
280
281
          }
        }
282
        
283
284
285
286
287
288
289
290
         /* 
          * It is the last turn of loop, we are going to send 
          * an EndOfSession (WithError) to the client. We need to have flushed 
          * all leftover BEFORE. Because as soon as we report the end, we can not 
          * report any the longer the success or failure of any job
         */
        if(isItTheEnd) {
          rep->execute(m_parent);
291
292
          break;
        }
293
    }
294
  }
295
  catch(const castor::exception::Exception& e){
296
297
298
    //we get there because to tried to close the connection and it failed
    //either from the catch a few lines above or directly from rep->execute
    m_parent.logRequestReport(chrono,"tried to report endOfSession(WithError) and got an exception, cant do much more",LOG_ERR);
299
300
    m_parent.m_watchdog->addToErrorCount("Error_clientCommunication");
    m_parent.m_watchdog->addParameter(log::Param("status","failure"));
301
  }
302
  m_parent.m_lc.log(LOG_DEBUG, "Finishing RecallReportPacker thread");
303
304
  
  //When we end up there, we might have still 
305
306
}
}}}}