oracleTapeGateway.sql 85.1 KB
Newer Older
1
2
3
4
5
6
7
/*******************************************************************
 *
 * PL/SQL code for the tape gateway daemon
 *
 * @author Castor Dev team, castor-dev@cern.ch
 *******************************************************************/

8
9
/* PL/SQL declaration for the castorTape package */
CREATE OR REPLACE PACKAGE castorTape AS 
10
  TYPE TapeGatewayRequest IS RECORD (
11
12
13
14
15
16
17
18
    accessMode INTEGER,
    mountTransactionId NUMBER, 
    vid VARCHAR2(2048));
  TYPE TapeGatewayRequest_Cur IS REF CURSOR RETURN TapeGatewayRequest;
  TYPE VIDPriorityRec IS RECORD (vid VARCHAR2(2048), vdqmPriority INTEGER);
  TYPE VIDPriority_Cur IS REF CURSOR RETURN VIDPriorityRec;
  TYPE FileToMigrateCore IS RECORD (
   fileId NUMBER,
19
   nsHost VARCHAR2(2048),
20
21
22
   lastKnownFileName VARCHAR2(2048),
   filePath VARCHAR2(2048),
   fileTransactionId NUMBER,
23
   fseq INTEGER,
24
   fileSize NUMBER);
25
26
  TYPE FileToMigrateCore_Cur IS REF CURSOR RETURN  FileToMigrateCore;  
END castorTape;
27
/
28

29
/* attach drive request to a recallMount or a migrationMount */
Dennis Waldron's avatar
Dennis Waldron committed
30
CREATE OR REPLACE
31
32
33
34
35
PROCEDURE tg_attachDriveReq(inVID IN VARCHAR2,
                            inVdqmId IN INTEGER,
                            inMode IN INTEGER,
                            inLabel IN VARCHAR2,
                            inDensity IN VARCHAR2) AS
36
BEGIN
37
38
39
40
41
42
43
44
  IF inMode = tconst.WRITE_DISABLE THEN
    UPDATE RecallMount
       SET lastvdqmpingtime   = gettime(),
           mountTransactionId = inVdqmId,
           status             = tconst.RECALLMOUNT_WAITDRIVE,
           label              = inLabel,
           density            = inDensity
     WHERE VID = inVID;
45
  ELSE
46
    UPDATE MigrationMount
47
48
49
50
51
       SET lastvdqmpingtime   = gettime(),
           mountTransactionId = inVdqmId,
           status             = tconst.MIGRATIONMOUNT_WAITDRIVE,
           label              = inLabel,
           density            = inDensity
52
     WHERE VID = inVID;
53
54
55
  END IF;
END;
/
56

57
/* attach the tapes to the migration mounts  */
Dennis Waldron's avatar
Dennis Waldron committed
58
CREATE OR REPLACE
59
PROCEDURE tg_attachTapesToMigMounts (
60
  inStartFseqs IN castor."cnumList",
61
  inMountIds   IN castor."cnumList",
62
63
64
  inTapeVids   IN castor."strList") AS
BEGIN
  -- Sanity check
65
  IF (inStartFseqs.COUNT != inTapeVids.COUNT) THEN
66
67
68
69
    RAISE_APPLICATION_ERROR (-20119,
       'Size mismatch for arrays: inStartFseqs.COUNT='||inStartFseqs.COUNT
       ||' inTapeVids.COUNT='||inTapeVids.COUNT);
  END IF;
70
71
72
73
74
75
76
  FORALL i IN inMountIds.FIRST .. inMountIds.LAST
    UPDATE MigrationMount
       SET VID = inTapeVids(i),
           lastFseq = inStartFseqs(i),
           startTime = getTime(),
           status = tconst.MIGRATIONMOUNT_SEND_TO_VDQM
     WHERE id = inMountIds(i);
77
  COMMIT;
78
79
80
END;
/

81
/* update the db when a tape session is ended */
82
83
CREATE OR REPLACE PROCEDURE tg_endTapeSession(inMountTransactionId IN NUMBER,
                                              inErrorCode IN INTEGER) AS
84
  varMjIds "numList";    -- recall/migration job Ids
85
  varMountId INTEGER;
Steven Murray's avatar
Steven Murray committed
86
BEGIN
87
88
89
90
91
  -- Let's assume this is a migration mount
  SELECT id INTO varMountId
    FROM MigrationMount
   WHERE mountTransactionId = inMountTransactionId
   FOR UPDATE;
92
  -- yes, it's a migration mount: delete it and detach all selected jobs
93
94
95
96
  UPDATE MigrationJob
     SET status = tconst.MIGRATIONJOB_PENDING,
         VID = NULL,
         mountTransactionId = NULL
97
   WHERE mountTransactionId = inMountTransactionId
98
99
100
101
102
103
104
105
106
107
108
     AND status = tconst.MIGRATIONJOB_SELECTED;
  DELETE FROM MigrationMount
   WHERE id = varMountId;
EXCEPTION WHEN NO_DATA_FOUND THEN
  -- was not a migration session, let's try a recall one
  DECLARE
    varVID VARCHAR2(2048);
    varRjIds "numList";
  BEGIN
    SELECT vid INTO varVID
      FROM RecallMount
109
     WHERE mountTransactionId = inMountTransactionId
110
111
112
113
     FOR UPDATE;
    -- it was a recall mount
    -- find and reset the all RecallJobs of files for this VID
    UPDATE RecallJob
114
       SET status = tconst.RECALLJOB_PENDING
115
116
     WHERE castorFile IN (SELECT castorFile
                            FROM RecallJob
117
                           WHERE VID = varVID
118
119
                             AND (status = tconst.RECALLJOB_SELECTED
                               OR status = tconst.RECALLJOB_RETRYMOUNT));
120
121
122
123
    DELETE FROM RecallMount WHERE vid = varVID;
  EXCEPTION WHEN NO_DATA_FOUND THEN
    -- Small infusion of paranoia ;-) We should never reach that point...
    ROLLBACK;
124
    RAISE_APPLICATION_ERROR (-20119, 'endTapeSession: no recall or migration mount found');
125
  END;
126
127
128
END;
/

129
130
131
132
133
134
135
136
137
138
139
/* update the db when a tape session is ended. This autonomous transaction wrapper
 * allow cleanup of leftover sessions when creating new sessions */
CREATE OR REPLACE PROCEDURE tg_endTapeSessionAT(inMountTransactionId IN NUMBER,
                                                inErrorCode IN INTEGER) AS
  PRAGMA AUTONOMOUS_TRANSACTION;
BEGIN
  tg_endTapeSession(inMountTransactionId, inErrorCode);
  COMMIT;
END;
/

140
/* find all migration mounts involving a set of tapes */
141
142
CREATE OR REPLACE PROCEDURE tg_getMigMountReqsForVids(inVids IN strListTable,
                                                      outBlockingSessions OUT SYS_REFCURSOR) AS
143
144
145
146
147
148
149
BEGIN
    OPEN  outBlockingSessions FOR
      SELECT vid TPVID, mountTransactionId VDQMREQID
        FROM MigrationMount
       WHERE vid IN (SELECT * FROM TABLE (inVids));
END;
/
150

151
/* PL/SQL method implementing bestFileSystemForRecall */
152
CREATE OR REPLACE PROCEDURE bestFileSystemForRecall(inCfId IN INTEGER, outFilePath OUT VARCHAR2) AS
153
  varCfId INTEGER;
154
  varFileSystemId NUMBER := 0;
155
156
157
  nb NUMBER;
BEGIN
  -- try and select a good FileSystem for this recall
158
  FOR f IN (SELECT /*+ INDEX_RS_ASC(RecallJob I_RecallJob_Castorfile_VID) */
159
                   DiskServer.name ||':'|| FileSystem.mountPoint AS remotePath, FileSystem.id,
160
                   FileSystem.diskserver, CastorFile.fileSize, CastorFile.fileId, CastorFile.nsHost
161
              FROM DiskServer, FileSystem, DiskPool2SvcClass, CastorFile, RecallJob
162
             WHERE CastorFile.id = inCfId
163
164
               AND RecallJob.castorFile = inCfId
               AND RecallJob.svcclass = DiskPool2SvcClass.child
165
166
167
168
169
170
171
172
173
174
175
               AND FileSystem.diskpool = DiskPool2SvcClass.parent
               -- a priori, we want to have enough free space. However, if we don't, we accept to start writing
               -- if we have a minimum of 30GB free and count on gerbage collection to liberate space while writing
               -- We still check that the file fit on the disk, and actually keep a 30% margin so that very recent
               -- files can be kept
               AND (FileSystem.free - FileSystem.minAllowedFreeSpace * FileSystem.totalSize > CastorFile.fileSize
                 OR (FileSystem.free - FileSystem.minAllowedFreeSpace * FileSystem.totalSize > 30000000000
                 AND FileSystem.totalSize * 0.7 > CastorFile.fileSize))
               AND FileSystem.status = dconst.FILESYSTEM_PRODUCTION
               AND DiskServer.id = FileSystem.diskServer
               AND DiskServer.status = dconst.DISKSERVER_PRODUCTION
176
               AND DiskServer.hwOnline = 1
177
178
179
180
          ORDER BY -- use randomness to scatter recalls everywhere in the pool. This works unless the pool starts to be overloaded:
                   -- once a hot spot develops, recalls start to take longer and longer and thus tend to accumulate. However,
                   -- until we have a faster feedback system to rank filesystems, the fileSystemRate order has not proven to be better.
                   DBMS_Random.value)
181
  LOOP
182
183
184
    varFileSystemId := f.id;
    buildPathFromFileId(f.fileId, f.nsHost, ids_seq.nextval, outFilePath);
    outFilePath := f.remotePath || outFilePath;
185
186
187
188
189
190
    -- Check that we don't already have a copy of this file on this filesystem.
    -- This will never happen in normal operations but may be the case if a filesystem
    -- was disabled and did come back while the tape recall was waiting.
    -- Even if we optimize by cancelling remaining unneeded tape recalls when a
    -- fileSystem comes back, the ones running at the time of the come back will have
    -- the problem.
191
    SELECT /*+ INDEX_RS_ASC(DiskCopy I_DiskCopy_CastorFile) */ count(*) INTO nb
192
      FROM DiskCopy
193
     WHERE fileSystem = f.id
194
       AND castorfile = inCfid
195
       AND status = dconst.DISKCOPY_VALID;
196
    IF nb != 0 THEN
197
      raise_application_error(-20115, 'Recaller could not find a FileSystem in production in the requested SvcClass and without copies of this file');
198
199
200
    END IF;
    RETURN;
  END LOOP;
201
  IF varFileSystemId = 0 THEN
202
203
204
205
206
    raise_application_error(-20115, 'No suitable filesystem found for this recalled file');
  END IF;
END;
/

207

208
/* get the migration mounts without any tape associated */
Dennis Waldron's avatar
Dennis Waldron committed
209
CREATE OR REPLACE
210
PROCEDURE tg_getMigMountsWithoutTapes(outStrList OUT SYS_REFCURSOR) AS
211
BEGIN
212
  -- get migration mounts in state WAITTAPE
213
  OPEN outStrList FOR
214
215
    SELECT M.id, TP.name
      FROM MigrationMount M, Tapepool TP
216
     WHERE M.status = tconst.MIGRATIONMOUNT_WAITTAPE
217
       AND M.tapepool = TP.id 
218
       FOR UPDATE OF M.id SKIP LOCKED;   
219
220
221
END;
/

222
/* get tape with a pending request in VDQM */
Dennis Waldron's avatar
Dennis Waldron committed
223
CREATE OR REPLACE
224
225
PROCEDURE tg_getTapesWithDriveReqs(
  inTimeLimit     IN  NUMBER,
226
  outTapeRequest  OUT castorTape.tapegatewayrequest_Cur) AS
227
  varTgrId        "numList";
228
229
  varRecMountIds  "numList";
  varMigMountIds  "numList";
230
  varNow          NUMBER;
231
BEGIN 
232
233
  -- we only look for the Recall/MigrationMounts which have a VDQM ping
  -- time older than inTimeLimit
234
235
236
  -- No need to query the clock all the time
  varNow := gettime();
  
237
238
239
240
241
  -- Find all the recall mounts and lock
  SELECT id BULK COLLECT INTO varRecMountIds
    FROM RecallMount
   WHERE status IN (tconst.RECALLMOUNT_WAITDRIVE, tconst.RECALLMOUNT_RECALLING)
     AND varNow - lastVdqmPingTime > inTimeLimit
242
243
     FOR UPDATE SKIP LOCKED;
     
244
245
246
  -- Find all the migration mounts and lock
  SELECT id BULK COLLECT INTO varMigMountIds
    FROM MigrationMount
247
   WHERE status IN (tconst.MIGRATIONMOUNT_WAITDRIVE, tconst.MIGRATIONMOUNT_MIGRATING)
248
     AND varNow - lastVdqmPingTime > inTimeLimit
249
250
251
     FOR UPDATE SKIP LOCKED;
     
  -- Update the last VDQM ping time for all of them.
252
253
254
255
  UPDATE RecallMount
     SET lastVdqmPingTime = varNow
   WHERE id IN (SELECT /*+ CARDINALITY(trTable 5) */ * 
                  FROM TABLE (varRecMountIds));
256
257
  UPDATE MigrationMount
     SET lastVdqmPingTime = varNow
258
259
   WHERE id IN (SELECT /*+ CARDINALITY(trTable 5) */ *
                  FROM TABLE (varMigMountIds));
260
                   
261
  -- Return them
262
263
  OPEN outTapeRequest FOR
    -- Read case
264
265
266
267
    SELECT tconst.WRITE_DISABLE, mountTransactionId, VID
      FROM RecallMount
     WHERE id IN (SELECT /*+ CARDINALITY(trTable 5) */ *
                    FROM TABLE(varRecMountIds))
268
269
     UNION ALL
    -- Write case
270
    SELECT tconst.WRITE_ENABLE, mountTransactionId, VID
271
272
273
      FROM MigrationMount
     WHERE id IN (SELECT /*+ CARDINALITY(trTable 5) */ *
                    FROM TABLE(varMigMountIds));
274
275
276
END;
/

277
/* get a the list of tapes to be sent to VDQM */
Dennis Waldron's avatar
Dennis Waldron committed
278
CREATE OR REPLACE
279
280
281
PROCEDURE tg_getTapeWithoutDriveReq(outVID OUT VARCHAR2,
                                    outVdqmPriority OUT INTEGER,
                                    outMode OUT INTEGER) AS
282
BEGIN
283
284
285
286
287
288
289
290
291
292
293
294
  -- try to find a migration mount
  SELECT VID, 0, 1  -- harcoded priority to 0, mode 1 == WRITE_ENABLE
    INTO outVID, outVdqmPriority, outMode
    FROM MigrationMount
   WHERE status = tconst.MIGRATIONMOUNT_SEND_TO_VDQM
     AND ROWNUM < 2
     FOR UPDATE SKIP LOCKED;
EXCEPTION WHEN NO_DATA_FOUND THEN
  -- no migration mount to process, try to find a recall mount
  BEGIN
    SELECT RecallMount.VID, RecallGroup.vdqmPriority, 0  -- mode 0 == WRITE_DISABLE
      INTO outVID, outVdqmPriority, outMode
295
296
297
      FROM RecallMount, RecallGroup
     WHERE RecallMount.status = tconst.RECALLMOUNT_NEW
       AND RecallMount.recallGroup = RecallGroup.id
298
       AND ROWNUM < 2
299
       FOR UPDATE OF RecallMount.id SKIP LOCKED;
300
301
302
303
304
305
  EXCEPTION WHEN NO_DATA_FOUND THEN
    -- no recall mount to process either
    outVID := '';
    outVdqmPriority := 0;
    outMode := 0;
  END;
306
307
308
END;
/

309
/* get tape to release in VMGR */
Dennis Waldron's avatar
Dennis Waldron committed
310
CREATE OR REPLACE
311
PROCEDURE tg_getTapeToRelease(
312
  inMountTransactionId IN  INTEGER, 
313
  outVID      OUT NOCOPY VARCHAR2, 
314
  outMode     OUT INTEGER,
315
  outFull     OUT INTEGER) AS
316
BEGIN
317
318
319
320
321
  -- suppose it's a recall case
  SELECT vid INTO outVID 
    FROM RecallMount
   WHERE mountTransactionId = inMountTransactionId;
  outMode := tconst.WRITE_DISABLE;
Giulia Taurelli's avatar
Giulia Taurelli committed
322
EXCEPTION WHEN NO_DATA_FOUND THEN
323
324
325
326
327
328
329
330
331
332
333
  -- no a recall, then let's suppose it's a migration
  BEGIN
    SELECT vid, full
    INTO outVID, outFull
      FROM MigrationMount
     WHERE mountTransactionId = inMountTransactionId;
    outMode := tconst.WRITE_ENABLE;
  EXCEPTION WHEN NO_DATA_FOUND THEN
    -- must have been already cleaned by the checker
    NULL;
  END;
334
335
336
END;
/

337
/* restart taperequest which had problems */
Dennis Waldron's avatar
Dennis Waldron committed
338
CREATE OR REPLACE
339
PROCEDURE tg_restartLostReqs(inMountTransactionIds IN castor."cnumList") AS
340
BEGIN
341
 FOR i IN inMountTransactionIds.FIRST .. inMountTransactionIds.LAST LOOP   
342
   tg_endTapeSession(inMountTransactionIds(i), 0);
343
344
345
346
347
 END LOOP;
 COMMIT;
END;
/

348
349
350
351
352
353
354
355
356
357
358
359
/* resets a CastorFile, its diskcopies and recall/migrationJobs when it
 * was overwritten in the namespace. This includes :
 *    - updating the CastorFile with the new NS data
 *    - mark current DiskCopies for GC
 *    - restart any pending recalls
 *    - discard any pending migrations
 * XXXX This is a preliminary version of this function that is used only
 * XXXX in the context of overwritten files during recalls. It has to be
 * XXXX completed and tested before any other usage. In particular, is
 * XXXX does not handle the Disk2DiskCopy case
 */
CREATE OR REPLACE PROCEDURE resetOverwrittenCastorFile(inCfId INTEGER,
360
                                                       inNewOpenTime NUMBER,
361
362
363
364
                                                       inNewSize INTEGER) AS
BEGIN
  -- update the Castorfile
  UPDATE CastorFile
365
     SET nsOpenTime = inNewOpenTime,
366
367
368
369
370
371
372
373
         fileSize = inNewSize,
         lastAccessTime = getTime()
   WHERE id = inCfId;
  -- cancel ongoing recalls, if any
  deleteRecallJobs(inCfId);
  -- cancel ongoing migrations, if any
  deleteMigrationJobs(inCfId);
  -- invalidate existing DiskCopies, if any
374
375
376
  UPDATE DiskCopy
     SET status = dconst.DISKCOPY_INVALID,
         gcType = dconst.GCTYPE_OVERWRITTEN
377
   WHERE castorFile = inCfId
378
     AND status = dconst.DISKCOPY_VALID;
379
380
381
382
383
384
385
386
387
  -- restart ongoing requests
  -- Note that we reset the "answered" flag of the subrequest. This will potentially lead to
  -- a wrong attempt to answer again the client (but won't harm as the client is gone in that case)
  -- but is needed as the current implementation of the stager also uses this flag to know
  -- whether to archive the subrequest. If we leave it to 1, the subrequests are wrongly
  -- archived when retried, leading e.g. to failing recalls
  UPDATE SubRequest
     SET status = dconst.SUBREQUEST_RESTART, answered = 0
   WHERE castorFile = inCfId
Giuseppe Lo Presti's avatar
Giuseppe Lo Presti committed
388
     AND status = dconst.SUBREQUEST_WAITTAPERECALL;
389
390
391
END;
/

392
393
394
395
396
397
398
399
400
/* Checks whether repack requests are ongoing for a file and archives them depending on the
 * provided error code.
 * Can be called because of Nameserver errors after recalls or after migrations
 * (cf. failFileMigration and checkRecallInNS).
 */
CREATE OR REPLACE PROCEDURE archiveOrFailRepackSubreq(inCfId INTEGER, inErrorCode INTEGER) AS
  varSrIds "numList";
BEGIN
  -- find and archive any repack subrequest(s)
401
  SELECT /*+ INDEX_RS_ASC(Subrequest I_Subrequest_Castorfile) */
402
403
404
405
         SubRequest.id BULK COLLECT INTO varSrIds
    FROM SubRequest
   WHERE SubRequest.castorfile = inCfId
     AND subrequest.reqType = 119;  -- OBJ_StageRepackRequest
406
  FOR i IN 1 .. varSrIds.COUNT LOOP
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
    -- archive: ENOENT and ENSFILECHG are considered as non-errors in a Repack context (#97529)
    archiveSubReq(varSrIds(i), CASE WHEN inErrorCode IN (serrno.ENOENT, serrno.ENSFILECHG)
      THEN dconst.SUBREQUEST_FINISHED ELSE dconst.SUBREQUEST_FAILED_FINISHED END);
    -- for error reporting
    UPDATE SubRequest
       SET errorCode = inErrorCode,
           errorMessage = CASE
             WHEN inErrorCode IN (serrno.ENOENT, serrno.ENSFILECHG) THEN
               ''
             WHEN inErrorCode = serrno.ENSNOSEG THEN
               'Segment was dropped during repack, skipping'
             WHEN inErrorCode = serrno.ENSTOOMANYSEGS THEN
               'File has too many segments on tape, skipping'
             ELSE
               'Migration failed, reached maximum number of retries'
             END
     WHERE id = varSrIds(i);
  END LOOP;
END;
/

428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
/* Checks whether a recall that was reported successful is ok from the namespace
 * point of view. This includes :
 *   - checking that the file still exists
 *   - checking that the file was not overwritten
 *   - checking the checksum, and setting it if there was none
 * In case one of the check fails, appropriate cleaning actions are taken.
 * Returns whether the checks were all ok. If not, the caller should
 * return immediately as all corrective actions were already taken.
 */
CREATE OR REPLACE FUNCTION checkRecallInNS(inCfId IN INTEGER,
                                           inMountTransactionId IN INTEGER,
                                           inVID IN VARCHAR2,
                                           inCopyNb IN INTEGER,
                                           inFseq IN INTEGER,
                                           inFileId IN INTEGER,
                                           inNsHost IN VARCHAR2,
                                           inCksumName IN VARCHAR2,
                                           inCksumValue IN INTEGER,
446
                                           inLastOpenTime IN NUMBER,
447
448
                                           inReqId IN VARCHAR2,
                                           inLogContext IN VARCHAR2) RETURN BOOLEAN AS
449
  varNSOpenTime NUMBER;
450
  varNSSize INTEGER;
451
452
  varNSCsumtype VARCHAR2(2048);
  varNSCsumvalue VARCHAR2(2048);
453
  varOpenMode CHAR(1);
454
BEGIN
455
456
457
  -- retrieve data from the namespace: note that if stagerTime is (still) NULL,
  -- we're still in compatibility mode and we resolve to using mtime.
  -- To be dropped in 2.1.15 where stagerTime is NOT NULL by design.
458
459
460
461
  -- Note the truncation of stagerTime to 5 digits. This is needed for consistency with
  -- the stager code that uses the OCCI api and thus loses precision when recuperating
  -- 64 bits integers into doubles (lack of support for 64 bits numbers in OCCI)
  SELECT NVL(TRUNC(stagertime,5), mtime), csumtype, csumvalue, filesize
462
463
    INTO varNSOpenTime, varNSCsumtype, varNSCsumvalue, varNSSize
    FROM Cns_File_Metadata@RemoteNS
464
   WHERE fileid = inFileId;
465
466
467
468
469
470
  -- check open mode: in compatibility mode we still have only seconds precision,
  -- hence the NS open time has to be truncated prior to comparing it with our time.
  varOpenMode := getConfigOption@RemoteNS('stager', 'openmode', NULL);
  IF varOpenMode = 'C' THEN
    varNSOpenTime := TRUNC(varNSOpenTime);
  END IF;
471
  -- was the file overwritten in the meantime ?
472
  IF varNSOpenTime > inLastOpenTime THEN
473
    -- yes ! reset it and thus restart the recall from scratch
474
    resetOverwrittenCastorFile(inCfId, varNSOpenTime, varNSSize);
475
476
477
478
    -- in case of repack, just stop and archive the corresponding request(s) as we're not interested
    -- any longer (the original segment disappeared). This potentially stops the entire recall process.
    archiveOrFailRepackSubreq(inCfId, serrno.ENSFILECHG);
    -- log "setFileRecalled : file was overwritten during recall, restarting from scratch or skipping repack"
479
    logToDLF(inReqId, dlf.LVL_NOTICE, dlf.RECALL_FILE_OVERWRITTEN, inFileId, inNsHost, 'tapegatewayd',
480
             'mountTransactionId=' || TO_CHAR(inMountTransactionId) || ' TPVID=' || inVID ||
481
482
             ' fseq=' || TO_CHAR(inFseq) || ' NSOpenTime=' || TRUNC(varNSOpenTime, 6) ||
             ' NsOpenTimeAtStager=' || TRUNC(inLastOpenTime, 6) ||' '|| inLogContext);
483
484
485
486
487
488
489
    RETURN FALSE;
  END IF;

  -- is the checksum set in the namespace ?
  IF varNSCsumtype IS NULL THEN
    -- no -> let's set it (note that the function called commits in the remote DB)
    setSegChecksumWhenNull@remoteNS(inFileId, inCopyNb, inCksumName, inCksumValue);
490
    -- log 'checkRecallInNS : created missing checksum in the namespace'
491
    logToDLF(inReqId, dlf.LVL_SYSTEM, dlf.RECALL_CREATED_CHECKSUM, inFileId, inNsHost, 'nsd',
492
493
             'mountTransactionId=' || TO_CHAR(inMountTransactionId) || ' copyNb=' || TO_CHAR(inCopyNb) ||
             ' TPVID=' || inVID || ' fseq=' || TO_CHAR(inFseq) || ' checksumType='  || inCksumName ||
494
495
496
497
498
             ' checksumValue=' || TO_CHAR(inCksumValue));
  ELSE
    -- is the checksum matching ?
    -- note that this is probably useless as it was already checked at transfer time
    IF inCksumName = varNSCsumtype AND TO_CHAR(inCksumValue, 'XXXXXXXX') != varNSCsumvalue THEN
499
      -- not matching ! log "checkRecallInNS : bad checksum detected, will retry if allowed"
500
      logToDLF(inReqId, dlf.LVL_ERROR, dlf.RECALL_BAD_CHECKSUM, inFileId, inNsHost, 'tapegatewayd',
501
               'mountTransactionId=' || TO_CHAR(inMountTransactionId) || ' TPVID=' || inVID ||
502
               ' fseq=' || TO_CHAR(inFseq) || ' copyNb=' || TO_CHAR(inCopyNb) || ' checksumType=' || inCksumName ||
503
               ' expectedChecksumValue=' || varNSCsumvalue ||
504
               ' checksumValue=' || TO_CHAR(inCksumValue, 'XXXXXXXX') ||' '|| inLogContext);
505
      retryOrFailRecall(inCfId, inVID, inReqId, inLogContext);
506
507
508
509
510
      RETURN FALSE;
    END IF;
  END IF;
  RETURN TRUE;
EXCEPTION WHEN NO_DATA_FOUND THEN
511
  -- file got dropped from the namespace, recall should be cancelled
512
  deleteRecallJobs(inCfId);
513
514
515
  -- potentially terminate repack requests
  archiveOrFailRepackSubreq(inCfId, serrno.ENOENT);
  -- and fail remaining requests
516
517
  UPDATE SubRequest
       SET status = dconst.SUBREQUEST_FAILED,
518
           errorCode = serrno.ENOENT,
519
           errorMessage = 'File was removed during recall'
520
521
     WHERE castorFile = inCfId
       AND status = dconst.SUBREQUEST_WAITTAPERECALL;
522
  -- log "checkRecallInNS : file was dropped from namespace during recall, giving up"
523
  logToDLF(inReqId, dlf.LVL_NOTICE, dlf.RECALL_FILE_DROPPED, inFileId, inNsHost, 'tapegatewayd',
524
           'mountTransactionId=' || TO_CHAR(inMountTransactionId) || ' TPVID=' || inVID ||
525
           ' fseq=' || TO_CHAR(inFseq) || ' CFLastOpenTime=' || TO_CHAR(inLastOpenTime) || ' ' || inLogContext);
526
527
528
529
  RETURN FALSE;
END;
/

530
/* update the db after a successful recall */
531
532
533
534
CREATE OR REPLACE PROCEDURE tg_setFileRecalled(inMountTransactionId IN INTEGER,
                                               inFseq IN INTEGER,
                                               inFilePath IN VARCHAR2,
                                               inCksumName IN VARCHAR2,
535
536
537
                                               inCksumValue IN INTEGER,
                                               inReqId IN VARCHAR2,
                                               inLogContext IN VARCHAR2) AS
538
539
540
541
542
543
544
  varFileId         INTEGER;
  varNsHost         VARCHAR2(2048);
  varVID            VARCHAR2(2048);
  varCopyNb         INTEGER;
  varSvcClassId     INTEGER;
  varEuid           INTEGER;
  varEgid           INTEGER;
545
  varLastOpenTime   NUMBER;
546
547
548
549
550
  varCfId           INTEGER;
  varFSId           INTEGER;
  varDCPath         VARCHAR2(2048);
  varDcId           INTEGER;
  varFileSize       INTEGER;
551
  varFileClassId    INTEGER;
552
  varNbMigrationsStarted INTEGER;
553
554
  varGcWeight       NUMBER;
  varGcWeightProc   VARCHAR2(2048);
555
  varRecallStartTime NUMBER;
556
BEGIN
557
558
559
560
561
562
563
564
565
566
  -- get diskserver, filesystem and path from full path in input
  BEGIN
    parsePath(inFilePath, varFSId, varDCPath, varDCId, varFileId, varNsHost);
  EXCEPTION WHEN NO_DATA_FOUND THEN
    -- log "setFileRecalled : unable to parse input path. giving up"
    logToDLF(inReqId, dlf.LVL_ERROR, dlf.RECALL_INVALID_PATH, 0, '', 'tapegatewayd',
             'mountTransactionId=' || TO_CHAR(inMountTransactionId) || ' TPVID=' || varVID ||
             ' fseq=' || TO_CHAR(inFseq) || ' filePath=' || inFilePath || ' ' || inLogContext);
    RETURN;
  END;
567

568
  -- first lock Castorfile, check NS and parse path
569
  -- Get RecallJob and lock Castorfile
570
  BEGIN
571
    SELECT CastorFile.id, CastorFile.fileId, CastorFile.nsHost, CastorFile.nsOpenTime,
572
           CastorFile.fileSize, CastorFile.fileClass, RecallMount.VID, RecallJob.copyNb,
573
           RecallJob.euid, RecallJob.egid
574
      INTO varCfId, varFileId, varNsHost, varLastOpenTime, varFileSize, varFileClassId, varVID,
575
           varCopyNb, varEuid, varEgid
576
577
578
579
      FROM RecallMount, RecallJob, CastorFile
     WHERE RecallMount.mountTransactionId = inMountTransactionId
       AND RecallJob.vid = RecallMount.vid
       AND RecallJob.fseq = inFseq
580
581
       AND (RecallJob.status = tconst.RECALLJOB_SELECTED
         OR RecallJob.status = tconst.tconst.RECALLJOB_SELECTED2NDCOPY)
582
       AND RecallJob.castorFile = CastorFile.id
583
       AND ROWNUM < 2
584
       FOR UPDATE OF CastorFile.id;
585
586
587
588
589
590
591
592
    -- the ROWNUM < 2 clause is worth a comment here :
    -- this select will select a single CastorFile and RecallMount, but may select
    -- several RecallJobs "linked" to them. All these recall jobs have the same copyNb
    -- but different uid/gid. They exist because these different uid/gid are attached
    -- to different recallGroups.
    -- In case of several recallJobs present, they are all equally responsible for the
    -- recall, thus we pick the first one as "the" responsible. The only consequence is
    -- that it's uid/gid will be used for the DiskCopy creation
593
594
  EXCEPTION WHEN NO_DATA_FOUND THEN
    -- log "setFileRecalled : unable to identify Recall. giving up"
595
    logToDLF(inReqId, dlf.LVL_ERROR, dlf.RECALL_NOT_FOUND, varFileId, varNsHost, 'tapegatewayd',
596
             'mountTransactionId=' || TO_CHAR(inMountTransactionId) ||
597
             ' fseq=' || TO_CHAR(inFseq) || ' filePath=' || inFilePath || ' ' || inLogContext);
598
599
600
    RETURN;
  END;

601
602
  -- Deal with the DiskCopy: it is created now as the recall is effectively over. The subsequent
  -- check in the NS may make it INVALID, which is fine as opposed to forget about it and generating dark data.
603
604

  -- compute GC weight of the recalled diskcopy
605
606
607
608
609
610
611
612
613
614
615
  -- first get the svcClass
  SELECT Diskpool2SvcClass.child INTO varSvcClassId
    FROM Diskpool2SvcClass, FileSystem
   WHERE FileSystem.id = varFSId
     AND Diskpool2SvcClass.parent = FileSystem.diskPool
     AND ROWNUM < 2;
  -- Again, the ROWNUM < 2 is worth a comment : the diskpool may be attached
  -- to several svcClasses. However, we do not support that these different
  -- SvcClasses have different GC policies (actually the GC policy should be
  -- moved to the DiskPool table in the future). Thus it is safe to take any
  -- SvcClass from the list
616
617
618
  varGcWeightProc := castorGC.getRecallWeight(varSvcClassId);
  EXECUTE IMMEDIATE 'BEGIN :newGcw := ' || varGcWeightProc || '(:size); END;'
    USING OUT varGcWeight, IN varFileSize;
619
620
621
622
623
624
625
626
627
628
629
  -- create the DiskCopy, after getting how many copies on tape we have, for the importance number
  DECLARE
    varNbCopiesOnTape INTEGER;
  BEGIN
    SELECT nbCopies INTO varNbCopiesOnTape FROM FileClass WHERE id = varFileClassId;
    INSERT INTO DiskCopy (path, gcWeight, creationTime, lastAccessTime, diskCopySize, nbCopyAccesses,
                          ownerUid, ownerGid, id, gcType, fileSystem, castorFile, status, importance)
    VALUES (varDCPath, varGcWeight, getTime(), getTime(), varFileSize, 0,
            varEuid, varEgid, varDCId, NULL, varFSId, varCfId, dconst.DISKCOPY_VALID,
            -1-varNbCopiesOnTape*100);
  END;
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652

  -- Check that the file is still there in the namespace (and did not get overwritten)
  -- Note that error handling and logging is done inside the function
  IF NOT checkRecallInNS(varCfId, inMountTransactionId, varVID, varCopyNb, inFseq, varFileId, varNsHost,
                         inCksumName, inCksumValue, varLastOpenTime, inReqId, inLogContext) THEN
    RETURN;
  END IF;

  -- Then deal with recalljobs and potential migrationJobs
  -- Find out starting time of oldest recall for logging purposes
  SELECT MIN(creationTime) INTO varRecallStartTime FROM RecallJob WHERE castorFile = varCfId;
  -- Delete recall jobs
  DELETE FROM RecallJob WHERE castorFile = varCfId;
  -- trigger waiting migrations if any
  -- Note that we reset the creation time as if the MigrationJob was created right now
  -- this is because "creationTime" is actually the time of entering the "PENDING" state
  -- in the cases where the migrationJob went through a WAITINGONRECALL state
  UPDATE /*+ INDEX_RS_ASC (MigrationJob I_MigrationJob_CFVID) */ MigrationJob
     SET status = tconst.MIGRATIONJOB_PENDING,
         creationTime = getTime()
   WHERE status = tconst.MIGRATIONJOB_WAITINGONRECALL
     AND castorFile = varCfId;
  varNbMigrationsStarted := SQL%ROWCOUNT;
653
654
655
656
657
658
659
660
  -- in case there are migrations, update CastorFile's tapeStatus to NOTONTAPE, otherwise it is ONTAPE
  UPDATE CastorFile
     SET tapeStatus = CASE varNbMigrationsStarted
                        WHEN 0
                        THEN dconst.CASTORFILE_ONTAPE
                        ELSE dconst.CASTORFILE_NOTONTAPE
                      END
   WHERE id = varCfId;
661
662
663

  -- Finally deal with user requests
  UPDATE SubRequest
664
665
666
667
     SET status = decode(reqType,
                         119, dconst.SUBREQUEST_REPACK, -- repack case
                         dconst.SUBREQUEST_RESTART),    -- standard case
         getNextStatus = dconst.GETNEXTSTATUS_FILESTAGED,
668
669
670
671
         lastModificationTime = getTime()
   WHERE castorFile = varCfId
     AND status = dconst.SUBREQUEST_WAITTAPERECALL;

672
  -- trigger the creation of additional copies of the file, if necessary.
673
  replicateOnClose(varCfId, varEuid, varEgid);
674
675

  -- log success
676
  logToDLF(inReqId, dlf.LVL_SYSTEM, dlf.RECALL_COMPLETED_DB, varFileId, varNsHost, 'tapegatewayd',
677
           'mountTransactionId=' || TO_CHAR(inMountTransactionId) || ' TPVID=' || varVID ||
678
679
           ' fseq=' || TO_CHAR(inFseq) || ' filePath=' || inFilePath || ' recallTime=' ||
           to_char(trunc(getTime() - varRecallStartTime, 0)) || ' ' || inLogContext);
680
681
682
END;
/

683
/* Attempt to retry a recall. Fail it in case it should not be retried anymore */
684
685
686
687
CREATE OR REPLACE PROCEDURE retryOrFailRecall(inCfId IN NUMBER, inVID IN VARCHAR2,
                                              inReqId IN VARCHAR2, inLogContext IN VARCHAR2) AS
  varFileId INTEGER;
  varNsHost VARCHAR2(2048);
688
  varRecallStillAlive INTEGER;
689
BEGIN
690
  -- lock castorFile
691
692
  SELECT fileId, nsHost INTO varFileId, varNsHost
    FROM CastorFile WHERE id = inCfId FOR UPDATE;
693
694
695
  -- increase retry counters within mount and set recallJob status to NEW
  UPDATE RecallJob
     SET nbRetriesWithinMount = nbRetriesWithinMount + 1,
696
         status = tconst.RECALLJOB_PENDING
697
698
699
700
701
702
703
704
705
706
707
   WHERE castorFile = inCfId
     AND VID = inVID;
  -- detect the RecallJobs with too many retries within this mount
  -- mark them for a retry on next mount
  UPDATE RecallJob
     SET nbRetriesWithinMount = 0,
         nbMounts = nbMounts + 1,
         status = tconst.RECALLJOB_RETRYMOUNT
   WHERE castorFile = inCfId
     AND VID = inVID
     AND nbRetriesWithinMount >= TO_NUMBER(getConfigOption('Recall', 'MaxNbRetriesWithinMount', 2));
708
  -- stop here if no recallJob was concerned
709
710
711
712
713
714
715
  IF SQL%ROWCOUNT = 0 THEN RETURN; END IF;
  -- detect RecallJobs with too many mounts
  DELETE RecallJob
   WHERE castorFile = inCfId
     AND VID = inVID
     AND nbMounts >= TO_NUMBER(getConfigOption('Recall', 'MaxNbMounts', 3));
  -- check whether other RecallJobs are still around for this file (other copies on tape)
716
717
  SELECT /*+ INDEX_RS_ASC(RecallJob I_RecallJob_CastorFile_VID) */
         count(*) INTO varRecallStillAlive
718
719
720
721
722
    FROM RecallJob
   WHERE castorFile = inCfId
     AND ROWNUM < 2;
  -- if no remaining recallJobs, the subrequests are failed
  IF varRecallStillAlive = 0 THEN
723
    UPDATE /*+ INDEX_RS_ASC(Subrequest I_Subrequest_Castorfile) */ SubRequest 
724
725
       SET status = dconst.SUBREQUEST_FAILED,
           lastModificationTime = getTime(),
726
           errorCode = serrno.SEINTERNAL,
727
728
729
           errorMessage = 'File recall from tape has failed, please try again later'
     WHERE castorFile = inCfId 
       AND status = dconst.SUBREQUEST_WAITTAPERECALL;
730
731
732
     -- log 'File recall has permanently failed'
    logToDLF(inReqId, dlf.LVL_ERROR, dlf.RECALL_PERMANENTLY_FAILED, varFileId, varNsHost,
      'tapegatewayd', ' TPVID=' || inVID ||' '|| inLogContext);
733
734
735
736
  END IF;
END;
/

737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
/* Attempt to retry a migration. Fail it in case it should not be retried anymore */
CREATE OR REPLACE PROCEDURE retryOrFailMigration(inMountTrId IN NUMBER, inFileId IN VARCHAR2, inNsHost IN VARCHAR2,
                                                 inErrorCode IN NUMBER, inReqId IN VARCHAR2) AS
  varFileTrId NUMBER;
BEGIN
  -- For the time being, we ignore the error code and apply the same policy to any
  -- tape-side error. Note that NS errors like ENOENT are caught at a second stage and never retried.
  -- Check if a new retry is allowed
  UPDATE (
    SELECT nbRetries, status, vid, mountTransactionId, fileTransactionId
      FROM MigrationJob MJ, CastorFile CF
     WHERE mountTransactionId = inMountTrId
       AND MJ.castorFile = CF.id
       AND CF.fileId = inFileId
       AND CF.nsHost = inNsHost
752
       AND nbRetries <= TO_NUMBER(getConfigOption('Migration', 'MaxNbMounts', 7)))
753
754
755
756
757
758
    SET nbRetries = nbRetries + 1,
        status = tconst.MIGRATIONJOB_PENDING,
        vid = NULL,
        mountTransactionId = NULL
    RETURNING fileTransactionId INTO varFileTrId;
  IF SQL%ROWCOUNT = 0 THEN
759
    -- Nb of retries exceeded or migration job not found, fail migration
760
    failFileMigration(inMountTrId, inFileId, inErrorCode, inReqId);
761
  -- ELSE we have one more retry, which has been logged upstream
762
763
764
765
  END IF;
END;
/

766
767

/* update the db when a tape session is started */
Dennis Waldron's avatar
Dennis Waldron committed
768
CREATE OR REPLACE
769
770
771
772
773
774
PROCEDURE tg_startTapeSession(inMountTransactionId IN NUMBER,
                              outVid        OUT NOCOPY VARCHAR2,
                              outAccessMode OUT INTEGER,
                              outRet        OUT INTEGER,
                              outDensity    OUT NOCOPY VARCHAR2,
                              outLabel      OUT NOCOPY VARCHAR2) AS
775
  varUnused   NUMBER;
776
  varTapePool INTEGER;
777
BEGIN
778
779
780
781
782
783
784
785
786
787
  outRet := 0;
  -- try to deal with a read case
  UPDATE RecallMount
     SET status = tconst.RECALLMOUNT_RECALLING
   WHERE mountTransactionId = inMountTransactionId
  RETURNING VID, tconst.WRITE_DISABLE, 0, density, label
    INTO outVid, outAccessMode, outRet, outDensity, outLabel;
  IF SQL%ROWCOUNT > 0 THEN
    -- it is a read case
    -- check whether there is something to do
788
    BEGIN
789
      SELECT id INTO varUnused FROM RecallJob WHERE VID=outVID AND ROWNUM < 2;
790
    EXCEPTION WHEN NO_DATA_FOUND THEN
791
792
793
794
795
      -- no more file to recall. Force the cleanup and return -1
      UPDATE RecallMount
         SET lastvdqmpingtime = 0
       WHERE mountTransactionId = inMountTransactionId;
      outRet:=-1;
796
    END;
797
798
  ELSE
    -- not a read, so it should be a write
799
800
    UPDATE MigrationMount
       SET status = tconst.MIGRATIONMOUNT_MIGRATING
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
     WHERE mountTransactionId = inMountTransactionId
    RETURNING VID, tconst.WRITE_ENABLE, 0, density, label, tapePool
    INTO outVid, outAccessMode, outRet, outDensity, outLabel, varTapePool;
    IF SQL%ROWCOUNT > 0 THEN
      -- it is a write case
      -- check whether there is something to do
      BEGIN
        SELECT id INTO varUnused FROM MigrationJob WHERE tapePool=varTapePool AND ROWNUM < 2;
      EXCEPTION WHEN NO_DATA_FOUND THEN
        -- no more file to migrate. Force the cleanup and return -1
        UPDATE MigrationMount
           SET lastvdqmpingtime = 0
         WHERE mountTransactionId = inMountTransactionId;
        outRet:=-1;
      END;
    ELSE
      -- it was neither a read nor a write -> not found error.
      outRet:=-2; -- UNKNOWN request
    END IF;
820
821
822
823
  END IF;
END;
/

824
/* delete MigrationMount */
825
CREATE OR REPLACE PROCEDURE tg_deleteMigrationMount(inMountId IN NUMBER) AS
Steven Murray's avatar
Steven Murray committed
826
BEGIN
827
  DELETE FROM MigrationMount WHERE id=inMountId;
Steven Murray's avatar
Steven Murray committed
828
END;
Giulia Taurelli's avatar
Giulia Taurelli committed
829
/
Steven Murray's avatar
Steven Murray committed
830

831

832
833
834
835
836
837
838
839
840
/* fail recall of a given CastorFile for a non existing tape */
CREATE OR REPLACE PROCEDURE cancelRecallForCFAndVID(inCfId IN INTEGER,
                                                    inVID IN VARCHAR2,
                                                    inErrorCode IN INTEGER,
                                                    inErrorMsg IN VARCHAR2) AS
  PRAGMA AUTONOMOUS_TRANSACTION;
  varNbRecalls INTEGER;
  varFileId INTEGER;
  varNsHost VARCHAR2(2048);
Giulia Taurelli's avatar
Giulia Taurelli committed
841
BEGIN
842
843
844
845
846
847
848
849
850
851
852
  -- lock castorFile, skip if it's missing
  -- (it may have disappeared in the mean time as we held no lock)
  BEGIN
    SELECT fileid, nsHost INTO varFileId, varNsHost
      FROM CastorFile
     WHERE id = inCfId
       FOR UPDATE;
  EXCEPTION
    WHEN NO_DATA_FOUND THEN RETURN;
  END;
  -- log "Canceling RecallJobs for given VID"
853
  logToDLF(NULL, dlf.LVL_SYSTEM, dlf.RECALL_CANCEL_RECALLJOB_VID, varFileId, varNsHost, 'tapegatewayd',
854
           'errorCode=' || TO_CHAR(inErrorCode) ||
855
856
           ' errorMessage="' || inErrorMsg ||
           '" TPVID=' || inVID);
857
858
859
  -- remove recallJobs that need the non existing tape
  DELETE FROM RecallJob WHERE castorfile = inCfId AND VID=inVID;
  -- check if other recallJobs remain (typically dual copy tapes)
860
861
862
  SELECT /*+ INDEX_RS_ASC(RecallJob I_RecallJob_CastorFile_VID) */
         count(*) INTO varNbRecalls
    FROM RecallJob WHERE castorfile = inCfId;
863
864
865
866
867
  -- if no remaining recalls, fail requests and cleanup
  IF varNbRecalls = 0 THEN
    -- log "Failing Recall(s)"
    logToDLF(NULL, dlf.LVL_ERROR, dlf.RECALL_FAILING, varFileId, varNsHost, 'tapegatewayd',
             'errorCode=' || TO_CHAR(inErrorCode) ||
868
869
             ' errorMessage="' || inErrorMsg ||
             '" TPVID=' || inVID);
870
871
872
    -- delete potential migration jobs waiting on recalls
    deleteMigrationJobsForRecall(inCfId);
    -- Fail the associated subrequest(s)
873
    UPDATE /*+ INDEX_RS_ASC(SR I_Subrequest_Castorfile)*/ SubRequest SR
874
875
876
       SET SR.status = dconst.SUBREQUEST_FAILED,
           SR.getNextStatus = dconst.GETNEXTSTATUS_FILESTAGED, --  (not strictly correct but the request is over anyway)
           SR.lastModificationTime = getTime(),
877
           SR.errorCode = serrno.SEINTERNAL,
878
           SR.errorMessage = 'File recall from tape has failed (tape not available), please try again later'
879
880
881
882
883
884
885
886
     WHERE SR.castorFile = inCfId
       AND SR.status IN (dconst.SUBREQUEST_WAITTAPERECALL, dconst.SUBREQUEST_WAITSUBREQ);
  END IF;
  -- commit
  COMMIT;
END;
/

887
888
889
890
/* Cancel a tape session before startup e.g. in case of a VMGR errors when checking the tape.
 * Not to be called on a running session as the procedure assumes no running migration/recall job
 * is attached to the session being deleted.
 */
891
892
893
894
895
896
CREATE OR REPLACE PROCEDURE cancelMigrationOrRecall(inMode IN INTEGER,
                                                    inVID IN VARCHAR2,
                                                    inErrorCode IN INTEGER,
                                                    inErrorMsg IN VARCHAR2) AS
BEGIN
  IF inMode = tconst.WRITE_ENABLE THEN
897
    -- cancel the migration. No job has been attached yet
898
    DELETE FROM MigrationMount WHERE VID = inVID;
899
  ELSE
900
901
902
903
904
905
906
    -- cancel the recall
    DELETE FROM RecallMount WHERE VID = inVID;
    -- fail the recalls of all files that waited for this tape
    FOR file IN (SELECT castorFile FROM RecallJob WHERE VID = inVID) LOOP
      -- note that this call commits
      cancelRecallForCFAndVID(file.castorFile, inVID, inErrorCode, inErrorMsg);
    END LOOP;
Giulia Taurelli's avatar
Giulia Taurelli committed
907
908
909
910
  END IF;
END;
/

911
/* flag tape as full for a given session */
912
CREATE OR REPLACE PROCEDURE tg_flagTapeFull (inMountTransactionId IN NUMBER) AS
913
BEGIN
914
  UPDATE MigrationMount SET full = 1 WHERE mountTransactionId = inMountTransactionId;
915
916
END;
/
917

918
/* Find the VID of the tape used in a tape session */
919
CREATE OR REPLACE PROCEDURE tg_getMigrationMountVid (
920
    inMountTransactionId IN NUMBER,
921
922
923
924
925
    outVid          OUT NOCOPY VARCHAR2,
    outTapePool     OUT NOCOPY VARCHAR2) AS
    varMMId         NUMBER;
    varUnused       NUMBER;
BEGIN
926
927
928
929
930
  SELECT MigrationMount.vid, TapePool.name
    INTO outVid, outTapePool
    FROM MigrationMount, TapePool
   WHERE TapePool.id = MigrationMount.tapePool
     AND MigrationMount.mountTransactionId = inMountTransactionId;
931
932
END;
/
933

934

935
936
/* insert new Migration Mount */
CREATE OR REPLACE PROCEDURE insertMigrationMount(inTapePoolId IN NUMBER,
937
                                                 minimumAge IN INTEGER,
938
                                                 outMountId OUT INTEGER) AS
939
940
  varMigJobId INTEGER;
BEGIN
941
942
943
  -- Check that the mount would be honoured by running a dry-run file selection:
  -- note that in case the mount was triggered because of age, we check that
  -- we have a valid candidate that is at least minimumAge seconds old.
944
945
946
947
948
949
950
951
952
953
954
  -- This is almost a duplicate of the query in tg_getFilesToMigrate.
  SELECT /*+ FIRST_ROWS_1
             LEADING(MigrationJob CastorFile DiskCopy FileSystem DiskServer)
             USE_NL(MMigrationJob CastorFile DiskCopy FileSystem DiskServer)
             INDEX(CastorFile PK_CastorFile_Id)
             INDEX_RS_ASC(DiskCopy I_DiskCopy_CastorFile)
             INDEX_RS_ASC(MigrationJob I_MigrationJob_TPStatusId) */
         MigrationJob.id mjId INTO varMigJobId
    FROM MigrationJob, DiskCopy, FileSystem, DiskServer, CastorFile
   WHERE MigrationJob.tapePool = inTapePoolId
     AND MigrationJob.status = tconst.MIGRATIONJOB_PENDING
955
     AND (minimumAge = 0 OR MigrationJob.creationTime < getTime() - minimumAge)
956
957
     AND CastorFile.id = MigrationJob.castorFile
     AND CastorFile.id = DiskCopy.castorFile
958
959
     AND CastorFile.tapeStatus = dconst.CASTORFILE_NOTONTAPE
     AND DiskCopy.status = dconst.DISKCOPY_VALID
960
     AND FileSystem.id = DiskCopy.fileSystem
961
     AND FileSystem.status IN (dconst.FILESYSTEM_PRODUCTION, dconst.FILESYSTEM_DRAINING, dconst.FILESYSTEM_READONLY)
962
     AND DiskServer.id = FileSystem.diskServer
963
964
     AND DiskServer.status IN (dconst.DISKSERVER_PRODUCTION, dconst.DISKSERVER_DRAINING, dconst.DISKSERVER_READONLY)
     AND DiskServer.hwOnline = 1
965
     AND ROWNUM < 2;
966
  -- The select worked out, create a mount for this tape pool
967
  INSERT INTO MigrationMount
968
              (mountTransactionId, id, startTime, VID, label, density,
969
               lastFseq, lastVDQMPingTime, tapePool, status)
970
971
972
973
974
975
976
    VALUES (NULL, ids_seq.nextval, gettime(), NULL, NULL, NULL,
            NULL, 0, inTapePoolId, tconst.MIGRATIONMOUNT_WAITTAPE)
    RETURNING id INTO outMountId;
EXCEPTION WHEN NO_DATA_FOUND THEN
  -- No valid candidate found: this could happen e.g. when candidates exist
  -- but reside on non-available hardware. In this case we drop the mount and log
  outMountId := 0;
977
978
979
END;
/

980
981

/* DB job to start new migration mounts */
982
CREATE OR REPLACE PROCEDURE startMigrationMounts AS
983
984
  varNbPreExistingMounts INTEGER;
  varTotalNbMounts INTEGER := 0;
985
986
987
  varDataAmount INTEGER;
  varNbFiles INTEGER;
  varOldestCreationTime NUMBER;
988
  varMountId INTEGER;
989
990
BEGIN
  -- loop through tapepools
991
  FOR t IN (SELECT id, name, nbDrives, minAmountDataForMount,
992
993
                   minNbFilesForMount, maxFileAgeBeforeMount
              FROM TapePool) LOOP
994
    -- get number of mounts already running for this tapepool
995
    SELECT nvl(count(*), 0) INTO varNbPreExistingMounts
996
997
      FROM MigrationMount
     WHERE tapePool = t.id;
998
    varTotalNbMounts := varNbPreExistingMounts;
999
    -- get the amount of data and number of files to migrate, plus the age of the oldest file
1000
    SELECT nvl(SUM(fileSize), 0), COUNT(*), nvl(MIN(creationTime), 0)
For faster browsing, not all history is shown. View entire blame