Commit f7cd34eb authored by Sebastien Ponce's avatar Sebastien Ponce
Browse files

Reviewed draining schema and tools, and use the new disk to disk copy infrastructure to improve it

parent 65956f6b
......@@ -736,68 +736,6 @@ ALTER TABLE MigrationRouting ADD CONSTRAINT FK_MigrationRouting_FileClass
ALTER TABLE MigrationRouting ADD CONSTRAINT FK_MigrationRouting_TapePool
FOREIGN KEY (tapePool) REFERENCES TapePool(id);
/* Definition of the Disk2DiskCopyJob table. Each line is a disk2diskCopy job to process
* id : unique DB identifier for this job
* transferId : unique identifier for the transfer associated to this job
* creationTime : creation time of this item, allows to compute easily waiting times
* status : status of the job (PENDING, SCHEDULED, RUNNING)
* retryCounter : number of times the copy was attempted
* ouid : the originator user id
* ogid : the originator group id
* castorFile : the concerned file
* nsOpenTime : the nsOpenTime of the castorFile when this job was created
* Allows to detect if the file has been overwritten during replication
* destSvcClass : the destination service class
* replicationType : the type of replication involved (user, internal or draining)
* replacedDcId : in case of draining, the replaced diskCopy to be dropped
* destDcId : the destination diskCopy
*/
CREATE TABLE Disk2DiskCopyJob
(id NUMBER CONSTRAINT PK_Disk2DiskCopyJob_Id PRIMARY KEY
CONSTRAINT NN_Disk2DiskCopyJob_Id NOT NULL,
transferId VARCHAR2(2048) CONSTRAINT NN_Disk2DiskCopyJob_TId NOT NULL,
creationTime INTEGER CONSTRAINT NN_Disk2DiskCopyJob_CTime NOT NULL,
status INTEGER CONSTRAINT NN_Disk2DiskCopyJob_Status NOT NULL,
retryCounter INTEGER DEFAULT 0 CONSTRAINT NN_Disk2DiskCopyJob_retryCnt NOT NULL,
ouid INTEGER CONSTRAINT NN_Disk2DiskCopyJob_ouid NOT NULL,
ogid INTEGER CONSTRAINT NN_Disk2DiskCopyJob_ogid NOT NULL,
castorFile INTEGER CONSTRAINT NN_Disk2DiskCopyJob_CastorFile NOT NULL,
nsOpenTime INTEGER CONSTRAINT NN_Disk2DiskCopyJob_NSOpenTime NOT NULL,
destSvcClass INTEGER CONSTRAINT NN_Disk2DiskCopyJob_dstSC NOT NULL,
replicationType INTEGER CONSTRAINT NN_Disk2DiskCopyJob_Type NOT NULL,
replacedDcId INTEGER,
destDcId INTEGER CONSTRAINT NN_Disk2DiskCopyJob_DCId NOT NULL)
INITRANS 50 PCTFREE 50 ENABLE ROW MOVEMENT;
CREATE INDEX I_Disk2DiskCopyJob_Tid ON Disk2DiskCopyJob(transferId);
CREATE INDEX I_Disk2DiskCopyJob_CfId ON Disk2DiskCopyJob(CastorFile);
CREATE INDEX I_Disk2DiskCopyJob_CT_Id ON Disk2DiskCopyJob(creationTime, id);
BEGIN
-- PENDING status is when a Disk2DiskCopyJob is created
-- It is immediately candidate for being scheduled
setObjStatusName('Disk2DiskCopyJob', 'status', dconst.DISK2DISKCOPYJOB_PENDING, 'DISK2DISKCOPYJOB_PENDING');
-- SCHEDULED status is when the Disk2DiskCopyJob has been scheduled and is not yet started
setObjStatusName('Disk2DiskCopyJob', 'status', dconst.DISK2DISKCOPYJOB_SCHEDULED, 'DISK2DISKCOPYJOB_SCHEDULED');
-- RUNNING status is when the disk to disk copy is ongoing
setObjStatusName('Disk2DiskCopyJob', 'status', dconst.DISK2DISKCOPYJOB_RUNNING, 'DISK2DISKCOPYJOB_RUNNING');
-- USER replication type is when replication is triggered by the user
setObjStatusName('Disk2DiskCopyJob', 'replicationType', dconst.REPLICATIONTYPE_USER, 'REPLICATIONTYPE_USER');
-- INTERNAL replication type is when replication is triggered internally (e.g. dual copy disk pools)
setObjStatusName('Disk2DiskCopyJob', 'replicationType', dconst.REPLICATIONTYPE_INTERNAL, 'REPLICATIONTYPE_INTERNAL');
-- DRAINING replication type is when replication is triggered by a drain operation
setObjStatusName('Disk2DiskCopyJob', 'replicationType', dconst.REPLICATIONTYPE_DRAINING, 'REPLICATIONTYPE_DRAINING');
END;
/
ALTER TABLE Disk2DiskCopyJob ADD CONSTRAINT FK_Disk2DiskCopyJob_CastorFile
FOREIGN KEY (castorFile) REFERENCES CastorFile(id);
ALTER TABLE Disk2DiskCopyJob ADD CONSTRAINT FK_Disk2DiskCopyJob_SvcClass
FOREIGN KEY (destSvcClass) REFERENCES SvcClass(id);
ALTER TABLE Disk2DiskCopyJob
ADD CONSTRAINT CK_Disk2DiskCopyJob_Status
CHECK (status IN (0, 1, 2));
ALTER TABLE Disk2DiskCopyJob
ADD CONSTRAINT CK_Disk2DiskCopyJob_type
CHECK (replicationType IN (0, 1, 2));
/* Temporary table used to bulk select next candidates for recall and migration */
CREATE GLOBAL TEMPORARY TABLE FilesToRecallHelper
(fileId NUMBER, nsHost VARCHAR2(100), fileTransactionId NUMBER,
......@@ -1193,8 +1131,20 @@ CREATE GLOBAL TEMPORARY TABLE DeleteDiskCopyHelper
/* Repack */
/**********/
/* SQL statements for type StageRepackRequest */
CREATE TABLE StageRepackRequest (flags INTEGER, userName VARCHAR2(2048), euid NUMBER, egid NUMBER, mask NUMBER, pid NUMBER, machine VARCHAR2(2048), svcClassName VARCHAR2(2048), userTag VARCHAR2(2048), reqId VARCHAR2(2048), creationTime INTEGER, lastModificationTime INTEGER,
/* SQL statements for type StageRepackRequest (not autogenerated any more) */
CREATE TABLE StageRepackRequest
(flags INTEGER,
userName VARCHAR2(2048),
euid NUMBER,
egid NUMBER,
mask NUMBER,
pid NUMBER,
machine VARCHAR2(2048),
svcClassName VARCHAR2(2048),
userTag VARCHAR2(2048),
reqId VARCHAR2(2048),
creationTime INTEGER,
lastModificationTime INTEGER,
repackVid VARCHAR2(2048) CONSTRAINT NN_StageRepackReq_repackVid NOT NULL,
id INTEGER CONSTRAINT PK_StageRepackRequest_Id PRIMARY KEY,
svcClass INTEGER,
......@@ -1202,7 +1152,7 @@ CREATE TABLE StageRepackRequest (flags INTEGER, userName VARCHAR2(2048), euid NU
status INTEGER CONSTRAINT NN_StageRepackReq_status NOT NULL,
fileCount INTEGER CONSTRAINT NN_StageRepackReq_fileCount NOT NULL,
totalSize INTEGER CONSTRAINT NN_StageRepackReq_totalSize NOT NULL)
INITRANS 50 PCTFREE 50 ENABLE ROW MOVEMENT;
INITRANS 50 PCTFREE 50 ENABLE ROW MOVEMENT;
BEGIN
setObjStatusName('StageRepackRequest', 'status', tconst.REPACK_STARTING, 'REPACK_STARTING');
......@@ -1227,6 +1177,172 @@ CREATE GLOBAL TEMPORARY TABLE RepackTapeSegments
copyNb NUMBER, fileClass NUMBER, allSegments VARCHAR2(2048))
ON COMMIT PRESERVE ROWS;
/**********************************/
/* Draining and disk to disk copy */
/**********************************/
/* Creation of the DrainingJob table
* - id : unique identifier of the DrainingJob
* - userName, euid, egid : identification of the originator of the job
* - pid : process id of the originator of the job
* - machine : machine where the originator of the job was running
* - creationTime : time when the job was created
* - lastModificationTime : lest time the job was updated
* - fileSystem : id of the concerned filesystem
* - status : current status of the job. One of SUBMITTED, STARTING,
* RUNNING, FAILED, COMPLETED
* - svcClass : the target service class for the draining
* - autoDelete : whether source files should be invalidated after
* their replication. One of 0 (no) and 1 (yes)
* - fileMask : indicates which files are concerned by the draining.
* One of NOTONTAPE, ALL
* - totalFiles, totalBytes : indication of the work to be done. These
* numbers are partial and increasing while starting
* and then stable while running
* - nbFailedBytes/Files, nbSuccessBytes/Files : indication of the
* work already done. These counters are updated while running
* - userComment : a user comment
*/
CREATE TABLE DrainingJob
(id INTEGER CONSTRAINT PK_DrainingJob_Id PRIMARY KEY,
userName VARCHAR2(2048) CONSTRAINT NN_DrainingJob_UserName NOT NULL,
euid INTEGER CONSTRAINT NN_DrainingJob_Euid NOT NULL,
egid INTEGER CONSTRAINT NN_DrainingJob_Egid NOT NULL,
pid INTEGER CONSTRAINT NN_DrainingJob_Pid NOT NULL,
machine VARCHAR2(2048) CONSTRAINT NN_DrainingJob_Machine NOT NULL,
creationTime INTEGER CONSTRAINT NN_DrainingJob_CT NOT NULL,
lastModificationTime INTEGER CONSTRAINT NN_DrainingJob_LMT NOT NULL,
status INTEGER CONSTRAINT NN_DrainingJob_Status NOT NULL,
fileSystem INTEGER CONSTRAINT NN_DrainingJob_FileSystem NOT NULL
CONSTRAINT UN_DrainingJob_FileSystem UNIQUE USING INDEX,
svcClass INTEGER CONSTRAINT NN_DrainingJob_SvcClass NOT NULL,
autoDelete INTEGER CONSTRAINT NN_DrainingJob_AutoDelete NOT NULL,
fileMask INTEGER CONSTRAINT NN_DrainingJob_FileMask NOT NULL,
totalFiles INTEGER CONSTRAINT NN_DrainingJob_TotFiles NOT NULL,
totalBytes INTEGER CONSTRAINT NN_DrainingJob_TotBytes NOT NULL,
nbFailedBytes INTEGER CONSTRAINT NN_DrainingJob_FailedFiles NOT NULL,
nbFailedFiles INTEGER CONSTRAINT NN_DrainingJob_FailedBytes NOT NULL,
nbSuccessBytes INTEGER CONSTRAINT NN_DrainingJob_SuccessBytes NOT NULL,
nbSuccessFiles INTEGER CONSTRAINT NN_DrainingJob_SuccessFiles NOT NULL,
userComment VARCHAR2(2048))
ENABLE ROW MOVEMENT;
BEGIN
setObjStatusName('DrainingJob', 'status', 0, 'SUBMITTED');
setObjStatusName('DrainingJob', 'status', 1, 'STARTING');
setObjStatusName('DrainingJob', 'status', 2, 'RUNNING');
setObjStatusName('DrainingJob', 'status', 4, 'FAILED');
setObjStatusName('DrainingJob', 'status', 5, 'FINISHED');
END;
/
ALTER TABLE DrainingJob
ADD CONSTRAINT FK_DrainingJob_FileSystem
FOREIGN KEY (fileSystem)
REFERENCES FileSystem (id);
ALTER TABLE DrainingJob
ADD CONSTRAINT CK_DrainingJob_Status
CHECK (status IN (0, 1, 2, 4, 5));
ALTER TABLE DrainingJob
ADD CONSTRAINT FK_DrainingJob_SvcClass
FOREIGN KEY (svcClass)
REFERENCES SvcClass (id);
ALTER TABLE DrainingJob
ADD CONSTRAINT CK_DrainingJob_AutoDelete
CHECK (autoDelete IN (0, 1));
ALTER TABLE DrainingJob
ADD CONSTRAINT CK_DrainingJob_FileMask
CHECK (fileMask IN (0, 1));
/* Creation of the DrainingErrors table
* - drainingJob : identifier of the concerned DrainingJob
* - errorMsg : the error that occured
* - fileId, nsHost : concerned file
*/
CREATE TABLE DrainingErrors
(drainingJob INTEGER CONSTRAINT NN_DrainingErrors_DJ NOT NULL,
errorMsg VARCHAR2(2048) CONSTRAINT NN_DrainingErrors_ErrorMsg NOT NULL,
fileId INTEGER CONSTRAINT NN_DrainingErrors_FileId NOT NULL,
nsHost VARCHAR2(2048) CONSTRAINT NN_DrainingErrors_NsHost NOT NULL)
ENABLE ROW MOVEMENT;
CREATE INDEX I_DrainingErrors_DJ ON DrainingErrors (drainingJob);
ALTER TABLE DrainingErrors
ADD CONSTRAINT FK_DrainingErrors_DJ
FOREIGN KEY (drainingJob)
REFERENCES DrainingJob (id);
/* Definition of the Disk2DiskCopyJob table. Each line is a disk2diskCopy job to process
* id : unique DB identifier for this job
* transferId : unique identifier for the transfer associated to this job
* creationTime : creation time of this item, allows to compute easily waiting times
* status : status of the job (PENDING, SCHEDULED, RUNNING)
* retryCounter : number of times the copy was attempted
* ouid : the originator user id
* ogid : the originator group id
* castorFile : the concerned file
* nsOpenTime : the nsOpenTime of the castorFile when this job was created
* Allows to detect if the file has been overwritten during replication
* destSvcClass : the destination service class
* replicationType : the type of replication involved (user, internal or draining)
* replacedDcId : in case of draining, the replaced diskCopy to be dropped
* destDcId : the destination diskCopy
* drainingJob : the draining job behind this d2dJob. Not NULL only if replicationType is DRAINING'
*/
CREATE TABLE Disk2DiskCopyJob
(id NUMBER CONSTRAINT PK_Disk2DiskCopyJob_Id PRIMARY KEY
CONSTRAINT NN_Disk2DiskCopyJob_Id NOT NULL,
transferId VARCHAR2(2048) CONSTRAINT NN_Disk2DiskCopyJob_TId NOT NULL,
creationTime INTEGER CONSTRAINT NN_Disk2DiskCopyJob_CTime NOT NULL,
status INTEGER CONSTRAINT NN_Disk2DiskCopyJob_Status NOT NULL,
retryCounter INTEGER DEFAULT 0 CONSTRAINT NN_Disk2DiskCopyJob_retryCnt NOT NULL,
ouid INTEGER CONSTRAINT NN_Disk2DiskCopyJob_ouid NOT NULL,
ogid INTEGER CONSTRAINT NN_Disk2DiskCopyJob_ogid NOT NULL,
castorFile INTEGER CONSTRAINT NN_Disk2DiskCopyJob_CastorFile NOT NULL,
nsOpenTime INTEGER CONSTRAINT NN_Disk2DiskCopyJob_NSOpenTime NOT NULL,
destSvcClass INTEGER CONSTRAINT NN_Disk2DiskCopyJob_dstSC NOT NULL,
replicationType INTEGER CONSTRAINT NN_Disk2DiskCopyJob_Type NOT NULL,
replacedDcId INTEGER,
destDcId INTEGER CONSTRAINT NN_Disk2DiskCopyJob_DCId NOT NULL,
drainingJob INTEGER)
INITRANS 50 PCTFREE 50 ENABLE ROW MOVEMENT;
CREATE INDEX I_Disk2DiskCopyJob_Tid ON Disk2DiskCopyJob(transferId);
CREATE INDEX I_Disk2DiskCopyJob_CfId ON Disk2DiskCopyJob(CastorFile);
CREATE INDEX I_Disk2DiskCopyJob_CT_Id ON Disk2DiskCopyJob(creationTime, id);
CREATE INDEX I_Disk2DiskCopyJob_drainJob ON Disk2DiskCopyJob(drainingJob);
BEGIN
-- PENDING status is when a Disk2DiskCopyJob is created
-- It is immediately candidate for being scheduled
setObjStatusName('Disk2DiskCopyJob', 'status', dconst.DISK2DISKCOPYJOB_PENDING, 'DISK2DISKCOPYJOB_PENDING');
-- SCHEDULED status is when the Disk2DiskCopyJob has been scheduled and is not yet started
setObjStatusName('Disk2DiskCopyJob', 'status', dconst.DISK2DISKCOPYJOB_SCHEDULED, 'DISK2DISKCOPYJOB_SCHEDULED');
-- RUNNING status is when the disk to disk copy is ongoing
setObjStatusName('Disk2DiskCopyJob', 'status', dconst.DISK2DISKCOPYJOB_RUNNING, 'DISK2DISKCOPYJOB_RUNNING');
-- USER replication type is when replication is triggered by the user
setObjStatusName('Disk2DiskCopyJob', 'replicationType', dconst.REPLICATIONTYPE_USER, 'REPLICATIONTYPE_USER');
-- INTERNAL replication type is when replication is triggered internally (e.g. dual copy disk pools)
setObjStatusName('Disk2DiskCopyJob', 'replicationType', dconst.REPLICATIONTYPE_INTERNAL, 'REPLICATIONTYPE_INTERNAL');
-- DRAINING replication type is when replication is triggered by a drain operation
setObjStatusName('Disk2DiskCopyJob', 'replicationType', dconst.REPLICATIONTYPE_DRAINING, 'REPLICATIONTYPE_DRAINING');
END;
/
ALTER TABLE Disk2DiskCopyJob ADD CONSTRAINT FK_Disk2DiskCopyJob_CastorFile
FOREIGN KEY (castorFile) REFERENCES CastorFile(id);
ALTER TABLE Disk2DiskCopyJob ADD CONSTRAINT FK_Disk2DiskCopyJob_SvcClass
FOREIGN KEY (destSvcClass) REFERENCES SvcClass(id);
ALTER TABLE Disk2DiskCopyJob ADD CONSTRAINT FK_Disk2DiskCopyJob_DrainingJob
FOREIGN KEY (drainingJob) REFERENCES DrainingJob(id);
ALTER TABLE Disk2DiskCopyJob
ADD CONSTRAINT CK_Disk2DiskCopyJob_Status
CHECK (status IN (0, 1, 2));
ALTER TABLE Disk2DiskCopyJob
ADD CONSTRAINT CK_Disk2DiskCopyJob_type
CHECK (replicationType IN (0, 1, 2));
/*****************/
/* logon trigger */
......
......@@ -135,20 +135,15 @@ AS
FILESYSTEM_DISABLED CONSTANT PLS_INTEGER := 2;
FILESYSTEM_READONLY CONSTANT PLS_INTEGER := 3;
DRAININGFS_CREATED CONSTANT PLS_INTEGER := 0;
DRAININGFS_INITIALIZING CONSTANT PLS_INTEGER := 1;
DRAININGFS_RUNNING CONSTANT PLS_INTEGER := 2;
DRAININGFS_INTERRUPTED CONSTANT PLS_INTEGER := 3;
DRAININGFS_FAILED CONSTANT PLS_INTEGER := 4;
DRAININGFS_COMPLETED CONSTANT PLS_INTEGER := 5;
DRAININGFS_DELETING CONSTANT PLS_INTEGER := 6;
DRAININGFS_RESTART CONSTANT PLS_INTEGER := 7;
DRAININGJOB_SUBMITTED CONSTANT PLS_INTEGER := 0;
DRAININGJOB_STARTING CONSTANT PLS_INTEGER := 1;
DRAININGJOB_RUNNING CONSTANT PLS_INTEGER := 2;
DRAININGJOB_FAILED CONSTANT PLS_INTEGER := 4;
DRAININGJOB_FINISHED CONSTANT PLS_INTEGER := 5;
DRAIN_FILEMASK_NOTONTAPE CONSTANT PLS_INTEGER := 0;
DRAIN_FILEMASK_ALL CONSTANT PLS_INTEGER := 1;
DRAININGDC_CREATED CONSTANT PLS_INTEGER := 0;
DRAININGDC_PROCESSING CONSTANT PLS_INTEGER := 2;
DRAININGDC_WAITD2D CONSTANT PLS_INTEGER := 3;
DRAININGDC_FAILED CONSTANT PLS_INTEGER := 4;
SUBREQUEST_START CONSTANT PLS_INTEGER := 0;
SUBREQUEST_RESTART CONSTANT PLS_INTEGER := 1;
SUBREQUEST_RETRY CONSTANT PLS_INTEGER := 2;
......@@ -282,6 +277,10 @@ AS
REPACK_JOB_STATS CONSTANT VARCHAR2(2048) := 'repackManager: Repack processes statistics';
REPACK_UNEXPECTED_EXCEPTION CONSTANT VARCHAR2(2048) := 'handleRepackRequest: unexpected exception caught';
DRAINING_JOB_ONGOING CONSTANT VARCHAR2(2048) := 'drainingManager: Draining jobs still starting, no new ones will be started for this round';
DRAINING_STARTED CONSTANT VARCHAR2(2048) := 'drainingManager: Draining process started';
DRAINING_JOB_STATS CONSTANT VARCHAR2(2048) := 'drainingManager: Draining processes statistics';
DELETEDISKCOPY_RECALL CONSTANT VARCHAR2(2048) := 'deleteDiskCopy: diskCopy was lost, about to recall from tape';
DELETEDISKCOPY_REPLICATION CONSTANT VARCHAR2(2048) := 'deleteDiskCopy: diskCopy was lost, about to replicate from another pool';
DELETEDISKCOPY_LOST CONSTANT VARCHAR2(2048) := 'deleteDiskCopy: file was LOST and is being dropped from the system';
......
/*******************************************************************
* Schema creation code for Draining FileSystems Logic
*
* @author Castor Dev team, castor-dev@cern.ch
*******************************************************************/
/* SQL statement for the creation of the DrainingFileSystem table */
CREATE TABLE DrainingFileSystem
(userName VARCHAR2(30) CONSTRAINT NN_DrainingFs_UserName NOT NULL,
machine VARCHAR2(500) CONSTRAINT NN_DrainingFs_Machine NOT NULL,
creationTime NUMBER DEFAULT 0,
startTime NUMBER DEFAULT 0,
lastUpdateTime NUMBER DEFAULT 0,
fileSystem NUMBER CONSTRAINT NN_DrainingFs_FileSystem NOT NULL,
status NUMBER DEFAULT 0,
svcClass NUMBER CONSTRAINT NN_DrainingFs_SvcClass NOT NULL,
/* Flag to indicate whether files should be invalidated so that they can be
* removed by the garbage collection process after a file is replicated to
* another diskserver.
*/
autoDelete NUMBER DEFAULT 0,
/* Column to indicate which files should be replicated. Valid values are:
* 0 -- ONTAPE AND DISKONLY,
* 1 -- NOTONTAPE
* 2 -- ALL
*/
fileMask NUMBER DEFAULT 1,
/* The maximum number of current transfers (job slots) available for draining
* the filesystem.
*/
maxTransfers NUMBER DEFAULT 50,
totalFiles NUMBER DEFAULT 0,
totalBytes NUMBER DEFAULT 0,
comments VARCHAR2(50) DEFAULT 'N/A' CONSTRAINT NN_DrainingFs_Comments NOT NULL)
/* Allow shrink operations */
ENABLE ROW MOVEMENT;
BEGIN
setObjStatusName('DrainingFileSystem', 'status', 0, 'CREATED');
setObjStatusName('DrainingFileSystem', 'status', 1, 'INITIALIZING');
setObjStatusName('DrainingFileSystem', 'status', 2, 'RUNNING');
setObjStatusName('DrainingFileSystem', 'status', 3, 'INTERRUPTED');
setObjStatusName('DrainingFileSystem', 'status', 4, 'FAILED');
setObjStatusName('DrainingFileSystem', 'status', 5, 'COMPLETED');
setObjStatusName('DrainingFileSystem', 'status', 6, 'DELETING');
setObjStatusName('DrainingFileSystem', 'status', 7, 'RESTART');
END;
/
/* SQL statement for primary key constraint on DrainingFileSystem */
ALTER TABLE DrainingFileSystem
ADD CONSTRAINT PK_DrainingFs_FileSystem
PRIMARY KEY (fileSystem);
/* SQL statements for check constraints on the DrainingFileSystem table */
ALTER TABLE DrainingFileSystem
ADD CONSTRAINT CK_DrainingFs_Status
CHECK (status IN (0, 1, 2, 3, 4, 5, 6, 7));
ALTER TABLE DrainingFileSystem
ADD CONSTRAINT CK_DrainingFs_FileMask
CHECK (fileMask IN (0, 1, 2));
ALTER TABLE DrainingFileSystem
ADD CONSTRAINT CK_DrainingFs_AutoDelete
CHECK (autoDelete IN (0, 1));
ALTER TABLE DrainingFileSystem
ADD CONSTRAINT CK_DrainingFs_MaxTransfers
CHECK (maxTransfers > 0);
/* SQL statements for foreign key constraints on DrainingFileSystem */
ALTER TABLE DrainingFileSystem
ADD CONSTRAINT FK_DrainingFs_SvcClass
FOREIGN KEY (svcClass)
REFERENCES SvcClass (id);
ALTER TABLE DrainingFileSystem
ADD CONSTRAINT FK_DrainingFs_FileSystem
FOREIGN KEY (fileSystem)
REFERENCES FileSystem (id);
/* SQL statements for indexes on DrainingFileSystem table */
CREATE INDEX I_DrainingFileSystem_SvcClass
ON DrainingFileSystem (svcClass);
/* SQL statements for the creation of the DrainingDiskCopy table
*
* The way the logic for draining a filesystems works is to essentially create
* a list of all the files that need to be replicated to other diskservers and
* to process that list until all files have been replicated.
*
* This list/queue could have been done with Oracle Advanced Queuing (AQ).
* However, due to the complexities of setting it up and the lack of prior
* experience on behalf of the CASTOR developers and CERN DBA's we create a
* simple queue using a standard table.
*/
CREATE TABLE DrainingDiskCopy
(fileSystem NUMBER CONSTRAINT NN_DrainingDCs_FileSystem NOT NULL,
/* Status of the diskcopy to be replicated. Note: this is not the same as
* the status of the diskcopy i.e. VALID, STAGEOUT. It is an internal
* status assigned to each diskcopy (file) as a means of tracking how far the
* file is in the lifecycle of draining a filesystem.
* PROCESSING is a transient state.
*/
status NUMBER DEFAULT 0 CONSTRAINT NN_DrainingDCs_Status NOT NULL,
/* A link to the diskcopy. Note: this is deliberately not enforced with a
* foreign key constraint!!!
*/
diskCopy NUMBER CONSTRAINT NN_DrainingDCs_DiskCopy NOT NULL,
parent NUMBER DEFAULT 0 CONSTRAINT NN_DrainingDCs_Parent NOT NULL,
creationTime NUMBER DEFAULT 0,
priority NUMBER DEFAULT 0,
fileSize NUMBER DEFAULT 0 CONSTRAINT NN_DrainingDCs_FileSize NOT NULL,
comments VARCHAR2(2048) DEFAULT NULL)
/* Allow shrink operations */
ENABLE ROW MOVEMENT;
BEGIN
setObjStatusName('DrainingDiskCopy', 'status', 0, 'CREATED');
setObjStatusName('DrainingDiskCopy', 'status', 2, 'PROCESSING');
setObjStatusName('DrainingDiskCopy', 'status', 3, 'WAITD2D');
setObjStatusName('DrainingDiskCopy', 'status', 4, 'FAILED');
END;
/
/* SQL statement for primary key constraint on DrainingDiskCopy */
ALTER TABLE DrainingDiskCopy
ADD CONSTRAINT PK_DrainingDCs_DiskCopy
PRIMARY KEY (diskCopy);
/* SQL statement for check constraints on the DrainingDiskCopy table */
ALTER TABLE DrainingDiskCopy
ADD CONSTRAINT CK_DrainingDCs_Status
CHECK (status IN (0, 2, 3, 4));
/* SQL statement for foreign key constraints on DrainingDiskCopy */
ALTER TABLE DrainingDiskCopy
ADD CONSTRAINT FK_DrainingDCs_FileSystem
FOREIGN KEY (fileSystem)
REFERENCES DrainingFileSystem (fileSystem);
/* SQL statements for indexes on DrainingDiskCopy table */
CREATE INDEX I_DrainingDCs_FileSystem
ON DrainingDiskCopy (fileSystem);
CREATE INDEX I_DrainingDCs_Status
ON DrainingDiskCopy (status);
/* For the in-order processing, see drainFileSystem */
CREATE INDEX I_DrainingDCs_FSStPrioTimeDC
ON DrainingDiskCopy (fileSystem, status, priority, creationTime, diskCopy);
CREATE INDEX I_DrainingDCs_Parent
ON DrainingDiskCopy (parent);
This diff is collapsed.
......@@ -283,6 +283,52 @@ EXCEPTION WHEN NO_DATA_FOUND THEN
END;
/
/* update a drainingJob at then end of a disk2diskcopy */
CREATE OR REPLACE PROCEDURE updateDrainingJobOnD2dEnd(inDjId IN INTEGER, inFileSize IN INTEGER,
inHasFailed IN BOOLEAN) AS
varTotalFiles INTEGER;
varNbFailedBytes INTEGER;
varNbSuccessBytes INTEGER;
varNbFailedFiles INTEGER;
varNbSuccessFiles INTEGER;
varStatus INTEGER;
BEGIN
-- note the locking that insures consistency of the counters
SELECT status, totalFiles, nbFailedBytes, nbSuccessBytes, nbFailedFiles, nbSuccessFiles
INTO varStatus, varTotalFiles, varNbFailedBytes, varNbSuccessBytes, varNbFailedFiles, varNbSuccessFiles
FROM DrainingJob
WHERE id = inDjId
FOR UPDATE;
-- update counters
IF inHasFailed THEN
-- case of failures
varNbFailedBytes := varNbFailedBytes + inFileSize;
varNbFailedFiles := varNbFailedFiles + 1;
ELSE
-- case of success
varNbSuccessBytes := varNbSuccessBytes + inFileSize;
varNbSuccessFiles := varNbSuccessFiles + 1;
END IF;
-- detect end of draining. Do not touch INTERRUPTED status
IF varStatus = dconst.DRAININGJOB_RUNNING AND
varNbSuccessFiles + varNbFailedFiles = varTotalFiles THEN
IF varNbFailedFiles = 0 THEN
varStatus := dconst.DRAININGJOB_FINISHED;
ELSE
varStatus := dconst.DRAININGJOB_FAILED;
END IF;
END IF;
-- update DrainingJob
UPDATE DrainingJob
SET status = varStatus,
totalFiles = varTotalFiles,
nbFailedBytes = varNbFailedBytes,
nbSuccessBytes = varNbSuccessBytes,
nbFailedFiles = varNbFailedFiles,
nbSuccessFiles = varNbSuccessFiles
WHERE id = inDjId;
END;
/
/* PL/SQL method implementing disk2DiskCopyEnded
* Note that inDestDsName, inDestPath and inReplicaFileSize are not used when inErrorMessage is not NULL
......@@ -307,14 +353,15 @@ CREATE OR REPLACE PROCEDURE disk2DiskCopyEnded
varNewDcStatus INTEGER := dconst.DISKCOPY_VALID;
varLogMsg VARCHAR2(2048);
varComment VARCHAR2(2048);
varDrainingJob VARCHAR2(2048);
BEGIN
varLogMsg := CASE WHEN inErrorMessage IS NULL THEN dlf.D2D_D2DDONE_OK ELSE dlf.D2D_D2DFAILED END;
BEGIN
-- Get data from the disk2DiskCopy Job
SELECT castorFile, ouid, ogid, destDcId,
destSvcClass, replicationType, replacedDcId, retryCounter
INTO varCfId, varUid, varGid, varDestDcId,
varDestSvcClass, varRepType, varReplacedDcId, varRetryCounter
SELECT castorFile, ouid, ogid, destDcId, destSvcClass, replicationType,
replacedDcId, retryCounter, drainingJob
INTO varCfId, varUid, varGid, varDestDcId, varDestSvcClass, varRepType,
varReplacedDcId, varRetryCounter, varDrainingJob
FROM Disk2DiskCopyjob
WHERE transferId = inTransferId;
EXCEPTION WHEN NO_DATA_FOUND THEN
......@@ -389,6 +436,10 @@ BEGIN
-- Trigger the creation of additional copies of the file, if any
replicateOnClose(varCfId, varUid, varGid);
END IF;
-- In case of draining, update DrainingJob
IF varDrainingJob IS NOT NULL THEN
updateDrainingJobOnD2dEnd(varDrainingJob, varFileSize, False);
END IF;
ELSE
DECLARE
varMaxNbD2dRetries INTEGER := TO_NUMBER(getConfigOption('D2dCopy', 'MaxNbRetries', 2));
......@@ -403,8 +454,16 @@ BEGIN
logToDLF(NULL, dlf.LVL_SYSTEM, dlf.D2D_D2DDONE_RETRIED, varFileId, varNsHost, 'stagerd', varComment ||
' RetryNb=' || TO_CHAR(varRetryCounter+1) || ' maxNbRetries=' || TO_CHAR(varMaxNbD2dRetries));
ELSE
-- no more retries, let's fail the disk to disk copy
DELETE FROM Disk2DiskCopyjob WHERE transferId = inTransferId;
-- no more retries, let's delete the disk to disk job copy and remember the error
BEGIN
DELETE FROM Disk2DiskCopyjob WHERE transferId = inTransferId;
INSERT INTO DrainingErrors (drainingJob, errorMsg, fileId, nsHost)
VALUES (varDrainingJob, inErrorMessage, varFileId, varNsHost);
EXCEPTION WHEN NO_DATA_FOUND THEN
-- the Disk2DiskCopyjob was already dropped (e.g. because of an interrupted draining)
-- in such a case, forget about the error
NULL;
END;
logToDLF(NULL, dlf.LVL_NOTICE, dlf.D2D_D2DDONE_NORETRY, varFileId, varNsHost, 'stagerd', varComment ||
' maxNbRetries=' || TO_CHAR(varMaxNbD2dRetries));
-- Fail waiting subrequests
......@@ -412,16 +471,20 @@ BEGIN
SET status = dconst.SUBREQUEST_FAILED,
lastModificationTime = getTime(),
errorCode = serrno.SEINTERNAL,
errorMessage = 'Disk to disk copy failed'
errorMessage = 'Disk to disk copy failed after ' || TO_CHAR(varMaxNbD2dRetries) ||
'retries. Last error was : ' || inErrorMessage
WHERE status = dconst.SUBREQUEST_WAITSUBREQ
AND castorfile = varCfId;
AND castorfile = varCfId;
-- In case of draining, update DrainingJob
IF varDrainingJob IS NOT NULL THEN
updateDrainingJobOnD2dEnd(varDrainingJob, varFileSize, True);
END IF;
END IF;
END;
END IF;
END;
/
/* PL/SQL method implementing disk2DiskCopyStart
* Note that cfId is only needed for proper logging in case the replication has been canceled.
*/
......@@ -865,7 +928,9 @@ BEGIN
END;
/