Commit 58b26d03 authored by Giuseppe Lo Presti's avatar Giuseppe Lo Presti
Browse files

CASTOR migration: added initial code and scripts, still WIP.

References:
https://gitlab.cern.ch/cta/CTA/issues/479 (on the CTA side)
Jira: CASTOR-5526: support the migration to CTA (on the CASTOR side)

The latter won't be updated further, all required code will
be committed here and gitlab tickets will be updated accordingly.
parent c4db1749
-- commit any previous pending session
COMMIT;
--------------------------------------------------
-- 1. Usage stats. This section should be ported
-- to PostgreSQL at some time in the future.
--------------------------------------------------
-- Table for namespace statistics. Same logic as in CASTOR.
CREATE TABLE UsageStats (
gid NUMBER(6) DEFAULT 0 CONSTRAINT NN_UsageStats_gid NOT NULL,
timestamp NUMBER DEFAULT 0 CONSTRAINT NN_UsageStats_ts NOT NULL,
maxFileId INTEGER, fileCount INTEGER, fileSize INTEGER,
segCount INTEGER, segSize INTEGER, segCompressedSize INTEGER,
seg2Count INTEGER, seg2Size INTEGER, seg2CompressedSize INTEGER);
ALTER TABLE UsageStats ADD CONSTRAINT PK_UsageStats_gid_ts PRIMARY KEY (gid, timestamp);
-- This table will be used to safely store the legacy CASTOR usage statistics.
CREATE TABLE CastorUsageStats (
gid NUMBER(6) DEFAULT 0 CONSTRAINT NN_UsageStats_gid NOT NULL,
timestamp NUMBER DEFAULT 0 CONSTRAINT NN_UsageStats_ts NOT NULL,
maxFileId INTEGER, fileCount INTEGER, fileSize INTEGER,
segCount INTEGER, segSize INTEGER, segCompressedSize INTEGER,
seg2Count INTEGER, seg2Size INTEGER, seg2CompressedSize INTEGER);
-- This table is used to store the mapping gid -> experiment name, like in CASTOR.
-- Still to be manually updated, in the lack of an automated mechanism.
CREATE TABLE EXPERIMENTS (
name VARCHAR2(20 BYTE),
gid NUMBER(6,0) CONSTRAINT "GID_PK" PRIMARY KEY ("GID"));
-- Helper procedure to insert/accumulate statistics in the UsageStats table
CREATE OR REPLACE PROCEDURE insertNSStats(inGid IN INTEGER, inTimestamp IN NUMBER,
inMaxFileId IN INTEGER, inFileCount IN INTEGER, inFileSize IN INTEGER,
inSegCount IN INTEGER, inSegSize IN INTEGER, inSegCompressedSize IN INTEGER,
inSeg2Count IN INTEGER, inSeg2Size IN INTEGER, inSeg2CompressedSize IN INTEGER) AS
CONSTRAINT_VIOLATED EXCEPTION;
PRAGMA EXCEPTION_INIT(CONSTRAINT_VIOLATED, -1);
BEGIN
INSERT INTO UsageStats (gid, timestamp, maxFileId, fileCount, fileSize, segCount, segSize,
segCompressedSize, seg2Count, seg2Size, seg2CompressedSize)
VALUES (inGid, inTimestamp, inMaxFileId, inFileCount, inFileSize, inSegCount, inSegSize,
inSegCompressedSize, inSeg2Count, inSeg2Size, inSeg2CompressedSize);
EXCEPTION WHEN CONSTRAINT_VIOLATED THEN
UPDATE UsageStats SET
maxFileId = CASE WHEN inMaxFileId > maxFileId THEN inMaxFileId ELSE maxFileId END,
fileCount = fileCount + inFileCount,
fileSize = fileSize + inFileSize,
segCount = segCount + inSegCount,
segSize = segSize + inSegSize,
segCompressedSize = segCompressedSize + inSegCompressedSize,
seg2Count = seg2Count + inSeg2Count,
seg2Size = seg2Size + inSeg2Size,
seg2CompressedSize = seg2CompressedSize + inSeg2CompressedSize
WHERE gid = inGid AND timestamp = inTimestamp;
END;
/
-- This procedure is run as a database job to generate statistics from the namespace
-- Taken as is from CASTOR, cf. https://gitlab.cern.ch/castor/CASTOR/tree/master/ns/oracleTrailer.sql
CREATE OR REPLACE PROCEDURE gatherCatalogueStats AS
varTimestamp NUMBER := trunc(getTime());
BEGIN
-- File-level statistics
FOR g IN (SELECT disk_file_gid, MAX(archive_file_id) maxId,
COUNT(*) fileCount, SUM(size_in_bytes) fileSize
FROM Archive_File
WHERE creation_time < varTimestamp
GROUP BY disk_file_gid) LOOP
insertNSStats(g.disk_file_gid, varTimestamp, g.maxId, g.fileCount, g.fileSize, 0, 0, 0, 0, 0, 0);
END LOOP;
COMMIT;
-- Tape-level statistics
FOR g IN (SELECT disk_file_gid, copy_nb, SUM(size_in_bytes) segComprSize,
SUM(size_in_bytes) segSize, COUNT(*) segCount
FROM Tape_File, Archive_File
WHERE Tape_File.archive_file_id = Archive_File.archive_file_id
AND Archive_File.creation_time < varTimestamp
GROUP BY disk_file_gid, copy_nb) LOOP
IF g.copy_nb = 1 THEN
insertNSStats(g.disk_file_gid, varTimestamp, 0, 0, 0, g.segCount, g.segSize, g.segComprSize, 0, 0, 0);
ELSE
insertNSStats(g.disk_file_gid, varTimestamp, 0, 0, 0, 0, 0, 0, g.segCount, g.segSize, g.segComprSize);
END IF;
END LOOP;
COMMIT;
-- Also compute totals
INSERT INTO UsageStats (gid, timestamp, maxFileId, fileCount, fileSize, segCount, segSize,
segCompressedSize, seg2Count, seg2Size, seg2CompressedSize)
(SELECT -1, varTimestamp, MAX(maxFileId), SUM(fileCount), SUM(fileSize),
SUM(segCount), SUM(segSize), SUM(segCompressedSize),
SUM(seg2Count), SUM(seg2Size), SUM(seg2CompressedSize)
FROM UsageStats
WHERE timestamp = varTimestamp);
COMMIT;
END;
/
/* Database job for the statistics */
BEGIN
-- Remove database jobs before recreating them
FOR j IN (SELECT job_name FROM user_scheduler_jobs
WHERE job_name = 'STATSJOB')
LOOP
DBMS_SCHEDULER.DROP_JOB(j.job_name, TRUE);
END LOOP;
-- Create a db job to be run every day executing the gatherNSStats procedure
DBMS_SCHEDULER.CREATE_JOB(
JOB_NAME => 'StatsJob',
JOB_TYPE => 'PLSQL_BLOCK',
JOB_ACTION => 'BEGIN gatherCatalogueStats(); END;',
JOB_CLASS => 'CASTOR_JOB_CLASS',
START_DATE => SYSDATE + 60/1440,
REPEAT_INTERVAL => 'FREQ=DAILY; INTERVAL=1',
ENABLED => TRUE,
COMMENTS => 'Gathering of catalogue usage statistics');
END;
/
---------------------------------------------
-- 2. CASTOR to CTA migration. This code is
-- only supported for Oracle.
---------------------------------------------
-- Create synonyms for all relevant tables
-- XXX TBD XXX
-- Import metadata from the CASTOR namespace
CREATE OR REPLACE importFromCASTOR(inTapePool VARCHAR2, inEOSCTAInstance VARCHAR2,
Dirs OUT SYS_REFCURSOR, Files OUT SYS_REFCURSOR) AS
nbFiles INTEGER;
pathInEos VARCHAR2;
ct INTEGER := 0;
BEGIN
-- XXX error handling is missing
castor.prepareCTAExport(inTapePool, nbFiles);
castor.dirsForCTAExport(inTapePool);
-- Get all metadata for the EOS-side namespace
OPEN Dirs FOR
SELECT *
FROM castor.CTADirsHelper;
castor.filesForCTAExport(inTapePool);
OPEN Files FOR
SELECT *
FROM castor.CTAFilesHelper;
END;
/
CREATE OR REPLACE populateCTAFromCASTOR AS
BEGIN
-- Populate the CTA catalogue with the CASTOR file/tape metadata
FOR f IN (SELECT * FROM castor.CTAFilesHelper) LOOP
pathInEos := '/eos/cta/' || inEOSCTAInstance || f.path; -- XXX how to massage this?
-- insert file metadata
INSERT INTO Archive_File (archive_file_id, disk_instance_name, disk_file_id, disk_file_path,
disk_gid, size_in_bytes, checksum_type, checksum_value,
storage_class_id, creation_time, reconciliation_time)
VALUES (f.fileId, inEOSCTAInstance, f.fileId, pathInEos, f.gid,
f.filesize, 'AD', f.checksum, f.fileclass, f.atime, 0);
-- insert tape metadata
INSERT INTO Tape_File (archive_file_id, vid, fseq, block_id, copy_nb, creation_time)
VALUES (f.fileId, f.vid, f.fseq, f.blockId, f.copyno, f.s_mtime);
IF ct = 10000 THEN
COMMIT;
ct := 0;
END IF;
ct := ct + 1;
END LOOP;
END;
/
CREATE OR REPLACE completeImportFromCASTOR AS
BEGIN
castor.completeCTAExport;
END;
/
/*****************************************************************************
* castor_ctamigration_schema.sql
*
* This file is part of the Castor/CTA project.
* See http://castor.web.cern.ch/castor
*
* Copyright (C) 2003 CERN
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* This script adds the necessary tables and code to an existing
* CASTOR Nameserver schema in order to support the metadata migration
* to CTA, the CASTOR successor.
*
* @author Castor Dev team, castor-dev@cern.ch
*****************************************************************************/
UNDEF ctaSchema
ACCEPT ctaSchema CHAR PROMPT 'Enter the name of the CTA schema: ';
-- Table for the CTA export
CREATE TABLE CTAExport (fileid INTEGER NOT NULL PRIMARY KEY);
-- Need to grant access from the CTA catalogue schema
GRANT SELECT ON CTAExport TO &ctaSchema;
-- Tables to store intermediate data for the export.
-- As above we cannot use temporary tables with distributed transactions.
CREATE TABLE CTAFilesHelper (fileid INTEGER NOT NULL PRIMARY KEY, path VARCHAR2(2048), disk_uid INTEGER, disk_gid INTEGER,
filemode INTEGER, atime INTEGER, mtime INTEGER, fileclass INTEGER,
filesize INTEGER, checksum VARCHAR2(10), copyno INTEGER, VID VARCHAR2(6), fseq INTEGER,
blockId INTEGER, s_mtime INTEGER);
GRANT SELECT ON CTAFilesHelper TO &ctaSchema;
CREATE TABLE CTADirsHelper (fileid INTEGER NOT NULL PRIMARY KEY, path VARCHAR2(2048), disk_uid INTEGER, disk_gid INTEGER,
filemode INTEGER, atime INTEGER, mtime INTEGER, fileclass INTEGER);
GRANT SELECT ON CTADirsHelper TO &ctaSchema;
/* Procedure to extract the directory names for the export to CTA */
CREATE OR REPLACE PROCEDURE dirsForCTAExport(inPoolName IN VARCHAR2) AS
fileids numList;
BEGIN
SELECT DISTINCT(F.parent_fileid)
BULK COLLECT INTO fileids
FROM Cns_file_metadata F, Cns_seg_metadata S
WHERE F.fileid = S.s_fileid
AND vid IN (
SELECT vid FROM Vmgr_tape_side
WHERE poolName = inPoolName AND BITAND(status, 2) = 0 -- not already EXPORTED
);
EXECUTE IMMEDIATE 'TRUNCATE TABLE CTADirsHelper';
INSERT /*+ APPEND */ INTO CTADirsHelper (
-- strip the /castor/cern.ch prefix from all paths
SELECT F.fileid, substr(nvl(D.path, getPathForFileid(F.fileid)), length('/castor/cern.ch')) as path,
F.owner_uid disk_uid, F.gid disk_gid,
F.filemode, F.atime, F.mtime, F.fileclass -- XXX fileclass?
FROM Cns_file_metadata F, Dirs_Full_Path D,
(SELECT * FROM TABLE(fileids)) DirIds
WHERE DirIds.column_value = F.fileid
AND F.fileid(+) = D.fileid);
END;
/
/* Procedure to prepare the export to CTA */
CREATE OR REPLACE PROCEDURE prepareCTAExport(inPoolName IN VARCHAR2, nbFiles OUT INTEGER) AS
nbAlreadyExported INTEGER;
BEGIN
-- check if there's some ongoing export
SELECT COUNT(*) INTO nbFiles FROM CTAExport;
IF nbFiles > 0 THEN
raise_application_error(-20000, 'Another export of ' || nbFiles || ' files to CTA is ongoing, ' ||
'please terminate it with completeCTAExport() before starting a new one.');
END IF;
-- first extract the tape pool and tapes metadata
-- now prepare the files
INSERT /*+ APPEND */ INTO CTAExport (
SELECT s_fileid FROM Cns_seg_metadata WHERE vid IN (
SELECT vid FROM Vmgr_tape_side
WHERE poolName = inPoolName AND BITAND(status, 2) = 0 -- not already EXPORTED
));
-- check if some files had already been exported
SELECT COUNT(*) INTO nbAlreadyExported FROM Cns_file_metadata F, CTAExport
WHERE F.fileid = CTAExport.fileid
AND F.onCTA = 1;
IF nbFiles > 0 THEN
raise_application_error(-20000, 'Warning: found ' || nbAlreadyExported || ' already exported, please manually check them.');
END IF;
-- all right, return the count of files to be exported
SELECT COUNT(*) INTO nbFiles FROM CTAExport;
END;
/
/* Procedure to extract the files metadata for the export to CTA */
CREATE OR REPLACE PROCEDURE filesForCTAExport(inPoolName IN VARCHAR2) AS
BEGIN
EXECUTE IMMEDIATE 'TRUNCATE TABLE CTAFilesHelper';
INSERT /*+ APPEND */ INTO CTAFilesHelper (
SELECT F.fileid, decode(D.path, NULL, getPathForFileid(F.fileid), D.path || '/' || F.name) as path,
F.owner_uid, F.gid, F.filemode, F.atime, F.mtime, F.fileclass,
S.segsize, S.checksum, S.copyno, S.vid, S.fseq, utl_raw.cast_to_binary_integer(S.blockId),
S.lastModificationTime as s_mtime
FROM Cns_file_metadata F, Cns_seg_metadata S, Cns_class_metadata C, Dirs_Full_Path D, CTAExport
WHERE CTAExport.fileid = F.fileid
AND F.fileid = S.s_fileid
AND F.parent_fileid(+) = D.fileid
AND F.fileclass = C.classid
);
END;
/
/* Procedure to terminate the export to CTA and account it on the statistics */
CREATE OR REPLACE PROCEDURE completeCTAExport(inPoolName IN VARCHAR2) AS
CURSOR c IS SELECT fileid FROM CTAExport;
ids numList;
BEGIN
LOOP
OPEN c;
FETCH c BULK COLLECT INTO ids LIMIT 10000;
EXIT WHEN ids.count = 0;
CLOSE c;
FORALL i IN 1..ids.count
UPDATE Cns_file_metadata SET onCTA = 1 WHERE fileid = ids(i);
FORALL i IN 1..ids.count
UPDATE Cns_seg_metadata SET onCTA = 1 WHERE s_fileid = ids(i);
FORALL i IN 1..ids.count
DELETE FROM CTAExport WHERE fileid = ids(i);
COMMIT;
END LOOP;
EXECUTE IMMEDIATE 'TRUNCATE TABLE CTAExport';
END;
/
-- XXX TO BE CHECKED
EXECUTE IMMEDIATE 'GRANT EXECUTE ON prepareCTAExport TO CTA';
EXECUTE IMMEDIATE 'GRANT EXECUTE ON dirsForCTAExport TO CTA';
EXECUTE IMMEDIATE 'GRANT EXECUTE ON filesForCTAExport TO CTA';
EXECUTE IMMEDIATE 'GRANT EXECUTE ON completeCTAExport TO CTA';
#!/bin/sh
#/******************************************************************************
# * castortapepooltocta.sh
# *
# * This file is part of the Castor/CTA project.
# * See http://castor.web.cern.ch/castor
# *
# * Copyright (C) 2003 CERN
# * This program is free software; you can redistribute it and/or
# * modify it under the terms of the GNU General Public License
# * as published by the Free Software Foundation; either version 2
# * of the License, or (at your option) any later version.
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# *
# * command line tool to prepare the export of a tapepool metadata
# * to CTA, the CASTOR successor
#
# * @author Castor Dev team, castor-dev@cern.ch
# *****************************************************************************/
# check arguments
if [ $# != 1 ]; then
echo usage: $0 tapepool
exit 1
fi
# check that no migrations are pending/ongoing for this tapepool
printmigrationstatus | grep $1 && echo 'Migrations still ongoing, aborting' && exit 1
# backup relevant metadata
mkdir -p ~/ctaexport
cd ~/ctaexport
[[ ! -x stager_listprivileges_output ]] && \
stager_listprivileges > stager_listprivileges_output && \
# pause the stager altogether (this destroys the B&W lists!)
stager_removeprivilege -U:
# on the stager, make the tapepool unusable (the tapepool metadata can stay)
[[ ! -x migrationroute_$1 }} && printmigrationroute | grep $1 > migrationroutes_$1
printmigrationroute | grep $1 | awk '{print $1}' | xargs -i deletemigrationroute {}
# on the VMGR, mark all tapes as Exported for the tape pool
vmgrlisttape -P $1 | awk '{print $1}' | xargs -i vmgrmodifytape -V {} --st EXPORTED
# execute the CTA DB extraction from the CTA DB
# (remote-linked with the CASTOR Nameserver)
# ...
# empty the CASTOR disk cache (not necessary)
#for h in `printdiskserver | grep cern.ch | awk '{print $1}'`; do
# for f in `seq -w 1 24`; do
# deletediskcopy $h:/srv/castor/$f/
# done
#done
# to resume the stager once everything is completed:
#stager_addprivilege -U:
#!/bin/sh
#/******************************************************************************
# * unexportctatocastor.sh
# *
# * This file is part of the Castor/CTA project.
# * See http://castor.web.cern.ch/castor
# *
# * Copyright (C) 2003 CERN
# * This program is free software; you can redistribute it and/or
# * modify it under the terms of the GNU General Public License
# * as published by the Free Software Foundation; either version 2
# * of the License, or (at your option) any later version.
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# *
# * command line tool to undo the export of a tapepool metadata
# * to CTA by reenabling the tapes in CASTOR.
#
# * @author Castor Dev team, castor-dev@cern.ch
# *****************************************************************************/
# check arguments
if [ $# != 1 ]; then
echo usage: $0 tapepool
exit 1
fi
# assume the relevant metadata was stored here
cd ~/ctaexport || echo 'Metadata from a previous export not found, aborting' && exit 1
# on the stager, restore the migration routes
cat migrationroutes_$1 | grep -v FILECLASS | grep -v '---' | awk '{print "entermigrationroute " $1 " " $2 ":" $4}' | sh
# on the VMGR, mark all tapes back as available (but full == read-only) for the tape pool
vmgrlisttape -P $1 | awk '{print $1}' | xargs -i vmgrmodifytape -V {} --st TAPE_FULL
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment