castorns_ctamigration_schema.sql 11.2 KB
Newer Older
1
/*****************************************************************************
2
 *              castorns_ctamigration_schema.sql
3
4
 *
 * This file is part of the Castor/CTA project.
5
 * See http://cern.ch/castor and http://cern.ch/eoscta
6
 * Copyright (C) 2019  CERN
7
8
9
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
10
 * as published by the Free Software Foundation; either version 3
11
12
13
14
15
16
 * of the License, or (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
17
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18
19
20
21
22
23
24
25
26
 *
 * This script adds the necessary tables and code to an existing
 * CASTOR Nameserver schema in order to support the metadata migration
 * to CTA, the CASTOR successor.
 *
 * @author Castor Dev team, castor-dev@cern.ch
 *****************************************************************************/

UNDEF ctaSchema
27
ACCEPT ctaSchema CHAR PROMPT 'Enter the username of the CTA schema: ';
28

29
-- Helper tables to store intermediate data for the export
30
CREATE TABLE CTAFilesHelper (fileid INTEGER NOT NULL PRIMARY KEY, parent_fileid INTEGER, filename VARCHAR2(255), disk_uid INTEGER, disk_gid INTEGER,
31
                             filemode INTEGER, btime INTEGER, ctime INTEGER, mtime INTEGER, classid INTEGER,
32
                             filesize INTEGER, checksum NUMBER, copyno INTEGER, VID VARCHAR2(6), fseq INTEGER,
33
                             blockId INTEGER, s_mtime INTEGER);
34
CREATE INDEX I_CTAFiles_parent_id ON CTAFilesHelper (parent_fileid);
35
CREATE INDEX I_CTAFiles_vid_filesize ON CTAFilesHelper (vid, filesize);
36

37
38
CREATE OR REPLACE VIEW CTADirsHelper AS
  SELECT /*+ PARALLEL(F) PARALLEL(D) */
39
         F.fileid, F.parent_fileid, D.depth, substr(D.path, length('/castor/cern.ch/')) as path,
40
         F.owner_uid disk_uid, F.gid disk_gid,
41
         F.filemode, F.ctime, F.mtime, F.fileclass classid       -- no creation time available for directories, cf. #556
42
43
    FROM Cns_file_metadata F, Dirs_Full_Path D
   WHERE F.fileid = D.fileid;
44

45
46
47
48
49
50
51
52
CREATE TABLE CTADeltaDirsHelper (fileid INTEGER NOT NULL PRIMARY KEY, parent_fileid INTEGER, depth INTEGER,
                                 path VARCHAR2(2048), disk_uid INTEGER, disk_gid INTEGER, filemode INTEGER,
                                 ctime INTEGER, mtime INTEGER, classid INTEGER);

CREATE GLOBAL TEMPORARY TABLE CTADeltaTemp
  (fileid INTEGER NOT NULL PRIMARY KEY, parent_fileid INTEGER, depth INTEGER, path VARCHAR2(2048))
ON COMMIT PRESERVE ROWS;

53
-- Table to store the logs of the migration operations
Giuseppe Lo Presti's avatar
Giuseppe Lo Presti committed
54
CREATE TABLE CTAMigrationLog (timestamp NUMBER, client VARCHAR2(100), tapepool VARCHAR2(15), message VARCHAR2(2000));
55

56
57
-- Enable parallel access in all concerned tables
ALTER TABLE CTAFilesHelper PARALLEL;
58
ALTER TABLE CTADeltaDirsHelper PARALLEL;
59
60
ALTER TABLE Cns_file_metadata PARALLEL;
ALTER TABLE Cns_seg_metadata PARALLEL;
61
ALTER TABLE Dirs_Full_Path PARALLEL;
62

63

64
65
66
67
-- Internal procedure to log an action for a given migration process
CREATE OR REPLACE PROCEDURE ctaLog(inTapepool IN VARCHAR2, inMsg IN VARCHAR2) AS
PRAGMA AUTONOMOUS_TRANSACTION;
BEGIN
Giuseppe Lo Presti's avatar
Giuseppe Lo Presti committed
68
69
  INSERT INTO CTAMigrationLog (timestamp, client, tapepool, message)
    VALUES (getTime(), (SELECT SYS_CONTEXT('USERENV', 'HOST', 15) FROM Dual), inTapePool, inMsg);
70
71
72
  COMMIT;
END;
/
73

74
-- Helper function to pretty print an ETA
75
76
77
CREATE OR REPLACE FUNCTION prettyTime(inMins IN INTEGER) RETURN VARCHAR2 DETERMINISTIC AS
BEGIN
  IF inMins < 60 THEN
78
    RETURN round(inMins) || ' minutes';
79
  ELSE
Giuseppe Lo Presti's avatar
Giuseppe Lo Presti committed
80
    RETURN trunc(inMins/60) || ' hours ' || trunc(mod(inMins, 60)) || ' mins';
81
82
83
  END IF;
END;
/
84

85

86
/* Procedure to extract the directory names for the export to CTA */
87
88
CREATE OR REPLACE PROCEDURE filesAndDirsForCTAExport(inPoolName IN VARCHAR2) AS
  nbFiles INTEGER;
89
90
91
  dirsSnapshotTime INTEGER;
  dirIds numList;
  varPath VARCHAR2(2048);
92
BEGIN
93
94
95
96
97
  -- populate the helper table by selecting all required metadata
  INSERT /*+ APPEND PARALLEL(CTAFilesHelper) */ INTO CTAFilesHelper (
    SELECT /*+ PARALLEL(F) PARALLEL(S) */
           F.fileid, F.parent_fileid, F.name filename,    -- the full path is later built in CTA
           F.owner_uid disk_uid, F.gid disk_gid,
98
           F.filemode, F.atime, F.ctime, F.mtime, F.fileclass classid,   -- atime in CASTOR is creation time (#556)
99
100
101
102
103
104
105
106
107
108
109
110
           S.segsize as filesize, S.checksum, S.copyno, S.vid,
           S.fseq, utl_raw.cast_to_binary_integer(S.blockId), S.lastModificationTime as s_mtime
      FROM Cns_file_metadata F, Cns_seg_metadata S
     WHERE F.fileid = S.s_fileid
       AND S.vid IN (
        SELECT vid FROM Vmgr_tape_side
         WHERE poolName = inPoolName
           AND BITAND(status, 2) = 0 AND BITAND(status, 32) = 0    -- not already EXPORTED or ARCHIVED
       )
    );
  COMMIT;
  SELECT COUNT(*) INTO nbFiles FROM CTAFilesHelper;
111
112
113
  IF nbFiles = 0 THEN
    raise_application_error(-20000, 'No valid files found on this tape pool, aborting the import');
  END IF;
114
  ctaLog(inPoolName, 'Intermediate table for files prepared, '|| nbFiles ||' files to be imported. ETA: '||
115
116
117
118
119
                     prettyTime(nbFiles/7000/60));
  EXECUTE IMMEDIATE 'TRUNCATE TABLE CTADeltaDirsHelper';
  -- estimate the last update time of the Dirs_Full_Path snapshot. To be noted that
  -- in case the directory identified by max(fileid) got updated itself, the estimate
  -- will be incorrect: we just hope this is unlikely to happen as renames are very rare.
120
121
122
  SELECT mtime INTO dirsSnapshotTime FROM Cns_file_metadata
   WHERE fileid = (SELECT max(fileid) FROM Dirs_Full_Path);
  -- select any parent directory of the selected files, whose mtime or ctime has changed after
123
  -- Dirs_Full_Path was last updated: this typically happens because of newly added files
124
  -- but also because of renames!
125
  SELECT DISTINCT(D.fileid)
126
127
128
129
    BULK COLLECT INTO dirIds
    FROM CTAFilesHelper F, Cns_file_metadata D
   WHERE F.parent_fileid = D.fileid
     AND (D.mtime > dirsSnapshotTime OR D.ctime > dirsSnapshotTime);
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  IF dirIds.count > 0 THEN
    -- found some, delete the renamed paths from the hierarchy snapshot
    ctaLog(inPoolName, 'Identified '|| dirIds.count ||' newly created/renamed directories');
    FOR i IN 1..dirIds.count LOOP
      BEGIN
        SELECT path INTO varPath FROM Dirs_Full_Path WHERE fileid = dirIds(i);
        DELETE FROM Dirs_Full_Path
         WHERE path LIKE varPath || '%';
      EXCEPTION WHEN NO_DATA_FOUND THEN
        -- this was a newly created path, will be inserted later
        NULL;
      END;
    END LOOP;
    COMMIT;
  END IF;
  -- do the selection again, this time with the anti-join to exactly identify
  -- which directories are missing for this export
  SELECT DISTINCT(D.fileid)
148
149
150
151
    BULK COLLECT INTO dirIds
    FROM CTAFilesHelper F, Cns_file_metadata D
   WHERE F.parent_fileid = D.fileid
     AND NOT EXISTS (SELECT 1 FROM Dirs_Full_Path DP WHERE DP.fileid = D.fileid);
152
153
154
155
156
157
158
159
160
  IF dirIds.count = 0 THEN
    ctaLog(inPoolName, 'Search for missing directories completed');
    RETURN;
  END IF;
  -- now populate the delta directories helper table with any directory that
  -- is not present in the Dirs_Full_Path snapshot. This accounts both for renames
  -- and for newly created directories after the Dirs_Full_Path was updated. Deleted
  -- directories are ignored
  ctaLog(inPoolName, 'Identified '|| dirIds.count ||' missing directories, preparing intermediate delta table');
161
  FOR i IN 1..dirIds.count LOOP
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
    MERGE INTO CTADeltaDirsHelper DD
      USING (SELECT /*+ NO_CONNECT_BY_COST_BASED */
                    fileid, parent_fileid, getPathDepthForFileId(fileid) as depth,
                    substr(getPathForFileId(fileid), length('/castor/cern.ch/')) as path,
                    owner_uid disk_uid, gid disk_gid,
                    filemode, ctime, mtime, fileclass classid
               FROM Cns_file_metadata
               START WITH fileid = dirIds(i)
               CONNECT BY fileid = PRIOR parent_fileid) D
    ON (DD.fileid = D.fileid)
    WHEN NOT MATCHED THEN
      INSERT (fileid, parent_fileid, depth, path, disk_uid, disk_gid,
              filemode, ctime, mtime, classid)
      VALUES (D.fileid, D.parent_fileid, D.depth, D.path, D.disk_uid, D.disk_gid,
              D.filemode, D.ctime, D.mtime, D.classid);
177
178
    COMMIT;
  END LOOP;
179
180
  -- drop null paths due to the above recursive query going to the top /castor level
  DELETE FROM CTADeltaDirsHelper WHERE path IS NULL;
181
182
183
  COMMIT;
  SELECT count(*) INTO nbFiles FROM CTADeltaDirsHelper;
  ctaLog(inPoolName, 'Intermediate delta table completed with '|| nbFiles ||' directories');
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
  -- also update the hierarchy snapshot with what was found
  MERGE /*+ PARALLEL(Dirs_Full_Path) */ INTO Dirs_Full_Path DP
    USING (SELECT /*+ PARALLEL(CTADeltaDirsHelper) */
                  fileid, parent_fileid, path, depth
             FROM CTADeltaDirsHelper) D
    ON (DP.fileid = D.fileid)
  WHEN MATCHED THEN
    UPDATE SET DP.parent = D.parent_fileid,
               DP.path = '/castor/cern.ch' || D.path,
               DP.depth = D.depth
  WHEN NOT MATCHED THEN
    INSERT (fileid, parent, path, depth)
    VALUES (D.fileid, D.parent_fileid, '/castor/cern.ch' || D.path, D.depth);
  COMMIT;
  ctaLog(inPoolName, 'Updated directories snapshot');
199
200
201
END;
/

202

203
/* Procedure to terminate the export to CTA and account it on the statistics */
204
CREATE OR REPLACE PROCEDURE completeCTAExport(inPoolName IN VARCHAR2, inDryRun IN INTEGER) AS
205
  CURSOR c IS SELECT fileid FROM CTAFilesHelper;
206
  ids numList;
207
  varUnused INTEGER;
Giuseppe Lo Presti's avatar
Giuseppe Lo Presti committed
208
  varStartTime INTEGER;
209
BEGIN
210
211
212
213
214
  BEGIN
    SELECT fileid INTO varUnused FROM CTAFilesHelper WHERE ROWNUM <= 1;
  EXCEPTION WHEN NO_DATA_FOUND THEN
    raise_application_error(-20000, 'No ongoing files export to CTA, nothing to do');
  END;
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
  IF inDryRun = 0 THEN
    ctaLog(inPoolName, 'Marking CASTOR files as exported to CTA');
    LOOP
      OPEN c;
      FETCH c BULK COLLECT INTO ids LIMIT 10000;
      EXIT WHEN ids.count = 0;
      CLOSE c;
      FORALL i IN 1..ids.count
        UPDATE Cns_file_metadata SET onCTA = 1 WHERE fileid = ids(i);
      FORALL i IN 1..ids.count
        UPDATE Cns_seg_metadata SET onCTA = 1 WHERE s_fileid = ids(i);
      FORALL i IN 1..ids.count
        DELETE FROM CTAFilesHelper WHERE fileid = ids(i);
      COMMIT;
    END LOOP;
  END IF;
231
  EXECUTE IMMEDIATE 'TRUNCATE TABLE CTAFilesHelper';
Giuseppe Lo Presti's avatar
Giuseppe Lo Presti committed
232
233
234
235
  SELECT max(timestamp) INTO varStartTime
    FROM CTAMigrationLog
   WHERE message LIKE 'CASTOR metadata import started%'
     AND tapepool = inPoolName;
236
  IF inDryRun = 1 THEN
Giuseppe Lo Presti's avatar
Giuseppe Lo Presti committed
237
    ctaLog(inPoolName, 'Export from CASTOR fully completed in '|| prettyTime((getTime()-varStartTime)/60) ||' [dry-run mode]');
238
  ELSE
Giuseppe Lo Presti's avatar
Giuseppe Lo Presti committed
239
    ctaLog(inPoolName, 'Export from CASTOR fully completed in '|| prettyTime((getTime()-varStartTime)/60));
240
  END IF;
241
242
243
END;
/

244

245
246
247
248
-- Need to grant access from the CTA catalogue schema
GRANT SELECT ON CTAFilesHelper TO &ctaSchema;
GRANT SELECT ON CTADirsHelper TO &ctaSchema;
GRANT SELECT ON CTAMigrationLog TO &ctaSchema;
249
GRANT SELECT ON Cns_class_metadata TO &ctaSchema;
250
GRANT SELECT ON Dirs_Full_Path TO &ctaSchema;
251
GRANT EXECUTE ON filesAndDirsForCTAExport TO &ctaSchema;
252
GRANT EXECUTE ON ctaLog TO &ctaSchema;