cta.proto 19.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// The CERN Tape Archive (CTA) project
// Copyright (C) 2015  CERN
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
syntax = "proto2";
17
package cta.objectstore.serializers;
18

19
20
21
22
23
24
// The types of the objects. It will be used to allow introspection
// for the contents.
enum ObjectType {
  RootEntry_t = 0;
  AgentRegister_t = 1;
  Agent_t = 2;
25
  DriveRegister_t = 3;
26
  DriveState_t = 4;
27
28
29
30
  SchedulerGlobalLock_t = 6;
  ArchiveRequest_t = 7;
  RetrieveRequest_t = 8;
  ArchiveQueue_t = 9;
31
  ArchiveQueueShard_t = 90;
32
  RetrieveQueue_t = 10;
33
  RetrieveQueueShard_t = 100;
34
35
36
  RepackRequest_t = 11;
  RepackIndex_t = 12;
  RepackQueue_t = 13;
37
  GenericObject_t = 1000;
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
}

// The base object header. This will allow introspection and automatic
// "garbage collection", i.e. returning an unprocessed object belonging
// to a dead agent to the right queue or container.
// - The type allows automatic management of the content without knowing its
// payload.
// - The version allows multi schema to be used at the object level (smooth schema 
// evolution, on an object by object basis).
// - The owner is the authoritative source of ownership for the object.
// It allows arbitration of the actual owner in case the object is pointed
// to by several containers (during a transition, or after a failure).
// - The backup owner allows the object to be returned to a previous container
// in case of failure of a owner (when it is an agent).
message ObjectHeader {
  required ObjectType type = 1;
  required uint64 version = 2;
  required string owner = 3;
  required string backupowner = 4;
57
  required bytes payload = 5;
58
59
}

60
61
62
63
// A placeholder object for the implementation of neutral object handlers
message GenericObject {
}

64
65
// ===========================  Root Entry =====================================
// The objects making up the root entry.
66

67
68
// A user information record
message UserIdentity {
69
70
  required string name = 10;
  required string group = 11;
71
72
}

73
74
75
// Pointer to the drive register (register defined further)
message DriveRegisterPointer {
  required string address = 90;
76
  required EntryLog log = 91;
77
78
}

79
80
81
// Pointer to the agent register (register defined further)
message AgentRegisterPointer {
  required string address = 100;
82
  required EntryLog log = 101;
83
}
84

85
message RepackIndexPointer {
86
87
88
  required string address = 105;
}

Eric Cano's avatar
Eric Cano committed
89
90
91
92
93
message RepackQueuePointer {
  required string address = 107;
}


94
95
96
// Pointer to the scheduler global lock
message SchedulerGlobalLockPointer {
  required string address = 110;
97
  required EntryLog log = 111;
98
99
}

100
101
// Pointer to the archive queue
message ArchiveQueuePointer {
102
103
104
105
106
  required string address = 120;
  required string name = 121;
}

// Pointer to the tape queue
107
message RetrieveQueuePointer {
108
109
110
111
  required string address = 130;
  required string vid = 131;
}

112
113
114
// The root entry. This entry contains all the most static information, i.e.
// the admin handled configuration information
message RootEntry {
115
116
117
118
119
120
121
122
123
  repeated ArchiveQueuePointer archive_queue_to_transfer_for_user_pointers = 1050;
  repeated ArchiveQueuePointer archive_queue_failed_pointers = 1062;
  repeated ArchiveQueuePointer archive_queue_to_report_for_user_pointers = 1068;
  repeated ArchiveQueuePointer archive_queue_to_transfer_for_repack_pointers = 1069;
  repeated ArchiveQueuePointer archive_queue_to_report_to_repack_for_success_pointers = 1072;
  repeated ArchiveQueuePointer archive_queue_to_report_to_repack_for_failure_pointers = 1073;
  repeated RetrieveQueuePointer retrieve_queue_to_transfer_for_user_pointers = 1060;
  repeated RetrieveQueuePointer retrieve_queue_to_report_for_user_pointers = 1063;
  repeated RetrieveQueuePointer retrieve_queue_failed_pointers = 1065;
124
  repeated RetrieveQueuePointer retrieve_queue_to_report_to_repack_for_success_pointers = 1066;
125
126
  repeated RetrieveQueuePointer retrieve_queue_to_report_to_repack_for_failure_pointers = 1067;
  repeated RetrieveQueuePointer retrieve_queue_to_transfer_for_repack_pointers = 1071;
127
128
  optional DriveRegisterPointer driveregisterpointer = 1070;
  optional AgentRegisterPointer agentregisterpointer = 1080;
129
  optional RepackIndexPointer repackindexpointer = 1085;
Eric Cano's avatar
Eric Cano committed
130
131
  optional RepackQueuePointer repackrequestspendingqueuepointer = 1086;
  optional RepackQueuePointer repackrequeststoexpandqueuepointer = 1088;
132
133
  optional string agentregisterintent = 1090;
  optional SchedulerGlobalLockPointer schedulerlockpointer = 1100;
134
135
}

136
//=========== Sub-objects ======================================================
137

138
// ------------- Agent handling ------------------------------------------------
139

140
141
142
143
144
145
146
147
148
149
150
151
152
153
// The agent object represents a running process. It is a payload to an object
// itself, and it can be owned by a watchdog or a global agent registry, which
// lists the agents not yet watched by a watchdog.
// - The description is a freetext string used for logging.
// - The watchdogs themselves own the agents they are watching, so the agents can
// be returned to the agent registry when they fail.
// - The heartbeat is incremented from time to time by the agent.
// - The timeout_us is the amount of microseconds after which the absence of
// change to the heartbeat can be interpreted as agent failure.
// - The ownedobject list is an ownership intent log that points to the objects
// that the agent is about to create, intends to own, or fully owns.
// The objects in this list can be considered for being returned to a backup
// owner.
message Agent {
154
155
156
157
  required string description = 2000;
  required uint64 heartbeat = 2001;
  required uint64 timeout_us = 2002;
  repeated string ownedobjects = 2003;
158
  optional bool being_garbage_collected = 2004 [default = false];
159
}
160

161
message AgentRegister {
162
163
164
165
  repeated string agents = 2100;
  repeated string untrackedagents = 2101;
}

166
167
168
169
170
171
// ------------- Mount criteria and quota  -------------------------------------

message MountCriteria {
  required uint64 maxFilesBeforeMount = 3200;
  required uint64 maxBytesBeforeMount = 3201;
  required uint64 maxSecondsBeforeMount = 3202;
172
  required uint32 quota = 3203;
173
174
}

175
176
177
178
179
message TapeFile {
  required string vid = 9120;
  required uint64 fseq = 9121;
  required uint64 blockid = 9122;
  required uint64 compressedsize = 9123;
180
  required uint32 copynb = 9124;
181
182
183
184
185
186
  required uint64 creationtime = 9125;
  required string checksumtype = 9126;
  required string checksumvalue = 9127;
}

message DiskFileInfo {
187
  required bytes recoveryblob = 8900;
188
189
190
191
192
  required string group = 8910;
  required string owner = 8930;
  required string path = 8940;
}

193
message ArchiveFile {
194
195
  required uint64 archivefileid = 4351;
  required uint64 filesize = 4352;
196
  required string diskfileid = 4353;
197
198
199
200
201
202
203
204
  required string diskinstance= 4354;
  required DiskFileInfo diskfileinfo= 4355;
  required string checksumtype = 4356;
  required string checksumvalue = 4357;
  required uint64 creationtime = 4358;
  repeated TapeFile tapefiles = 4359;
  required uint64 reconciliationtime = 4360;
  required string storageclass = 4361;
205
206
}

207
208
// ------------- Drives handling  ----------------------------------------------

209
message DriveState {
210
  required string drivename = 5000;
211
212
213
214
215
216
217
218
219
220
221
222
223
  required string host = 5001;
  required string logicallibrary = 5002;
  optional uint64 sessionid = 5003;
  optional uint64 bytestransferedinsession = 5004;
  optional uint64 filestransferedinsession = 5005;
  optional double latestbandwidth = 5006;
  optional uint64 sessionstarttime = 5007;
  optional uint64 mountstarttime = 5008;
  optional uint64 transferstarttime = 5009;
  optional uint64 unloadstarttime = 5010;
  optional uint64 unmountstarttime = 5011;
  optional uint64 drainingstarttime = 5012;
  optional uint64 downorupstarttime = 5013;
224
  optional uint64 probestarttime = 5026;
225
226
227
  optional uint64 cleanupstarttime = 5014;
  optional uint64 lastupdatetime = 5015;
  optional uint64 startstarttime = 5016;
228
  optional uint64 shutdowntime = 5027;
229
230
231
232
233
234
  required uint32 mounttype = 5017;
  required uint32 drivestatus = 5018;
  required bool desiredUp = 5019;
  required bool desiredForceDown = 5020;
  optional string currentvid = 5021;
  optional string currenttapepool = 5022;
235
236
237
  optional uint32 nextmounttype = 5023;
  optional string nextvid = 5024;
  optional string nexttapepool = 5025;
238
// TODO: implement or remove  required EntryLog creationlog = 5023;
239
240
}

241
242
243
244
245
message DriveStatePointer {
  required string drivename = 7010;
  required string drivestateaddress = 7011;
}

Eric Cano's avatar
Eric Cano committed
246
message DriveRegister {
247
  repeated DriveStatePointer drives = 7001;
Eric Cano's avatar
Eric Cano committed
248
}
249

250
251
252
253
254
// ------------- Scheduler global lock handling  -------------------------------

message SchedulerGlobalLock {
  required uint64 nextmountid = 8000;
}
255

Daniele Kruse's avatar
Daniele Kruse committed
256
message EntryLog {
257
  required string username = 8950;
Daniele Kruse's avatar
Daniele Kruse committed
258
259
260
261
  required string host = 8960;
  required uint64 time = 8970;
}

262
message MountPolicy {
263
  required string name = 8980;
264
265
266
267
268
269
270
271
  required uint64 archivepriority = 8981;
  required uint64 archiveminrequestage = 8982;
  required uint64 retrievepriority = 8983;
  required uint64 retieveminrequestage = 8984;
  required uint64 maxdrivesallowed = 8985;
  required EntryLog creationlog = 8986;
  required EntryLog lastmodificationlog = 8987;
  required string comment = 8988;
272
273
}

274
275
276
277
278
279
280
// ------------- Archive Jobs --------------------------------------------------

// The status of the individual archive jobs. The jobs are materialised 
// by table entries in the ArchiveRequest.
// This life cycle represented by the following enum

enum ArchiveJobStatus {
281
282
  AJS_ToTransferForUser = 1;
  AJS_ToReportToUserForTransfer = 2;
283
  AJS_Complete = 3;
284
  AJS_ToReportToUserForFailure = 997;
285
286
  AJS_Failed = 998;
  AJS_Abandoned = 999;
287
  AJS_ToTransferForRepack = 1001;
288
289
  AJS_ToReportToRepackForSuccess = 1005;
  AJS_ToReportToRepackForFailure = 1006;
290
291
}

292
293
294
295
296
297
298
299
300
301
302
message ArchiveJob {
  required uint32 copynb = 4400;
  required string tapepool = 4401;
  required string archivequeueaddress = 4402;
  required string owner = 4403;
  required ArchiveJobStatus status = 4404;
  required uint32 totalretries = 4405;
  required uint32 retrieswithinmount = 4406;
  required uint64 lastmountwithfailure = 4407;
  required uint32 maxtotalretries = 4408;
  required uint32 maxretrieswithinmount = 4409;
303
  repeated string failurelogs = 4410;
304
305
306
  required uint32 maxreportretries = 4411;
  required uint32 totalreportretries = 4412;
  repeated string reportfailurelogs = 4413;
307
308
}

309
310
311
312
313
314
message ArchiveRequestRepackInfo {
  required string repack_request_address = 4450;
  required string file_buffer_url = 4453;
  required uint64 fseq = 4455;
}

315
message ArchiveRequest {
316
  required uint64 archivefileid = 8990;
317
  required MountPolicy mountpolicy = 8995;
318
  required string checksumtype = 9000;
319
  required string checksumvalue = 9010;
320
321
  required uint64 creationtime = 9015;
  required uint64 reconcilationtime = 9017;
322
323
  required DiskFileInfo diskfileinfo = 9040;
  required string diskfileid = 9050;
324
  required string diskinstance = 9055;
325
  required string archivereporturl = 9057;
326
  required string archiveerrorreporturl = 9058;
327
  required uint64 filesize = 9060;
328
  required UserIdentity requester = 9070;
329
330
331
  required string srcurl = 9080;
  required string storageclass = 9090;
  required EntryLog creationlog = 9091;
332
  repeated ArchiveJob jobs = 9092;
333
  required bool reportdecided = 9093;
334
335
  required bool isrepack = 9095;
  optional ArchiveRequestRepackInfo repack_info = 9097;
336
337
}

338
339
340
341
342
343
344
345
346
347
// ------------- Retrieve Jobs -------------------------------------------------

// The status of the individual retrieve jobs. The jobs are materialised 
// by table entries in the RetrieveRequest.
// This life cycle represented by the following enum
// There is no complete state as the completion of one jobs implies the 
// completion of the whole requests, and leads to the immediate deletion
// of the request.

enum RetrieveJobStatus {
348
349
  RJS_ToTransferForUser = 1;
  RJS_ToReportToUserForFailure = 997;
350
  RJS_Failed = 998;
351
  RJS_ToReportToRepackForSuccess = 1002; //For Retrieve request created by a Repack request
352
353
  RJS_ToReportToRepackForFailure = 1003;
  RJS_ToTransferForRepack = 1004;
354
355
}

356
357
358
359
360
361
message SchedulerRetrieveRequest {
  required UserIdentity requester = 9100;
  required uint64 ArchiveFileId = 9101;
  required string dstURL = 9102;
  required DiskFileInfo diskfileinfo = 9103;
  required EntryLog entrylog = 9106;
362
  required string retrieveerrorreporturl = 9110;
363
364
365
}

message RetrieveJob {
366
  required uint32 copynb = 9200;
367
368
369
370
371
  required uint32 maxtotalretries = 9201;
  required uint32 maxretrieswithinmount = 9202;
  required uint32 retrieswithinmount = 9203;
  required uint32 totalretries = 9204;
  required RetrieveJobStatus status = 9205;
372
  required uint64 lastmountwithfailure = 9206;
373
  repeated string failurelogs = 9207;
374
375
376
  required uint32 maxreportretries = 9208;
  required uint32 totalreportretries = 9209;
  repeated string reportfailurelogs = 9210;
377
378
}

379
380
381
382
383
384
385
386
387
388
message RetrieveRequestArchiveRoute {
  required uint32 copynb = 9400;
  required string tapepool = 9410;
}

message RetrieveRequestRepackInfo {
  repeated RetrieveRequestArchiveRoute archive_routes = 9500;
  repeated uint32 copy_nbs_to_rearchive = 9510;
  required string repack_request_address = 9520;
  required string file_buffer_url = 9530;
389
  required uint64 fseq = 9540;
390
391
}

392
393
394
395
396
397
398
// The different timings of the lifecycle of a RetrieveRequest (creation time, first select time, request complete)
message LifecycleTimings{
  optional uint64 creation_time = 9160 [default = 0];
  optional uint64 first_selected_time = 9161 [default = 0];
  optional uint64 completed_time = 9162 [default = 0];
}

399
message RetrieveRequest {
400
401
  required SchedulerRetrieveRequest schedulerrequest = 9150;
  required MountPolicy mountpolicy = 9151;
402
403
404
  required ArchiveFile archivefile = 9152;
  required uint32 activecopynb = 9153;
  repeated RetrieveJob jobs = 9154;
405
406
  required string failurereporturl = 9155;
  required string failurereportlog = 9156;
407
408
  required bool isrepack = 9157;
  optional RetrieveRequestRepackInfo repack_info = 9158;
409
  optional LifecycleTimings lifecycle_timings = 9159;
410
}
411

412
message ValueCountPair {
413
414
  required uint64 value = 9301;
  required uint64 count = 9302;
415
416
}

417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
message ArchiveJobPointer {
  required uint64 fileid = 3000;
  required uint64 size = 3001;
  required string address = 3002;
  required uint32 copynb = 3003;
  required uint64 priority = 3004;
  required uint64 minarchiverequestage = 3005;
  required uint64 maxdrivesallowed = 3006;
  required uint64 starttime = 3007;
}

message ArchiveQueueShardPointer {
  required string address = 10200;
  required uint64 shardjobscount = 10201;
  required uint64 shardbytescount = 10202;
}

message ArchiveQueueShard {
  repeated ArchiveJobPointer archivejobs = 10300;
  required uint64 archivejobstotalsize = 10301;
}

439
message ArchiveQueue {
440
  required string tapepool = 10000;
441
  repeated ArchiveQueueShardPointer archivequeueshards = 10010;
442
443
444
445
  repeated ValueCountPair prioritymap = 10031;
  repeated ValueCountPair minarchiverequestagemap = 10032;
  repeated ValueCountPair maxdrivesallowedmap = 10033;
  required uint64 archivejobstotalsize = 10040;
446
  required uint64 archivejobscount = 10045;
447
  required uint64 oldestjobcreationtime = 10050;
448
  required uint64 mapsrebuildcount = 10060;
449
450
}

451
452
453
454
455
456
457
458
message RetrieveJobPointer {
  required uint64 size = 3101;
  required string address = 3102;
  required uint32 copynb = 3103;
  required uint64 fseq = 3107;
  required uint64 priority = 3104;
  required uint64 minretrieverequestage = 3105;
  required uint64 maxdrivesallowed = 3106;
459
460
461
462
463
464
465
466
467
468
469
470
471
472
  required uint64 starttime = 3108;
}

message RetrieveQueueShardPointer {
  required string address = 10400;
  required uint64 shardjobscount = 10401;
  required uint64 shardbytescount = 10402;
  required uint64 minfseq = 10403;
  required uint64 maxfseq = 10404;
}

message RetrieveQueueShard {
  repeated RetrieveJobPointer retrievejobs = 10500;
  required uint64 retrievejobstotalsize = 10501;
473
474
}

475
message RetrieveQueue {
476
  required string vid = 10100;
477
  repeated RetrieveQueueShardPointer retrievequeueshards = 10111;
478
  repeated ValueCountPair prioritymap = 10131;
479
  repeated ValueCountPair minretrieverequestagemap = 10132;
480
481
  repeated ValueCountPair maxdrivesallowedmap = 10133;
  required uint64 retrievejobstotalsize = 10140;
482
  required uint64 retrievejobscount = 10145;
483
  required uint64 oldestjobcreationtime = 10150;
484
  required uint64 mapsrebuildcount = 10160;
485
  required uint64 maxshardsize = 10170;
486
}
487
488
489

// ------------- Repack data strcutures ----------------------------------------

490
enum RepackRequestStatus {
491
  // those value are matching the dataStructures/RepackInfo.hpp: RepackInfo::Status values.
492
493
494
495
  RRS_Pending = 1;
  RRS_ToExpand = 2;
  RRS_Starting = 3;
  RRS_Running = 4;
496
497
  RRS_Complete = 5;
  RRS_Failed = 6;
498
499
}

500
501
502
503
504
505
506
507
508
509
// In order to properly handle retries in case of failure during reporting, we hold
// a series of booleans tracking the status of the statistics recording and deletion of the
// sub requests. Subrequest deleted records whether a sub request is expected to be present or not.
// The process deleting the sub request will
// first mark it as delete with this status and then actually delete it. In case of crash in this
// process, the switch of this status will be re-done and the sub request deleted on retry.
// On the opposite side, this information will allow the creation process to know if a reference to the
// sub request should be interpreted as an unfulfilled creation intent (deleted=false) and create the
// missing sub request or the completion of the request (which can happen anytime after sub request
// creation).
510
511
512
// Likewise, the "accounted" booleans or copyNbs will prevent double counting in case a report (for success or failure)
// need to be retried after a process failure. The same flag is used for both success and failure. Archive requires a
// set of copyNbs as a single repack can lead to several archives (in case we create new copies).
513
514
515
message RepackSubRequestPointer {
  required string address = 10500;
  required uint64 fseq = 10510;
516
517
518
  required bool retrieve_accounted = 10530;
  repeated uint32 archive_copynb_accounted = 10534;
  required bool subrequest_deleted = 10540;
519
520
}

521
522
message RepackRequest {
  required string vid = 11000;
523
  required string buffer_url = 11005;
524
  required RepackRequestStatus status = 11010;
525
526
  required bool add_copies_mode = 11400;
  required bool move_mode = 11410;
527
528
529
530
531
532
533
534
535
536
537
538
  required uint64 totalfilestoretrieve = 11420;
  required uint64 totalbytestoretrieve = 11430;
  // We can have more files to archive than we have to retrieve if we plan to also archive a missing
  // copy of a multi copy file.
  required uint64 totalfilestoarchive = 11440;
  required uint64 totalbytestoarchive = 11450;
  required uint64 userprovidedfiles = 11460;
  required uint64 userprovidedbytes = 11470;
  required uint64 retrievedfiles = 11480;
  required uint64 retrievedbytes = 11490;
  required uint64 archivedfiles = 11500;
  required uint64 archivedbytes = 11510;
539
  required uint64 failedtoretrievefiles = 11520;
540
541
542
543
  required uint64 failedtoretrievebytes = 11530;
  required uint64 failedtoarchivefiles = 11540;
  required uint64 failedtoarchivebytes = 11550;
  required uint64 lastexpandedfseq = 11560;
544
  repeated RepackSubRequestPointer subrequests = 11570;
545
546
}

Eric Cano's avatar
Eric Cano committed
547
message RepackRequestIndexPointer {
548
549
550
551
  required string vid = 12000;
  required string address = 12010;
}

552
message RepackIndex {
Eric Cano's avatar
Eric Cano committed
553
554
555
556
  repeated RepackRequestIndexPointer repackrequestpointers = 12100;
}

message RepackRequestQueuePointer {
557
  required string address = 12101;
Eric Cano's avatar
Eric Cano committed
558
559
560
561
}

message RepackQueue {
  repeated RepackRequestQueuePointer repackrequestpointers = 12200;
562
}