Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
name: MySchema
id: https://example.org/MySchema
imports:
- linkml:types
prefixes:
MySchema: https://example.org/MySchema
linkml: https://w3id.org/linkml/
default_prefix: MySchema
default_range: string
enums:
type_options:
permissible_values:
raw:
derived:
slots:
owner:
description: Owner or custodian of the dataset, usually first name + last name.
The string may contain a list of persons, which should then be seperated by
semicolons.
required: true
ownerEmail:
description: Email of the owner or custodian of the dataset. The string may contain
a list of emails, which should then be seperated by semicolons.
orcidOfOwner:
description: ORCID of the owner or custodian. The string may contain a list of
ORCIDs, which should then be separated by semicolons.
contactEmail:
description: Email of the contact person for this dataset. The string may contain
a list of emails, which should then be seperated by semicolons.
sourceFolder:
description: Absolute file path on file server containing the files of this dataset,
e.g. /some/path/to/sourcefolder. In case of a single file dataset, e.g. HDF5
data, it contains the path up to, but excluding the filename. Trailing slashes
are removed.
#~ sourceFolderHost:
#~ description: DNS host name of file server hosting sourceFolder, optionally including
#~ a protocol e.g. [protocol://]fileserver1.example.com
size:
description: Total size of all source files contained in source folder on disk
when unpacked.
range: float
dataQualityMetrics:
range: float
description: Data Quality Metrics is a number given by the user to rate the dataset.
#~ packedSize:
#~ description: Total size of all datablock package files created for this dataset.
#~ range: float
#~ numberOfFiles:
#~ description: Total number of files in all OrigDatablocks for this dataset.
#~ range: float
#~ numberOfFilesArchived:
#~ description: Total number of files in all Datablocks for this dataset.
#~ range: float
#~ required: true
creationTime:
description: Time when dataset became fully available on disk, i.e. all containing
files have been written. Format according to chapter 5.6 internet date/time
format in RFC 3339. Local times without timezone/offset info are automatically
transformed to UTC using the timezone of the API server.
type:
description: Characterize type of dataset, either 'raw' or 'derived'. Autofilled
when choosing the proper inherited models.
range: type_options
required: true
#~ validationStatus:
#~ description: Defines a level of trust, e.g. a measure of how much data was verified
#~ or used by other persons.
keywords:
multivalued: true
description:
description: Free text explanation of contents of dataset.
datasetName:
description: A name for the dataset, given by the creator to carry some semantic
meaning. Useful for display purposes e.g. instead of displaying the pid. Will
be autofilled if missing using info from sourceFolder.
#~ classification:
#~ description: ACIA information about AUthenticity,COnfidentiality,INtegrity and
#~ AVailability requirements of dataset. E.g. AV(ailabilty)=medium could trigger
#~ the creation of a two tape copies. Format 'AV=medium,CO=low'
license:
description: Name of the license under which the data can be used.
#~ version:
#~ description: Version of the API used in creation of the dataset.
#~ isPublished:
#~ description: Flag is true when data are made publicly available.
#~ range: boolean
techniques:
multivalued: true
range: TechniqueClass
#~ sharedWith:
#~ multivalued: true
#~ relationships:
#~ multivalued: true
#~ range: RelationshipClass
scientificMetadata:
description: JSON object containing the scientific metadata.
abstract: true
principalInvestigator:
description: First name and last name of principal investigator(s). If multiple
PIs are present, use a semicolon separated list. This field is required if the
dataset is a Raw dataset.
required: true
endTime:
description: End time of data acquisition for this dataset, format according to
chapter 5.6 internet date/time format in RFC 3339. Local times without timezone/offset
info are automatically transformed to UTC using the timezone of the API server.
creationLocation:
description: Unique location identifier where data was taken, usually in the form
/Site-name/facility-name/instrumentOrBeamline-name. This field is required if
the dataset is a Raw dataset.
dataFormat:
description: Defines the format of the data files in this dataset, e.g Nexus Version
x.y.
proposalId:
description: The ID of the proposal to which the dataset belongs.
#~ sampleId:
#~ description: ID of the sample used when collecting the data.
instrumentId:
description: ID of the instrument where the data was created.
#~ ownerGroup: {}
#~ accessGroups: {}
#~ instrumentGroup: {}
classes:
ScicatDataset:
abstract: true
slots:
- owner
- ownerEmail
- orcidOfOwner
- contactEmail
- sourceFolder
#~ - sourceFolderHost
- size
#~ - packedSize
#~ - numberOfFiles
#~ - numberOfFilesArchived
- creationTime
- type
#~ - validationStatus
- keywords
- description
- datasetName
#~ - classification
- license
#~ - version
#~ - isPublished
- techniques
#~ - sharedWith
#~ - relationships
- scientificMetadata
- principalInvestigator
- endTime
- creationLocation
- dataFormat
- proposalId
#~ - sampleId # not in use here since it referst to cici
- instrumentId
- dataQualityMetrics
#~ - ownerGroup
#~ - accessGroups
#~ - instrumentGroup
#~ slot_usage:
#~ owner:
#~ description: Owner or custodian of the dataset, usually first name + last
#~ name. The string may contain a list of persons, which should then be seperated
#~ by semicolons.
#~ required: true
#~ ownerEmail:
#~ description: Email of the owner or custodian of the dataset. The string may
#~ contain a list of emails, which should then be seperated by semicolons.
#~ orcidOfOwner:
#~ description: ORCID of the owner or custodian. The string may contain a list
#~ of ORCIDs, which should then be separated by semicolons.
#~ contactEmail:
#~ description: Email of the contact person for this dataset. The string may
#~ contain a list of emails, which should then be seperated by semicolons.
#~ required: true
#~ sourceFolder:
#~ description: Absolute file path on file server containing the files of this
#~ dataset, e.g. /some/path/to/sourcefolder. In case of a single file dataset,
#~ e.g. HDF5 data, it contains the path up to, but excluding the filename.
#~ Trailing slashes are removed.
#~ required: true
#~ sourceFolderHost:
#~ description: DNS host name of file server hosting sourceFolder, optionally
#~ including a protocol e.g. [protocol://]fileserver1.example.com
#~ size:
#~ description: Total size of all source files contained in source folder on
#~ disk when unpacked.
#~ range: float
#~ packedSize:
#~ description: Total size of all datablock package files created for this dataset.
#~ range: float
#~ numberOfFiles:
#~ description: Total number of files in all OrigDatablocks for this dataset.
#~ range: float
#~ numberOfFilesArchived:
#~ description: Total number of files in all Datablocks for this dataset.
#~ range: float
#~ required: true
#~ creationTime:
#~ description: Time when dataset became fully available on disk, i.e. all containing
#~ files have been written. Format according to chapter 5.6 internet date/time
#~ format in RFC 3339. Local times without timezone/offset info are automatically
#~ transformed to UTC using the timezone of the API server.
#~ required: true
#~ type:
#~ description: Characterize type of dataset, either 'raw' or 'derived'. Autofilled
#~ when choosing the proper inherited models.
#~ range: type_options
#~ required: true
#~ validationStatus:
#~ description: Defines a level of trust, e.g. a measure of how much data was
#~ verified or used by other persons.
#~ keywords:
#~ multivalued: true
#~ description:
#~ description: Free text explanation of contents of dataset.
#~ datasetName:
#~ description: A name for the dataset, given by the creator to carry some semantic
#~ meaning. Useful for display purposes e.g. instead of displaying the pid.
#~ Will be autofilled if missing using info from sourceFolder.
#~ classification:
#~ description: ACIA information about AUthenticity,COnfidentiality,INtegrity
#~ and AVailability requirements of dataset. E.g. AV(ailabilty)=medium could
#~ trigger the creation of a two tape copies. Format 'AV=medium,CO=low'
#~ license:
#~ description: Name of the license under which the data can be used.
#~ version:
#~ description: Version of the API used in creation of the dataset.
#~ isPublished:
#~ description: Flag is true when data are made publicly available.
#~ range: boolean
#~ techniques:
#~ multivalued: true
#~ range: TechniqueClass
#~ sharedWith:
#~ multivalued: true
#~ relationships:
#~ multivalued: true
#~ range: RelationshipClass
#~ scientificMetadata:
#~ description: JSON object containing the scientific metadata.
#~ principalInvestigator:
#~ description: First name and last name of principal investigator(s). If multiple
#~ PIs are present, use a semicolon separated list. This field is required
#~ if the dataset is a Raw dataset.
#~ required: true
#~ endTime:
#~ description: End time of data acquisition for this dataset, format according
#~ to chapter 5.6 internet date/time format in RFC 3339. Local times without
#~ timezone/offset info are automatically transformed to UTC using the timezone
#~ of the API server.
#~ creationLocation:
#~ description: Unique location identifier where data was taken, usually in the
#~ form /Site-name/facility-name/instrumentOrBeamline-name. This field is required
#~ if the dataset is a Raw dataset.
#~ required: true
#~ dataFormat:
#~ description: Defines the format of the data files in this dataset, e.g Nexus
#~ Version x.y.
#~ proposalId:
#~ description: The ID of the proposal to which the dataset belongs.
#~ sampleId:
#~ description: ID of the sample used when collecting the data.
#~ instrumentId:
#~ description: ID of the instrument where the data was created.
#~ ownerGroup:
#~ required: true
#~ accessGroups:
#~ multivalued: true
#~ instrumentGroup: {}