-
Anjali Aggarwal authoredAnjali Aggarwal authored
base_scicat_dataset.yaml 5.54 KiB
---
name: SciCatDatasetSchema
id: https://example.org/base_scicat
imports: [linkml:types]
prefixes:
MySchema: https://example.org/MySchema
linkml: https://w3id.org/linkml/
default_prefix: linkml
default_range: string
enums:
type_options:
permissible_values:
raw:
derived:
classes:
ScicatDataset:
abstract: true
attributes:
owner:
description: Owner or custodian of the dataset, usually first name + last
name. The string may contain a list of persons, which should then be seperated
by semicolons.
required: true
ownerEmail:
description: Email of the owner or custodian of the dataset. The string may
contain a list of emails, which should then be seperated by semicolons.
orcidOfOwner:
description: ORCID of the owner or custodian. The string may contain a list
of ORCIDs, which should then be separated by semicolons.
contactEmail:
description: Email of the contact person for this dataset. The string may
contain a list of emails, which should then be seperated by semicolons.
sourceFolder:
description: Absolute file path on file server containing the files of this
dataset, e.g. /some/path/to/sourcefolder. In case of a single file dataset,
e.g. HDF5 data, it contains the path up to, but excluding the filename.
Trailing slashes are removed.
#~ sourceFolderHost:
#~ description: DNS host name of file server hosting sourceFolder, optionally including
#~ a protocol e.g. [protocol://]fileserver1.example.com
size:
description: Total size of all source files contained in source folder on
disk when unpacked.
range: float
dataQualityMetrics:
range: float
description: Data Quality Metrics is a number given by the user to rate the
dataset.
#~ packedSize:
#~ description: Total size of all datablock package files created for this dataset.
#~ range: float
#~ numberOfFiles:
#~ description: Total number of files in all OrigDatablocks for this dataset.
#~ range: float
#~ numberOfFilesArchived:
#~ description: Total number of files in all Datablocks for this dataset.
#~ range: float
#~ required: true
creationTime:
description: Time when dataset became fully available on disk, i.e. all containing
files have been written. Format according to chapter 5.6 internet date/time
format in RFC 3339. Local times without timezone/offset info are automatically
transformed to UTC using the timezone of the API server.
type:
description: Characterize type of dataset, either 'raw' or 'derived'. Autofilled
when choosing the proper inherited models.
range: type_options
required: true
#~ validationStatus:
#~ description: Defines a level of trust, e.g. a measure of how much data was verified